From 1420aaefddc8f7ad081766da6d43354e50b2ddb1 Mon Sep 17 00:00:00 2001
From: Kevin Vizhalil <kvnthomas98@gmail.com>
Date: Mon, 9 Sep 2024 15:46:39 -0400
Subject: [PATCH 01/21] #2352 Improve quality of xCRG Paths

---
 code/ARAX/ARAXQuery/ARAX_infer.py             | 24 +++++-
 .../ARAXQuery/Infer/scripts/creativeCRG.py    | 84 +++++++++++++++----
 code/ARAX/BiolinkHelper/biolink_helper.py     | 48 +++++++++--
 3 files changed, 129 insertions(+), 27 deletions(-)

diff --git a/code/ARAX/ARAXQuery/ARAX_infer.py b/code/ARAX/ARAXQuery/ARAX_infer.py
index b2be0e7a9..73d358624 100644
--- a/code/ARAX/ARAXQuery/ARAX_infer.py
+++ b/code/ARAX/ARAXQuery/ARAX_infer.py
@@ -27,6 +27,9 @@ def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs)
 from openapi_server.models.edge import Edge
 from openapi_server.models.attribute import Attribute as EdgeAttribute
 from openapi_server.models.node import Node
+from openapi_server.models.qualifier import Qualifier
+from openapi_server.models.qualifier_constraint import QualifierConstraint as QConstraint
+
 
 sys.path.append(os.path.sep.join([*pathlist[:(RTXindex + 1)], 'code', 'ARAX', 'NodeSynonymizer']))
 from node_synonymizer import NodeSynonymizer
@@ -36,6 +39,7 @@ def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs)
 # from creativeDTD import creativeDTD
 from creativeCRG import creativeCRG
 from ExplianableDTD_db import ExplainableDTD
+
 # from ExplianableCRG import ExplianableCRG
 
 # sys.path.append(os.path.sep.join([*pathlist[:(RTXindex + 1)], 'code']))
@@ -615,7 +619,7 @@ def __chemical_gene_regulation_graph_expansion(self, describe=False):
                 f"The `n_result_curies` value must be a positive integer. The provided value was {self.parameters['n_result_curies']}.",
                 error_code="ValueError")
         else:
-            self.parameters['n_result_curies'] = 10
+            self.parameters['n_result_curies'] = 30
 
         if 'n_paths' in self.parameters:
             if isinstance(self.parameters['n_paths'], str):
@@ -678,9 +682,26 @@ def __chemical_gene_regulation_graph_expansion(self, describe=False):
                 if not preferred_subject_curie and not preferred_object_curie:
                     self.response.error(f"Both parameters 'subject_curie' and 'object_curie' are not provided. Please provide the curie for either one of them")
                     return self.response
+                qedges = message.query_graph.edges
+                
 
             else:
                 self.response.error(f"The 'query_graph' is detected. One of 'subject_qnode_id' or 'object_qnode_id' should be specified.")
+            
+            if self.parameters['regulation_type'] == 'increase':
+                edge_qualifier_direction = 'increased'
+            else:
+                edge_qualifier_direction = 'decreased'
+            edge_qualifier_list = [
+                Qualifier(qualifier_type_id='biolink:object_aspect_qualifier', qualifier_value='activity_or_abundance'),
+                Qualifier(qualifier_type_id='biolink:object_direction_qualifier', qualifier_value=edge_qualifier_direction)]
+                
+            for qedge in qedges:
+                edge = message.query_graph.edges[qedge]
+                edge.knowledge_type = "inferred"
+                edge.predicates = ["biolink:affects"]
+                edge.qualifier_constraints = [QConstraint(qualifier_set=edge_qualifier_list)]
+                   
 
         else:
             if 'subject_curie' in parameters or 'object_curie' in parameters:
@@ -763,6 +784,7 @@ def __chemical_gene_regulation_graph_expansion(self, describe=False):
 
             iu = InferUtilities()
             qedge_id = self.parameters.get('qedge_id')
+            
             self.response, self.kedge_global_iter, self.qedge_global_iter, self.qnode_global_iter, self.option_global_iter = iu.genrete_regulate_subgraphs(self.response, None, normalized_object_curie, top_predictions, top_paths, qedge_id,  self.parameters['regulation_type'], self.kedge_global_iter, self.qedge_global_iter, self.qnode_global_iter, self.option_global_iter)
 
         return self.response
diff --git a/code/ARAX/ARAXQuery/Infer/scripts/creativeCRG.py b/code/ARAX/ARAXQuery/Infer/scripts/creativeCRG.py
index d94f7d3e0..75c40d043 100644
--- a/code/ARAX/ARAXQuery/Infer/scripts/creativeCRG.py
+++ b/code/ARAX/ARAXQuery/Infer/scripts/creativeCRG.py
@@ -8,7 +8,6 @@
 import requests
 # import graph_tool.all as gt
 from tqdm import tqdm, trange
-
 pathlist = os.getcwd().split(os.path.sep)
 RTXindex = pathlist.index("RTX")
 sys.path.append(os.path.sep.join([*pathlist[:(RTXindex + 1)], 'code', 'ARAX', 'ARAXQuery']))
@@ -23,7 +22,9 @@
 from RTXConfiguration import RTXConfiguration
 RTXConfig = RTXConfiguration()
 sys.path.append(os.path.sep.join([*pathlist[:(RTXindex + 1)], 'code', 'ARAX', 'ARAXQuery','']))
-
+sys.path.append(os.path.sep.join([*pathlist[:(RTXindex + 1)], 'code', 'ARAX', 'BiolinkHelper','']))
+from biolink_helper import BiolinkHelper
+def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs)
 def call_plover(curies: List, respect_predicate_symmetry: bool=False):
         json = {}
         plover_url = RTXConfig.plover_url
@@ -164,6 +165,16 @@ def __init__(self, response: ARAXResponse, data_path: str):
 
         ## set up parameters
         self.response = response
+        self.bh = BiolinkHelper()
+        self.predicate_depth_map = self.bh.get_predicate_depth_map()
+        self.relevant_node_categories = ['biolink:Drug', 'biolink:PathologicalProcess', 'biolink:GeneOrGeneProduct', 'biolink:ChemicalEntity',
+                                         'biolink:SmallMolecule', 'biolink:Gene', 'biolink:BiologicalProcess', 'biolink:Pathway', 'biolink:Disease',
+                                         'biolink:Transcript', 'biolink:Cell', 'biolink:GeneFamily', 'biolink:GeneProduct', 'biolink:Exon',
+                                         'biolink:DiseaseOrPhenotypicFeature', 'biolink:PhenotypicFeature', 'biolink:MolecularActivity', 'biolink:GeneGroupingMixin',
+                                         'biolink:CellularComponent', 'biolink:RNAProduct', 'biolink:Protein', 'biolink:BiologicalProcessOrActivity', 'biolink:PhysiologicalProcess',
+                                          'biolink:NoncodingRNAProduct', 'biolink:ProteinFamily', 'biolink:ProteinDomain']
+        self.relevant_node_categories = self.bh.get_descendants(self.relevant_node_categories)
+        
         self.data_path = data_path
         self.chemical_type = ['biolink:ChemicalEntity', 'biolink:ChemicalMixture','biolink:SmallMolecule']
         self.gene_type = ['biolink:Gene','biolink:Protein']
@@ -212,6 +223,7 @@ def get_tf_neighbors(self):
         for edge in edges.keys():
             c1 = edges[edge][0]
             c2 = edges[edge][1]
+            depth = self.predicate_depth_map[edges[edge][2]]
             if 'subclass' in edges[edge][2]:
                 continue
             if c1 == c2:
@@ -219,11 +231,11 @@ def get_tf_neighbors(self):
             if c1 in self.tf_list:
                 curie = c2
                 tf = c1
-                answer_tf_neigbor_data.append({"edge_id": edge, "transcription_factor":tf, "neighbour": curie})
+                answer_tf_neigbor_data.append({"edge_id": edge, "transcription_factor":tf, "neighbour": curie, "depth": depth})
             if c2 in self.tf_list:
                 curie = c1
                 tf = c2
-                query_tf_neighbor_data.append({"edge_id": edge, "transcription_factor":tf, "neighbour": curie})
+                query_tf_neighbor_data.append({"edge_id": edge, "transcription_factor":tf, "neighbour": curie, "depth": depth})
         return query_tf_neighbor_data,answer_tf_neigbor_data, edges
     
     def add_node_ids_to_path(self, paths, tf_edges,chemical_edges, gene_edges):
@@ -507,62 +519,90 @@ def _check_params(query_chemical: Optional[str], query_gene: Optional[str], mode
                 else:
 
                     top_paths = dict()
+                    
                     gene_neighbors = call_plover([preferred_query_gene])
                     answers = res['chemical_id'].tolist()
                     self.preferred_curies = self.get_preferred_curies(answers)
                     valid_chemicals = [item for item in self.preferred_curies.values() if item]
                     chemical_neighbors = call_plover(valid_chemicals)
                     query_tf_neighbors, answer_tf_neigbors, tf_edges = self.get_tf_neighbors()
-                    
                     paths = self.get_paths(preferred_query_gene, res['chemical_id'].tolist(), gene_neighbors, chemical_neighbors,  query_tf_neighbors, answer_tf_neigbors,self.tf_list, M)
                     final_paths = self.add_node_ids_to_path(paths, tf_edges, chemical_neighbors, gene_neighbors)
                     return final_paths
     
 
     def get_paths(self, query_curie, answer_curies, query_neighbors, answer_neighbors, query_tf_neighbors, answer_tf_neighbors, tf_list,n_paths):
-        query_neighbors_curies = list(query_neighbors['nodes']['n01'].keys())
         query_tf_neighbors_dict = {}
         answer_tf_neighbors_dict = {}
         query_path = {}
         answer_path = {}
         combined_path = dict()
-        one_hop_from_query  = set(tf_list).intersection(query_neighbors_curies)
+        valid_query_tf_list = []
+        valid_answer_tf_list = {}
+        for answer in answer_curies:
+            valid_answer_tf_list[answer] = []
+
         for record in query_tf_neighbors:
-            query_tf_neighbors_dict[record['neighbour']] = query_tf_neighbors_dict.get(record['neighbour'],[]) + [(record['edge_id'],record['transcription_factor'])]
+            query_tf_neighbors_dict[record['neighbour']] = query_tf_neighbors_dict.get(record['neighbour'],[]) + [(record['edge_id'],record['transcription_factor'], record['depth'])]
         for record in answer_tf_neighbors:
-            answer_tf_neighbors_dict[record['neighbour']] = answer_tf_neighbors_dict.get(record['neighbour'],[]) + [(record['edge_id'],record['transcription_factor'])]
+            answer_tf_neighbors_dict[record['neighbour']] = answer_tf_neighbors_dict.get(record['neighbour'],[]) + [(record['edge_id'],record['transcription_factor'], record['depth'])]
         # one hop from query
         for edge_id, edge in query_neighbors['edges']['e00'].items():
             if edge[1] in tf_list and edge[1] not in query_path:
-                query_path[edge[1]] = [edge_id]
-        
+                valid_query_tf_list.append(edge[1])
+                query_path[edge[1]] = [edge_id,self.predicate_depth_map[edge[2]]]
+            elif edge[1] in tf_list and edge[1] in query_path:
+                if query_path[edge[1]][-1] < self.predicate_depth_map[edge[2]]:
+                    query_path[edge[1]] = [edge_id,self.predicate_depth_map[edge[2]]]
 
         # two hop from query
         for edge_id, edge in query_neighbors['edges']['e00'].items():
             if edge[0] != query_curie:
                 continue
+            relevant_node = False
             neighbor =  edge[1]
+            neighbor_category = query_neighbors['nodes']['n01'][neighbor][1]
+            if neighbor_category  in self.relevant_node_categories:
+                relevant_node = True
+
             for item in query_tf_neighbors_dict.get(neighbor,[]):
+                if item[1] not in valid_query_tf_list and relevant_node:
+                    valid_query_tf_list.append(item[1])
                 if item[1] not in query_path:
-                    query_path[item[1]] = [edge_id,item[0]]
+                    query_path[item[1]] = [edge_id,item[0], item[2]]
+                elif query_path[item[1]][-1] < min(item[2],self.predicate_depth_map[edge[2]]) and ((item[1] not in valid_query_tf_list) or relevant_node) :
+                        query_path[item[1]] = [edge_id,item[0], min(item[2],self.predicate_depth_map[edge[2]])]
+                
                     
         
         for edge_id, edge in answer_neighbors['edges']['e00'].items():
             if edge[1] not in self.preferred_curies.values():
                 continue
+            relevant_node = False
             answer = edge[1]
             neighbor = edge[0]
+            neighbor_category = answer_neighbors['nodes']['n01'][neighbor][1]
+            if neighbor_category  in self.relevant_node_categories:
+                relevant_node = True
             # one hop from answer
             if answer not in answer_path:
                 answer_path[answer] = dict()
-            if neighbor in tf_list:
-                answer_path[answer][neighbor] = [edge_id]
+            if neighbor in tf_list and neighbor not in answer_path[answer]:
+                valid_answer_tf_list[answer].append(neighbor)
+                answer_path[answer][neighbor] = [edge_id, self.predicate_depth_map[edge[2]]]
+            elif neighbor in tf_list and neighbor in answer_path[answer]:
+                if answer_path[answer][neighbor][-1] < self.predicate_depth_map[edge[2]]:
+                    answer_path[answer][neighbor] = [edge_id, self.predicate_depth_map[edge[2]]]
 
             # two hop from answer
             for item in answer_tf_neighbors_dict.get(neighbor,[]):
+                neighbor_category = answer_neighbors['nodes']['n01'][neighbor][1]
+                if relevant_node and item[1] not in valid_answer_tf_list[answer]:
+                    valid_answer_tf_list[answer].append(item[1])
                 if item[1] not in answer_path[answer]:
-                    answer_path[answer][item[1]] = [item[0],edge_id]
-
+                    answer_path[answer][item[1]] = [item[0],edge_id, item[2]]
+                elif answer_path[answer][item[1]][-1] < item[2] and ((item[1] not in valid_answer_tf_list[answer]) or relevant_node):
+                        answer_path[answer][item[1]] = [item[0], edge_id, min(item[2],self.predicate_depth_map[edge[2]])] 
         # joining paths
         for answer in answer_curies:
             combined_path[(query_curie,answer)] = list()
@@ -573,13 +613,21 @@ def get_paths(self, query_curie, answer_curies, query_neighbors, answer_neighbor
                 continue
             
             path_counter = 0
-            for tf in tf_list:
+            relevant_tf = list(set(valid_query_tf_list).intersection(valid_answer_tf_list[answer]))
+            irrelevant_tf = [tf for tf in tf_list if tf not in relevant_tf]
+            for tf in relevant_tf:
                 if path_counter > n_paths:
                     break
                 if tf in query_path and tf in answer_path[key]:
-                    combined_path[(query_curie,answer)].append(query_path[tf] + answer_path[key][tf])
+                    combined_path[(query_curie,answer)].append(query_path[tf][:-1] + answer_path[key][tf][:-1])
                     path_counter += 1
 
+            for tf in irrelevant_tf:
+                if path_counter > n_paths:
+                    break
+                if tf in query_path and tf in answer_path[key]:
+                    combined_path[(query_curie,answer)].append(query_path[tf][:-1] + answer_path[key][tf][:-1])
+                    path_counter += 1
         return combined_path
 
 
diff --git a/code/ARAX/BiolinkHelper/biolink_helper.py b/code/ARAX/BiolinkHelper/biolink_helper.py
index f0b001767..bbb693be5 100644
--- a/code/ARAX/BiolinkHelper/biolink_helper.py
+++ b/code/ARAX/BiolinkHelper/biolink_helper.py
@@ -125,7 +125,18 @@ def get_canonical_predicates(self, predicates: Union[str, List[str], Set[str]])
                                 for predicate in valid_predicates}
         canonical_predicates.update(invalid_predicates)  # Go ahead and include those we don't have canonical info for
         return list(canonical_predicates)
-
+    
+    def get_predicate_depth_map(self)->Dict[str,int]:
+        response = self._download_biolink_model()
+        if response.status_code == 200:
+            biolink_model = yaml.safe_load(response.text)
+            predicate_dag = self._build_predicate_dag(biolink_model)
+            
+        else:
+            raise RuntimeError(f"ERROR: Request to get Biolink {self.biolink_version} YAML file returned "
+                               f"{response.status_code} response. Cannot load BiolinkHelper.")
+        return self._get_depths_from_root(predicate_dag)
+    
     def is_symmetric(self, predicate: str) -> Optional[bool]:
         if predicate in self.biolink_lookup_map["predicates"]:
             return self.biolink_lookup_map["predicates"][predicate]["is_symmetric"]
@@ -198,7 +209,15 @@ def _load_biolink_lookup_map(self, is_test: bool = False):
             with open(self.biolink_lookup_map_path, "rb") as biolink_map_file:
                 biolink_lookup_map = pickle.load(biolink_map_file)
             return biolink_lookup_map
-
+        
+    def _download_biolink_model(self):
+        response = requests.get(f"https://raw.githubusercontent.com/biolink/biolink-model/{self.biolink_version}/biolink-model.yaml",
+                                timeout=10)
+        if response.status_code != 200:  # Sometimes Biolink's tags start with 'v', so try that
+            response = requests.get(f"https://raw.githubusercontent.com/biolink/biolink-model/v{self.biolink_version}/biolink-model.yaml",
+                                    timeout=10)
+        return response
+    
     def _create_biolink_lookup_map(self) -> Dict[str, Dict[str, Dict[str, Union[str, List[str], bool]]]]:
         timestamp = str(datetime.datetime.now().isoformat())
         eprint(f"{timestamp}: INFO: Building local Biolink {self.biolink_version} ancestor/descendant lookup map "
@@ -206,17 +225,14 @@ def _create_biolink_lookup_map(self) -> Dict[str, Dict[str, Dict[str, Union[str,
         biolink_lookup_map = {"predicates": dict(), "categories": dict(),
                               "aspects": dict(), "directions": dict()}
         # Grab the relevant Biolink yaml file
-        response = requests.get(f"https://raw.githubusercontent.com/biolink/biolink-model/{self.biolink_version}/biolink-model.yaml",
-                                timeout=10)
-        if response.status_code != 200:  # Sometimes Biolink's tags start with 'v', so try that
-            response = requests.get(f"https://raw.githubusercontent.com/biolink/biolink-model/v{self.biolink_version}/biolink-model.yaml",
-                                    timeout=10)
+        response = self._download_biolink_model()
 
         if response.status_code == 200:
             biolink_model = yaml.safe_load(response.text)
 
             # --------------------------------  PREDICATES --------------------------------- #
             predicate_dag = self._build_predicate_dag(biolink_model)
+            import pdb;pdb.set_trace()
             # Build our map of predicate ancestors/descendants for easy lookup, first WITH mixins
             for node_id in list(predicate_dag.nodes):
                 node_info = predicate_dag.nodes[node_id]
@@ -382,7 +398,23 @@ def _build_direction_dag(self, biolink_model: dict) -> nx.DiGraph:
                 direction_dag.add_edge(parent_name_trapi, direction_name_trapi)
 
         return direction_dag
-
+    
+    def _get_depths_from_root(self, dag)-> Dict[str,int]:
+        node_depths = {}
+        for node in nx.topological_sort(dag):
+            # Skip if the node is the start node
+            
+            # Get all predecessors of the current node
+            predecessors = list(dag.predecessors(node))
+            
+            # If the node has predecessors, calculate its depth as max(depth of predecessors) + 1
+            if predecessors:
+                node_depths[node] = max(node_depths[pred] for pred in predecessors) + 1
+            else:
+                node_depths[node] = 0  # Handle nodes that have no predecessors (if any)
+        
+        return node_depths
+    
     @staticmethod
     def _get_ancestors_nx(nx_graph: nx.DiGraph, node_id: str) -> List[str]:
         return list(nx.ancestors(nx_graph, node_id).union({node_id}))

From fd5a0a0f57c9a1136f450f545196a7af8f25af39 Mon Sep 17 00:00:00 2001
From: amykglen <glena@oregonstate.edu>
Date: Mon, 9 Sep 2024 12:47:25 -0700
Subject: [PATCH 02/21] Ignore nodes/edges with empty ID/subject/object

---
 code/ARAX/ARAXQuery/Expand/trapi_querier.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/code/ARAX/ARAXQuery/Expand/trapi_querier.py b/code/ARAX/ARAXQuery/Expand/trapi_querier.py
index 9c256e0df..c02f82447 100644
--- a/code/ARAX/ARAXQuery/Expand/trapi_querier.py
+++ b/code/ARAX/ARAXQuery/Expand/trapi_querier.py
@@ -368,9 +368,20 @@ def _load_kp_json_response(self, json_response: dict, qg: QueryGraph) -> QGOrgan
         # Build a map that indicates which qnodes/qedges a given node/edge fulfills
         kg_to_qg_mappings, query_curie_mappings = self._get_kg_to_qg_mappings_from_results(kp_message.results, qg)
 
-        # Populate our final KG with the returned nodes and edges
+        # Populate our final KG with the returned edges
         returned_edge_keys_missing_qg_bindings = set()
+        nodes_dict = kp_message.knowledge_graph.nodes
         for returned_edge_key, returned_edge in kp_message.knowledge_graph.edges.items():
+            # Catch invalid subject/object
+            if not returned_edge.subject or not returned_edge.object:
+                self.log.warning(f"{self.kp_infores_curie}: Edge has empty subject/object, skipping. "
+                                 f"subject: '{returned_edge.subject}', object: '{returned_edge.object}'")
+                continue
+            if returned_edge.subject not in nodes_dict or returned_edge.object not in nodes_dict:
+                self.log.warning(f"{self.kp_infores_curie}: Edge is an orphan, skipping. "
+                                 f"subject: '{returned_edge.subject}', object: '{returned_edge.object}'")
+                continue
+
             arax_edge_key = self._get_arax_edge_key(returned_edge)  # Convert to an ID that's unique for us
 
             # Put in a placeholder for missing required attribute fields to try to keep our answer TRAPI-compliant
@@ -399,9 +410,13 @@ def _load_kp_json_response(self, json_response: dict, qg: QueryGraph) -> QGOrgan
             self.log.warning(f"{self.kp_infores_curie}: {len(returned_edge_keys_missing_qg_bindings)} edges in the KP's answer "
                              f"KG have no bindings to the QG: {returned_edge_keys_missing_qg_bindings}")
 
+        # Populate our final KG with the returned nodes
         returned_node_keys_missing_qg_bindings = set()
         for returned_node_key, returned_node in kp_message.knowledge_graph.nodes.items():
-            if returned_node_key not in kg_to_qg_mappings['nodes']:
+            if not returned_node_key:
+                self.log.warning(f"{self.kp_infores_curie}: Node has empty ID, skipping. Node key is: "
+                                 f"'{returned_node_key}'")
+            elif returned_node_key not in kg_to_qg_mappings['nodes']:
                 returned_node_keys_missing_qg_bindings.add(returned_node_key)
             else:
                 for qnode_key in kg_to_qg_mappings['nodes'][returned_node_key]:

From d37f15a73bfb8fea1826634d23254b787cc9445e Mon Sep 17 00:00:00 2001
From: Kevin Vizhalil <kvnthomas98@gmail.com>
Date: Mon, 9 Sep 2024 16:05:10 -0400
Subject: [PATCH 03/21] removing pdb

---
 code/ARAX/BiolinkHelper/biolink_helper.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/code/ARAX/BiolinkHelper/biolink_helper.py b/code/ARAX/BiolinkHelper/biolink_helper.py
index bbb693be5..cb0e4162b 100644
--- a/code/ARAX/BiolinkHelper/biolink_helper.py
+++ b/code/ARAX/BiolinkHelper/biolink_helper.py
@@ -232,7 +232,6 @@ def _create_biolink_lookup_map(self) -> Dict[str, Dict[str, Dict[str, Union[str,
 
             # --------------------------------  PREDICATES --------------------------------- #
             predicate_dag = self._build_predicate_dag(biolink_model)
-            import pdb;pdb.set_trace()
             # Build our map of predicate ancestors/descendants for easy lookup, first WITH mixins
             for node_id in list(predicate_dag.nodes):
                 node_info = predicate_dag.nodes[node_id]

From 9a0307466b2d3dca00c8902dc48329e4b394ed30 Mon Sep 17 00:00:00 2001
From: Eric Deutsch <ericdeutsch@gmail.com>
Date: Thu, 12 Sep 2024 08:46:16 -0700
Subject: [PATCH 04/21] Update Python version in the template script.
 Anticipating no actual change to deployed code in arax.ncats.io or any ITRB
 instances. #2348

---
 code/UI/OpenAPI/python-flask-server/RTX_OpenAPI.start | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/UI/OpenAPI/python-flask-server/RTX_OpenAPI.start b/code/UI/OpenAPI/python-flask-server/RTX_OpenAPI.start
index 283fd67a7..d833c5d5c 100755
--- a/code/UI/OpenAPI/python-flask-server/RTX_OpenAPI.start
+++ b/code/UI/OpenAPI/python-flask-server/RTX_OpenAPI.start
@@ -17,7 +17,7 @@ fi
 
 cd /mnt/data/orangeboard/$DEVAREA/RTX/code/UI/OpenAPI/python-flask-server
 
-export PATH=/mnt/data/python/Python-3.9.16/bin:$PATH
+export PATH=/mnt/data/python/Python-3.9.18/bin:$PATH
 
 exec python3 -u -m openapi_server 1>$LOGFILE 2>$ELOGFILE
 

From a1b38ed75b009803c256ac8ed1e6c700f0cab64e Mon Sep 17 00:00:00 2001
From: Eric Deutsch <ericdeutsch@gmail.com>
Date: Thu, 12 Sep 2024 09:08:33 -0700
Subject: [PATCH 05/21] Remove the STDERR emission of an error message during a
 non-error condition #2141

---
 code/ARAX/ARAXQuery/ARAX_query.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/code/ARAX/ARAXQuery/ARAX_query.py b/code/ARAX/ARAXQuery/ARAX_query.py
index d7934ea34..44cd84b48 100644
--- a/code/ARAX/ARAXQuery/ARAX_query.py
+++ b/code/ARAX/ARAXQuery/ARAX_query.py
@@ -283,7 +283,8 @@ def track_query_finish(self):
         if hasattr(self.response, 'job_id'):
             query_tracker.update_tracker_entry(self.response.job_id, attributes)
         else:
-            eprint("*******ERROR: self.response has no job_id attr! E275")
+            # Sometimes we finish without a job_id having been created, and that's okay
+            pass
 
 
 

From 641d570da2acbabcf6e80ffcd34f9b608cbf380f Mon Sep 17 00:00:00 2001
From: isbluis <lmendoza@systemsbiology.org>
Date: Sat, 14 Sep 2024 05:57:38 +0000
Subject: [PATCH 06/21] - Rename quick_defs.json to .js since it is a
 Javascript file (#2365) - Add quick-links to external URLs for ClinicalTrials
 and DrugApprovals KPs in Settings (#2346) - Update some of those URLs to
 TRAPI 1.5 endpoints - Update ARAXi DSL helper JSON - Add "Total" row to
 Testing summary view

---
 code/UI/interactive/araxi.js          | 14 +++++++-------
 code/UI/interactive/dsl2json.py       |  1 +
 code/UI/interactive/index.html        | 10 ++++++----
 code/UI/interactive/rtx.js            |  8 ++++++++
 code/UI/interactive/rtx.version       |  2 +-
 code/autocomplete/data/quick_def.json |  1 -
 6 files changed, 23 insertions(+), 13 deletions(-)
 delete mode 100644 code/autocomplete/data/quick_def.json

diff --git a/code/UI/interactive/araxi.js b/code/UI/interactive/araxi.js
index 178da2f24..75d1bff36 100644
--- a/code/UI/interactive/araxi.js
+++ b/code/UI/interactive/araxi.js
@@ -1,5 +1,5 @@
 // WARNING:
-// This file was auto-generated by /mnt/data/orangeboard/devLM/RTX/code/UI/interactive/./dsl2json.py on: 2024-07-25 01:50:08
+// This file was auto-generated by /mnt/data/orangeboard/devLM/RTX/code/UI/interactive/./dsl2json.py on: 2024-09-11 06:23:47
 //
 var araxi_commands = {
   "add_qedge()": {
@@ -208,7 +208,7 @@ var araxi_commands = {
         "description": "Whether to omit supporting data on nodes/edges in the results (e.g., publications, description, etc.)."
       }
     },
-    "description": "This command will expand (aka, answer/fill) your query graph in an edge-by-edge fashion, intelligently selecting which KPs to use for each edge. It selects KPs from the SmartAPI Registry based on the meta information provided by their TRAPI APIs, whether they have an endpoint running a matching TRAPI version, and whether they have an endpoint with matching maturity. For each QEdge, it queries the selected KPs concurrently; it will timeout for a particular KP if it decides it's taking too long to respond (this KP timeout can be controlled by the user). You may also optionally specify a particular KP to use via the 'kp' parameter (described below).\n\nCurrent candidate KPs include (for TRAPI 1.5, maturity 'development'): \ninfores:answer-coalesce, infores:automat-binding-db, infores:automat-cam-kp, infores:automat-ctd, infores:automat-drug-central, infores:automat-genome-alliance, infores:automat-gtex, infores:automat-gtopdb, infores:automat-gwas-catalog, infores:automat-hetionet, infores:automat-hgnc, infores:automat-hmdb, infores:automat-human-goa, infores:automat-icees-kg, infores:automat-intact, infores:automat-monarchinitiative, infores:automat-panther, infores:automat-pharos, infores:automat-reactome, infores:automat-robokop, infores:automat-string-db, infores:automat-ubergraph, infores:automat-viral-proteome, infores:cohd, infores:connections-hypothesis, infores:gelinea, infores:genetics-data-provider, infores:knowledge-collaboratory, infores:molepro, infores:openpredict, infores:rtx-kg2, infores:service-provider-trapi, infores:spoke. \n\n(Note that this list of KPs may change unexpectedly based on the SmartAPI registry.)"
+    "description": "This command will expand (aka, answer/fill) your query graph in an edge-by-edge fashion, intelligently selecting which KPs to use for each edge. It selects KPs from the SmartAPI Registry based on the meta information provided by their TRAPI APIs, whether they have an endpoint running a matching TRAPI version, and whether they have an endpoint with matching maturity. For each QEdge, it queries the selected KPs concurrently; it will timeout for a particular KP if it decides it's taking too long to respond (this KP timeout can be controlled by the user). You may also optionally specify a particular KP to use via the 'kp' parameter (described below).\n\nCurrent candidate KPs include (for TRAPI 1.5, maturity 'development'): \ninfores:answer-coalesce, infores:automat-binding-db, infores:automat-cam-kp, infores:automat-ctd, infores:automat-drug-central, infores:automat-genome-alliance, infores:automat-gtex, infores:automat-gtopdb, infores:automat-gwas-catalog, infores:automat-hetionet, infores:automat-hgnc, infores:automat-hmdb, infores:automat-human-goa, infores:automat-icees-kg, infores:automat-intact, infores:automat-monarchinitiative, infores:automat-panther, infores:automat-pharos, infores:automat-reactome, infores:automat-robokop, infores:automat-string-db, infores:automat-ubergraph, infores:automat-viral-proteome, infores:cohd, infores:connections-hypothesis, infores:gelinea, infores:genetics-data-provider, infores:knowledge-collaboratory, infores:molepro, infores:multiomics-clinicaltrials, infores:multiomics-drugapprovals, infores:multiomics-microbiome, infores:multiomics-multiomics, infores:openpredict, infores:rtx-kg2, infores:service-provider-trapi, infores:spoke. \n\n(Note that this list of KPs may change unexpectedly based on the SmartAPI registry.)"
   },
   "overlay(action=add_node_pmids)": {
     "parameters": {
@@ -1374,15 +1374,15 @@ var araxi_commands = {
         "type": "list",
         "description": "List with just two qnode keys to connect. example: [n1, n2]"
       },
-      "result_as": {
+      "node_category_constraint": {
         "is_required": false,
         "examples": [
-          "betweenness_centrality",
-          "all_in_one",
-          "one_by_one"
+          "biolink:Disease",
+          "biolink:Gene",
+          "biolink:ChemicalEntity"
         ],
         "type": "string",
-        "description": "It determines how to receive the results. For instance, one_by_one means that it will return each path in one subgraph. The default value is betweenness_centrality"
+        "description": "This constraint will display paths that only pass through the user-specified category."
       }
     },
     "description": "\nconnect_nodes adds paths between two nodes specified in the query.\n                    "
diff --git a/code/UI/interactive/dsl2json.py b/code/UI/interactive/dsl2json.py
index ecca50a5f..771880765 100755
--- a/code/UI/interactive/dsl2json.py
+++ b/code/UI/interactive/dsl2json.py
@@ -1,3 +1,4 @@
+#!/mnt/data/python/Python-3.9.18/bin/python3
 import datetime
 import importlib
 import json
diff --git a/code/UI/interactive/index.html b/code/UI/interactive/index.html
index 93ffa494d..3dc4534a3 100644
--- a/code/UI/interactive/index.html
+++ b/code/UI/interactive/index.html
@@ -14,7 +14,7 @@
 <script src="//unpkg.com/timelines-chart"></script>
 
 <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
-<script src="/rtxcomplete/data/quick_def.json"></script>
+<script src="/rtxcomplete/data/quick_def.js"></script>
 <script src="/rtxcomplete/bootstrap.js"></script>
 <script src="/rtxcomplete/bootstrap3-typeahead.js"></script>
 <script src="/rtxcomplete/rtxcomplete.js"></script>
@@ -552,13 +552,15 @@ <h3 style="margin-bottom:0px;">EXTERNAL API (post query):</h3>
     <input type="button" class="qprob p0" value="ClinicalRiskKP" onClick="update_url('EXT','https://api.bte.ncats.io/v1/smartapi/d86a24f6027ffe778f84ba10a7a1861a');"/>
     <input type="button" class="qprob scod" value="COHD" onClick="update_url('EXT','https://cohd.io/api');"/>
     <input type="button" class="qprob p0" value="DrugResponseKP" onClick="update_url('EXT','https://api.bte.ncats.io/v1/smartapi/adf20dd6ff23dfe18e8e012bde686e31');"/>
-    <input type="button" class="qprob sgen" value="GeneticsKP" onClick="update_url('EXT','https://translator.broadinstitute.org/genetics_provider/trapi/v1.4');"/>
+    <input type="button" class="qprob sgen" value="GeneticsKP" onClick="update_url('EXT','https://translator.broadinstitute.org/genetics_provider/trapi/v1.5');"/>
     <input type="button" class="qprob p1" value="ICEES-Asthma" onClick="update_url('EXT','https://icees.renci.org:16339');"/>
     <input type="button" class="qprob p1" value="ICEES-DILI" onClick="update_url('EXT','https://icees.renci.org:16341');"/>
-    <input type="button" class="qprob smol" value="MolePro" onClick="update_url('EXT','https://translator.broadinstitute.org/molepro/trapi/v1.4');"/>
-    <input type="button" class="qprob srtx" value="RTX-KG2" onClick="update_url('EXT','https://arax.ncats.io/api/rtxkg2/v1.4');"/>
+    <input type="button" class="qprob smol" value="MolePro" onClick="update_url('EXT','https://translator.broadinstitute.org/molepro/trapi/v1.5');"/>
+    <input type="button" class="qprob srtx" value="RTX-KG2" onClick="update_url('EXT','https://arax.ncats.io/api/rtxkg2/v1.5');"/>
     <input type="button" class="qprob p0" value="TumorGeneMutationKP" onClick="update_url('EXT','https://api.bte.ncats.io/v1/smartapi/5219cefb9d2b8d5df08c3a956fdd20f3');"/>
     <input type="button" class="qprob p0" value="WellnessKP" onClick="update_url('EXT','https://api.bte.ncats.io/v1/smartapi/02af7d098ab304e80d6f4806c3527027');"/>
+    <input type="button" class="qprob p3" value="ClinicalTrialsKP" onClick="update_url('EXT','https://multiomics.rtx.ai:9990/ctkp');"/>
+    <input type="button" class="qprob p5" value="DrugApprovalsKP" onClick="update_url('EXT','https://multiomics.rtx.ai:9990/dakp');"/>
     <br><br>
   </div>
 
diff --git a/code/UI/interactive/rtx.js b/code/UI/interactive/rtx.js
index eaf4e6363..fe1b493c4 100644
--- a/code/UI/interactive/rtx.js
+++ b/code/UI/interactive/rtx.js
@@ -7025,6 +7025,7 @@ function displayARSResults(parentnode,arsdata) {
     for (var agent of arsdata['ara_list']) {
 	stats[agent] = {};
 	stats[agent]['PASSED'] = 0;
+	stats[agent]['TOTAL'] = 0;
     }
 
     var tdiv = document.createElement("div");
@@ -7212,6 +7213,7 @@ function displayARSResults(parentnode,arsdata) {
     }
     sumtable.appendChild(tr);
 
+    stats.status_list['TOTAL'] = 1;
     for (var status in stats.status_list) {
 	tr = document.createElement("tr");
         tr.className = 'hoverable';
@@ -7238,6 +7240,9 @@ function displayARSResults(parentnode,arsdata) {
 	    span.innerHTML = status.split(":")[1];
 	    span.className = 'explevel p3';
 	}
+	else if (status == 'TOTAL') {
+	    tr.style.borderTop = "2px solid black";
+	}
         td.appendChild(span);
 	tr.appendChild(td);
 
@@ -7267,6 +7272,9 @@ function displayARSResults(parentnode,arsdata) {
 		td.appendChild(span);
 	    }
 	    tr.appendChild(td);
+
+	    if (status != 'TOTAL' && stats[agent][status]!=null)
+		stats[agent]['TOTAL'] += stats[agent][status];
 	}
 	sumtable.appendChild(tr);
     }
diff --git a/code/UI/interactive/rtx.version b/code/UI/interactive/rtx.version
index 5112da1a8..bb7a2cd1b 100644
--- a/code/UI/interactive/rtx.version
+++ b/code/UI/interactive/rtx.version
@@ -1 +1 @@
-Demanding Praying mantis
\ No newline at end of file
+Urban Otter
\ No newline at end of file
diff --git a/code/autocomplete/data/quick_def.json b/code/autocomplete/data/quick_def.json
deleted file mode 100644
index 4374bf3ff..000000000
--- a/code/autocomplete/data/quick_def.json
+++ /dev/null
@@ -1 +0,0 @@
-var quick_def = {"hemoglobin": "<strong>Hemoglobin</strong> or haemoglobin, abbreviated Hb or Hgb, is the iron-containing oxygen-transport metalloprotein in the red blood cells of all vertebrates (with the exception of the fish family Channichthyidae) as well as the tissues of some invertebrates.", "cancer": "<strong>Cancer</strong> is a group of diseases involving abnormal cell growth with the potential to invade or spread to other parts of the body.", "lovastatin": "<strong>Lovastatin</strong> is a statin drug, used for lowering cholesterol in those with hypercholesterolemia to reduce risk of cardiovascular disease."}
\ No newline at end of file

From 051c4af8d1030769cc093e591b031dbba9163080 Mon Sep 17 00:00:00 2001
From: isbluis <lmendoza@systemsbiology.org>
Date: Sat, 14 Sep 2024 06:02:40 +0000
Subject: [PATCH 07/21] Oops, forgot to add this to the previous commit...

---
 code/autocomplete/data/quick_def.js | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 code/autocomplete/data/quick_def.js

diff --git a/code/autocomplete/data/quick_def.js b/code/autocomplete/data/quick_def.js
new file mode 100644
index 000000000..4374bf3ff
--- /dev/null
+++ b/code/autocomplete/data/quick_def.js
@@ -0,0 +1 @@
+var quick_def = {"hemoglobin": "<strong>Hemoglobin</strong> or haemoglobin, abbreviated Hb or Hgb, is the iron-containing oxygen-transport metalloprotein in the red blood cells of all vertebrates (with the exception of the fish family Channichthyidae) as well as the tissues of some invertebrates.", "cancer": "<strong>Cancer</strong> is a group of diseases involving abnormal cell growth with the potential to invade or spread to other parts of the body.", "lovastatin": "<strong>Lovastatin</strong> is a statin drug, used for lowering cholesterol in those with hypercholesterolemia to reduce risk of cardiovascular disease."}
\ No newline at end of file

From 28429383554889e2bafbdb600b184514c1361581 Mon Sep 17 00:00:00 2001
From: isbluis <lmendoza@systemsbiology.org>
Date: Sat, 14 Sep 2024 06:18:48 +0000
Subject: [PATCH 08/21] Revert back helper link to RTX/KG2 from 1.5 to 1.4...

---
 code/UI/interactive/index.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/UI/interactive/index.html b/code/UI/interactive/index.html
index 3dc4534a3..33dbb1f9a 100644
--- a/code/UI/interactive/index.html
+++ b/code/UI/interactive/index.html
@@ -556,7 +556,7 @@ <h3 style="margin-bottom:0px;">EXTERNAL API (post query):</h3>
     <input type="button" class="qprob p1" value="ICEES-Asthma" onClick="update_url('EXT','https://icees.renci.org:16339');"/>
     <input type="button" class="qprob p1" value="ICEES-DILI" onClick="update_url('EXT','https://icees.renci.org:16341');"/>
     <input type="button" class="qprob smol" value="MolePro" onClick="update_url('EXT','https://translator.broadinstitute.org/molepro/trapi/v1.5');"/>
-    <input type="button" class="qprob srtx" value="RTX-KG2" onClick="update_url('EXT','https://arax.ncats.io/api/rtxkg2/v1.5');"/>
+    <input type="button" class="qprob srtx" value="RTX-KG2" onClick="update_url('EXT','https://arax.ncats.io/api/rtxkg2/v1.4');"/>
     <input type="button" class="qprob p0" value="TumorGeneMutationKP" onClick="update_url('EXT','https://api.bte.ncats.io/v1/smartapi/5219cefb9d2b8d5df08c3a956fdd20f3');"/>
     <input type="button" class="qprob p0" value="WellnessKP" onClick="update_url('EXT','https://api.bte.ncats.io/v1/smartapi/02af7d098ab304e80d6f4806c3527027');"/>
     <input type="button" class="qprob p3" value="ClinicalTrialsKP" onClick="update_url('EXT','https://multiomics.rtx.ai:9990/ctkp');"/>

From 97d2915de799580229e4ff8cee0723f6ceeb1bdb Mon Sep 17 00:00:00 2001
From: amykglen <glena@oregonstate.edu>
Date: Mon, 16 Sep 2024 10:01:33 -0700
Subject: [PATCH 09/21] Don't override categories now that synonymizer is clean

---
 code/ARAX/ARAXQuery/ARAX_expander.py      | 31 ++---------------------
 code/ARAX/ARAXQuery/Expand/kp_selector.py |  5 +---
 2 files changed, 3 insertions(+), 33 deletions(-)

diff --git a/code/ARAX/ARAXQuery/ARAX_expander.py b/code/ARAX/ARAXQuery/ARAX_expander.py
index 197831546..4ecc59b97 100644
--- a/code/ARAX/ARAXQuery/ARAX_expander.py
+++ b/code/ARAX/ARAXQuery/ARAX_expander.py
@@ -353,7 +353,8 @@ def apply(self, response, input_parameters, mode: str = "ARAX"):
                 if inferred_qedge_keys and len(query_graph.edges) == 1:
                     for edge in query_sub_graph.edges.keys():
                         query_sub_graph.edges[edge].knowledge_type = 'lookup'
-            # Expand the query graph edge-by-edge
+
+            # Expand the query graph edge-by-edge (in regular 'lookup' fashion)
             for qedge_key in ordered_qedge_keys_to_expand:
                 log.debug(f"Expanding qedge {qedge_key}")
                 response.update_query_plan(qedge_key, 'edge_properties', 'status', 'Expanding')
@@ -583,9 +584,6 @@ def apply(self, response, input_parameters, mode: str = "ARAX"):
             decorator = ARAXDecorator()
             decorator.decorate_nodes(response)
             decorator.decorate_edges(response, kind="RTX-KG2")
-
-            # Override node types to only include descendants of what was asked for in the QG (where applicable) #1360
-            self._override_node_categories(message.knowledge_graph, message.query_graph, log)
         elif mode == "RTXKG2":
             decorator = ARAXDecorator()
             decorator.decorate_edges(response, kind="SEMMEDDB")
@@ -1400,31 +1398,6 @@ def _load_fda_approved_drug_ids() -> Set[str]:
             fda_approved_drug_ids = pickle.load(fda_pickle)
         return fda_approved_drug_ids
 
-    def _override_node_categories(self, kg: KnowledgeGraph, qg: QueryGraph, log: ARAXResponse):
-        # Clean up what we list as the TRAPI node.categories; list descendants of what was asked for in the QG
-        log.debug(f"Overriding node categories to better align with what's in the QG")
-        qnode_descendant_categories_map = {qnode_key: set(self.bh.get_descendants(qnode.categories))
-                                           for qnode_key, qnode in qg.nodes.items() if qnode.categories}
-        for node_key, node in kg.nodes.items():
-            final_categories = set()
-            for qnode_key in node.qnode_keys:
-                # If qnode has categories specified, use node's all_categories that are descendants of qnode categories
-                if qnode_key in qnode_descendant_categories_map:
-                    all_categories_attributes = [attribute for attribute in eu.convert_to_list(node.attributes)
-                                                 if attribute.attribute_type_id == "biolink:category"]
-                    node_categories = all_categories_attributes[0].value if all_categories_attributes else node.categories
-                    relevant_categories = set(node_categories).intersection(qnode_descendant_categories_map[qnode_key])
-                # Otherwise just use what's already in the node's categories (for KG2 this is the 'preferred' category)
-                else:
-                    relevant_categories = set(node.categories)
-                final_categories = final_categories.union(relevant_categories)
-            if final_categories:
-                node.categories = list(final_categories)
-            else:
-                # Leave categories as they are but issue a warning
-                log.warning(f"None of the categories KPs gave node {node_key} ({node.categories}) are descendants of "
-                            f"those asked for in the QG (for qnode {node.qnode_keys})")
-
     @staticmethod
     def _map_back_to_input_curies(kg: KnowledgeGraph, qg: QueryGraph, log: ARAXResponse):
         """
diff --git a/code/ARAX/ARAXQuery/Expand/kp_selector.py b/code/ARAX/ARAXQuery/Expand/kp_selector.py
index 8c556d086..692520716 100644
--- a/code/ARAX/ARAXQuery/Expand/kp_selector.py
+++ b/code/ARAX/ARAXQuery/Expand/kp_selector.py
@@ -42,10 +42,7 @@ def _load_cached_kp_info(self) -> tuple:
                 self.log.error(f"Failed to load KP info caches due to {e}", error_code="LoadKPCachesFailed")
                 return None, None, None, None
 
-            # Record None URLs for our local KPs
-            allowed_kp_urls = smart_api_info["allowed_kp_urls"]
-
-            return (meta_map, allowed_kp_urls, smart_api_info["kps_excluded_by_version"],
+            return (meta_map, smart_api_info["allowed_kp_urls"], smart_api_info["kps_excluded_by_version"],
                     smart_api_info["kps_excluded_by_maturity"])
 
     def get_kps_for_single_hop_qg(self, qg: QueryGraph) -> Optional[Set[str]]:

From 3549a71c4d97fcad7028ae94c46b9f415405a471 Mon Sep 17 00:00:00 2001
From: amykglen <glena@oregonstate.edu>
Date: Mon, 16 Sep 2024 10:57:27 -0700
Subject: [PATCH 10/21] Get rid of no-longer-relevant test

---
 code/ARAX/test/test_ARAX_expand.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/code/ARAX/test/test_ARAX_expand.py b/code/ARAX/test/test_ARAX_expand.py
index 02207a0e4..17acba75a 100644
--- a/code/ARAX/test/test_ARAX_expand.py
+++ b/code/ARAX/test/test_ARAX_expand.py
@@ -337,18 +337,6 @@ def test_873_consider_both_gene_and_protein():
     assert set(nodes_by_qg_id_protein['n01']) == set(nodes_by_qg_id_gene['n01'])
 
 
-def test_987_override_node_categories():
-    actions_list = [
-        "add_qnode(name=DOID:8398, key=n00)",
-        "add_qnode(categories=biolink:PhenotypicFeature, key=n01)",
-        "add_qedge(subject=n00, object=n01, predicates=biolink:has_phenotype, key=e00)",
-        "expand(edge_key=e00, kp=infores:rtx-kg2)",
-        "return(message=true, store=false)"
-    ]
-    nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list)
-    assert all('biolink:PhenotypicFeature' in node.categories for node in nodes_by_qg_id['n01'].values())
-
-
 @pytest.mark.external
 def test_cohd_expand():
     actions_list = [

From eb61a669abcdf3e6e92ff04b636e58bf15900a00 Mon Sep 17 00:00:00 2001
From: Chunyu Ma <machunyu4402@hotmail.com>
Date: Mon, 16 Sep 2024 14:44:23 -0400
Subject: [PATCH 11/21] update xDTD edge description to KG2.10.0c

---
 code/ARAX/ARAXQuery/Infer/scripts/infer_utilities.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/ARAX/ARAXQuery/Infer/scripts/infer_utilities.py b/code/ARAX/ARAXQuery/Infer/scripts/infer_utilities.py
index fe92d4376..19056adc4 100644
--- a/code/ARAX/ARAXQuery/Infer/scripts/infer_utilities.py
+++ b/code/ARAX/ARAXQuery/Infer/scripts/infer_utilities.py
@@ -350,7 +350,7 @@ def genrete_treat_subgraphs(self, response: ARAXResponse, top_drugs: pd.DataFram
                             ]
                         else:
                             edge_attribute_list += [
-                                Attribute(original_attribute_name=None, value=True, attribute_type_id="EDAM-DATA:1772", attribute_source="infores:arax", value_type_id="metatype:Boolean", value_url=None, description="This edge was extracted from RTX-KG2.8.4c by ARAXInfer."),
+                                Attribute(original_attribute_name=None, value=True, attribute_type_id="EDAM-DATA:1772", attribute_source="infores:arax", value_type_id="metatype:Boolean", value_url=None, description="This edge was extracted from RTX-KG2.10.0c by ARAXInfer."),
                             ]
                             retrieval_source = [
                                 RetrievalSource(resource_id=primary_knowledge_source, resource_role="primary_knowledge_source"),

From 5f92cc812055fabecf90ea5755f54918bec85d46 Mon Sep 17 00:00:00 2001
From: isbluis <lmendoza@systemsbiology.org>
Date: Mon, 16 Sep 2024 22:18:47 +0000
Subject: [PATCH 12/21] Verify that we indeed connected to a valid autocomplete
 SQLite db file #2365

---
 code/autocomplete/rtxcomplete.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/code/autocomplete/rtxcomplete.py b/code/autocomplete/rtxcomplete.py
index 384665e97..3e9f13e2d 100644
--- a/code/autocomplete/rtxcomplete.py
+++ b/code/autocomplete/rtxcomplete.py
@@ -28,7 +28,11 @@ def load():
     database_name = f"{autocomplete_filepath}{os.path.sep}{RTXConfig.autocomplete_path.split('/')[-1]}"
     conn = sqlite3.connect(database_name)
     cursor = conn.cursor()
-    print(f"INFO: Connected to {database_name}",file=sys.stderr)
+    try:
+        conn.execute(f"SELECT term FROM terms LIMIT 1")
+        print(f"INFO: Connected to {database_name}",file=sys.stderr)
+    except:
+        print(f"WARN: Could NOT connect to {database_name}. Please check that file and database exist!",file=sys.stderr)
 
     cache_database_name = os.path.dirname(os.path.abspath(__file__)) + '/rtxcomplete_cache.sqlite'
     cache_conn = sqlite3.connect(cache_database_name)

From 3589d123333013199960733f512b949eba205cae Mon Sep 17 00:00:00 2001
From: Chunyu Ma <machunyu4402@hotmail.com>
Date: Mon, 16 Sep 2024 19:40:38 -0400
Subject: [PATCH 13/21] update ranking algorithm to exclude xDTD/xCRG support
 graph edges and the max flows

---
 code/ARAX/ARAXQuery/ARAX_ranker.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/code/ARAX/ARAXQuery/ARAX_ranker.py b/code/ARAX/ARAXQuery/ARAX_ranker.py
index 63f7f99f7..31c843739 100644
--- a/code/ARAX/ARAXQuery/ARAX_ranker.py
+++ b/code/ARAX/ARAXQuery/ARAX_ranker.py
@@ -33,8 +33,8 @@ def _get_nx_edges_by_attr(G: Union[nx.MultiDiGraph, nx.MultiGraph], key: str, va
 
 def _get_query_graph_networkx_from_query_graph(query_graph: QueryGraph) -> nx.MultiDiGraph:
     query_graph_nx = nx.MultiDiGraph()
-    query_graph_nx.add_nodes_from([key for key,node in query_graph.nodes.items()])
-    edge_list = [[edge.subject, edge.object, key, {'weight': 0.0}] for key,edge in query_graph.edges.items()]
+    query_graph_nx.add_nodes_from([key for key, node in query_graph.nodes.items() if 'creative_DTD_qnode' not in key and 'creative_CRG_qnode' not in key])
+    edge_list = [[edge.subject, edge.object, key, {'weight': 0.0}] for key,edge in query_graph.edges.items() if 'creative_DTD_qedge' not in key and 'creative_CRG_qedge' not in key]
     query_graph_nx.add_edges_from(edge_list)
     return query_graph_nx
 
@@ -124,8 +124,9 @@ def _get_weighted_graph_networkx_from_result_graph(kg_edge_id_to_edge: Dict[str,
     qg_edge_key_to_edge_tuple = {edge_tuple[2]: edge_tuple for edge_tuple in qg_edge_tuples}
     for analysis in result.analyses:  # For now we only ever have one Analysis per Result
         for qedge_key, edge_binding_list in analysis.edge_bindings.items():
-            qedge_tuple = qg_edge_key_to_edge_tuple[qedge_key]
-            res_graph[qedge_tuple[0]][qedge_tuple[1]][qedge_tuple[2]]['weight'] = _calculate_final_result_score(kg_edge_id_to_edge, edge_binding_list)
+            if 'creative_DTD_qedge' not in qedge_key and 'creative_CRG_qedge' not in qedge_key:
+                qedge_tuple = qg_edge_key_to_edge_tuple[qedge_key]
+                res_graph[qedge_tuple[0]][qedge_tuple[1]][qedge_tuple[2]]['weight'] = _calculate_final_result_score(kg_edge_id_to_edge, edge_binding_list)
                 
     return res_graph
 
@@ -187,13 +188,13 @@ def _score_networkx_graphs_by_max_flow(result_graphs_nx: List[Union[nx.MultiDiGr
                                                                                             capacity="weight"))
             max_flow_value = 0.0
             if len(max_flow_values_for_node_pairs) > 0:
-                max_flow_value = sum(max_flow_values_for_node_pairs)/float(len(max_flow_values_for_node_pairs))
+                max_flow_value = _calculate_final_individual_edge_confidence(0, max_flow_values_for_node_pairs)
         else:
             max_flow_value = 1.0
         max_flow_values.append(max_flow_value)
     return max_flow_values
 
-
+_calculate_final_individual_edge_confidence
 def _score_networkx_graphs_by_longest_path(result_graphs_nx: List[Union[nx.MultiDiGraph,
                                                                         nx.MultiGraph]]) -> List[float]:
     result_scores = []
@@ -209,7 +210,7 @@ def _score_networkx_graphs_by_longest_path(result_graphs_nx: List[Union[nx.Multi
         adj_matrix_power = np.linalg.matrix_power(adj_matrix, max_path_len)/math.factorial(max_path_len)
         score_list = [adj_matrix_power[map_node_name_to_index[node_i],
                                        map_node_name_to_index[node_j]] for node_i, node_j in pairs_with_max_path_len]
-        result_score = np.mean(score_list)
+        result_score = _calculate_final_individual_edge_confidence(0, score_list)
         result_scores.append(result_score)
     return result_scores
 
@@ -365,7 +366,7 @@ def edge_attribute_score_combiner(self, edge_key, edge):
         elif 'infores' in edge_key.split('--')[-1]: # default score for other data sources
             base = edge_default_base
         else: # virtual edges or inferred edges
-            base = 0 # no base score for these edges. Its core is based on
+            base = 0 # no base score for these edges. Its score is based on its attribute scores.
         
         if edge.attributes is not None:
             for edge_attribute in edge.attributes:

From d4cc37051a800efb3dd57c6392776ab5174a23d4 Mon Sep 17 00:00:00 2001
From: mohamad mohsen <mohsen.taheri.1989@gmail.com>
Date: Mon, 16 Sep 2024 20:42:19 -0400
Subject: [PATCH 14/21] test_connect_resveratrol_glyoxalase Failure

---
 code/ARAX/test/test_ARAX_connect.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/ARAX/test/test_ARAX_connect.py b/code/ARAX/test/test_ARAX_connect.py
index cb1536e1e..de4d6eef5 100644
--- a/code/ARAX/test/test_ARAX_connect.py
+++ b/code/ARAX/test/test_ARAX_connect.py
@@ -98,7 +98,7 @@ def test_connect_resveratrol_glyoxalase():
         "create_message",
         "add_qnode(ids=PUBCHEM.COMPOUND:445154, key=n00)",
         "add_qnode(ids=NCBIGene:2739, key=n01)",
-        "connect(action=connect_nodes, max_path_length=3)",
+        "connect(action=connect_nodes, max_path_length=4)",
         "return(message=true, store=false)"
     ]}}
     [response, message] = _do_arax_query(query)

From 9a316d7ff32188da8f5b4513c6ae67ffa6082458 Mon Sep 17 00:00:00 2001
From: amykglen <glena@oregonstate.edu>
Date: Mon, 16 Sep 2024 18:24:56 -0700
Subject: [PATCH 15/21] Adjust KG2 treats patch now that BiolinkHelper is fixed
 #2328

---
 code/ARAX/ARAXQuery/ARAX_expander.py        | 8 +++-----
 code/ARAX/ARAXQuery/Expand/trapi_querier.py | 6 ++----
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/code/ARAX/ARAXQuery/ARAX_expander.py b/code/ARAX/ARAXQuery/ARAX_expander.py
index 4ecc59b97..040246be7 100644
--- a/code/ARAX/ARAXQuery/ARAX_expander.py
+++ b/code/ARAX/ARAXQuery/ARAX_expander.py
@@ -60,9 +60,7 @@ def __init__(self):
                                                       "aggregator_knowledge_source": {"==": "*"}}
         self.supported_qedge_qualifier_constraints = {"biolink:qualified_predicate", "biolink:object_direction_qualifier",
                                                       "biolink:object_aspect_qualifier"}
-        self.higher_level_treats_predicates = {"biolink:treats_or_applied_or_studied_to_treat",
-                                               "biolink:applied_to_treat",
-                                               "biolink:studied_to_treat"}
+        self.treats_like_predicates = set(self.bh.get_descendants("biolink:treats_or_applied_or_studied_to_treat")).difference({"biolink:treats"})
 
     def describe_me(self):
         """
@@ -512,7 +510,7 @@ def apply(self, response, input_parameters, mode: str = "ARAX"):
                 # Remove KG2 SemMedDB treats_or_applied-type edges if this is an inferred treats query
                 if alter_kg2_treats_edges:
                     edge_keys_to_remove = {edge_key for edge_key, edge in overarching_kg.edges_by_qg_id[qedge_key].items()
-                                           if edge.predicate in self.higher_level_treats_predicates and
+                                           if edge.predicate in self.treats_like_predicates and
                                            any(source.resource_id == "infores:rtx-kg2" for source in edge.sources) and
                                            any(source.resource_id == "infores:semmeddb" for source in edge.sources)}
                     log.debug(f"Removing {len(edge_keys_to_remove)} KG2 semmeddb treats_or_applied-type edges "
@@ -593,7 +591,7 @@ def apply(self, response, input_parameters, mode: str = "ARAX"):
             num_edges_altered = 0
             for edge in message.knowledge_graph.edges.values():
                 is_kg2_edge = any(source.resource_id == "infores:rtx-kg2" for source in edge.sources)
-                if is_kg2_edge and edge.predicate in self.higher_level_treats_predicates:
+                if is_kg2_edge and edge.predicate in self.treats_like_predicates:
                     # Record the original KG2 predicate in an attribute
                     edge.attributes.append(Attribute(attribute_type_id="biolink:original_predicate",
                                                      value=edge.predicate,
diff --git a/code/ARAX/ARAXQuery/Expand/trapi_querier.py b/code/ARAX/ARAXQuery/Expand/trapi_querier.py
index c02f82447..a0ee1c477 100644
--- a/code/ARAX/ARAXQuery/Expand/trapi_querier.py
+++ b/code/ARAX/ARAXQuery/Expand/trapi_querier.py
@@ -93,10 +93,8 @@ async def answer_one_hop_query_async(self, query_graph: QueryGraph,
         # Patch to address lack of answers from KG2 for treats queries after treats refactor #2328
         if alter_kg2_treats_edges and self.kp_infores_curie == "infores:rtx-kg2":
             for qedge in qg_copy.edges.values():  # Note there's only ever one qedge per QG here
-                qedge.predicates = list(set(qedge.predicates).union({"biolink:treats_or_applied_or_studied_to_treat",
-                                                                     "biolink:applied_to_treat",
-                                                                     "biolink:studied_to_treat"}))
-                log.info(f"For querying infores:rtx-kg2, edited {qedge_key} to use higher treats-type predicates: "
+                qedge.predicates = list(set(qedge.predicates).union({"biolink:treats_or_applied_or_studied_to_treat"}))
+                log.info(f"For querying infores:rtx-kg2, edited {qedge_key} to use higher treats-type predicate: "
                          f"{qedge.predicates}")
 
         # Answer the query using the KP and load its answers into our object model

From 0b8f2fb1c7f19599178201fdd65113247c23306d Mon Sep 17 00:00:00 2001
From: Chunyu Ma <cqm5886@psu.edu>
Date: Tue, 17 Sep 2024 13:38:34 -0500
Subject: [PATCH 16/21] Clear some lines

---
 code/ARAX/ARAXQuery/ARAX_ranker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/ARAX/ARAXQuery/ARAX_ranker.py b/code/ARAX/ARAXQuery/ARAX_ranker.py
index 31c843739..85ed4f91b 100644
--- a/code/ARAX/ARAXQuery/ARAX_ranker.py
+++ b/code/ARAX/ARAXQuery/ARAX_ranker.py
@@ -194,7 +194,7 @@ def _score_networkx_graphs_by_max_flow(result_graphs_nx: List[Union[nx.MultiDiGr
         max_flow_values.append(max_flow_value)
     return max_flow_values
 
-_calculate_final_individual_edge_confidence
+
 def _score_networkx_graphs_by_longest_path(result_graphs_nx: List[Union[nx.MultiDiGraph,
                                                                         nx.MultiGraph]]) -> List[float]:
     result_scores = []

From 0adb0f943860a2ac710ccb7aff56235d6349cdf4 Mon Sep 17 00:00:00 2001
From: amykglen <glena@oregonstate.edu>
Date: Tue, 17 Sep 2024 18:40:20 -0700
Subject: [PATCH 17/21] List kg2cplover3 as dev KG2 endpoint (has new
 pathfinder endpoints)

---
 code/UI/OpenAPI/specifications/export/KG2/1.5.0/openapi.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/UI/OpenAPI/specifications/export/KG2/1.5.0/openapi.yaml b/code/UI/OpenAPI/specifications/export/KG2/1.5.0/openapi.yaml
index 2a1a3ed2d..5af6ac810 100644
--- a/code/UI/OpenAPI/specifications/export/KG2/1.5.0/openapi.yaml
+++ b/code/UI/OpenAPI/specifications/export/KG2/1.5.0/openapi.yaml
@@ -51,7 +51,7 @@ servers:
   url: https://kg2.ci.transltr.io/api/rtxkg2/v1.4
   x-maturity: staging
 - description: RTX-KG2 TRAPI 1.5 endpoint - development
-  url: https://kg2cplover.rtx.ai:9990
+  url: https://kg2cplover3.rtx.ai:9990
   x-maturity: development
 tags:
 - description: Retrieve the meta knowledge graph representation of this TRAPI web

From 59b201ef3417e9033dc635f73b76aa7706cefc54 Mon Sep 17 00:00:00 2001
From: Chunyu Ma <machunyu4402@hotmail.com>
Date: Tue, 17 Sep 2024 22:05:16 -0400
Subject: [PATCH 18/21] notch the confidence down a bit for the manual_agent
 edges

---
 code/ARAX/ARAXQuery/ARAX_ranker.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/code/ARAX/ARAXQuery/ARAX_ranker.py b/code/ARAX/ARAXQuery/ARAX_ranker.py
index 31c843739..1a8c7d6a3 100644
--- a/code/ARAX/ARAXQuery/ARAX_ranker.py
+++ b/code/ARAX/ARAXQuery/ARAX_ranker.py
@@ -20,7 +20,7 @@
 from openapi_server.models.edge import Edge
 from openapi_server.models.attribute import Attribute
 
-edge_confidence_manual_agent = 0.999
+edge_confidence_manual_agent = 0.99
 
 def _get_nx_edges_by_attr(G: Union[nx.MultiDiGraph, nx.MultiGraph], key: str, val: str) -> Set[tuple]:
     res_set = set()
@@ -194,7 +194,7 @@ def _score_networkx_graphs_by_max_flow(result_graphs_nx: List[Union[nx.MultiDiGr
         max_flow_values.append(max_flow_value)
     return max_flow_values
 
-_calculate_final_individual_edge_confidence
+
 def _score_networkx_graphs_by_longest_path(result_graphs_nx: List[Union[nx.MultiDiGraph,
                                                                         nx.MultiGraph]]) -> List[float]:
     result_scores = []
@@ -455,7 +455,7 @@ def edge_attribute_publication_normalizer(self, attribute_type_id: str, edge_att
             pub_value = np.log(n_publications)
             max_value = 1.0
             curve_steepness = 3.16993
-            logistic_midpoint = 1.38629
+            logistic_midpoint = 1.60943 # log(5) = 1.60943 meaning having 5 publications is a mid point
             normalized_value = max_value / float(1 + np.exp(-curve_steepness * (pub_value - logistic_midpoint)))
         return normalized_value
 

From f9317f413727875d715d479e0eabcfbe776060cc Mon Sep 17 00:00:00 2001
From: Amy Glen <49423686+amykglen@users.noreply.github.com>
Date: Wed, 18 Sep 2024 14:43:39 -0700
Subject: [PATCH 19/21] Update README.md; fix subbullet issue

---
 code/kg2c/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/code/kg2c/README.md b/code/kg2c/README.md
index 80e8783a6..0a1391650 100644
--- a/code/kg2c/README.md
+++ b/code/kg2c/README.md
@@ -142,12 +142,12 @@ flags/options.
    1. once the build finishes, run the regression test suite:
       1. `pytest -vs test_synonymizer.py --synonymizername node_synonymizer_v1.0_KG2.X.Y.sqlite`
 1. **Do a test KG2c build**: If you're satisfied with the synonymizer, proceed with a test KG2c build:
-   2. `screen -S kg2c`
-   3. `pyenv activate rtx` if you're using buildkg2c.rtx.ai; otherwise activate your python environment however necessary
+   1. `screen -S kg2c`
+   1. `pyenv activate rtx` if you're using buildkg2c.rtx.ai; otherwise activate your python environment however necessary
    1. `cd RTX/code/kg2c`
-   4. `python build_kg2c.py 2.10.0 v1.0 4.2.0 --uploadartifacts --test`
+   1. `python build_kg2c.py 2.10.0 v1.0 4.2.0 --uploadartifacts --test`
 1. **Do the full KG2c build**: Then, if everything went smoothly, do the full build (we're assuming you're in the same `screen` session):
-   4. `python build_kg2c.py 2.10.0 v1.0 4.2.0 --uploadartifacts`
+   1. `python build_kg2c.py 2.10.0 v1.0 4.2.0 --uploadartifacts`
 
 The synonymizer build should take around 5 hours and the KG2c build should take around 10 hours.
 

From b28db5c4a388df7579805ce3db711dd3e6e172a1 Mon Sep 17 00:00:00 2001
From: Abhilash Reddy <abhilash.reddy@axleinfo.com>
Date: Mon, 23 Sep 2024 09:50:52 -0400
Subject: [PATCH 20/21] update resource limits for arax and kg2

---
 deploy/arax/values.yaml | 4 ++--
 deploy/kg2/values.yaml  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/deploy/arax/values.yaml b/deploy/arax/values.yaml
index 2bb081879..e40850f34 100644
--- a/deploy/arax/values.yaml
+++ b/deploy/arax/values.yaml
@@ -74,8 +74,8 @@ affinity:
 resources:
   requests:
     memory: 50Gi
-    cpu: 10000m
+    cpu: 8000m
   limits:
     memory: 58Gi
-    cpu: 14000m 
+    cpu: 11000m 
 
diff --git a/deploy/kg2/values.yaml b/deploy/kg2/values.yaml
index 7fc012048..8c45e3d6e 100644
--- a/deploy/kg2/values.yaml
+++ b/deploy/kg2/values.yaml
@@ -81,7 +81,7 @@ storage:
 resources:
   requests:
     memory: 50Gi
-    cpu: 10000m
+    cpu: 6000m
   limits:
     memory: 58Gi
-    cpu: 14000m 
+    cpu: 8000m 

From 00dc4b77795d950753356fe6b39664cff212cf47 Mon Sep 17 00:00:00 2001
From: Kevin Vizhalil <kvnthomas98@gmail.com>
Date: Mon, 23 Sep 2024 13:08:47 -0400
Subject: [PATCH 21/21] Fixing #2378

---
 .../python-flask-server/KG2/openapi_server/__main__.py      | 3 ++-
 .../OpenAPI/python-flask-server/openapi_server/__main__.py  | 6 ++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/code/UI/OpenAPI/python-flask-server/KG2/openapi_server/__main__.py b/code/UI/OpenAPI/python-flask-server/KG2/openapi_server/__main__.py
index 0dd657848..a3cb8c1c1 100644
--- a/code/UI/OpenAPI/python-flask-server/KG2/openapi_server/__main__.py
+++ b/code/UI/OpenAPI/python-flask-server/KG2/openapi_server/__main__.py
@@ -55,7 +55,8 @@ def instrument(app, host, port):
         )
         )
     )
-    trace.get_tracer_provider().get_tracer(__name__)
+    # trace.get_tracer_provider().get_tracer(__name__)
+    tracer_provider = trace.get_tracer(__name__)
     FlaskInstrumentor().instrument_app(app=app.app)
     RequestsInstrumentor().instrument()
     AioHttpClientInstrumentor().instrument()
diff --git a/code/UI/OpenAPI/python-flask-server/openapi_server/__main__.py b/code/UI/OpenAPI/python-flask-server/openapi_server/__main__.py
index f53d7d02d..8abdc38ba 100644
--- a/code/UI/OpenAPI/python-flask-server/openapi_server/__main__.py
+++ b/code/UI/OpenAPI/python-flask-server/openapi_server/__main__.py
@@ -55,8 +55,10 @@ def instrument(app, host, port):
         )
         )
     )
-    tracer_provider = trace.get_tracer_provider()
-    tracer_provider.get_tracer(__name__)
+    # tracer_provider = trace.get_tracer_provider()
+    # tracer_provider.get_tracer(__name__)
+    tracer_provider = trace.get_tracer(__name__)
+    
     FlaskInstrumentor().instrument_app(app=app.app)
     RequestsInstrumentor().instrument()
     AioHttpClientInstrumentor().instrument()