From 1420aaefddc8f7ad081766da6d43354e50b2ddb1 Mon Sep 17 00:00:00 2001 From: Kevin Vizhalil Date: Mon, 9 Sep 2024 15:46:39 -0400 Subject: [PATCH 01/21] #2352 Improve quality of xCRG Paths --- code/ARAX/ARAXQuery/ARAX_infer.py | 24 +++++- .../ARAXQuery/Infer/scripts/creativeCRG.py | 84 +++++++++++++++---- code/ARAX/BiolinkHelper/biolink_helper.py | 48 +++++++++-- 3 files changed, 129 insertions(+), 27 deletions(-) diff --git a/code/ARAX/ARAXQuery/ARAX_infer.py b/code/ARAX/ARAXQuery/ARAX_infer.py index b2be0e7a9..73d358624 100644 --- a/code/ARAX/ARAXQuery/ARAX_infer.py +++ b/code/ARAX/ARAXQuery/ARAX_infer.py @@ -27,6 +27,9 @@ def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) from openapi_server.models.edge import Edge from openapi_server.models.attribute import Attribute as EdgeAttribute from openapi_server.models.node import Node +from openapi_server.models.qualifier import Qualifier +from openapi_server.models.qualifier_constraint import QualifierConstraint as QConstraint + sys.path.append(os.path.sep.join([*pathlist[:(RTXindex + 1)], 'code', 'ARAX', 'NodeSynonymizer'])) from node_synonymizer import NodeSynonymizer @@ -36,6 +39,7 @@ def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) # from creativeDTD import creativeDTD from creativeCRG import creativeCRG from ExplianableDTD_db import ExplainableDTD + # from ExplianableCRG import ExplianableCRG # sys.path.append(os.path.sep.join([*pathlist[:(RTXindex + 1)], 'code'])) @@ -615,7 +619,7 @@ def __chemical_gene_regulation_graph_expansion(self, describe=False): f"The `n_result_curies` value must be a positive integer. The provided value was {self.parameters['n_result_curies']}.", error_code="ValueError") else: - self.parameters['n_result_curies'] = 10 + self.parameters['n_result_curies'] = 30 if 'n_paths' in self.parameters: if isinstance(self.parameters['n_paths'], str): @@ -678,9 +682,26 @@ def __chemical_gene_regulation_graph_expansion(self, describe=False): if not preferred_subject_curie and not preferred_object_curie: self.response.error(f"Both parameters 'subject_curie' and 'object_curie' are not provided. Please provide the curie for either one of them") return self.response + qedges = message.query_graph.edges + else: self.response.error(f"The 'query_graph' is detected. One of 'subject_qnode_id' or 'object_qnode_id' should be specified.") + + if self.parameters['regulation_type'] == 'increase': + edge_qualifier_direction = 'increased' + else: + edge_qualifier_direction = 'decreased' + edge_qualifier_list = [ + Qualifier(qualifier_type_id='biolink:object_aspect_qualifier', qualifier_value='activity_or_abundance'), + Qualifier(qualifier_type_id='biolink:object_direction_qualifier', qualifier_value=edge_qualifier_direction)] + + for qedge in qedges: + edge = message.query_graph.edges[qedge] + edge.knowledge_type = "inferred" + edge.predicates = ["biolink:affects"] + edge.qualifier_constraints = [QConstraint(qualifier_set=edge_qualifier_list)] + else: if 'subject_curie' in parameters or 'object_curie' in parameters: @@ -763,6 +784,7 @@ def __chemical_gene_regulation_graph_expansion(self, describe=False): iu = InferUtilities() qedge_id = self.parameters.get('qedge_id') + self.response, self.kedge_global_iter, self.qedge_global_iter, self.qnode_global_iter, self.option_global_iter = iu.genrete_regulate_subgraphs(self.response, None, normalized_object_curie, top_predictions, top_paths, qedge_id, self.parameters['regulation_type'], self.kedge_global_iter, self.qedge_global_iter, self.qnode_global_iter, self.option_global_iter) return self.response diff --git a/code/ARAX/ARAXQuery/Infer/scripts/creativeCRG.py b/code/ARAX/ARAXQuery/Infer/scripts/creativeCRG.py index d94f7d3e0..75c40d043 100644 --- a/code/ARAX/ARAXQuery/Infer/scripts/creativeCRG.py +++ b/code/ARAX/ARAXQuery/Infer/scripts/creativeCRG.py @@ -8,7 +8,6 @@ import requests # import graph_tool.all as gt from tqdm import tqdm, trange - pathlist = os.getcwd().split(os.path.sep) RTXindex = pathlist.index("RTX") sys.path.append(os.path.sep.join([*pathlist[:(RTXindex + 1)], 'code', 'ARAX', 'ARAXQuery'])) @@ -23,7 +22,9 @@ from RTXConfiguration import RTXConfiguration RTXConfig = RTXConfiguration() sys.path.append(os.path.sep.join([*pathlist[:(RTXindex + 1)], 'code', 'ARAX', 'ARAXQuery',''])) - +sys.path.append(os.path.sep.join([*pathlist[:(RTXindex + 1)], 'code', 'ARAX', 'BiolinkHelper',''])) +from biolink_helper import BiolinkHelper +def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) def call_plover(curies: List, respect_predicate_symmetry: bool=False): json = {} plover_url = RTXConfig.plover_url @@ -164,6 +165,16 @@ def __init__(self, response: ARAXResponse, data_path: str): ## set up parameters self.response = response + self.bh = BiolinkHelper() + self.predicate_depth_map = self.bh.get_predicate_depth_map() + self.relevant_node_categories = ['biolink:Drug', 'biolink:PathologicalProcess', 'biolink:GeneOrGeneProduct', 'biolink:ChemicalEntity', + 'biolink:SmallMolecule', 'biolink:Gene', 'biolink:BiologicalProcess', 'biolink:Pathway', 'biolink:Disease', + 'biolink:Transcript', 'biolink:Cell', 'biolink:GeneFamily', 'biolink:GeneProduct', 'biolink:Exon', + 'biolink:DiseaseOrPhenotypicFeature', 'biolink:PhenotypicFeature', 'biolink:MolecularActivity', 'biolink:GeneGroupingMixin', + 'biolink:CellularComponent', 'biolink:RNAProduct', 'biolink:Protein', 'biolink:BiologicalProcessOrActivity', 'biolink:PhysiologicalProcess', + 'biolink:NoncodingRNAProduct', 'biolink:ProteinFamily', 'biolink:ProteinDomain'] + self.relevant_node_categories = self.bh.get_descendants(self.relevant_node_categories) + self.data_path = data_path self.chemical_type = ['biolink:ChemicalEntity', 'biolink:ChemicalMixture','biolink:SmallMolecule'] self.gene_type = ['biolink:Gene','biolink:Protein'] @@ -212,6 +223,7 @@ def get_tf_neighbors(self): for edge in edges.keys(): c1 = edges[edge][0] c2 = edges[edge][1] + depth = self.predicate_depth_map[edges[edge][2]] if 'subclass' in edges[edge][2]: continue if c1 == c2: @@ -219,11 +231,11 @@ def get_tf_neighbors(self): if c1 in self.tf_list: curie = c2 tf = c1 - answer_tf_neigbor_data.append({"edge_id": edge, "transcription_factor":tf, "neighbour": curie}) + answer_tf_neigbor_data.append({"edge_id": edge, "transcription_factor":tf, "neighbour": curie, "depth": depth}) if c2 in self.tf_list: curie = c1 tf = c2 - query_tf_neighbor_data.append({"edge_id": edge, "transcription_factor":tf, "neighbour": curie}) + query_tf_neighbor_data.append({"edge_id": edge, "transcription_factor":tf, "neighbour": curie, "depth": depth}) return query_tf_neighbor_data,answer_tf_neigbor_data, edges def add_node_ids_to_path(self, paths, tf_edges,chemical_edges, gene_edges): @@ -507,62 +519,90 @@ def _check_params(query_chemical: Optional[str], query_gene: Optional[str], mode else: top_paths = dict() + gene_neighbors = call_plover([preferred_query_gene]) answers = res['chemical_id'].tolist() self.preferred_curies = self.get_preferred_curies(answers) valid_chemicals = [item for item in self.preferred_curies.values() if item] chemical_neighbors = call_plover(valid_chemicals) query_tf_neighbors, answer_tf_neigbors, tf_edges = self.get_tf_neighbors() - paths = self.get_paths(preferred_query_gene, res['chemical_id'].tolist(), gene_neighbors, chemical_neighbors, query_tf_neighbors, answer_tf_neigbors,self.tf_list, M) final_paths = self.add_node_ids_to_path(paths, tf_edges, chemical_neighbors, gene_neighbors) return final_paths def get_paths(self, query_curie, answer_curies, query_neighbors, answer_neighbors, query_tf_neighbors, answer_tf_neighbors, tf_list,n_paths): - query_neighbors_curies = list(query_neighbors['nodes']['n01'].keys()) query_tf_neighbors_dict = {} answer_tf_neighbors_dict = {} query_path = {} answer_path = {} combined_path = dict() - one_hop_from_query = set(tf_list).intersection(query_neighbors_curies) + valid_query_tf_list = [] + valid_answer_tf_list = {} + for answer in answer_curies: + valid_answer_tf_list[answer] = [] + for record in query_tf_neighbors: - query_tf_neighbors_dict[record['neighbour']] = query_tf_neighbors_dict.get(record['neighbour'],[]) + [(record['edge_id'],record['transcription_factor'])] + query_tf_neighbors_dict[record['neighbour']] = query_tf_neighbors_dict.get(record['neighbour'],[]) + [(record['edge_id'],record['transcription_factor'], record['depth'])] for record in answer_tf_neighbors: - answer_tf_neighbors_dict[record['neighbour']] = answer_tf_neighbors_dict.get(record['neighbour'],[]) + [(record['edge_id'],record['transcription_factor'])] + answer_tf_neighbors_dict[record['neighbour']] = answer_tf_neighbors_dict.get(record['neighbour'],[]) + [(record['edge_id'],record['transcription_factor'], record['depth'])] # one hop from query for edge_id, edge in query_neighbors['edges']['e00'].items(): if edge[1] in tf_list and edge[1] not in query_path: - query_path[edge[1]] = [edge_id] - + valid_query_tf_list.append(edge[1]) + query_path[edge[1]] = [edge_id,self.predicate_depth_map[edge[2]]] + elif edge[1] in tf_list and edge[1] in query_path: + if query_path[edge[1]][-1] < self.predicate_depth_map[edge[2]]: + query_path[edge[1]] = [edge_id,self.predicate_depth_map[edge[2]]] # two hop from query for edge_id, edge in query_neighbors['edges']['e00'].items(): if edge[0] != query_curie: continue + relevant_node = False neighbor = edge[1] + neighbor_category = query_neighbors['nodes']['n01'][neighbor][1] + if neighbor_category in self.relevant_node_categories: + relevant_node = True + for item in query_tf_neighbors_dict.get(neighbor,[]): + if item[1] not in valid_query_tf_list and relevant_node: + valid_query_tf_list.append(item[1]) if item[1] not in query_path: - query_path[item[1]] = [edge_id,item[0]] + query_path[item[1]] = [edge_id,item[0], item[2]] + elif query_path[item[1]][-1] < min(item[2],self.predicate_depth_map[edge[2]]) and ((item[1] not in valid_query_tf_list) or relevant_node) : + query_path[item[1]] = [edge_id,item[0], min(item[2],self.predicate_depth_map[edge[2]])] + for edge_id, edge in answer_neighbors['edges']['e00'].items(): if edge[1] not in self.preferred_curies.values(): continue + relevant_node = False answer = edge[1] neighbor = edge[0] + neighbor_category = answer_neighbors['nodes']['n01'][neighbor][1] + if neighbor_category in self.relevant_node_categories: + relevant_node = True # one hop from answer if answer not in answer_path: answer_path[answer] = dict() - if neighbor in tf_list: - answer_path[answer][neighbor] = [edge_id] + if neighbor in tf_list and neighbor not in answer_path[answer]: + valid_answer_tf_list[answer].append(neighbor) + answer_path[answer][neighbor] = [edge_id, self.predicate_depth_map[edge[2]]] + elif neighbor in tf_list and neighbor in answer_path[answer]: + if answer_path[answer][neighbor][-1] < self.predicate_depth_map[edge[2]]: + answer_path[answer][neighbor] = [edge_id, self.predicate_depth_map[edge[2]]] # two hop from answer for item in answer_tf_neighbors_dict.get(neighbor,[]): + neighbor_category = answer_neighbors['nodes']['n01'][neighbor][1] + if relevant_node and item[1] not in valid_answer_tf_list[answer]: + valid_answer_tf_list[answer].append(item[1]) if item[1] not in answer_path[answer]: - answer_path[answer][item[1]] = [item[0],edge_id] - + answer_path[answer][item[1]] = [item[0],edge_id, item[2]] + elif answer_path[answer][item[1]][-1] < item[2] and ((item[1] not in valid_answer_tf_list[answer]) or relevant_node): + answer_path[answer][item[1]] = [item[0], edge_id, min(item[2],self.predicate_depth_map[edge[2]])] # joining paths for answer in answer_curies: combined_path[(query_curie,answer)] = list() @@ -573,13 +613,21 @@ def get_paths(self, query_curie, answer_curies, query_neighbors, answer_neighbor continue path_counter = 0 - for tf in tf_list: + relevant_tf = list(set(valid_query_tf_list).intersection(valid_answer_tf_list[answer])) + irrelevant_tf = [tf for tf in tf_list if tf not in relevant_tf] + for tf in relevant_tf: if path_counter > n_paths: break if tf in query_path and tf in answer_path[key]: - combined_path[(query_curie,answer)].append(query_path[tf] + answer_path[key][tf]) + combined_path[(query_curie,answer)].append(query_path[tf][:-1] + answer_path[key][tf][:-1]) path_counter += 1 + for tf in irrelevant_tf: + if path_counter > n_paths: + break + if tf in query_path and tf in answer_path[key]: + combined_path[(query_curie,answer)].append(query_path[tf][:-1] + answer_path[key][tf][:-1]) + path_counter += 1 return combined_path diff --git a/code/ARAX/BiolinkHelper/biolink_helper.py b/code/ARAX/BiolinkHelper/biolink_helper.py index f0b001767..bbb693be5 100644 --- a/code/ARAX/BiolinkHelper/biolink_helper.py +++ b/code/ARAX/BiolinkHelper/biolink_helper.py @@ -125,7 +125,18 @@ def get_canonical_predicates(self, predicates: Union[str, List[str], Set[str]]) for predicate in valid_predicates} canonical_predicates.update(invalid_predicates) # Go ahead and include those we don't have canonical info for return list(canonical_predicates) - + + def get_predicate_depth_map(self)->Dict[str,int]: + response = self._download_biolink_model() + if response.status_code == 200: + biolink_model = yaml.safe_load(response.text) + predicate_dag = self._build_predicate_dag(biolink_model) + + else: + raise RuntimeError(f"ERROR: Request to get Biolink {self.biolink_version} YAML file returned " + f"{response.status_code} response. Cannot load BiolinkHelper.") + return self._get_depths_from_root(predicate_dag) + def is_symmetric(self, predicate: str) -> Optional[bool]: if predicate in self.biolink_lookup_map["predicates"]: return self.biolink_lookup_map["predicates"][predicate]["is_symmetric"] @@ -198,7 +209,15 @@ def _load_biolink_lookup_map(self, is_test: bool = False): with open(self.biolink_lookup_map_path, "rb") as biolink_map_file: biolink_lookup_map = pickle.load(biolink_map_file) return biolink_lookup_map - + + def _download_biolink_model(self): + response = requests.get(f"https://raw.githubusercontent.com/biolink/biolink-model/{self.biolink_version}/biolink-model.yaml", + timeout=10) + if response.status_code != 200: # Sometimes Biolink's tags start with 'v', so try that + response = requests.get(f"https://raw.githubusercontent.com/biolink/biolink-model/v{self.biolink_version}/biolink-model.yaml", + timeout=10) + return response + def _create_biolink_lookup_map(self) -> Dict[str, Dict[str, Dict[str, Union[str, List[str], bool]]]]: timestamp = str(datetime.datetime.now().isoformat()) eprint(f"{timestamp}: INFO: Building local Biolink {self.biolink_version} ancestor/descendant lookup map " @@ -206,17 +225,14 @@ def _create_biolink_lookup_map(self) -> Dict[str, Dict[str, Dict[str, Union[str, biolink_lookup_map = {"predicates": dict(), "categories": dict(), "aspects": dict(), "directions": dict()} # Grab the relevant Biolink yaml file - response = requests.get(f"https://raw.githubusercontent.com/biolink/biolink-model/{self.biolink_version}/biolink-model.yaml", - timeout=10) - if response.status_code != 200: # Sometimes Biolink's tags start with 'v', so try that - response = requests.get(f"https://raw.githubusercontent.com/biolink/biolink-model/v{self.biolink_version}/biolink-model.yaml", - timeout=10) + response = self._download_biolink_model() if response.status_code == 200: biolink_model = yaml.safe_load(response.text) # -------------------------------- PREDICATES --------------------------------- # predicate_dag = self._build_predicate_dag(biolink_model) + import pdb;pdb.set_trace() # Build our map of predicate ancestors/descendants for easy lookup, first WITH mixins for node_id in list(predicate_dag.nodes): node_info = predicate_dag.nodes[node_id] @@ -382,7 +398,23 @@ def _build_direction_dag(self, biolink_model: dict) -> nx.DiGraph: direction_dag.add_edge(parent_name_trapi, direction_name_trapi) return direction_dag - + + def _get_depths_from_root(self, dag)-> Dict[str,int]: + node_depths = {} + for node in nx.topological_sort(dag): + # Skip if the node is the start node + + # Get all predecessors of the current node + predecessors = list(dag.predecessors(node)) + + # If the node has predecessors, calculate its depth as max(depth of predecessors) + 1 + if predecessors: + node_depths[node] = max(node_depths[pred] for pred in predecessors) + 1 + else: + node_depths[node] = 0 # Handle nodes that have no predecessors (if any) + + return node_depths + @staticmethod def _get_ancestors_nx(nx_graph: nx.DiGraph, node_id: str) -> List[str]: return list(nx.ancestors(nx_graph, node_id).union({node_id})) From fd5a0a0f57c9a1136f450f545196a7af8f25af39 Mon Sep 17 00:00:00 2001 From: amykglen Date: Mon, 9 Sep 2024 12:47:25 -0700 Subject: [PATCH 02/21] Ignore nodes/edges with empty ID/subject/object --- code/ARAX/ARAXQuery/Expand/trapi_querier.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/code/ARAX/ARAXQuery/Expand/trapi_querier.py b/code/ARAX/ARAXQuery/Expand/trapi_querier.py index 9c256e0df..c02f82447 100644 --- a/code/ARAX/ARAXQuery/Expand/trapi_querier.py +++ b/code/ARAX/ARAXQuery/Expand/trapi_querier.py @@ -368,9 +368,20 @@ def _load_kp_json_response(self, json_response: dict, qg: QueryGraph) -> QGOrgan # Build a map that indicates which qnodes/qedges a given node/edge fulfills kg_to_qg_mappings, query_curie_mappings = self._get_kg_to_qg_mappings_from_results(kp_message.results, qg) - # Populate our final KG with the returned nodes and edges + # Populate our final KG with the returned edges returned_edge_keys_missing_qg_bindings = set() + nodes_dict = kp_message.knowledge_graph.nodes for returned_edge_key, returned_edge in kp_message.knowledge_graph.edges.items(): + # Catch invalid subject/object + if not returned_edge.subject or not returned_edge.object: + self.log.warning(f"{self.kp_infores_curie}: Edge has empty subject/object, skipping. " + f"subject: '{returned_edge.subject}', object: '{returned_edge.object}'") + continue + if returned_edge.subject not in nodes_dict or returned_edge.object not in nodes_dict: + self.log.warning(f"{self.kp_infores_curie}: Edge is an orphan, skipping. " + f"subject: '{returned_edge.subject}', object: '{returned_edge.object}'") + continue + arax_edge_key = self._get_arax_edge_key(returned_edge) # Convert to an ID that's unique for us # Put in a placeholder for missing required attribute fields to try to keep our answer TRAPI-compliant @@ -399,9 +410,13 @@ def _load_kp_json_response(self, json_response: dict, qg: QueryGraph) -> QGOrgan self.log.warning(f"{self.kp_infores_curie}: {len(returned_edge_keys_missing_qg_bindings)} edges in the KP's answer " f"KG have no bindings to the QG: {returned_edge_keys_missing_qg_bindings}") + # Populate our final KG with the returned nodes returned_node_keys_missing_qg_bindings = set() for returned_node_key, returned_node in kp_message.knowledge_graph.nodes.items(): - if returned_node_key not in kg_to_qg_mappings['nodes']: + if not returned_node_key: + self.log.warning(f"{self.kp_infores_curie}: Node has empty ID, skipping. Node key is: " + f"'{returned_node_key}'") + elif returned_node_key not in kg_to_qg_mappings['nodes']: returned_node_keys_missing_qg_bindings.add(returned_node_key) else: for qnode_key in kg_to_qg_mappings['nodes'][returned_node_key]: From d37f15a73bfb8fea1826634d23254b787cc9445e Mon Sep 17 00:00:00 2001 From: Kevin Vizhalil Date: Mon, 9 Sep 2024 16:05:10 -0400 Subject: [PATCH 03/21] removing pdb --- code/ARAX/BiolinkHelper/biolink_helper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/code/ARAX/BiolinkHelper/biolink_helper.py b/code/ARAX/BiolinkHelper/biolink_helper.py index bbb693be5..cb0e4162b 100644 --- a/code/ARAX/BiolinkHelper/biolink_helper.py +++ b/code/ARAX/BiolinkHelper/biolink_helper.py @@ -232,7 +232,6 @@ def _create_biolink_lookup_map(self) -> Dict[str, Dict[str, Dict[str, Union[str, # -------------------------------- PREDICATES --------------------------------- # predicate_dag = self._build_predicate_dag(biolink_model) - import pdb;pdb.set_trace() # Build our map of predicate ancestors/descendants for easy lookup, first WITH mixins for node_id in list(predicate_dag.nodes): node_info = predicate_dag.nodes[node_id] From 9a0307466b2d3dca00c8902dc48329e4b394ed30 Mon Sep 17 00:00:00 2001 From: Eric Deutsch Date: Thu, 12 Sep 2024 08:46:16 -0700 Subject: [PATCH 04/21] Update Python version in the template script. Anticipating no actual change to deployed code in arax.ncats.io or any ITRB instances. #2348 --- code/UI/OpenAPI/python-flask-server/RTX_OpenAPI.start | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/UI/OpenAPI/python-flask-server/RTX_OpenAPI.start b/code/UI/OpenAPI/python-flask-server/RTX_OpenAPI.start index 283fd67a7..d833c5d5c 100755 --- a/code/UI/OpenAPI/python-flask-server/RTX_OpenAPI.start +++ b/code/UI/OpenAPI/python-flask-server/RTX_OpenAPI.start @@ -17,7 +17,7 @@ fi cd /mnt/data/orangeboard/$DEVAREA/RTX/code/UI/OpenAPI/python-flask-server -export PATH=/mnt/data/python/Python-3.9.16/bin:$PATH +export PATH=/mnt/data/python/Python-3.9.18/bin:$PATH exec python3 -u -m openapi_server 1>$LOGFILE 2>$ELOGFILE From a1b38ed75b009803c256ac8ed1e6c700f0cab64e Mon Sep 17 00:00:00 2001 From: Eric Deutsch Date: Thu, 12 Sep 2024 09:08:33 -0700 Subject: [PATCH 05/21] Remove the STDERR emission of an error message during a non-error condition #2141 --- code/ARAX/ARAXQuery/ARAX_query.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/code/ARAX/ARAXQuery/ARAX_query.py b/code/ARAX/ARAXQuery/ARAX_query.py index d7934ea34..44cd84b48 100644 --- a/code/ARAX/ARAXQuery/ARAX_query.py +++ b/code/ARAX/ARAXQuery/ARAX_query.py @@ -283,7 +283,8 @@ def track_query_finish(self): if hasattr(self.response, 'job_id'): query_tracker.update_tracker_entry(self.response.job_id, attributes) else: - eprint("*******ERROR: self.response has no job_id attr! E275") + # Sometimes we finish without a job_id having been created, and that's okay + pass From 641d570da2acbabcf6e80ffcd34f9b608cbf380f Mon Sep 17 00:00:00 2001 From: isbluis Date: Sat, 14 Sep 2024 05:57:38 +0000 Subject: [PATCH 06/21] - Rename quick_defs.json to .js since it is a Javascript file (#2365) - Add quick-links to external URLs for ClinicalTrials and DrugApprovals KPs in Settings (#2346) - Update some of those URLs to TRAPI 1.5 endpoints - Update ARAXi DSL helper JSON - Add "Total" row to Testing summary view --- code/UI/interactive/araxi.js | 14 +++++++------- code/UI/interactive/dsl2json.py | 1 + code/UI/interactive/index.html | 10 ++++++---- code/UI/interactive/rtx.js | 8 ++++++++ code/UI/interactive/rtx.version | 2 +- code/autocomplete/data/quick_def.json | 1 - 6 files changed, 23 insertions(+), 13 deletions(-) delete mode 100644 code/autocomplete/data/quick_def.json diff --git a/code/UI/interactive/araxi.js b/code/UI/interactive/araxi.js index 178da2f24..75d1bff36 100644 --- a/code/UI/interactive/araxi.js +++ b/code/UI/interactive/araxi.js @@ -1,5 +1,5 @@ // WARNING: -// This file was auto-generated by /mnt/data/orangeboard/devLM/RTX/code/UI/interactive/./dsl2json.py on: 2024-07-25 01:50:08 +// This file was auto-generated by /mnt/data/orangeboard/devLM/RTX/code/UI/interactive/./dsl2json.py on: 2024-09-11 06:23:47 // var araxi_commands = { "add_qedge()": { @@ -208,7 +208,7 @@ var araxi_commands = { "description": "Whether to omit supporting data on nodes/edges in the results (e.g., publications, description, etc.)." } }, - "description": "This command will expand (aka, answer/fill) your query graph in an edge-by-edge fashion, intelligently selecting which KPs to use for each edge. It selects KPs from the SmartAPI Registry based on the meta information provided by their TRAPI APIs, whether they have an endpoint running a matching TRAPI version, and whether they have an endpoint with matching maturity. For each QEdge, it queries the selected KPs concurrently; it will timeout for a particular KP if it decides it's taking too long to respond (this KP timeout can be controlled by the user). You may also optionally specify a particular KP to use via the 'kp' parameter (described below).\n\nCurrent candidate KPs include (for TRAPI 1.5, maturity 'development'): \ninfores:answer-coalesce, infores:automat-binding-db, infores:automat-cam-kp, infores:automat-ctd, infores:automat-drug-central, infores:automat-genome-alliance, infores:automat-gtex, infores:automat-gtopdb, infores:automat-gwas-catalog, infores:automat-hetionet, infores:automat-hgnc, infores:automat-hmdb, infores:automat-human-goa, infores:automat-icees-kg, infores:automat-intact, infores:automat-monarchinitiative, infores:automat-panther, infores:automat-pharos, infores:automat-reactome, infores:automat-robokop, infores:automat-string-db, infores:automat-ubergraph, infores:automat-viral-proteome, infores:cohd, infores:connections-hypothesis, infores:gelinea, infores:genetics-data-provider, infores:knowledge-collaboratory, infores:molepro, infores:openpredict, infores:rtx-kg2, infores:service-provider-trapi, infores:spoke. \n\n(Note that this list of KPs may change unexpectedly based on the SmartAPI registry.)" + "description": "This command will expand (aka, answer/fill) your query graph in an edge-by-edge fashion, intelligently selecting which KPs to use for each edge. It selects KPs from the SmartAPI Registry based on the meta information provided by their TRAPI APIs, whether they have an endpoint running a matching TRAPI version, and whether they have an endpoint with matching maturity. For each QEdge, it queries the selected KPs concurrently; it will timeout for a particular KP if it decides it's taking too long to respond (this KP timeout can be controlled by the user). You may also optionally specify a particular KP to use via the 'kp' parameter (described below).\n\nCurrent candidate KPs include (for TRAPI 1.5, maturity 'development'): \ninfores:answer-coalesce, infores:automat-binding-db, infores:automat-cam-kp, infores:automat-ctd, infores:automat-drug-central, infores:automat-genome-alliance, infores:automat-gtex, infores:automat-gtopdb, infores:automat-gwas-catalog, infores:automat-hetionet, infores:automat-hgnc, infores:automat-hmdb, infores:automat-human-goa, infores:automat-icees-kg, infores:automat-intact, infores:automat-monarchinitiative, infores:automat-panther, infores:automat-pharos, infores:automat-reactome, infores:automat-robokop, infores:automat-string-db, infores:automat-ubergraph, infores:automat-viral-proteome, infores:cohd, infores:connections-hypothesis, infores:gelinea, infores:genetics-data-provider, infores:knowledge-collaboratory, infores:molepro, infores:multiomics-clinicaltrials, infores:multiomics-drugapprovals, infores:multiomics-microbiome, infores:multiomics-multiomics, infores:openpredict, infores:rtx-kg2, infores:service-provider-trapi, infores:spoke. \n\n(Note that this list of KPs may change unexpectedly based on the SmartAPI registry.)" }, "overlay(action=add_node_pmids)": { "parameters": { @@ -1374,15 +1374,15 @@ var araxi_commands = { "type": "list", "description": "List with just two qnode keys to connect. example: [n1, n2]" }, - "result_as": { + "node_category_constraint": { "is_required": false, "examples": [ - "betweenness_centrality", - "all_in_one", - "one_by_one" + "biolink:Disease", + "biolink:Gene", + "biolink:ChemicalEntity" ], "type": "string", - "description": "It determines how to receive the results. For instance, one_by_one means that it will return each path in one subgraph. The default value is betweenness_centrality" + "description": "This constraint will display paths that only pass through the user-specified category." } }, "description": "\nconnect_nodes adds paths between two nodes specified in the query.\n " diff --git a/code/UI/interactive/dsl2json.py b/code/UI/interactive/dsl2json.py index ecca50a5f..771880765 100755 --- a/code/UI/interactive/dsl2json.py +++ b/code/UI/interactive/dsl2json.py @@ -1,3 +1,4 @@ +#!/mnt/data/python/Python-3.9.18/bin/python3 import datetime import importlib import json diff --git a/code/UI/interactive/index.html b/code/UI/interactive/index.html index 93ffa494d..3dc4534a3 100644 --- a/code/UI/interactive/index.html +++ b/code/UI/interactive/index.html @@ -14,7 +14,7 @@ - + @@ -552,13 +552,15 @@

EXTERNAL API (post query):

- + - - + + + +

diff --git a/code/UI/interactive/rtx.js b/code/UI/interactive/rtx.js index eaf4e6363..fe1b493c4 100644 --- a/code/UI/interactive/rtx.js +++ b/code/UI/interactive/rtx.js @@ -7025,6 +7025,7 @@ function displayARSResults(parentnode,arsdata) { for (var agent of arsdata['ara_list']) { stats[agent] = {}; stats[agent]['PASSED'] = 0; + stats[agent]['TOTAL'] = 0; } var tdiv = document.createElement("div"); @@ -7212,6 +7213,7 @@ function displayARSResults(parentnode,arsdata) { } sumtable.appendChild(tr); + stats.status_list['TOTAL'] = 1; for (var status in stats.status_list) { tr = document.createElement("tr"); tr.className = 'hoverable'; @@ -7238,6 +7240,9 @@ function displayARSResults(parentnode,arsdata) { span.innerHTML = status.split(":")[1]; span.className = 'explevel p3'; } + else if (status == 'TOTAL') { + tr.style.borderTop = "2px solid black"; + } td.appendChild(span); tr.appendChild(td); @@ -7267,6 +7272,9 @@ function displayARSResults(parentnode,arsdata) { td.appendChild(span); } tr.appendChild(td); + + if (status != 'TOTAL' && stats[agent][status]!=null) + stats[agent]['TOTAL'] += stats[agent][status]; } sumtable.appendChild(tr); } diff --git a/code/UI/interactive/rtx.version b/code/UI/interactive/rtx.version index 5112da1a8..bb7a2cd1b 100644 --- a/code/UI/interactive/rtx.version +++ b/code/UI/interactive/rtx.version @@ -1 +1 @@ -Demanding Praying mantis \ No newline at end of file +Urban Otter \ No newline at end of file diff --git a/code/autocomplete/data/quick_def.json b/code/autocomplete/data/quick_def.json deleted file mode 100644 index 4374bf3ff..000000000 --- a/code/autocomplete/data/quick_def.json +++ /dev/null @@ -1 +0,0 @@ -var quick_def = {"hemoglobin": "Hemoglobin or haemoglobin, abbreviated Hb or Hgb, is the iron-containing oxygen-transport metalloprotein in the red blood cells of all vertebrates (with the exception of the fish family Channichthyidae) as well as the tissues of some invertebrates.", "cancer": "Cancer is a group of diseases involving abnormal cell growth with the potential to invade or spread to other parts of the body.", "lovastatin": "Lovastatin is a statin drug, used for lowering cholesterol in those with hypercholesterolemia to reduce risk of cardiovascular disease."} \ No newline at end of file From 051c4af8d1030769cc093e591b031dbba9163080 Mon Sep 17 00:00:00 2001 From: isbluis Date: Sat, 14 Sep 2024 06:02:40 +0000 Subject: [PATCH 07/21] Oops, forgot to add this to the previous commit... --- code/autocomplete/data/quick_def.js | 1 + 1 file changed, 1 insertion(+) create mode 100644 code/autocomplete/data/quick_def.js diff --git a/code/autocomplete/data/quick_def.js b/code/autocomplete/data/quick_def.js new file mode 100644 index 000000000..4374bf3ff --- /dev/null +++ b/code/autocomplete/data/quick_def.js @@ -0,0 +1 @@ +var quick_def = {"hemoglobin": "Hemoglobin or haemoglobin, abbreviated Hb or Hgb, is the iron-containing oxygen-transport metalloprotein in the red blood cells of all vertebrates (with the exception of the fish family Channichthyidae) as well as the tissues of some invertebrates.", "cancer": "Cancer is a group of diseases involving abnormal cell growth with the potential to invade or spread to other parts of the body.", "lovastatin": "Lovastatin is a statin drug, used for lowering cholesterol in those with hypercholesterolemia to reduce risk of cardiovascular disease."} \ No newline at end of file From 28429383554889e2bafbdb600b184514c1361581 Mon Sep 17 00:00:00 2001 From: isbluis Date: Sat, 14 Sep 2024 06:18:48 +0000 Subject: [PATCH 08/21] Revert back helper link to RTX/KG2 from 1.5 to 1.4... --- code/UI/interactive/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/UI/interactive/index.html b/code/UI/interactive/index.html index 3dc4534a3..33dbb1f9a 100644 --- a/code/UI/interactive/index.html +++ b/code/UI/interactive/index.html @@ -556,7 +556,7 @@

EXTERNAL API (post query):

- + From 97d2915de799580229e4ff8cee0723f6ceeb1bdb Mon Sep 17 00:00:00 2001 From: amykglen Date: Mon, 16 Sep 2024 10:01:33 -0700 Subject: [PATCH 09/21] Don't override categories now that synonymizer is clean --- code/ARAX/ARAXQuery/ARAX_expander.py | 31 ++--------------------- code/ARAX/ARAXQuery/Expand/kp_selector.py | 5 +--- 2 files changed, 3 insertions(+), 33 deletions(-) diff --git a/code/ARAX/ARAXQuery/ARAX_expander.py b/code/ARAX/ARAXQuery/ARAX_expander.py index 197831546..4ecc59b97 100644 --- a/code/ARAX/ARAXQuery/ARAX_expander.py +++ b/code/ARAX/ARAXQuery/ARAX_expander.py @@ -353,7 +353,8 @@ def apply(self, response, input_parameters, mode: str = "ARAX"): if inferred_qedge_keys and len(query_graph.edges) == 1: for edge in query_sub_graph.edges.keys(): query_sub_graph.edges[edge].knowledge_type = 'lookup' - # Expand the query graph edge-by-edge + + # Expand the query graph edge-by-edge (in regular 'lookup' fashion) for qedge_key in ordered_qedge_keys_to_expand: log.debug(f"Expanding qedge {qedge_key}") response.update_query_plan(qedge_key, 'edge_properties', 'status', 'Expanding') @@ -583,9 +584,6 @@ def apply(self, response, input_parameters, mode: str = "ARAX"): decorator = ARAXDecorator() decorator.decorate_nodes(response) decorator.decorate_edges(response, kind="RTX-KG2") - - # Override node types to only include descendants of what was asked for in the QG (where applicable) #1360 - self._override_node_categories(message.knowledge_graph, message.query_graph, log) elif mode == "RTXKG2": decorator = ARAXDecorator() decorator.decorate_edges(response, kind="SEMMEDDB") @@ -1400,31 +1398,6 @@ def _load_fda_approved_drug_ids() -> Set[str]: fda_approved_drug_ids = pickle.load(fda_pickle) return fda_approved_drug_ids - def _override_node_categories(self, kg: KnowledgeGraph, qg: QueryGraph, log: ARAXResponse): - # Clean up what we list as the TRAPI node.categories; list descendants of what was asked for in the QG - log.debug(f"Overriding node categories to better align with what's in the QG") - qnode_descendant_categories_map = {qnode_key: set(self.bh.get_descendants(qnode.categories)) - for qnode_key, qnode in qg.nodes.items() if qnode.categories} - for node_key, node in kg.nodes.items(): - final_categories = set() - for qnode_key in node.qnode_keys: - # If qnode has categories specified, use node's all_categories that are descendants of qnode categories - if qnode_key in qnode_descendant_categories_map: - all_categories_attributes = [attribute for attribute in eu.convert_to_list(node.attributes) - if attribute.attribute_type_id == "biolink:category"] - node_categories = all_categories_attributes[0].value if all_categories_attributes else node.categories - relevant_categories = set(node_categories).intersection(qnode_descendant_categories_map[qnode_key]) - # Otherwise just use what's already in the node's categories (for KG2 this is the 'preferred' category) - else: - relevant_categories = set(node.categories) - final_categories = final_categories.union(relevant_categories) - if final_categories: - node.categories = list(final_categories) - else: - # Leave categories as they are but issue a warning - log.warning(f"None of the categories KPs gave node {node_key} ({node.categories}) are descendants of " - f"those asked for in the QG (for qnode {node.qnode_keys})") - @staticmethod def _map_back_to_input_curies(kg: KnowledgeGraph, qg: QueryGraph, log: ARAXResponse): """ diff --git a/code/ARAX/ARAXQuery/Expand/kp_selector.py b/code/ARAX/ARAXQuery/Expand/kp_selector.py index 8c556d086..692520716 100644 --- a/code/ARAX/ARAXQuery/Expand/kp_selector.py +++ b/code/ARAX/ARAXQuery/Expand/kp_selector.py @@ -42,10 +42,7 @@ def _load_cached_kp_info(self) -> tuple: self.log.error(f"Failed to load KP info caches due to {e}", error_code="LoadKPCachesFailed") return None, None, None, None - # Record None URLs for our local KPs - allowed_kp_urls = smart_api_info["allowed_kp_urls"] - - return (meta_map, allowed_kp_urls, smart_api_info["kps_excluded_by_version"], + return (meta_map, smart_api_info["allowed_kp_urls"], smart_api_info["kps_excluded_by_version"], smart_api_info["kps_excluded_by_maturity"]) def get_kps_for_single_hop_qg(self, qg: QueryGraph) -> Optional[Set[str]]: From 3549a71c4d97fcad7028ae94c46b9f415405a471 Mon Sep 17 00:00:00 2001 From: amykglen Date: Mon, 16 Sep 2024 10:57:27 -0700 Subject: [PATCH 10/21] Get rid of no-longer-relevant test --- code/ARAX/test/test_ARAX_expand.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/code/ARAX/test/test_ARAX_expand.py b/code/ARAX/test/test_ARAX_expand.py index 02207a0e4..17acba75a 100644 --- a/code/ARAX/test/test_ARAX_expand.py +++ b/code/ARAX/test/test_ARAX_expand.py @@ -337,18 +337,6 @@ def test_873_consider_both_gene_and_protein(): assert set(nodes_by_qg_id_protein['n01']) == set(nodes_by_qg_id_gene['n01']) -def test_987_override_node_categories(): - actions_list = [ - "add_qnode(name=DOID:8398, key=n00)", - "add_qnode(categories=biolink:PhenotypicFeature, key=n01)", - "add_qedge(subject=n00, object=n01, predicates=biolink:has_phenotype, key=e00)", - "expand(edge_key=e00, kp=infores:rtx-kg2)", - "return(message=true, store=false)" - ] - nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(actions_list) - assert all('biolink:PhenotypicFeature' in node.categories for node in nodes_by_qg_id['n01'].values()) - - @pytest.mark.external def test_cohd_expand(): actions_list = [ From eb61a669abcdf3e6e92ff04b636e58bf15900a00 Mon Sep 17 00:00:00 2001 From: Chunyu Ma Date: Mon, 16 Sep 2024 14:44:23 -0400 Subject: [PATCH 11/21] update xDTD edge description to KG2.10.0c --- code/ARAX/ARAXQuery/Infer/scripts/infer_utilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/ARAX/ARAXQuery/Infer/scripts/infer_utilities.py b/code/ARAX/ARAXQuery/Infer/scripts/infer_utilities.py index fe92d4376..19056adc4 100644 --- a/code/ARAX/ARAXQuery/Infer/scripts/infer_utilities.py +++ b/code/ARAX/ARAXQuery/Infer/scripts/infer_utilities.py @@ -350,7 +350,7 @@ def genrete_treat_subgraphs(self, response: ARAXResponse, top_drugs: pd.DataFram ] else: edge_attribute_list += [ - Attribute(original_attribute_name=None, value=True, attribute_type_id="EDAM-DATA:1772", attribute_source="infores:arax", value_type_id="metatype:Boolean", value_url=None, description="This edge was extracted from RTX-KG2.8.4c by ARAXInfer."), + Attribute(original_attribute_name=None, value=True, attribute_type_id="EDAM-DATA:1772", attribute_source="infores:arax", value_type_id="metatype:Boolean", value_url=None, description="This edge was extracted from RTX-KG2.10.0c by ARAXInfer."), ] retrieval_source = [ RetrievalSource(resource_id=primary_knowledge_source, resource_role="primary_knowledge_source"), From 5f92cc812055fabecf90ea5755f54918bec85d46 Mon Sep 17 00:00:00 2001 From: isbluis Date: Mon, 16 Sep 2024 22:18:47 +0000 Subject: [PATCH 12/21] Verify that we indeed connected to a valid autocomplete SQLite db file #2365 --- code/autocomplete/rtxcomplete.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/code/autocomplete/rtxcomplete.py b/code/autocomplete/rtxcomplete.py index 384665e97..3e9f13e2d 100644 --- a/code/autocomplete/rtxcomplete.py +++ b/code/autocomplete/rtxcomplete.py @@ -28,7 +28,11 @@ def load(): database_name = f"{autocomplete_filepath}{os.path.sep}{RTXConfig.autocomplete_path.split('/')[-1]}" conn = sqlite3.connect(database_name) cursor = conn.cursor() - print(f"INFO: Connected to {database_name}",file=sys.stderr) + try: + conn.execute(f"SELECT term FROM terms LIMIT 1") + print(f"INFO: Connected to {database_name}",file=sys.stderr) + except: + print(f"WARN: Could NOT connect to {database_name}. Please check that file and database exist!",file=sys.stderr) cache_database_name = os.path.dirname(os.path.abspath(__file__)) + '/rtxcomplete_cache.sqlite' cache_conn = sqlite3.connect(cache_database_name) From 3589d123333013199960733f512b949eba205cae Mon Sep 17 00:00:00 2001 From: Chunyu Ma Date: Mon, 16 Sep 2024 19:40:38 -0400 Subject: [PATCH 13/21] update ranking algorithm to exclude xDTD/xCRG support graph edges and the max flows --- code/ARAX/ARAXQuery/ARAX_ranker.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/code/ARAX/ARAXQuery/ARAX_ranker.py b/code/ARAX/ARAXQuery/ARAX_ranker.py index 63f7f99f7..31c843739 100644 --- a/code/ARAX/ARAXQuery/ARAX_ranker.py +++ b/code/ARAX/ARAXQuery/ARAX_ranker.py @@ -33,8 +33,8 @@ def _get_nx_edges_by_attr(G: Union[nx.MultiDiGraph, nx.MultiGraph], key: str, va def _get_query_graph_networkx_from_query_graph(query_graph: QueryGraph) -> nx.MultiDiGraph: query_graph_nx = nx.MultiDiGraph() - query_graph_nx.add_nodes_from([key for key,node in query_graph.nodes.items()]) - edge_list = [[edge.subject, edge.object, key, {'weight': 0.0}] for key,edge in query_graph.edges.items()] + query_graph_nx.add_nodes_from([key for key, node in query_graph.nodes.items() if 'creative_DTD_qnode' not in key and 'creative_CRG_qnode' not in key]) + edge_list = [[edge.subject, edge.object, key, {'weight': 0.0}] for key,edge in query_graph.edges.items() if 'creative_DTD_qedge' not in key and 'creative_CRG_qedge' not in key] query_graph_nx.add_edges_from(edge_list) return query_graph_nx @@ -124,8 +124,9 @@ def _get_weighted_graph_networkx_from_result_graph(kg_edge_id_to_edge: Dict[str, qg_edge_key_to_edge_tuple = {edge_tuple[2]: edge_tuple for edge_tuple in qg_edge_tuples} for analysis in result.analyses: # For now we only ever have one Analysis per Result for qedge_key, edge_binding_list in analysis.edge_bindings.items(): - qedge_tuple = qg_edge_key_to_edge_tuple[qedge_key] - res_graph[qedge_tuple[0]][qedge_tuple[1]][qedge_tuple[2]]['weight'] = _calculate_final_result_score(kg_edge_id_to_edge, edge_binding_list) + if 'creative_DTD_qedge' not in qedge_key and 'creative_CRG_qedge' not in qedge_key: + qedge_tuple = qg_edge_key_to_edge_tuple[qedge_key] + res_graph[qedge_tuple[0]][qedge_tuple[1]][qedge_tuple[2]]['weight'] = _calculate_final_result_score(kg_edge_id_to_edge, edge_binding_list) return res_graph @@ -187,13 +188,13 @@ def _score_networkx_graphs_by_max_flow(result_graphs_nx: List[Union[nx.MultiDiGr capacity="weight")) max_flow_value = 0.0 if len(max_flow_values_for_node_pairs) > 0: - max_flow_value = sum(max_flow_values_for_node_pairs)/float(len(max_flow_values_for_node_pairs)) + max_flow_value = _calculate_final_individual_edge_confidence(0, max_flow_values_for_node_pairs) else: max_flow_value = 1.0 max_flow_values.append(max_flow_value) return max_flow_values - +_calculate_final_individual_edge_confidence def _score_networkx_graphs_by_longest_path(result_graphs_nx: List[Union[nx.MultiDiGraph, nx.MultiGraph]]) -> List[float]: result_scores = [] @@ -209,7 +210,7 @@ def _score_networkx_graphs_by_longest_path(result_graphs_nx: List[Union[nx.Multi adj_matrix_power = np.linalg.matrix_power(adj_matrix, max_path_len)/math.factorial(max_path_len) score_list = [adj_matrix_power[map_node_name_to_index[node_i], map_node_name_to_index[node_j]] for node_i, node_j in pairs_with_max_path_len] - result_score = np.mean(score_list) + result_score = _calculate_final_individual_edge_confidence(0, score_list) result_scores.append(result_score) return result_scores @@ -365,7 +366,7 @@ def edge_attribute_score_combiner(self, edge_key, edge): elif 'infores' in edge_key.split('--')[-1]: # default score for other data sources base = edge_default_base else: # virtual edges or inferred edges - base = 0 # no base score for these edges. Its core is based on + base = 0 # no base score for these edges. Its score is based on its attribute scores. if edge.attributes is not None: for edge_attribute in edge.attributes: From d4cc37051a800efb3dd57c6392776ab5174a23d4 Mon Sep 17 00:00:00 2001 From: mohamad mohsen Date: Mon, 16 Sep 2024 20:42:19 -0400 Subject: [PATCH 14/21] test_connect_resveratrol_glyoxalase Failure --- code/ARAX/test/test_ARAX_connect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/ARAX/test/test_ARAX_connect.py b/code/ARAX/test/test_ARAX_connect.py index cb1536e1e..de4d6eef5 100644 --- a/code/ARAX/test/test_ARAX_connect.py +++ b/code/ARAX/test/test_ARAX_connect.py @@ -98,7 +98,7 @@ def test_connect_resveratrol_glyoxalase(): "create_message", "add_qnode(ids=PUBCHEM.COMPOUND:445154, key=n00)", "add_qnode(ids=NCBIGene:2739, key=n01)", - "connect(action=connect_nodes, max_path_length=3)", + "connect(action=connect_nodes, max_path_length=4)", "return(message=true, store=false)" ]}} [response, message] = _do_arax_query(query) From 9a316d7ff32188da8f5b4513c6ae67ffa6082458 Mon Sep 17 00:00:00 2001 From: amykglen Date: Mon, 16 Sep 2024 18:24:56 -0700 Subject: [PATCH 15/21] Adjust KG2 treats patch now that BiolinkHelper is fixed #2328 --- code/ARAX/ARAXQuery/ARAX_expander.py | 8 +++----- code/ARAX/ARAXQuery/Expand/trapi_querier.py | 6 ++---- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/code/ARAX/ARAXQuery/ARAX_expander.py b/code/ARAX/ARAXQuery/ARAX_expander.py index 4ecc59b97..040246be7 100644 --- a/code/ARAX/ARAXQuery/ARAX_expander.py +++ b/code/ARAX/ARAXQuery/ARAX_expander.py @@ -60,9 +60,7 @@ def __init__(self): "aggregator_knowledge_source": {"==": "*"}} self.supported_qedge_qualifier_constraints = {"biolink:qualified_predicate", "biolink:object_direction_qualifier", "biolink:object_aspect_qualifier"} - self.higher_level_treats_predicates = {"biolink:treats_or_applied_or_studied_to_treat", - "biolink:applied_to_treat", - "biolink:studied_to_treat"} + self.treats_like_predicates = set(self.bh.get_descendants("biolink:treats_or_applied_or_studied_to_treat")).difference({"biolink:treats"}) def describe_me(self): """ @@ -512,7 +510,7 @@ def apply(self, response, input_parameters, mode: str = "ARAX"): # Remove KG2 SemMedDB treats_or_applied-type edges if this is an inferred treats query if alter_kg2_treats_edges: edge_keys_to_remove = {edge_key for edge_key, edge in overarching_kg.edges_by_qg_id[qedge_key].items() - if edge.predicate in self.higher_level_treats_predicates and + if edge.predicate in self.treats_like_predicates and any(source.resource_id == "infores:rtx-kg2" for source in edge.sources) and any(source.resource_id == "infores:semmeddb" for source in edge.sources)} log.debug(f"Removing {len(edge_keys_to_remove)} KG2 semmeddb treats_or_applied-type edges " @@ -593,7 +591,7 @@ def apply(self, response, input_parameters, mode: str = "ARAX"): num_edges_altered = 0 for edge in message.knowledge_graph.edges.values(): is_kg2_edge = any(source.resource_id == "infores:rtx-kg2" for source in edge.sources) - if is_kg2_edge and edge.predicate in self.higher_level_treats_predicates: + if is_kg2_edge and edge.predicate in self.treats_like_predicates: # Record the original KG2 predicate in an attribute edge.attributes.append(Attribute(attribute_type_id="biolink:original_predicate", value=edge.predicate, diff --git a/code/ARAX/ARAXQuery/Expand/trapi_querier.py b/code/ARAX/ARAXQuery/Expand/trapi_querier.py index c02f82447..a0ee1c477 100644 --- a/code/ARAX/ARAXQuery/Expand/trapi_querier.py +++ b/code/ARAX/ARAXQuery/Expand/trapi_querier.py @@ -93,10 +93,8 @@ async def answer_one_hop_query_async(self, query_graph: QueryGraph, # Patch to address lack of answers from KG2 for treats queries after treats refactor #2328 if alter_kg2_treats_edges and self.kp_infores_curie == "infores:rtx-kg2": for qedge in qg_copy.edges.values(): # Note there's only ever one qedge per QG here - qedge.predicates = list(set(qedge.predicates).union({"biolink:treats_or_applied_or_studied_to_treat", - "biolink:applied_to_treat", - "biolink:studied_to_treat"})) - log.info(f"For querying infores:rtx-kg2, edited {qedge_key} to use higher treats-type predicates: " + qedge.predicates = list(set(qedge.predicates).union({"biolink:treats_or_applied_or_studied_to_treat"})) + log.info(f"For querying infores:rtx-kg2, edited {qedge_key} to use higher treats-type predicate: " f"{qedge.predicates}") # Answer the query using the KP and load its answers into our object model From 0b8f2fb1c7f19599178201fdd65113247c23306d Mon Sep 17 00:00:00 2001 From: Chunyu Ma Date: Tue, 17 Sep 2024 13:38:34 -0500 Subject: [PATCH 16/21] Clear some lines --- code/ARAX/ARAXQuery/ARAX_ranker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/ARAX/ARAXQuery/ARAX_ranker.py b/code/ARAX/ARAXQuery/ARAX_ranker.py index 31c843739..85ed4f91b 100644 --- a/code/ARAX/ARAXQuery/ARAX_ranker.py +++ b/code/ARAX/ARAXQuery/ARAX_ranker.py @@ -194,7 +194,7 @@ def _score_networkx_graphs_by_max_flow(result_graphs_nx: List[Union[nx.MultiDiGr max_flow_values.append(max_flow_value) return max_flow_values -_calculate_final_individual_edge_confidence + def _score_networkx_graphs_by_longest_path(result_graphs_nx: List[Union[nx.MultiDiGraph, nx.MultiGraph]]) -> List[float]: result_scores = [] From 0adb0f943860a2ac710ccb7aff56235d6349cdf4 Mon Sep 17 00:00:00 2001 From: amykglen Date: Tue, 17 Sep 2024 18:40:20 -0700 Subject: [PATCH 17/21] List kg2cplover3 as dev KG2 endpoint (has new pathfinder endpoints) --- code/UI/OpenAPI/specifications/export/KG2/1.5.0/openapi.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/UI/OpenAPI/specifications/export/KG2/1.5.0/openapi.yaml b/code/UI/OpenAPI/specifications/export/KG2/1.5.0/openapi.yaml index 2a1a3ed2d..5af6ac810 100644 --- a/code/UI/OpenAPI/specifications/export/KG2/1.5.0/openapi.yaml +++ b/code/UI/OpenAPI/specifications/export/KG2/1.5.0/openapi.yaml @@ -51,7 +51,7 @@ servers: url: https://kg2.ci.transltr.io/api/rtxkg2/v1.4 x-maturity: staging - description: RTX-KG2 TRAPI 1.5 endpoint - development - url: https://kg2cplover.rtx.ai:9990 + url: https://kg2cplover3.rtx.ai:9990 x-maturity: development tags: - description: Retrieve the meta knowledge graph representation of this TRAPI web From 59b201ef3417e9033dc635f73b76aa7706cefc54 Mon Sep 17 00:00:00 2001 From: Chunyu Ma Date: Tue, 17 Sep 2024 22:05:16 -0400 Subject: [PATCH 18/21] notch the confidence down a bit for the manual_agent edges --- code/ARAX/ARAXQuery/ARAX_ranker.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/code/ARAX/ARAXQuery/ARAX_ranker.py b/code/ARAX/ARAXQuery/ARAX_ranker.py index 31c843739..1a8c7d6a3 100644 --- a/code/ARAX/ARAXQuery/ARAX_ranker.py +++ b/code/ARAX/ARAXQuery/ARAX_ranker.py @@ -20,7 +20,7 @@ from openapi_server.models.edge import Edge from openapi_server.models.attribute import Attribute -edge_confidence_manual_agent = 0.999 +edge_confidence_manual_agent = 0.99 def _get_nx_edges_by_attr(G: Union[nx.MultiDiGraph, nx.MultiGraph], key: str, val: str) -> Set[tuple]: res_set = set() @@ -194,7 +194,7 @@ def _score_networkx_graphs_by_max_flow(result_graphs_nx: List[Union[nx.MultiDiGr max_flow_values.append(max_flow_value) return max_flow_values -_calculate_final_individual_edge_confidence + def _score_networkx_graphs_by_longest_path(result_graphs_nx: List[Union[nx.MultiDiGraph, nx.MultiGraph]]) -> List[float]: result_scores = [] @@ -455,7 +455,7 @@ def edge_attribute_publication_normalizer(self, attribute_type_id: str, edge_att pub_value = np.log(n_publications) max_value = 1.0 curve_steepness = 3.16993 - logistic_midpoint = 1.38629 + logistic_midpoint = 1.60943 # log(5) = 1.60943 meaning having 5 publications is a mid point normalized_value = max_value / float(1 + np.exp(-curve_steepness * (pub_value - logistic_midpoint))) return normalized_value From f9317f413727875d715d479e0eabcfbe776060cc Mon Sep 17 00:00:00 2001 From: Amy Glen <49423686+amykglen@users.noreply.github.com> Date: Wed, 18 Sep 2024 14:43:39 -0700 Subject: [PATCH 19/21] Update README.md; fix subbullet issue --- code/kg2c/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/code/kg2c/README.md b/code/kg2c/README.md index 80e8783a6..0a1391650 100644 --- a/code/kg2c/README.md +++ b/code/kg2c/README.md @@ -142,12 +142,12 @@ flags/options. 1. once the build finishes, run the regression test suite: 1. `pytest -vs test_synonymizer.py --synonymizername node_synonymizer_v1.0_KG2.X.Y.sqlite` 1. **Do a test KG2c build**: If you're satisfied with the synonymizer, proceed with a test KG2c build: - 2. `screen -S kg2c` - 3. `pyenv activate rtx` if you're using buildkg2c.rtx.ai; otherwise activate your python environment however necessary + 1. `screen -S kg2c` + 1. `pyenv activate rtx` if you're using buildkg2c.rtx.ai; otherwise activate your python environment however necessary 1. `cd RTX/code/kg2c` - 4. `python build_kg2c.py 2.10.0 v1.0 4.2.0 --uploadartifacts --test` + 1. `python build_kg2c.py 2.10.0 v1.0 4.2.0 --uploadartifacts --test` 1. **Do the full KG2c build**: Then, if everything went smoothly, do the full build (we're assuming you're in the same `screen` session): - 4. `python build_kg2c.py 2.10.0 v1.0 4.2.0 --uploadartifacts` + 1. `python build_kg2c.py 2.10.0 v1.0 4.2.0 --uploadartifacts` The synonymizer build should take around 5 hours and the KG2c build should take around 10 hours. From b28db5c4a388df7579805ce3db711dd3e6e172a1 Mon Sep 17 00:00:00 2001 From: Abhilash Reddy Date: Mon, 23 Sep 2024 09:50:52 -0400 Subject: [PATCH 20/21] update resource limits for arax and kg2 --- deploy/arax/values.yaml | 4 ++-- deploy/kg2/values.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/deploy/arax/values.yaml b/deploy/arax/values.yaml index 2bb081879..e40850f34 100644 --- a/deploy/arax/values.yaml +++ b/deploy/arax/values.yaml @@ -74,8 +74,8 @@ affinity: resources: requests: memory: 50Gi - cpu: 10000m + cpu: 8000m limits: memory: 58Gi - cpu: 14000m + cpu: 11000m diff --git a/deploy/kg2/values.yaml b/deploy/kg2/values.yaml index 7fc012048..8c45e3d6e 100644 --- a/deploy/kg2/values.yaml +++ b/deploy/kg2/values.yaml @@ -81,7 +81,7 @@ storage: resources: requests: memory: 50Gi - cpu: 10000m + cpu: 6000m limits: memory: 58Gi - cpu: 14000m + cpu: 8000m From 00dc4b77795d950753356fe6b39664cff212cf47 Mon Sep 17 00:00:00 2001 From: Kevin Vizhalil Date: Mon, 23 Sep 2024 13:08:47 -0400 Subject: [PATCH 21/21] Fixing #2378 --- .../python-flask-server/KG2/openapi_server/__main__.py | 3 ++- .../OpenAPI/python-flask-server/openapi_server/__main__.py | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/code/UI/OpenAPI/python-flask-server/KG2/openapi_server/__main__.py b/code/UI/OpenAPI/python-flask-server/KG2/openapi_server/__main__.py index 0dd657848..a3cb8c1c1 100644 --- a/code/UI/OpenAPI/python-flask-server/KG2/openapi_server/__main__.py +++ b/code/UI/OpenAPI/python-flask-server/KG2/openapi_server/__main__.py @@ -55,7 +55,8 @@ def instrument(app, host, port): ) ) ) - trace.get_tracer_provider().get_tracer(__name__) + # trace.get_tracer_provider().get_tracer(__name__) + tracer_provider = trace.get_tracer(__name__) FlaskInstrumentor().instrument_app(app=app.app) RequestsInstrumentor().instrument() AioHttpClientInstrumentor().instrument() diff --git a/code/UI/OpenAPI/python-flask-server/openapi_server/__main__.py b/code/UI/OpenAPI/python-flask-server/openapi_server/__main__.py index f53d7d02d..8abdc38ba 100644 --- a/code/UI/OpenAPI/python-flask-server/openapi_server/__main__.py +++ b/code/UI/OpenAPI/python-flask-server/openapi_server/__main__.py @@ -55,8 +55,10 @@ def instrument(app, host, port): ) ) ) - tracer_provider = trace.get_tracer_provider() - tracer_provider.get_tracer(__name__) + # tracer_provider = trace.get_tracer_provider() + # tracer_provider.get_tracer(__name__) + tracer_provider = trace.get_tracer(__name__) + FlaskInstrumentor().instrument_app(app=app.app) RequestsInstrumentor().instrument() AioHttpClientInstrumentor().instrument()