From 40d2aad70e197fc26c63d68c58dc44dd29530927 Mon Sep 17 00:00:00 2001 From: richardbruskiewich Date: Mon, 15 Jan 2024 16:57:12 -0800 Subject: [PATCH 1/5] Added 'supporting_data_source' parsing to _construct_sources_tree(), plus other tweaks and a unit test to validate --- PLATER/requirements.txt | 1 + PLATER/services/util/question.py | 77 ++++++++++--- PLATER/tests/test_question.py | 184 +++++++++++++++++++++++++++++-- 3 files changed, 238 insertions(+), 24 deletions(-) diff --git a/PLATER/requirements.txt b/PLATER/requirements.txt index b662099..c27880c 100644 --- a/PLATER/requirements.txt +++ b/PLATER/requirements.txt @@ -2,6 +2,7 @@ fastapi==0.85.0 pyaml==20.4.0 pytest==7.4.3 pytest-asyncio==0.21.1 +deepdiff==6.7.1 uvicorn==0.24.0 reasoner-transpiler==2.0.5 reasoner-pydantic==4.1.6 diff --git a/PLATER/services/util/question.py b/PLATER/services/util/question.py index 563f97c..217c0bf 100644 --- a/PLATER/services/util/question.py +++ b/PLATER/services/util/question.py @@ -1,3 +1,4 @@ +from typing import List, Dict import copy import orjson import time @@ -62,41 +63,83 @@ def compile_cypher(self, **kwargs): return get_query(query_graph, **kwargs) - def _construct_sources_tree(self, sources): - # if primary source and aggregator source are specified in the graph, upstream_resource_ids of all aggregator_ks - # be that source + def _construct_sources_tree(self, sources: List[Dict]) -> List[Dict]: + """ + Method to fill out the full annotation for edge "sources" + entries including "upstream_resource_ids" tree. + :param sources: List[Dict], edge 'sources' property entries + :return: enhanced "sources" including top-level "Monarch TRAPI" source entry. + """ + if not sources: + # empty sources.. pretty strange, but then just send back + # an instance of the top-level "Monarch TRAPI" source entry + return [ + { + "resource_id": self.provenance, + "resource_role": "aggregator_knowledge_source", + "source_record_urls": None, + "upstream_resource_ids": None + } + ] - # if aggregator ks are coming from db, plater would add itself as aggregator and use other aggregator ids + # if primary source and aggregator source are specified in the graph, + # upstream_resource_ids of all aggregator_ks be that source + + # if aggregator ks are coming from db, mta would add itself as aggregator and use other aggregator ids # as upstream resources, if no aggregators are found and only primary ks is provided that would be added - # as upstream for the plater entry + # as upstream for the mta entry formatted_sources = [] # filter out source entries that actually have values - temp = {} + resource_ids_with_resource_role = {} + source_record_urls_to_resource_id = {} for source in sources: + if not source['resource_id']: continue - temp[source['resource_role']] = temp.get(source['resource_role'], set()) + + resource_ids_with_resource_role[source['resource_role']] = \ + resource_ids_with_resource_role.get(source['resource_role'], set()) + + source_record_urls_to_resource_id[source['resource_id']] = \ + source['source_record_urls'] if 'source_record_urls' in source else None + if isinstance(source["resource_id"], str): - temp[source["resource_role"]].add(source["resource_id"]) + resource_ids_with_resource_role[source["resource_role"]].add(source["resource_id"]) elif isinstance(source["resource_id"], list): for resource_id in source["resource_id"]: - temp[source["resource_role"]].add(resource_id) + resource_ids_with_resource_role[source["resource_role"]].add(resource_id) + + for resource_role in resource_ids_with_resource_role: - for resource_role in temp: upstreams = None - if resource_role == "biolink:aggregator_knowledge_source": - upstreams = temp.get("biolink:primary_knowledge_source", None) + + if resource_role == "aggregator_knowledge_source": + upstreams = resource_ids_with_resource_role.get("primary_knowledge_source", None) + elif resource_role == "primary_knowledge_source": + upstreams = resource_ids_with_resource_role.get("supporting_data_source", None) formatted_sources += [ - {"resource_id": resource_id, "resource_role": resource_role.lstrip('biolink:'), "upstream_resource_ids": upstreams} - for resource_id in temp[resource_role] + { + "resource_id": resource_id, + "resource_role": resource_role, + "source_record_urls": source_record_urls_to_resource_id[resource_id], + "upstream_resource_ids": list(upstreams) if upstreams else None + } + for resource_id in resource_ids_with_resource_role[resource_role] ] - upstreams_for_plater_entry = temp.get("biolink:aggregator_knowledge_source") or temp.get("biolink:primary_knowledge_source") + + upstreams_for_mta_entry = \ + resource_ids_with_resource_role.get("aggregator_knowledge_source") or \ + resource_ids_with_resource_role.get("primary_knowledge_source") or \ + resource_ids_with_resource_role.get("supporting_data_source") + formatted_sources.append({ - "resource_id":self.provenance, + "resource_id": self.provenance, "resource_role": "aggregator_knowledge_source", - "upstream_resource_ids": upstreams_for_plater_entry + "source_record_urls": None, + "upstream_resource_ids": list(upstreams_for_mta_entry) if upstreams_for_mta_entry else None }) + return formatted_sources diff --git a/PLATER/tests/test_question.py b/PLATER/tests/test_question.py index d35df4b..661b843 100644 --- a/PLATER/tests/test_question.py +++ b/PLATER/tests/test_question.py @@ -1,19 +1,28 @@ from unittest.mock import patch - +from typing import List, Dict +import json +import os +import copy +import asyncio import pytest +from deepdiff.diff import DeepDiff -from PLATER.services.util.question import Question from bmt import Toolkit -import asyncio, json -import os -import copy + +from PLATER.services.config import config +from PLATER.services.util.question import Question + + +DEFAULT_PROVENANCE = config.get("PROVENANCE_TAG", "infores:automat.notspecified") + @pytest.fixture def message(): - with open(os.path.join(os.path.dirname(__file__), 'data','trapi1.4.json')) as stream: + with open(os.path.join(os.path.dirname(__file__), 'data', 'trapi1.4.json')) as stream: message = json.load(stream) return message + def test_init(): reasoner_dict = { "query_graph": { @@ -26,6 +35,163 @@ def test_init(): assert question._question_json == reasoner_dict +@pytest.mark.parametrize( + "sources,output", + [ + ( # Query 0 - Empty sources, return instance of top level system source + [], + [ + { + "resource_id": DEFAULT_PROVENANCE, + "resource_role": "aggregator_knowledge_source", + "source_record_urls": None, + "upstream_resource_ids": None + } + ] + ), + ( # Query 1 - Add primary knowledge source + [ + { + "resource_id": "infores:my-kp", + "resource_role": "primary_knowledge_source" + } + ], + [ + { + "resource_id": "infores:my-kp", + "resource_role": "primary_knowledge_source", + "source_record_urls": None, + "upstream_resource_ids": None + }, + { + "resource_id": DEFAULT_PROVENANCE, + "resource_role": "aggregator_knowledge_source", + "source_record_urls": None, + "upstream_resource_ids": ["infores:my-kp"] + } + ] + ), + ( # Query 2 - Add a supporting data source, below the primary knowledge source + [ + { + "resource_id": "infores:my-kp", + "resource_role": "primary_knowledge_source" + }, + { + "resource_id": "infores:hpo-annotations", + "resource_role": "supporting_data_source" + } + ], + [ + { + "resource_id": "infores:my-kp", + "resource_role": "primary_knowledge_source", + "source_record_urls": None, + "upstream_resource_ids": ["infores:hpo-annotations"] + }, + { + "resource_id": "infores:hpo-annotations", + "resource_role": "supporting_data_source", + "source_record_urls": None, + "upstream_resource_ids": None + }, + { + "resource_id": DEFAULT_PROVENANCE, + "resource_role": "aggregator_knowledge_source", + "source_record_urls": None, + "upstream_resource_ids": ["infores:my-kp"] + } + ] + ), + ( # Query 3 - Add a supporting data source, below the main application + # aggregator (lacking primary knowledge source) + [ + { + "resource_id": "infores:hpo-annotations", + "resource_role": "supporting_data_source" + } + ], + [ + { + "resource_id": "infores:hpo-annotations", + "resource_role": "supporting_data_source", + "source_record_urls": None, + "upstream_resource_ids": None + }, + { + "resource_id": DEFAULT_PROVENANCE, + "resource_role": "aggregator_knowledge_source", + "source_record_urls": None, + "upstream_resource_ids": ["infores:hpo-annotations"] + } + ] + ), + ( # Query 4 - Same query as 3 above except adding some + # source_record_urls for the supporting data source + [ + { + "resource_id": "infores:hpo-annotations", + "resource_role": "supporting_data_source", + "source_record_urls": ["https://hpo.jax.org/app/"] + } + ], + [ + { + "resource_id": "infores:hpo-annotations", + "resource_role": "supporting_data_source", + "source_record_urls": ["https://hpo.jax.org/app/"], + "upstream_resource_ids": None + }, + { + "resource_id": DEFAULT_PROVENANCE, + "resource_role": "aggregator_knowledge_source", + "source_record_urls": None, + "upstream_resource_ids": ["infores:hpo-annotations"] + } + ] + ), + ( # Query 5 - Same query as 3 above except adding a second "supporting_data_source" + [ + { + "resource_id": "infores:hpo-annotations", + "resource_role": "supporting_data_source" + }, + { + "resource_id": "infores:upheno", + "resource_role": "supporting_data_source" + } + ], + [ + { + "resource_id": "infores:hpo-annotations", + "resource_role": "supporting_data_source", + "source_record_urls": None, + "upstream_resource_ids": None + }, + { + "resource_id": "infores:upheno", + "resource_role": "supporting_data_source", + "source_record_urls": None, + "upstream_resource_ids": None + }, + { + "resource_id": DEFAULT_PROVENANCE, + "resource_role": "aggregator_knowledge_source", + "source_record_urls": None, + "upstream_resource_ids": ["infores:hpo-annotations", "infores:upheno"] + } + ] + ) + ] +) +def test_source_construct_sources_tree(sources: List[Dict], output: List[Dict]): + # dummy Question - don't care about input question JSON for this test... + question: Question = Question(question_json={}) + # ... 'cuz comparing sources tree directly + formatted_sources = question._construct_sources_tree(sources) + assert not DeepDiff(output, formatted_sources, ignore_order=True, report_repetition=True) + + def test_format_attribute(): # note that this test does not run through the reasoner code that does the attribute mapping. # so the values in the expected results must account for that @@ -102,6 +268,7 @@ def test_format_attribute(): # test if value_type is preserved if in response from neo4j assert transformed == t2_expected_trapi + def test_format_edge_qualifiers(): # note that this test does not run through the reasoner code that does the attribute mapping. # so the values in the expected results must account for that @@ -165,6 +332,7 @@ def test_format_edge_qualifiers(): # test if value_type is added to default "biolink:Attribute" assert transformed == expected_trapi + class MOCK_GRAPH_ADAPTER(): called = False toolkit = Toolkit() @@ -197,6 +365,7 @@ def test_attribute_constraint_basic(message): result = Question.apply_attribute_constraints(message) assert result == expected + def test_attribute_constraint_filter_node(message): # this node doesnt exist, and is the main node, so everything should vanish node_constraints = [ @@ -217,6 +386,7 @@ def test_attribute_constraint_filter_node(message): assert len(result['knowledge_graph']['edges']) == 0 # no edges assert len(result['results']) == 0 # no bindings + def test_attribute_constraint_filter_edge(message): edge_constraints = [ {"id": "biolink:relation", "name": "eq_id_filter", "value": "CTD:marker_mechanism", "operator": "=="} @@ -247,4 +417,4 @@ def test_attribute_constraint_filter_edge(message): assert len(result['knowledge_graph']['nodes']) == 3 assert len(result['knowledge_graph']['edges']) == 1 assert len(result['results']) == 1 - assert len(result['results'][0]['analyses'][0]['edge_bindings']['e0']) == 1 \ No newline at end of file + assert len(result['results'][0]['analyses'][0]['edge_bindings']['e0']) == 1 From 8839d002cccdb8b354c1828f9f6f4db23aa9ea4a Mon Sep 17 00:00:00 2001 From: richardbruskiewich Date: Mon, 15 Jan 2024 17:09:09 -0800 Subject: [PATCH 2/5] Cleaned up format_attribute_trapi() adding a few checks for missing (or previously set) key fields --- PLATER/services/util/question.py | 38 ++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/PLATER/services/util/question.py b/PLATER/services/util/question.py index 217c0bf..1974957 100644 --- a/PLATER/services/util/question.py +++ b/PLATER/services/util/question.py @@ -27,6 +27,7 @@ config.get('logging_format'), ) + class Question: # SPEC VARS QUERY_GRAPH_KEY = 'query_graph' @@ -62,7 +63,6 @@ def compile_cypher(self, **kwargs): item['qualifier_type_id'] = item['qualifier_type_id'].replace('biolink:', '') return get_query(query_graph, **kwargs) - def _construct_sources_tree(self, sources: List[Dict]) -> List[Dict]: """ Method to fill out the full annotation for edge "sources" @@ -142,7 +142,6 @@ def _construct_sources_tree(self, sources: List[Dict]) -> List[Dict]: return formatted_sources - def format_attribute_trapi(self, kg_items, node=False): for identifier in kg_items: # get the properties for the record @@ -153,8 +152,10 @@ def format_attribute_trapi(self, kg_items, node=False): # separate the qualifiers from attributes for edges and format them if not node: - qualifier_results = [attrib for attrib in attributes - if 'qualifie' in attrib['original_attribute_name']] + qualifier_results = [ + attrib for attrib in attributes + if 'original_attribute_name' in attrib and 'qualifie' in attrib['original_attribute_name'] + ] if qualifier_results: formatted_qualifiers = [] for qualifier in qualifier_results: @@ -166,20 +167,26 @@ def format_attribute_trapi(self, kg_items, node=False): }) props['qualifiers'] = formatted_qualifiers - # create a new list that doesnt have the core properties or qualifiers - new_attribs = [attrib for attrib in attributes - if attrib['original_attribute_name'] not in props and - attrib['original_attribute_name'] not in skip_list and - 'qualifie' not in attrib['original_attribute_name'] - ] + # create a new list that doesn't have the core properties or qualifiers + new_attribs: List = list() + for attrib in attributes: + if 'original_attribute_name' not in attrib or ( + attrib['original_attribute_name'] not in props and + attrib['original_attribute_name'] not in skip_list and + 'qualifie' not in attrib['original_attribute_name'] + ): + new_attribs.append(attrib) # for the non-core properties for attr in new_attribs: # make sure the original_attribute_name has something other than none - attr['original_attribute_name'] = attr['original_attribute_name'] or '' + attr['original_attribute_name'] = \ + ('original_attribute_name' in attr and attr['original_attribute_name']) or '' # map the attribute type to the list above, otherwise generic default - attr["value_type_id"] = VALUE_TYPES.get(attr["original_attribute_name"], "EDAM:data_0006") + attr["value_type_id"] = \ + ("value_type_id" in attr and attr["value_type_id"]) or \ + VALUE_TYPES.get(attr["original_attribute_name"], "EDAM:data_0006") # uses generic data as attribute type id if not defined if not ("attribute_type_id" in attr and attr["attribute_type_id"] != 'NA'): @@ -187,9 +194,12 @@ def format_attribute_trapi(self, kg_items, node=False): if attribute_data: attr.update(attribute_data) - # update edge provenance with automat infores, filter empty ones, expand list type resource ids + # update edge provenance with infores, + # filter empty ones, expand list type resource ids if not node: - kg_items[identifier]["sources"] = self._construct_sources_tree(kg_items[identifier].get("sources", [])) + kg_items[identifier]["sources"] = \ + self._construct_sources_tree(kg_items[identifier].get("sources", [])) + # assign these attribs back to the original attrib list without the core properties props['attributes'] = new_attribs From 57125eaab352c99f9c3509b1988432f939646360 Mon Sep 17 00:00:00 2001 From: richardbruskiewich Date: Mon, 15 Jan 2024 17:12:54 -0800 Subject: [PATCH 3/5] Removed unused parameter "graph_interface: GraphInterface" in Question.transform_attributes() --- PLATER/services/util/question.py | 4 ++-- PLATER/tests/test_question.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/PLATER/services/util/question.py b/PLATER/services/util/question.py index 1974957..56145bf 100644 --- a/PLATER/services/util/question.py +++ b/PLATER/services/util/question.py @@ -205,7 +205,7 @@ def format_attribute_trapi(self, kg_items, node=False): return kg_items - def transform_attributes(self, trapi_message, graph_interface: GraphInterface): + def transform_attributes(self, trapi_message): self.format_attribute_trapi(trapi_message.get('knowledge_graph', {}).get('nodes', {}), node=True) self.format_attribute_trapi(trapi_message.get('knowledge_graph', {}).get('edges', {})) for r in trapi_message.get("results", []): @@ -250,7 +250,7 @@ async def answer(self, graph_interface: GraphInterface): } ) results_dict = graph_interface.convert_to_dict(results) - self._question_json.update(self.transform_attributes(results_dict[0], graph_interface)) + self._question_json.update(self.transform_attributes(results_dict[0])) self._question_json = Question.apply_attribute_constraints(self._question_json) return self._question_json diff --git a/PLATER/tests/test_question.py b/PLATER/tests/test_question.py index 661b843..802435c 100644 --- a/PLATER/tests/test_question.py +++ b/PLATER/tests/test_question.py @@ -235,7 +235,7 @@ def test_format_attribute(): } q = Question(question_json={}) graph_interface = MOCK_GRAPH_ADAPTER() - transformed = q.transform_attributes(trapi_kg_response, graph_interface=MOCK_GRAPH_ADAPTER) + transformed = q.transform_attributes(trapi_kg_response) # test attribute_id if provided from neo4j response is preserved # test if value_type is added to default 'biolink:Attribute' @@ -262,7 +262,7 @@ def test_format_attribute(): q = Question(question_json={}) - transformed = q.transform_attributes(t2_trapi_kg_response, graph_interface=MOCK_GRAPH_ADAPTER) + transformed = q.transform_attributes(t2_trapi_kg_response) # test default attribute to be EDAM:data_0006 # test if value_type is preserved if in response from neo4j @@ -283,17 +283,17 @@ def test_format_edge_qualifiers(): { "attribute_type_id":"NA", "original_attribute_name":"qualified_predicate", - "value":"biolink:causes" + "value": "biolink:causes" }, { "attribute_type_id":"NA", "original_attribute_name":"object_aspect_qualifier", - "value":"activity" + "value": "activity" }, { "attribute_type_id":"NA", "original_attribute_name":"object_direction_qualifier", - "value":"decreased" + "value": "decreased" }], } } @@ -326,7 +326,7 @@ def test_format_edge_qualifiers(): q = Question(question_json={}) graph_interface = MOCK_GRAPH_ADAPTER() - transformed = q.transform_attributes(trapi_kg_response, graph_interface=MOCK_GRAPH_ADAPTER) + transformed = q.transform_attributes(trapi_kg_response) # test attribute_id if provided from neo4j response is preserved # test if value_type is added to default "biolink:Attribute" From 267175e21b26e2abfada1e527e4f5697107c6002 Mon Sep 17 00:00:00 2001 From: richardbruskiewich Date: Mon, 15 Jan 2024 18:55:25 -0800 Subject: [PATCH 4/5] Added back in code to remove 'biolink:' prefix in resource_role values; fixed unit tests and reformatted some test data (for readability) --- PLATER/services/util/question.py | 14 +- PLATER/tests/test_question.py | 212 +++++++++++++++++++------------ 2 files changed, 141 insertions(+), 85 deletions(-) diff --git a/PLATER/services/util/question.py b/PLATER/services/util/question.py index 56145bf..41dca50 100644 --- a/PLATER/services/util/question.py +++ b/PLATER/services/util/question.py @@ -94,9 +94,19 @@ def _construct_sources_tree(self, sources: List[Dict]) -> List[Dict]: source_record_urls_to_resource_id = {} for source in sources: - if not source['resource_id']: + if not ( + 'resource_id' in source and + source['resource_id'] and + 'resource_role' in source and + source['resource_role'] + ): + # silently pruning TRAPI non-compliant source records + logger.warning(f"Invalid edge 'source' entry: '{str(source)}'? Skipped!") continue + # 'resource_role' values are now ResourceRoleEnum without a biolink: CURIE prefix + source['resource_role'] = source['resource_role'].lstrip("biolink:") + resource_ids_with_resource_role[source['resource_role']] = \ resource_ids_with_resource_role.get(source['resource_role'], set()) @@ -121,7 +131,7 @@ def _construct_sources_tree(self, sources: List[Dict]) -> List[Dict]: formatted_sources += [ { "resource_id": resource_id, - "resource_role": resource_role, + "resource_role": resource_role.lstrip("biolink:"), "source_record_urls": source_record_urls_to_resource_id[resource_id], "upstream_resource_ids": list(upstreams) if upstreams else None } diff --git a/PLATER/tests/test_question.py b/PLATER/tests/test_question.py index 802435c..395d6ed 100644 --- a/PLATER/tests/test_question.py +++ b/PLATER/tests/test_question.py @@ -196,42 +196,78 @@ def test_format_attribute(): # note that this test does not run through the reasoner code that does the attribute mapping. # so the values in the expected results must account for that - trapi_kg_response = {"knowledge_graph": - {"nodes": - {"CURIE:1": - {"attributes": [{"original_attribute_name": "pub", "attribute_type_id": "CURIE:x"}]} + trapi_kg_response = { + "knowledge_graph": { + "nodes": { + "CURIE:1": { + "attributes": [ + { + "original_attribute_name": "pub", + "attribute_type_id": "CURIE:x" + } + ] + } }, - "edges": - {"123123": - { - "attributes": [{"original_attribute_name": "some_attr", "value": "some_value"}], - "sources": [{"resource_role": "biolink:primary_knowledge_source", "resource_id":"infores:primary"}] + "edges": { + "123123": { + "attributes": [ + { + "original_attribute_name": "some_attr", + "value": "some_value" + } + ], + "sources": [ + { + "resource_role": "biolink:primary_knowledge_source", + "resource_id": "infores:primary" + } + ] } } } } - expected_trapi = {"knowledge_graph": - {"nodes": - {"CURIE:1": - {"attributes": [{"original_attribute_name": "pub", "attribute_type_id": "CURIE:x", "value_type_id": "EDAM:data_0006"}]} + expected_trapi = { + "knowledge_graph": { + "nodes": { + "CURIE:1": { + "attributes": [ + { + "original_attribute_name": "pub", + "attribute_type_id": "CURIE:x", + "value_type_id": "EDAM:data_0006" + } + ] + } }, - "edges": - {"123123": - {"attributes": [{"original_attribute_name": "some_attr", "value": "some_value", - "attribute_type_id": "biolink:Attribute", - "value_type_id": "EDAM:data_0006"}, - ], - - "sources": [ - {"resource_role": "primary_knowledge_source", - "resource_id": "infores:primary", - "upstream_resource_ids": None}, - {"resource_role": "aggregator_knowledge_source", - "resource_id": "infores:automat.notspecified", - "upstream_resource_ids": {"infores:primary"}}, - ]} - } - } + "edges": { + "123123": { + "attributes": [ + { + "original_attribute_name": "some_attr", + "value": "some_value", + "attribute_type_id": "biolink:Attribute", + "value_type_id": "EDAM:data_0006" + } + ], + "sources": [ + { + "resource_role": "primary_knowledge_source", + "resource_id": "infores:primary", + "source_record_urls": None, + "upstream_resource_ids": None + }, + { + "resource_role": "aggregator_knowledge_source", + "resource_id": DEFAULT_PROVENANCE, + "source_record_urls": None, + "upstream_resource_ids": [ + "infores:primary" + ] + }, + ] + } + } + } } q = Question(question_json={}) graph_interface = MOCK_GRAPH_ADAPTER() @@ -239,7 +275,7 @@ def test_format_attribute(): # test attribute_id if provided from neo4j response is preserved # test if value_type is added to default 'biolink:Attribute' - assert transformed == expected_trapi + assert not DeepDiff(transformed, expected_trapi) t2_trapi_kg_response = {"knowledge_graph": {"nodes": {"CURIE:1": {"attributes": [ {"original_attribute_name": "pub", "value": "x", "value_type_id": "oo", "attribute_type_id": "preserved_attrib"}, @@ -266,64 +302,74 @@ def test_format_attribute(): # test default attribute to be EDAM:data_0006 # test if value_type is preserved if in response from neo4j - assert transformed == t2_expected_trapi + assert DeepDiff(transformed, t2_expected_trapi) def test_format_edge_qualifiers(): # note that this test does not run through the reasoner code that does the attribute mapping. # so the values in the expected results must account for that - trapi_kg_response ={ "knowledge_graph": { - "edges":{ - "some_id":{ - "object": "NCBIGene:283871", - "predicate": "biolink:affects", - "subject": "PUBCHEM.COMPOUND:5311062", - "attributes": [ - { - "attribute_type_id":"NA", - "original_attribute_name":"qualified_predicate", - "value": "biolink:causes" - }, - { - "attribute_type_id":"NA", - "original_attribute_name":"object_aspect_qualifier", - "value": "activity" - }, - { - "attribute_type_id":"NA", - "original_attribute_name":"object_direction_qualifier", - "value": "decreased" - }], - } - } - }} - expected_trapi = {"knowledge_graph": {"edges": {'some_id': { - 'object': 'NCBIGene:283871', - 'predicate': 'biolink:affects', - 'subject': 'PUBCHEM.COMPOUND:5311062', - 'attributes': [], - 'sources': [{'resource_id': 'infores:automat.notspecified', - 'resource_role': 'aggregator_knowledge_source', - 'upstream_resource_ids': None - }], - "qualifiers": [ - { - "qualifier_type_id": "biolink:qualified_predicate", - "qualifier_value": "biolink:causes" - }, - { - "qualifier_type_id": "biolink:object_aspect_qualifier", - "qualifier_value": "activity" - }, - { - "qualifier_type_id": "biolink:object_direction_qualifier", - "qualifier_value": "decreased" - }, - ], - }} - }} - + trapi_kg_response = { + "knowledge_graph": { + "edges": { + "some_id": { + "object": "NCBIGene:283871", + "predicate": "biolink:affects", + "subject": "PUBCHEM.COMPOUND:5311062", + "attributes": [ + { + "attribute_type_id":"NA", + "original_attribute_name":"qualified_predicate", + "value": "biolink:causes" + }, + { + "attribute_type_id":"NA", + "original_attribute_name":"object_aspect_qualifier", + "value": "activity" + }, + { + "attribute_type_id":"NA", + "original_attribute_name":"object_direction_qualifier", + "value": "decreased" + }], + } + } + } + } + expected_trapi = { + "knowledge_graph": { + "edges": { + 'some_id': { + 'object': 'NCBIGene:283871', + 'predicate': 'biolink:affects', + 'subject': 'PUBCHEM.COMPOUND:5311062', + 'attributes': [], + 'sources': [ + { + 'resource_id': 'infores:automat.notspecified', + 'resource_role': 'aggregator_knowledge_source', + "source_record_urls": None, + 'upstream_resource_ids': None + } + ], + "qualifiers": [ + { + "qualifier_type_id": "biolink:qualified_predicate", + "qualifier_value": "biolink:causes" + }, + { + "qualifier_type_id": "biolink:object_aspect_qualifier", + "qualifier_value": "activity" + }, + { + "qualifier_type_id": "biolink:object_direction_qualifier", + "qualifier_value": "decreased" + }, + ], + } + } + } + } q = Question(question_json={}) graph_interface = MOCK_GRAPH_ADAPTER() transformed = q.transform_attributes(trapi_kg_response) From 2819445014b4009dd144e33824ba90646c9f72b4 Mon Sep 17 00:00:00 2001 From: richardbruskiewich Date: Mon, 15 Jan 2024 19:23:06 -0800 Subject: [PATCH 5/5] variable naming and readability --- PLATER/services/util/question.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/PLATER/services/util/question.py b/PLATER/services/util/question.py index 41dca50..7fdf075 100644 --- a/PLATER/services/util/question.py +++ b/PLATER/services/util/question.py @@ -89,9 +89,10 @@ def _construct_sources_tree(self, sources: List[Dict]) -> List[Dict]: # as upstream resources, if no aggregators are found and only primary ks is provided that would be added # as upstream for the mta entry formatted_sources = [] - # filter out source entries that actually have values resource_ids_with_resource_role = {} source_record_urls_to_resource_id = {} + + # filter out source entries that actually have values for source in sources: if not ( @@ -138,7 +139,7 @@ def _construct_sources_tree(self, sources: List[Dict]) -> List[Dict]: for resource_id in resource_ids_with_resource_role[resource_role] ] - upstreams_for_mta_entry = \ + upstreams_for_top_level_entry = \ resource_ids_with_resource_role.get("aggregator_knowledge_source") or \ resource_ids_with_resource_role.get("primary_knowledge_source") or \ resource_ids_with_resource_role.get("supporting_data_source") @@ -147,7 +148,7 @@ def _construct_sources_tree(self, sources: List[Dict]) -> List[Dict]: "resource_id": self.provenance, "resource_role": "aggregator_knowledge_source", "source_record_urls": None, - "upstream_resource_ids": list(upstreams_for_mta_entry) if upstreams_for_mta_entry else None + "upstream_resource_ids": list(upstreams_for_top_level_entry) if upstreams_for_top_level_entry else None }) return formatted_sources