From 40d2aad70e197fc26c63d68c58dc44dd29530927 Mon Sep 17 00:00:00 2001
From: richardbruskiewich <richard.bruskiewich@delphinai.com>
Date: Mon, 15 Jan 2024 16:57:12 -0800
Subject: [PATCH 1/5] Added 'supporting_data_source' parsing to
 _construct_sources_tree(), plus other tweaks and a unit test to validate

---
 PLATER/requirements.txt          |   1 +
 PLATER/services/util/question.py |  77 ++++++++++---
 PLATER/tests/test_question.py    | 184 +++++++++++++++++++++++++++++--
 3 files changed, 238 insertions(+), 24 deletions(-)

diff --git a/PLATER/requirements.txt b/PLATER/requirements.txt
index b662099..c27880c 100644
--- a/PLATER/requirements.txt
+++ b/PLATER/requirements.txt
@@ -2,6 +2,7 @@ fastapi==0.85.0
 pyaml==20.4.0
 pytest==7.4.3
 pytest-asyncio==0.21.1
+deepdiff==6.7.1
 uvicorn==0.24.0
 reasoner-transpiler==2.0.5
 reasoner-pydantic==4.1.6
diff --git a/PLATER/services/util/question.py b/PLATER/services/util/question.py
index 563f97c..217c0bf 100644
--- a/PLATER/services/util/question.py
+++ b/PLATER/services/util/question.py
@@ -1,3 +1,4 @@
+from typing import List, Dict
 import copy
 import orjson
 import time
@@ -62,41 +63,83 @@ def compile_cypher(self, **kwargs):
         return get_query(query_graph, **kwargs)
 
 
-    def _construct_sources_tree(self, sources):
-        # if primary source and aggregator source are specified in the graph, upstream_resource_ids of all aggregator_ks
-        # be that source
+    def _construct_sources_tree(self, sources: List[Dict]) -> List[Dict]:
+        """
+        Method to fill out the full annotation for edge "sources"
+        entries including "upstream_resource_ids" tree.
+        :param sources: List[Dict], edge 'sources' property entries
+        :return: enhanced "sources" including top-level "Monarch TRAPI" source entry.
+        """
+        if not sources:
+            # empty sources.. pretty strange, but then just send back
+            # an instance of the top-level "Monarch TRAPI" source entry
+            return [
+                {
+                    "resource_id": self.provenance,
+                    "resource_role": "aggregator_knowledge_source",
+                    "source_record_urls": None,
+                    "upstream_resource_ids":  None
+                }
+            ]
 
-        # if aggregator ks are coming from db, plater would add itself as aggregator and use other aggregator ids
+        # if primary source and aggregator source are specified in the graph,
+        # upstream_resource_ids of all aggregator_ks be that source
+
+        # if aggregator ks are coming from db, mta would add itself as aggregator and use other aggregator ids
         # as upstream resources, if no aggregators are found and only primary ks is provided that would be added
-        # as upstream for the plater entry
+        # as upstream for the mta entry
         formatted_sources = []
         # filter out source entries that actually have values
-        temp = {}
+        resource_ids_with_resource_role = {}
+        source_record_urls_to_resource_id = {}
         for source in sources:
+
             if not source['resource_id']:
                 continue
-            temp[source['resource_role']] = temp.get(source['resource_role'], set())
+
+            resource_ids_with_resource_role[source['resource_role']] = \
+                resource_ids_with_resource_role.get(source['resource_role'], set())
+
+            source_record_urls_to_resource_id[source['resource_id']] = \
+                source['source_record_urls'] if 'source_record_urls' in source else None
+
             if isinstance(source["resource_id"], str):
-                temp[source["resource_role"]].add(source["resource_id"])
+                resource_ids_with_resource_role[source["resource_role"]].add(source["resource_id"])
             elif isinstance(source["resource_id"], list):
                 for resource_id in source["resource_id"]:
-                    temp[source["resource_role"]].add(resource_id)
+                    resource_ids_with_resource_role[source["resource_role"]].add(resource_id)
+
+        for resource_role in resource_ids_with_resource_role:
 
-        for resource_role in temp:
             upstreams = None
-            if resource_role == "biolink:aggregator_knowledge_source":
-                upstreams = temp.get("biolink:primary_knowledge_source", None)
+
+            if resource_role == "aggregator_knowledge_source":
+                upstreams = resource_ids_with_resource_role.get("primary_knowledge_source", None)
+            elif resource_role == "primary_knowledge_source":
+                upstreams = resource_ids_with_resource_role.get("supporting_data_source", None)
 
             formatted_sources += [
-                {"resource_id": resource_id, "resource_role": resource_role.lstrip('biolink:'), "upstream_resource_ids": upstreams}
-                for resource_id in temp[resource_role]
+                {
+                    "resource_id": resource_id,
+                    "resource_role": resource_role,
+                    "source_record_urls": source_record_urls_to_resource_id[resource_id],
+                    "upstream_resource_ids": list(upstreams) if upstreams else None
+                }
+                for resource_id in resource_ids_with_resource_role[resource_role]
             ]
-        upstreams_for_plater_entry = temp.get("biolink:aggregator_knowledge_source") or temp.get("biolink:primary_knowledge_source")
+
+        upstreams_for_mta_entry = \
+            resource_ids_with_resource_role.get("aggregator_knowledge_source") or \
+            resource_ids_with_resource_role.get("primary_knowledge_source") or \
+            resource_ids_with_resource_role.get("supporting_data_source")
+
         formatted_sources.append({
-            "resource_id":self.provenance,
+            "resource_id": self.provenance,
             "resource_role": "aggregator_knowledge_source",
-            "upstream_resource_ids": upstreams_for_plater_entry
+            "source_record_urls": None,
+            "upstream_resource_ids": list(upstreams_for_mta_entry) if upstreams_for_mta_entry else None
         })
+
         return formatted_sources
 
 
diff --git a/PLATER/tests/test_question.py b/PLATER/tests/test_question.py
index d35df4b..661b843 100644
--- a/PLATER/tests/test_question.py
+++ b/PLATER/tests/test_question.py
@@ -1,19 +1,28 @@
 from unittest.mock import patch
-
+from typing import List, Dict
+import json
+import os
+import copy
+import asyncio
 import pytest
+from deepdiff.diff import DeepDiff
 
-from PLATER.services.util.question import Question
 from bmt import Toolkit
-import asyncio, json
-import os
-import copy
+
+from PLATER.services.config import config
+from PLATER.services.util.question import Question
+
+
+DEFAULT_PROVENANCE = config.get("PROVENANCE_TAG", "infores:automat.notspecified")
+
 
 @pytest.fixture
 def message():
-    with open(os.path.join(os.path.dirname(__file__), 'data','trapi1.4.json')) as stream:
+    with open(os.path.join(os.path.dirname(__file__), 'data', 'trapi1.4.json')) as stream:
         message = json.load(stream)
     return message
 
+
 def test_init():
     reasoner_dict = {
             "query_graph": {
@@ -26,6 +35,163 @@ def test_init():
     assert question._question_json == reasoner_dict
 
 
+@pytest.mark.parametrize(
+    "sources,output",
+    [
+        (   # Query 0 - Empty sources, return instance of top level system source
+            [],
+            [
+                {
+                    "resource_id": DEFAULT_PROVENANCE,
+                    "resource_role": "aggregator_knowledge_source",
+                    "source_record_urls": None,
+                    "upstream_resource_ids": None
+                }
+            ]
+        ),
+        (   # Query 1 - Add primary knowledge source
+            [
+                {
+                    "resource_id": "infores:my-kp",
+                    "resource_role": "primary_knowledge_source"
+                }
+            ],
+            [
+                {
+                    "resource_id": "infores:my-kp",
+                    "resource_role": "primary_knowledge_source",
+                    "source_record_urls": None,
+                    "upstream_resource_ids": None
+                },
+                {
+                    "resource_id": DEFAULT_PROVENANCE,
+                    "resource_role": "aggregator_knowledge_source",
+                    "source_record_urls": None,
+                    "upstream_resource_ids":  ["infores:my-kp"]
+                }
+            ]
+        ),
+        (   # Query 2 - Add a supporting data source, below the primary knowledge source
+            [
+                {
+                    "resource_id": "infores:my-kp",
+                    "resource_role": "primary_knowledge_source"
+                },
+                {
+                    "resource_id": "infores:hpo-annotations",
+                    "resource_role": "supporting_data_source"
+                }
+            ],
+            [
+                {
+                    "resource_id": "infores:my-kp",
+                    "resource_role": "primary_knowledge_source",
+                    "source_record_urls": None,
+                    "upstream_resource_ids": ["infores:hpo-annotations"]
+                },
+                {
+                    "resource_id": "infores:hpo-annotations",
+                    "resource_role": "supporting_data_source",
+                    "source_record_urls": None,
+                    "upstream_resource_ids": None
+                },
+                {
+                    "resource_id": DEFAULT_PROVENANCE,
+                    "resource_role": "aggregator_knowledge_source",
+                    "source_record_urls": None,
+                    "upstream_resource_ids":  ["infores:my-kp"]
+                }
+            ]
+        ),
+        (   # Query 3 - Add a supporting data source, below the main application
+            #           aggregator (lacking primary knowledge source)
+            [
+                {
+                    "resource_id": "infores:hpo-annotations",
+                    "resource_role": "supporting_data_source"
+                }
+            ],
+            [
+                {
+                    "resource_id": "infores:hpo-annotations",
+                    "resource_role": "supporting_data_source",
+                    "source_record_urls": None,
+                    "upstream_resource_ids": None
+                },
+                {
+                    "resource_id": DEFAULT_PROVENANCE,
+                    "resource_role": "aggregator_knowledge_source",
+                    "source_record_urls": None,
+                    "upstream_resource_ids":  ["infores:hpo-annotations"]
+                }
+            ]
+        ),
+        (   # Query 4 - Same query as 3 above except adding some
+            #           source_record_urls for the supporting data source
+            [
+                {
+                    "resource_id": "infores:hpo-annotations",
+                    "resource_role": "supporting_data_source",
+                    "source_record_urls": ["https://hpo.jax.org/app/"]
+                }
+            ],
+            [
+                {
+                    "resource_id": "infores:hpo-annotations",
+                    "resource_role": "supporting_data_source",
+                    "source_record_urls": ["https://hpo.jax.org/app/"],
+                    "upstream_resource_ids": None
+                },
+                {
+                    "resource_id": DEFAULT_PROVENANCE,
+                    "resource_role": "aggregator_knowledge_source",
+                    "source_record_urls": None,
+                    "upstream_resource_ids":  ["infores:hpo-annotations"]
+                }
+            ]
+        ),
+        (   # Query 5 - Same query as 3 above except adding a second "supporting_data_source"
+            [
+                {
+                    "resource_id": "infores:hpo-annotations",
+                    "resource_role": "supporting_data_source"
+                },
+                {
+                    "resource_id": "infores:upheno",
+                    "resource_role": "supporting_data_source"
+                }
+            ],
+            [
+                {
+                    "resource_id": "infores:hpo-annotations",
+                    "resource_role": "supporting_data_source",
+                    "source_record_urls": None,
+                    "upstream_resource_ids": None
+                },
+                {
+                    "resource_id": "infores:upheno",
+                    "resource_role": "supporting_data_source",
+                    "source_record_urls": None,
+                    "upstream_resource_ids": None
+                },
+                {
+                    "resource_id": DEFAULT_PROVENANCE,
+                    "resource_role": "aggregator_knowledge_source",
+                    "source_record_urls": None,
+                    "upstream_resource_ids":  ["infores:hpo-annotations", "infores:upheno"]
+                }
+            ]
+        )
+    ]
+)
+def test_source_construct_sources_tree(sources: List[Dict], output: List[Dict]):
+    # dummy Question - don't care about input question JSON for this test...
+    question: Question = Question(question_json={})
+    # ... 'cuz comparing sources tree directly
+    formatted_sources = question._construct_sources_tree(sources)
+    assert not DeepDiff(output, formatted_sources, ignore_order=True, report_repetition=True)
+
+
 def test_format_attribute():
     # note that this test does not run through the reasoner code that does the attribute mapping.
     # so the values in the expected results must account for that
@@ -102,6 +268,7 @@ def test_format_attribute():
     # test if value_type is preserved if in response from neo4j
     assert transformed == t2_expected_trapi
 
+
 def test_format_edge_qualifiers():
     # note that this test does not run through the reasoner code that does the attribute mapping.
     # so the values in the expected results must account for that
@@ -165,6 +332,7 @@ def test_format_edge_qualifiers():
     # test if value_type is added to default "biolink:Attribute"
     assert transformed == expected_trapi
 
+
 class MOCK_GRAPH_ADAPTER():
     called = False
     toolkit = Toolkit()
@@ -197,6 +365,7 @@ def test_attribute_constraint_basic(message):
     result = Question.apply_attribute_constraints(message)
     assert result == expected
 
+
 def test_attribute_constraint_filter_node(message):
     # this node doesnt exist, and is the main node, so everything should vanish
     node_constraints = [
@@ -217,6 +386,7 @@ def test_attribute_constraint_filter_node(message):
     assert len(result['knowledge_graph']['edges']) == 0  # no edges
     assert len(result['results']) == 0  # no bindings
 
+
 def test_attribute_constraint_filter_edge(message):
     edge_constraints = [
         {"id": "biolink:relation", "name": "eq_id_filter", "value": "CTD:marker_mechanism", "operator": "=="}
@@ -247,4 +417,4 @@ def test_attribute_constraint_filter_edge(message):
     assert len(result['knowledge_graph']['nodes']) == 3
     assert len(result['knowledge_graph']['edges']) == 1
     assert len(result['results']) == 1
-    assert len(result['results'][0]['analyses'][0]['edge_bindings']['e0']) == 1
\ No newline at end of file
+    assert len(result['results'][0]['analyses'][0]['edge_bindings']['e0']) == 1

From 8839d002cccdb8b354c1828f9f6f4db23aa9ea4a Mon Sep 17 00:00:00 2001
From: richardbruskiewich <richard.bruskiewich@delphinai.com>
Date: Mon, 15 Jan 2024 17:09:09 -0800
Subject: [PATCH 2/5] Cleaned up format_attribute_trapi() adding a few checks
 for missing (or previously set) key fields

---
 PLATER/services/util/question.py | 38 ++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/PLATER/services/util/question.py b/PLATER/services/util/question.py
index 217c0bf..1974957 100644
--- a/PLATER/services/util/question.py
+++ b/PLATER/services/util/question.py
@@ -27,6 +27,7 @@
     config.get('logging_format'),
 )
 
+
 class Question:
     # SPEC VARS
     QUERY_GRAPH_KEY = 'query_graph'
@@ -62,7 +63,6 @@ def compile_cypher(self, **kwargs):
                         item['qualifier_type_id'] = item['qualifier_type_id'].replace('biolink:', '')
         return get_query(query_graph, **kwargs)
 
-
     def _construct_sources_tree(self, sources: List[Dict]) -> List[Dict]:
         """
         Method to fill out the full annotation for edge "sources"
@@ -142,7 +142,6 @@ def _construct_sources_tree(self, sources: List[Dict]) -> List[Dict]:
 
         return formatted_sources
 
-
     def format_attribute_trapi(self, kg_items, node=False):
         for identifier in kg_items:
             # get the properties for the record
@@ -153,8 +152,10 @@ def format_attribute_trapi(self, kg_items, node=False):
 
             # separate the qualifiers from attributes for edges and format them
             if not node:
-                qualifier_results = [attrib for attrib in attributes
-                                     if 'qualifie' in attrib['original_attribute_name']]
+                qualifier_results = [
+                    attrib for attrib in attributes
+                    if 'original_attribute_name' in attrib and 'qualifie' in attrib['original_attribute_name']
+                ]
                 if qualifier_results:
                     formatted_qualifiers = []
                     for qualifier in qualifier_results:
@@ -166,20 +167,26 @@ def format_attribute_trapi(self, kg_items, node=False):
                         })
                     props['qualifiers'] = formatted_qualifiers
 
-            # create a new list that doesnt have the core properties or qualifiers
-            new_attribs = [attrib for attrib in attributes
-                           if attrib['original_attribute_name'] not in props and
-                           attrib['original_attribute_name'] not in skip_list and
-                           'qualifie' not in attrib['original_attribute_name']
-                           ]
+            # create a new list that doesn't have the core properties or qualifiers
+            new_attribs: List = list()
+            for attrib in attributes:
+                if 'original_attribute_name' not in attrib or (
+                        attrib['original_attribute_name'] not in props and
+                        attrib['original_attribute_name'] not in skip_list and
+                        'qualifie' not in attrib['original_attribute_name']
+                ):
+                    new_attribs.append(attrib)
 
             # for the non-core properties
             for attr in new_attribs:
                 # make sure the original_attribute_name has something other than none
-                attr['original_attribute_name'] = attr['original_attribute_name'] or ''
+                attr['original_attribute_name'] = \
+                    ('original_attribute_name' in attr and attr['original_attribute_name']) or ''
 
                 # map the attribute type to the list above, otherwise generic default
-                attr["value_type_id"] = VALUE_TYPES.get(attr["original_attribute_name"], "EDAM:data_0006")
+                attr["value_type_id"] = \
+                    ("value_type_id" in attr and attr["value_type_id"]) or \
+                    VALUE_TYPES.get(attr["original_attribute_name"], "EDAM:data_0006")
 
                 # uses generic data as attribute type id if not defined
                 if not ("attribute_type_id" in attr and attr["attribute_type_id"] != 'NA'):
@@ -187,9 +194,12 @@ def format_attribute_trapi(self, kg_items, node=False):
                     if attribute_data:
                         attr.update(attribute_data)
 
-            # update edge provenance with automat infores, filter empty ones, expand list type resource ids
+            # update edge provenance with infores,
+            # filter empty ones, expand list type resource ids
             if not node:
-                kg_items[identifier]["sources"] = self._construct_sources_tree(kg_items[identifier].get("sources", []))
+                kg_items[identifier]["sources"] = \
+                    self._construct_sources_tree(kg_items[identifier].get("sources", []))
+
             # assign these attribs back to the original attrib list without the core properties
             props['attributes'] = new_attribs
 

From 57125eaab352c99f9c3509b1988432f939646360 Mon Sep 17 00:00:00 2001
From: richardbruskiewich <richard.bruskiewich@delphinai.com>
Date: Mon, 15 Jan 2024 17:12:54 -0800
Subject: [PATCH 3/5] Removed unused parameter "graph_interface:
 GraphInterface" in Question.transform_attributes()

---
 PLATER/services/util/question.py |  4 ++--
 PLATER/tests/test_question.py    | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/PLATER/services/util/question.py b/PLATER/services/util/question.py
index 1974957..56145bf 100644
--- a/PLATER/services/util/question.py
+++ b/PLATER/services/util/question.py
@@ -205,7 +205,7 @@ def format_attribute_trapi(self, kg_items, node=False):
 
         return kg_items
 
-    def transform_attributes(self, trapi_message, graph_interface: GraphInterface):
+    def transform_attributes(self, trapi_message):
         self.format_attribute_trapi(trapi_message.get('knowledge_graph', {}).get('nodes', {}), node=True)
         self.format_attribute_trapi(trapi_message.get('knowledge_graph', {}).get('edges', {}))
         for r in trapi_message.get("results", []):
@@ -250,7 +250,7 @@ async def answer(self, graph_interface: GraphInterface):
                 }
             )
         results_dict = graph_interface.convert_to_dict(results)
-        self._question_json.update(self.transform_attributes(results_dict[0], graph_interface))
+        self._question_json.update(self.transform_attributes(results_dict[0]))
         self._question_json = Question.apply_attribute_constraints(self._question_json)
         return self._question_json
 
diff --git a/PLATER/tests/test_question.py b/PLATER/tests/test_question.py
index 661b843..802435c 100644
--- a/PLATER/tests/test_question.py
+++ b/PLATER/tests/test_question.py
@@ -235,7 +235,7 @@ def test_format_attribute():
     }
     q = Question(question_json={})
     graph_interface = MOCK_GRAPH_ADAPTER()
-    transformed = q.transform_attributes(trapi_kg_response, graph_interface=MOCK_GRAPH_ADAPTER)
+    transformed = q.transform_attributes(trapi_kg_response)
 
     # test attribute_id if provided from neo4j response is preserved
     # test if value_type is added to default 'biolink:Attribute'
@@ -262,7 +262,7 @@ def test_format_attribute():
 
     q = Question(question_json={})
 
-    transformed = q.transform_attributes(t2_trapi_kg_response, graph_interface=MOCK_GRAPH_ADAPTER)
+    transformed = q.transform_attributes(t2_trapi_kg_response)
 
     # test default attribute to be EDAM:data_0006
     # test if value_type is preserved if in response from neo4j
@@ -283,17 +283,17 @@ def test_format_edge_qualifiers():
                 {
                    "attribute_type_id":"NA",
                    "original_attribute_name":"qualified_predicate",
-                   "value":"biolink:causes"
+                   "value": "biolink:causes"
                 },
                 {
                    "attribute_type_id":"NA",
                    "original_attribute_name":"object_aspect_qualifier",
-                   "value":"activity"
+                   "value": "activity"
                 },
                 {
                    "attribute_type_id":"NA",
                    "original_attribute_name":"object_direction_qualifier",
-                   "value":"decreased"
+                   "value": "decreased"
                 }],
           }
        }
@@ -326,7 +326,7 @@ def test_format_edge_qualifiers():
 
     q = Question(question_json={})
     graph_interface = MOCK_GRAPH_ADAPTER()
-    transformed = q.transform_attributes(trapi_kg_response, graph_interface=MOCK_GRAPH_ADAPTER)
+    transformed = q.transform_attributes(trapi_kg_response)
 
     # test attribute_id if provided from neo4j response is preserved
     # test if value_type is added to default "biolink:Attribute"

From 267175e21b26e2abfada1e527e4f5697107c6002 Mon Sep 17 00:00:00 2001
From: richardbruskiewich <richard.bruskiewich@delphinai.com>
Date: Mon, 15 Jan 2024 18:55:25 -0800
Subject: [PATCH 4/5] Added back in code to remove 'biolink:' prefix in
 resource_role values; fixed unit tests and reformatted some test data (for
 readability)

---
 PLATER/services/util/question.py |  14 +-
 PLATER/tests/test_question.py    | 212 +++++++++++++++++++------------
 2 files changed, 141 insertions(+), 85 deletions(-)

diff --git a/PLATER/services/util/question.py b/PLATER/services/util/question.py
index 56145bf..41dca50 100644
--- a/PLATER/services/util/question.py
+++ b/PLATER/services/util/question.py
@@ -94,9 +94,19 @@ def _construct_sources_tree(self, sources: List[Dict]) -> List[Dict]:
         source_record_urls_to_resource_id = {}
         for source in sources:
 
-            if not source['resource_id']:
+            if not (
+                    'resource_id' in source and
+                    source['resource_id'] and
+                    'resource_role' in source and
+                    source['resource_role']
+            ):
+                # silently pruning TRAPI non-compliant source records
+                logger.warning(f"Invalid edge 'source' entry: '{str(source)}'? Skipped!")
                 continue
 
+            # 'resource_role' values are now ResourceRoleEnum without a biolink: CURIE prefix
+            source['resource_role'] = source['resource_role'].lstrip("biolink:")
+
             resource_ids_with_resource_role[source['resource_role']] = \
                 resource_ids_with_resource_role.get(source['resource_role'], set())
 
@@ -121,7 +131,7 @@ def _construct_sources_tree(self, sources: List[Dict]) -> List[Dict]:
             formatted_sources += [
                 {
                     "resource_id": resource_id,
-                    "resource_role": resource_role,
+                    "resource_role": resource_role.lstrip("biolink:"),
                     "source_record_urls": source_record_urls_to_resource_id[resource_id],
                     "upstream_resource_ids": list(upstreams) if upstreams else None
                 }
diff --git a/PLATER/tests/test_question.py b/PLATER/tests/test_question.py
index 802435c..395d6ed 100644
--- a/PLATER/tests/test_question.py
+++ b/PLATER/tests/test_question.py
@@ -196,42 +196,78 @@ def test_format_attribute():
     # note that this test does not run through the reasoner code that does the attribute mapping.
     # so the values in the expected results must account for that
 
-    trapi_kg_response = {"knowledge_graph":
-        {"nodes":
-            {"CURIE:1":
-                {"attributes": [{"original_attribute_name": "pub", "attribute_type_id": "CURIE:x"}]}
+    trapi_kg_response = {
+        "knowledge_graph": {
+            "nodes": {
+                "CURIE:1": {
+                    "attributes": [
+                        {
+                            "original_attribute_name": "pub",
+                            "attribute_type_id": "CURIE:x"
+                        }
+                    ]
+                }
              },
-         "edges":
-             {"123123":
-                  {
-                      "attributes": [{"original_attribute_name": "some_attr", "value": "some_value"}],
-                      "sources": [{"resource_role": "biolink:primary_knowledge_source", "resource_id":"infores:primary"}]
+            "edges": {
+                "123123": {
+                      "attributes": [
+                          {
+                              "original_attribute_name": "some_attr",
+                              "value": "some_value"
+                          }
+                      ],
+                      "sources": [
+                          {
+                              "resource_role": "biolink:primary_knowledge_source",
+                              "resource_id": "infores:primary"
+                          }
+                      ]
                   }
               }
          }
     }
-    expected_trapi = {"knowledge_graph":
-        {"nodes":
-            {"CURIE:1":
-                {"attributes": [{"original_attribute_name": "pub", "attribute_type_id": "CURIE:x", "value_type_id": "EDAM:data_0006"}]}
+    expected_trapi = {
+        "knowledge_graph": {
+            "nodes": {
+                "CURIE:1": {
+                    "attributes": [
+                        {
+                            "original_attribute_name": "pub",
+                            "attribute_type_id": "CURIE:x",
+                            "value_type_id": "EDAM:data_0006"
+                        }
+                    ]
+                }
              },
-         "edges":
-             {"123123":
-                  {"attributes": [{"original_attribute_name": "some_attr", "value": "some_value",
-                                   "attribute_type_id": "biolink:Attribute",
-                                   "value_type_id": "EDAM:data_0006"},
-                                  ],
-
-                   "sources": [
-                       {"resource_role": "primary_knowledge_source",
-                        "resource_id": "infores:primary",
-                        "upstream_resource_ids": None},
-                       {"resource_role": "aggregator_knowledge_source",
-                        "resource_id": "infores:automat.notspecified",
-                        "upstream_resource_ids": {"infores:primary"}},
-                   ]}
-              }
-         }
+            "edges": {
+                "123123": {
+                    "attributes": [
+                        {
+                            "original_attribute_name": "some_attr",
+                            "value": "some_value",
+                            "attribute_type_id": "biolink:Attribute",
+                            "value_type_id": "EDAM:data_0006"
+                        }
+                    ],
+                    "sources": [
+                       {
+                           "resource_role": "primary_knowledge_source",
+                           "resource_id": "infores:primary",
+                           "source_record_urls": None,
+                           "upstream_resource_ids": None
+                       },
+                       {
+                           "resource_role": "aggregator_knowledge_source",
+                           "resource_id": DEFAULT_PROVENANCE,
+                           "source_record_urls": None,
+                           "upstream_resource_ids": [
+                               "infores:primary"
+                           ]
+                       },
+                    ]
+                }
+            }
+        }
     }
     q = Question(question_json={})
     graph_interface = MOCK_GRAPH_ADAPTER()
@@ -239,7 +275,7 @@ def test_format_attribute():
 
     # test attribute_id if provided from neo4j response is preserved
     # test if value_type is added to default 'biolink:Attribute'
-    assert transformed == expected_trapi
+    assert not DeepDiff(transformed, expected_trapi)
 
     t2_trapi_kg_response = {"knowledge_graph": {"nodes": {"CURIE:1": {"attributes": [
         {"original_attribute_name": "pub", "value": "x", "value_type_id": "oo", "attribute_type_id": "preserved_attrib"},
@@ -266,64 +302,74 @@ def test_format_attribute():
 
     # test default attribute to be EDAM:data_0006
     # test if value_type is preserved if in response from neo4j
-    assert transformed == t2_expected_trapi
+    assert DeepDiff(transformed, t2_expected_trapi)
 
 
 def test_format_edge_qualifiers():
     # note that this test does not run through the reasoner code that does the attribute mapping.
     # so the values in the expected results must account for that
 
-    trapi_kg_response ={ "knowledge_graph": {
-       "edges":{
-          "some_id":{
-              "object": "NCBIGene:283871",
-              "predicate": "biolink:affects",
-              "subject": "PUBCHEM.COMPOUND:5311062",
-              "attributes": [
-                {
-                   "attribute_type_id":"NA",
-                   "original_attribute_name":"qualified_predicate",
-                   "value": "biolink:causes"
-                },
-                {
-                   "attribute_type_id":"NA",
-                   "original_attribute_name":"object_aspect_qualifier",
-                   "value": "activity"
-                },
-                {
-                   "attribute_type_id":"NA",
-                   "original_attribute_name":"object_direction_qualifier",
-                   "value": "decreased"
-                }],
-          }
-       }
-    }}
-    expected_trapi = {"knowledge_graph": {"edges": {'some_id': {
-        'object': 'NCBIGene:283871',
-        'predicate': 'biolink:affects',
-        'subject': 'PUBCHEM.COMPOUND:5311062',
-        'attributes': [],
-        'sources': [{'resource_id': 'infores:automat.notspecified',
-                     'resource_role': 'aggregator_knowledge_source',
-                     'upstream_resource_ids': None
-                     }],
-        "qualifiers": [
-            {
-                "qualifier_type_id": "biolink:qualified_predicate",
-                "qualifier_value": "biolink:causes"
-            },
-            {
-                "qualifier_type_id": "biolink:object_aspect_qualifier",
-                "qualifier_value": "activity"
-            },
-            {
-                "qualifier_type_id": "biolink:object_direction_qualifier",
-                "qualifier_value": "decreased"
-            },
-        ],
-        }}
-    }}
-
+    trapi_kg_response = {
+        "knowledge_graph": {
+            "edges": {
+                  "some_id": {
+                      "object": "NCBIGene:283871",
+                      "predicate": "biolink:affects",
+                      "subject": "PUBCHEM.COMPOUND:5311062",
+                      "attributes": [
+                        {
+                           "attribute_type_id":"NA",
+                           "original_attribute_name":"qualified_predicate",
+                           "value": "biolink:causes"
+                        },
+                        {
+                           "attribute_type_id":"NA",
+                           "original_attribute_name":"object_aspect_qualifier",
+                           "value": "activity"
+                        },
+                        {
+                           "attribute_type_id":"NA",
+                           "original_attribute_name":"object_direction_qualifier",
+                           "value": "decreased"
+                        }],
+                  }
+            }
+        }
+    }
+    expected_trapi = {
+        "knowledge_graph": {
+            "edges": {
+                'some_id': {
+                    'object': 'NCBIGene:283871',
+                    'predicate': 'biolink:affects',
+                    'subject': 'PUBCHEM.COMPOUND:5311062',
+                    'attributes': [],
+                    'sources': [
+                        {
+                            'resource_id': 'infores:automat.notspecified',
+                            'resource_role': 'aggregator_knowledge_source',
+                            "source_record_urls": None,
+                            'upstream_resource_ids': None
+                        }
+                    ],
+                    "qualifiers": [
+                        {
+                            "qualifier_type_id": "biolink:qualified_predicate",
+                            "qualifier_value": "biolink:causes"
+                        },
+                        {
+                            "qualifier_type_id": "biolink:object_aspect_qualifier",
+                            "qualifier_value": "activity"
+                        },
+                        {
+                            "qualifier_type_id": "biolink:object_direction_qualifier",
+                            "qualifier_value": "decreased"
+                        },
+                    ],
+                }
+            }
+        }
+    }
     q = Question(question_json={})
     graph_interface = MOCK_GRAPH_ADAPTER()
     transformed = q.transform_attributes(trapi_kg_response)

From 2819445014b4009dd144e33824ba90646c9f72b4 Mon Sep 17 00:00:00 2001
From: richardbruskiewich <richard.bruskiewich@delphinai.com>
Date: Mon, 15 Jan 2024 19:23:06 -0800
Subject: [PATCH 5/5] variable naming and readability

---
 PLATER/services/util/question.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/PLATER/services/util/question.py b/PLATER/services/util/question.py
index 41dca50..7fdf075 100644
--- a/PLATER/services/util/question.py
+++ b/PLATER/services/util/question.py
@@ -89,9 +89,10 @@ def _construct_sources_tree(self, sources: List[Dict]) -> List[Dict]:
         # as upstream resources, if no aggregators are found and only primary ks is provided that would be added
         # as upstream for the mta entry
         formatted_sources = []
-        # filter out source entries that actually have values
         resource_ids_with_resource_role = {}
         source_record_urls_to_resource_id = {}
+
+        # filter out source entries that actually have values
         for source in sources:
 
             if not (
@@ -138,7 +139,7 @@ def _construct_sources_tree(self, sources: List[Dict]) -> List[Dict]:
                 for resource_id in resource_ids_with_resource_role[resource_role]
             ]
 
-        upstreams_for_mta_entry = \
+        upstreams_for_top_level_entry = \
             resource_ids_with_resource_role.get("aggregator_knowledge_source") or \
             resource_ids_with_resource_role.get("primary_knowledge_source") or \
             resource_ids_with_resource_role.get("supporting_data_source")
@@ -147,7 +148,7 @@ def _construct_sources_tree(self, sources: List[Dict]) -> List[Dict]:
             "resource_id": self.provenance,
             "resource_role": "aggregator_knowledge_source",
             "source_record_urls": None,
-            "upstream_resource_ids": list(upstreams_for_mta_entry) if upstreams_for_mta_entry else None
+            "upstream_resource_ids": list(upstreams_for_top_level_entry) if upstreams_for_top_level_entry else None
         })
 
         return formatted_sources