Skip to content

Commit

Permalink
Revert "Implementing #2045"
Browse files Browse the repository at this point in the history
This reverts commit ee1f61b.
  • Loading branch information
kvnthomas98 committed Aug 4, 2023
1 parent ee1f61b commit 86d289c
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 60 deletions.
3 changes: 1 addition & 2 deletions code/ARAX/ARAXQuery/ARAX_expander.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,8 +697,7 @@ def apply(self, response, input_parameters, mode: str = "ARAX"):

# Map canonical curies back to the input curies in the QG (where applicable) #1622
self._map_back_to_input_curies(message.knowledge_graph, query_graph, log)
eu.remove_semmeddb_edges_and_nodes_with_low_publications(message.knowledge_graph, response)
overarching_kg = eu.convert_standard_kg_to_qg_organized_kg(message.knowledge_graph)

# Return the response and done
kg = message.knowledge_graph
log.info(f"After Expand, the KG has {len(kg.nodes)} nodes and {len(kg.edges)} edges "
Expand Down
47 changes: 0 additions & 47 deletions code/ARAX/ARAXQuery/Expand/expand_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,53 +555,6 @@ def remove_edges_with_qedge_key(kg: KnowledgeGraph, qedge_key: str):
if qedge_key in edge.qedge_keys:
del kg.edges[edge_key]

def remove_semmeddb_edges_and_nodes_with_low_publications(kg: KnowledgeGraph, log: ARAXResponse):
publication_threshold = 4
edge_keys = set(kg.edges)
edges_removed_counter = 0
removed_nodes = set()
connected_nodes = set()
try:
for edge_key in edge_keys:
edge = kg.edges[edge_key]
if not (edge.sources and any(retrieval_source.resource_id == 'infores:semmeddb' and
retrieval_source.resource_role == "primary_knowledge_source"
for retrieval_source in edge.sources)):
connected_nodes.add(edge.subject)
connected_nodes.add(edge.object)
continue
if not edge.attributes:
removed_nodes.add(edge.subject)
removed_nodes.add(edge.object)
del kg.edges[edge_key]
edges_removed_counter += 1
continue
n_publications = 0
for attribute in edge.attributes:
if attribute.attribute_type_id == 'biolink:publications':
if isinstance(attribute.value, list):
n_publications = len(attribute.value)
if n_publications < publication_threshold:
removed_nodes.add(edge.subject)
removed_nodes.add(edge.object)
del kg.edges[edge_key]
edges_removed_counter += 1
else:
connected_nodes.add(edge.subject)
connected_nodes.add(edge.object)
orphaned_nodes = removed_nodes - connected_nodes
for node_key in orphaned_nodes:
del kg.nodes[node_key]
except:
tb = traceback.format_exc()
error_type, error, _ = sys.exc_info()
log.error(tb, error_code=error_type.__name__)
log.error(f"Something went wrong removing semmeddb edges from the knowledge graph")
else:
log.info(f"{edges_removed_counter} Semmeddb Edges with low publications successfully removed")




def is_expand_created_subclass_qedge_key(qedge_key: str, qg: QueryGraph) -> bool:
"""
Expand Down
4 changes: 2 additions & 2 deletions code/ARAX/test/test_ARAX_expand.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,8 +793,7 @@ def test_qualified_regulates_query():
query = {
"nodes": {
"n0": {
"ids": ["NCBIGene:7157"]
# "ids": ["NCBIGene:375"]
"ids": ["NCBIGene:375"]
},
"n1": {
"categories": ["biolink:Gene"]
Expand Down Expand Up @@ -830,6 +829,7 @@ def test_qualified_regulates_query():
}
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query)


def test_1516_single_quotes_in_ids():
actions = [
"add_qnode(key=n0,ids=UniProtKB:P00491)",
Expand Down
18 changes: 9 additions & 9 deletions code/ARAX/test/test_ARAX_filter_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@ def test_n_results():
"expand(edge_key=e00, kp=infores:rtx-kg2)",
"overlay(action=add_node_pmids, max_num=15)",
"resultify(ignore_edge_direction=true)",
"filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=5)",
"filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=20)",
"return(message=true, store=false)"
]}}
[response, message] = _do_arax_query(query)
assert response.status == 'OK'
assert message.n_results == len(message.results) == 5
assert message.n_results == len(message.results) == 20

def test_no_results():
query = {"operations": {"actions": [
Expand Down Expand Up @@ -128,13 +128,13 @@ def test_warning():
"expand(edge_key=e00, kp=infores:rtx-kg2)",
"overlay(action=add_node_pmids, max_num=15)",
"resultify(ignore_edge_direction=true)",
"filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=5)",
"filter_results(action=sort_by_node_attribute, node_attribute=asdfghjkl, direction=a, max_results=5)",
"filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=20)",
"filter_results(action=sort_by_node_attribute, node_attribute=asdfghjkl, direction=a, max_results=20)",
"return(message=true, store=false)"
]}}
[response, message] = _do_arax_query(query)
assert response.status == 'OK'
assert len(message.results) == 5
assert len(message.results) == 20

@pytest.mark.slow
def test_sort_by_edge_attribute():
Expand Down Expand Up @@ -166,12 +166,12 @@ def test_sort_by_node_attribute():
"expand(edge_key=e00, kp=infores:rtx-kg2)",
"overlay(action=add_node_pmids, max_num=15)",
"resultify(ignore_edge_direction=true)",
"filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=5, qnode_keys=[n01])",
"filter_results(action=sort_by_node_attribute, node_attribute=pubmed_ids, direction=a, max_results=20, qnode_keys=[n01])",
"return(message=true, store=false)"
]}}
[response, message] = _do_arax_query(query)
assert response.status == 'OK'
assert len(message.results) == 5
assert len(message.results) == 20
# add something to test if the results are assending and the correct numbers

def test_sort_by_score():
Expand All @@ -182,12 +182,12 @@ def test_sort_by_score():
"add_qedge(subject=n00, object=n01, key=e00)",
"expand(edge_key=e00, kp=infores:rtx-kg2)",
"resultify(ignore_edge_direction=true)",
"filter_results(action=sort_by_score, direction=a, max_results=8)",
"filter_results(action=sort_by_score, direction=a, max_results=20)",
"return(message=true, store=false)"
]}}
[response, message] = _do_arax_query(query)
assert response.status == 'OK'
assert len(message.results) == 8
assert len(message.results) == 20
result_scores = [x.analyses[0].score for x in message.results]
assert result_scores == sorted(result_scores)
assert max(result_scores) < 1
Expand Down

0 comments on commit 86d289c

Please sign in to comment.