Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue2388 #2393

Merged
merged 3 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 40 additions & 35 deletions code/ARAX/ARAXQuery/Filter_KG/remove_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,28 @@ def remove_orphaned_nodes(self):

return self.response

def _is_general_concept(self, node):
curies = set()
synonyms = set()
if not node['attributes']:
return False
for attribute in node['attributes']:
if attribute['attribute_type_id'] == 'biolink:xref':
curies.update(map(str.lower, attribute.get('value', [])))
if attribute['attribute_type_id'] == 'biolink:synonym':
synonyms.update(map(str.lower, attribute.get('value', [])))
if node['name']:
synonyms.add(node['name'].lower())
if self.block_list_curies.intersection(curies) or self.block_list_synonyms.intersection(synonyms):
return True

for synonym in synonyms:
if not isinstance(synonym,str):
continue
if any(p.match(synonym) for p in self.block_list_patterns):
return True
return False

def remove_general_concept_nodes(self):
node_params = self.node_parameters
if 'perform_action' not in node_params:
Expand All @@ -177,44 +199,27 @@ def remove_general_concept_nodes(self):
file_name = 'general_concepts.json'
with open(blocklist_file_path) as fp:
block_list_dict = json.loads(fp.read())
block_list_synonyms = set(block_list_dict["synonyms"])
block_list_curies = set(block_list_dict["curies"])
nodes_to_remove = set()
names = []
for key, node in self.message.knowledge_graph.nodes.items():
node_dict = node.to_dict()
synonyms = []
curies = []
if not node_dict['attributes']:
self.block_list_synonyms = set(block_list_dict["synonyms"])
self.block_list_curies = set(block_list_dict["curies"])
node_to_remove = set()
self.block_list_patterns = [re.compile(pattern,re.IGNORECASE) for pattern in block_list_dict["patterns"]]
# iterate over edges find edges connected to the nodes
for key, edge in self.message.knowledge_graph.edges.items():
if set({edge.subject, edge.object}).intersection(node_to_remove):
del self.message.knowledge_graph.edges[key]
continue
for attribute in node_dict['attributes']:
if attribute['attribute_type_id'] == 'biolink:xref':
curies += attribute.get('value',[])
if attribute['attribute_type_id'] == 'biolink:synonym':
synonyms += attribute.get('value',[])
if node_dict['name']:
synonyms.append(node_dict['name'].lower())
if block_list_curies.intersection([curie.lower() for curie in curies if curie]):
nodes_to_remove.add(key)
subject_node = self.message.knowledge_graph.nodes[edge.subject].to_dict()
object_node = self.message.knowledge_graph.nodes[edge.object].to_dict()

if self._is_general_concept(subject_node):
node_to_remove.add(edge.subject)
del self.message.knowledge_graph.edges[key]
continue
for synonym in synonyms:
for block_list_synonym in block_list_synonyms:
if isinstance(synonym,str) and isinstance(block_list_synonym,str) and re.match(block_list_synonym, synonym,re.IGNORECASE):
nodes_to_remove.add(key)


for key in nodes_to_remove:
del self.message.knowledge_graph.nodes[key]
self.response.info(f"Removed {len(nodes_to_remove)} nodes from the knowledge graph which are general concepts")
edges_to_remove = set()
# iterate over edges find edges connected to the nodes
for key, edge in self.message.knowledge_graph.edges.items():
if edge.subject in nodes_to_remove or edge.object in nodes_to_remove:
edges_to_remove.add(key)
# remove edges
#self.message.knowledge_graph.edges = [val for idx, val in enumerate(self.message.knowledge_graph.edges) if idx not in edges_to_remove]
for key in edges_to_remove:
del self.message.knowledge_graph.edges[key]
if self._is_general_concept(object_node):
node_to_remove.add(edge.object)
del self.message.knowledge_graph.edges[key]
continue
self.remove_orphaned_nodes()
except:
tb = traceback.format_exc()
Expand Down
8 changes: 4 additions & 4 deletions code/ARAX/KnowledgeSources/general_concepts.json
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,6 @@
"umls:c0003209",
"umls:c4045974",
"umls:c0005522"

],
"synonyms": [
"used in nicotine dependence",
Expand Down Expand Up @@ -684,7 +683,6 @@
"secondary",
"uterotonics",
"radiotherapy",
"pharmacolog.*",
"medicament",
"Anesthetics",
"vaccines",
Expand All @@ -708,6 +706,8 @@
"Introns",
"antioxidant",
"Oils"

],
"patterns": [
"pharmacolog.*"
]
}
}
Loading