Skip to content

Commit

Permalink
split patterns and synonyms #2388
Browse files Browse the repository at this point in the history
  • Loading branch information
kvnthomas98 committed Oct 7, 2024
1 parent fda0efc commit 6bfe0b4
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 13 deletions.
21 changes: 12 additions & 9 deletions code/ARAX/ARAXQuery/Filter_KG/remove_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,23 +157,25 @@ def remove_orphaned_nodes(self):
return self.response

def _is_general_concept(self, node):
curies = []
synonyms = []
curies = set()
synonyms = set()
if not node['attributes']:
return False
for attribute in node['attributes']:
if attribute['attribute_type_id'] == 'biolink:xref':
curies += attribute.get('value',[])
curies.update(map(str.lower, attribute.get('value', [])))
if attribute['attribute_type_id'] == 'biolink:synonym':
synonyms += attribute.get('value',[])
synonyms.update(map(str.lower, attribute.get('value', [])))
if node['name']:
synonyms.append(node['name'].lower())
if self.block_list_curies.intersection([curie.lower() for curie in curies if curie]):
synonyms.add(node['name'].lower())
if self.block_list_curies.intersection(curies) or self.block_list_synonyms.intersection(synonyms):
return True

for synonym in synonyms:
for block_list_synonym in self.block_list_synonyms:
if isinstance(synonym,str) and isinstance(block_list_synonym,str) and re.match(block_list_synonym, synonym,re.IGNORECASE):
return True
if not isinstance(synonym,str):
continue
if any(p.match(synonym) for p in self.block_list_patterns):
return True
return False

def remove_general_concept_nodes(self):
Expand All @@ -200,6 +202,7 @@ def remove_general_concept_nodes(self):
self.block_list_synonyms = set(block_list_dict["synonyms"])
self.block_list_curies = set(block_list_dict["curies"])
node_to_remove = set()
self.block_list_patterns = [re.compile(pattern,re.IGNORECASE) for pattern in block_list_dict["patterns"]]
# iterate over edges find edges connected to the nodes
for key, edge in self.message.knowledge_graph.edges.items():
if set({edge.subject, edge.object}).intersection(node_to_remove):
Expand Down
8 changes: 4 additions & 4 deletions code/ARAX/KnowledgeSources/general_concepts.json
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,6 @@
"umls:c0003209",
"umls:c4045974",
"umls:c0005522"

],
"synonyms": [
"used in nicotine dependence",
Expand Down Expand Up @@ -684,7 +683,6 @@
"secondary",
"uterotonics",
"radiotherapy",
"pharmacolog.*",
"medicament",
"Anesthetics",
"vaccines",
Expand All @@ -708,6 +706,8 @@
"Introns",
"antioxidant",
"Oils"

],
"patterns": [
"pharmacolog.*"
]
}
}

0 comments on commit 6bfe0b4

Please sign in to comment.