Skip to content

Commit

Permalink
Added a highlighting file to turn on/off highlighting.
Browse files Browse the repository at this point in the history
  • Loading branch information
gaurav committed Aug 26, 2024
1 parent b741a8d commit 2329a3a
Showing 1 changed file with 34 additions and 20 deletions.
54 changes: 34 additions & 20 deletions api/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,7 @@ async def reverse_lookup(curies) -> Dict[str, Dict]:
class LookupResult(BaseModel):
curie:str
label: str
preferred_matches: List[str]
synonym_matches: List[str]
highlighting: Dict[str, List[str]]
synonyms: List[str]
taxa: List[str]
types: List[str]
Expand All @@ -182,6 +181,9 @@ async def lookup_curies_get(
autocomplete: Annotated[bool, Query(
description="Is the input string incomplete (autocomplete=true) or a complete phrase (autocomplete=false)?"
)] = True,
highlighting: Annotated[bool, Query(
description="Return information on which labels and synonyms matched the search query?"
)] = False,
offset: Annotated[int, Query(
description="The number of results to skip. Can be used to page through the results of a query.",
# Offset should be greater than or equal to zero.
Expand Down Expand Up @@ -218,7 +220,7 @@ async def lookup_curies_get(
"""
Returns cliques with a name or synonym that contains a specified string.
"""
return await lookup(string, autocomplete, offset, limit, biolink_type, only_prefixes, exclude_prefixes, only_taxa)
return await lookup(string, autocomplete, highlighting, offset, limit, biolink_type, only_prefixes, exclude_prefixes, only_taxa)


@app.post("/lookup",
Expand All @@ -234,6 +236,9 @@ async def lookup_curies_post(
autocomplete: Annotated[bool, Query(
description="Is the input string incomplete (autocomplete=true) or a complete phrase (autocomplete=false)?"
)] = True,
highlighting: Annotated[bool, Query(
description="Return information on which labels and synonyms matched the search query?"
)] = False,
offset: Annotated[int, Query(
description="The number of results to skip. Can be used to page through the results of a query.",
# Offset should be greater than or equal to zero.
Expand Down Expand Up @@ -270,11 +275,12 @@ async def lookup_curies_post(
"""
Returns cliques with a name or synonym that contains a specified string.
"""
return await lookup(string, autocomplete, offset, limit, biolink_type, only_prefixes, exclude_prefixes, only_taxa)
return await lookup(string, autocomplete, highlighting, offset, limit, biolink_type, only_prefixes, exclude_prefixes, only_taxa)


async def lookup(string: str,
autocomplete: bool = False,
highlighting: bool = False,
offset: int = 0,
limit: conint(le=1000) = 10,
biolink_type: str = None,
Expand All @@ -287,6 +293,7 @@ async def lookup(string: str,
:param autocomplete: Should we do the lookup in autocomplete mode (in which we expect the final word to be
incomplete) or not (in which the entire phrase is expected to be complete, i.e. as an entity linker)?
:param highlighting: Return information on which labels and synonyms matched the search query.
"""

# First, we lowercase the query since all our indexes are case-insensitive.
Expand Down Expand Up @@ -338,6 +345,20 @@ async def lookup(string: str,
taxa_filters.append(f'taxa:"{taxon}"')
filters.append(" OR ".join(taxa_filters))

# Turn on highlighting if requested.
inner_params = {}
if highlighting:
inner_params.update({
# Highlighting
"hl": "true",
"hl.method": "unified",
"hl.encoder": "html",
"hl.tag.pre": "<strong>",
"hl.tag.post": "</strong>",
# "hl.usePhraseHighlighter": "true",
# "hl.highlightMultiTerm": "true",
})

params = {
"query": {
"edismax": {
Expand All @@ -362,16 +383,7 @@ async def lookup(string: str,
"offset": offset,
"filter": filters,
"fields": "*, score",
"params": {
# Highlighting
"hl": "true",
"hl.method": "unified",
"hl.encoder": "html",
"hl.tag.pre": "<strong>",
"hl.tag.post": "</strong>",
# "hl.usePhraseHighlighter": "true",
# "hl.highlightMultiTerm": "true",
},
"params": inner_params,
}
logging.debug(f"Query: {json.dumps(params, indent=2)}")

Expand All @@ -382,18 +394,18 @@ async def lookup(string: str,
LOGGER.error("Solr REST error: %s", response.text)
response.raise_for_status()
response = response.json()
print(f"Solr response: {json.dumps(response, indent=2)}")
logging.debug(f"Solr response: {json.dumps(response, indent=2)}")

# Associate highlighting information with search results.
highlighting = response["highlighting"]
highlighting_response = response.get("highlighting", {})

outputs = []
for doc in response['response']['docs']:
preferred_matches = []
synonym_matches = []

if doc['id'] in highlighting:
matches = highlighting[doc['id']]
if doc['id'] in highlighting_response:
matches = highlighting_response[doc['id']]

# We order exactish matches before token matches.
if 'preferred_name_exactish' in matches:
Expand All @@ -415,8 +427,10 @@ async def lookup(string: str,

outputs.append(LookupResult(curie=doc.get("curie", ""),
label=doc.get("preferred_name", ""),
preferred_matches=preferred_matches,
synonym_matches=synonym_matches,
highlighting={
'labels': preferred_matches,
'synonyms': synonym_matches,
} if highlighting else {},
synonyms=doc.get("names", []),
score=doc.get("score", ""),
taxa=doc.get("taxa", []),
Expand Down

0 comments on commit 2329a3a

Please sign in to comment.