Skip to content

Commit

Permalink
Merge pull request #150 from spraakbanken/136-error-on-non-existent-word
Browse files Browse the repository at this point in the history
136 error on non existent word
  • Loading branch information
kod-kristoff authored Jun 12, 2024
2 parents 05935b1 + 42ee6d6 commit 22ef6e9
Show file tree
Hide file tree
Showing 13 changed files with 142 additions and 59 deletions.
2 changes: 1 addition & 1 deletion ruff.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ unfixable = ["B"]

# Ignore `E402` (import violations) in all `__init__.py` files, and in `path/to/file.py`.
[lint.per-file-ignores]
"tests/*" = ["D100", "D101", "D102", "D103", "D104", "S101"]
"tests/**/*.py" = ["D100", "D101", "D102", "D103", "D104", "S101"]
"bases/sblex/webapp/tests/**/*" = [
"D100",
"D101",
Expand Down
4 changes: 3 additions & 1 deletion src/sblex/application/services/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ async def lookup_ff(self, segment: str) -> list[dict[str, Any]]:
with trace.get_tracer(__name__).start_as_current_span(
sys._getframe().f_code.co_name
) as _process_api_span:
return jsonlib.loads(await self._morphology.lookup(segment))["a"]
if json_data := await self._morphology.lookup(segment):
return jsonlib.loads(json_data)["a"]
return []

async def lookup_lid(self, lid: str) -> dict[str, Any]:
with trace.get_tracer(__name__).start_as_current_span(
Expand Down
17 changes: 8 additions & 9 deletions src/sblex/fm/morphology.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""FM morphology."""

import abc
import contextlib
import logging
import sys

Expand All @@ -14,10 +15,10 @@

class Morphology(abc.ABC):
@abc.abstractmethod
async def lookup(self, word: str, n: int = 0) -> bytes: ...
async def lookup(self, word: str, n: int = 0) -> bytes | None: ...

@abc.abstractmethod
async def lookup_from_bytes(self, s: bytes) -> bytes: ...
async def lookup_from_bytes(self, s: bytes) -> bytes | None: ...


class MemMorphology(Morphology):
Expand All @@ -36,16 +37,14 @@ def from_path(cls, fname: str) -> "Morphology":
)
)

async def lookup(self, word: str, n: int = 0) -> bytes:
return r if (r := self._trie.lookup(word, n)) else b'{"id":"0","a":[],"c":""}'
async def lookup(self, word: str, n: int = 0) -> bytes | None:
return self._trie.lookup(word, n)

async def lookup_from_bytes(self, s: bytes) -> bytes:
try:
async def lookup_from_bytes(self, s: bytes) -> bytes | None:
with contextlib.suppress(Exception):
res = s.decode("UTF-8").split(" ", 1)
n = int(res[0])
word = res[1]
if r := self._trie.lookup(word, n):
return r
except: # noqa: E722, S110
pass
return b'{"id":"0","a":[],"c":""}'
return None
12 changes: 9 additions & 3 deletions src/sblex/fm_server/api/morph.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from fastapi import APIRouter, Depends, Response
from fastapi import APIRouter, Depends, Response, status
from fastapi.responses import JSONResponse
from sblex.fm.morphology import Morphology
from sblex.fm_server import deps

Expand All @@ -11,5 +12,10 @@ async def get_morph(
n: int = 0,
morphology: Morphology = Depends(deps.get_morphology), # noqa: B008
):
json_data = await morphology.lookup(fragment, n)
return Response(json_data, media_type="application/json")
if json_data := await morphology.lookup(fragment, n):
return Response(json_data, media_type="application/json")
return JSONResponse(
{"msg": f"fragment '{fragment}' not found"},
status_code=status.HTTP_404_NOT_FOUND,
media_type="application/json",
)
16 changes: 14 additions & 2 deletions src/sblex/infrastructure/queries/http_morpology.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,33 @@
import logging
import sys

import httpx
from opentelemetry import trace
from sblex.fm import Morphology

logger = logging.getLogger(__name__)


class HttpMorphology(Morphology):
def __init__(self, http_client: httpx.AsyncClient) -> None:
self._http_client = http_client

async def lookup(self, word: str, n: int = 0) -> bytes:
async def lookup(self, word: str, n: int = 0) -> bytes | None:
with trace.get_tracer(__name__).start_as_current_span(
sys._getframe().f_code.co_name
) as _process_api_span:
response = await self._http_client.get(f"/morph/{word}/{n}")
if response.status_code == 404:
return None
elif response.status_code >= 400:
logger.error(
"Http lookup failed with status=%d",
response.status_code,
extra={"content": response.content},
)
return None
return response.content

async def lookup_from_bytes(self, s: bytes) -> bytes:
async def lookup_from_bytes(self, s: bytes) -> bytes | None:
response = await self._http_client.get(f"/morph/{s.decode('utf-8')}/0")
return response.content
5 changes: 5 additions & 0 deletions src/sblex/saldo_ws/routes/fullform_lex.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,14 @@ async def fullform_lex_html(
if segment
else "Fullform med semantisk koppling"
)
if segment == "" or len(json_data) > 0:
status_code = status.HTTP_200_OK
else:
status_code = status.HTTP_404_NOT_FOUND
return templates.TemplateResponse(
request=request,
name="saldo_fullform_lex.html",
status_code=status_code,
context=templating.build_context(
request=request,
title=title,
Expand Down
6 changes: 3 additions & 3 deletions src/sblex/trie/trie.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ def from_iter(cls, dicts: Iterable[dict[str, Any]]) -> "Trie":
logger.info("building morphology took %d s", elapsed)
return trie

def lookup(self, word: str, start_state=0) -> bytes:
def lookup(self, word: str, start_state=0) -> bytes | None:
st = start_state # traversal state
for c in word:
try:
st = self._trie[st][0][c]
except: # noqa: E722
return b""
except KeyError:
return None
return self._trie[st][1]


Expand Down
78 changes: 39 additions & 39 deletions tests/e2e/saldo_ws/__snapshots__/test_fullform_lex_api.ambr
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# serializer version: 1
# name: TestFullformLexRoutes.test_html_valid_input_returns_200[ ]
# name: TestFullformLexRoutes.test_html_missing_input_returns_404[l\xf6parsko]
'''
<!DOCTYPE html>
<html>
Expand All @@ -8,7 +8,7 @@
<meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
<link rel="shortcut icon" href="https://svn.spraakdata.gu.se/sb-arkiv/pub/images/saldo_icon.png">
<link rel="stylesheet" type="text/css" href="http://testserver/static/saldo.css" />
<title>Fullform med semantisk koppling</title>
<title>Fullform med semantisk koppling | löparsko</title>

<script src="https://unpkg.com/htmx.org@1.9.8" crossorigin="anonymous"></script>
</head>
Expand All @@ -25,11 +25,21 @@

<center>
<form action="http://testserver/fl/html" method="get" class="inputclass">
<input type="search" id="search" class="inputclass" value="" name="segment" size="30"
<input type="search" id="search" class="inputclass" value="löparsko" name="segment" size="30"
placeholder="Skriv in en ordform" results="10" />
<input type="submit" value="Sök" />
</form>

<h1>segment: löparsko</h1>
<p><a href="http://testserver/sms/html/löparsko">sammansättningsanalys (FUNGERAR INTE)</a></p>
<table border="1">

<tr>
<td>ordet saknas i lexikonet</td>
</tr>

</table>

</center>

</div>
Expand All @@ -45,7 +55,7 @@
</html>
'''
# ---
# name: TestFullformLexRoutes.test_html_valid_input_returns_200[]
# name: TestFullformLexRoutes.test_html_valid_input_returns_200[ ]
'''
<!DOCTYPE html>
<html>
Expand Down Expand Up @@ -91,7 +101,7 @@
</html>
'''
# ---
# name: TestFullformLexRoutes.test_html_valid_input_returns_200[dv\xe4ljas]
# name: TestFullformLexRoutes.test_html_valid_input_returns_200[]
'''
<!DOCTYPE html>
<html>
Expand All @@ -100,7 +110,7 @@
<meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
<link rel="shortcut icon" href="https://svn.spraakdata.gu.se/sb-arkiv/pub/images/saldo_icon.png">
<link rel="stylesheet" type="text/css" href="http://testserver/static/saldo.css" />
<title>Fullform med semantisk koppling | dväljas</title>
<title>Fullform med semantisk koppling</title>

<script src="https://unpkg.com/htmx.org@1.9.8" crossorigin="anonymous"></script>
</head>
Expand All @@ -117,37 +127,11 @@

<center>
<form action="http://testserver/fl/html" method="get" class="inputclass">
<input type="search" id="search" class="inputclass" value="dväljas" name="segment" size="30"
<input type="search" id="search" class="inputclass" value="" name="segment" size="30"
placeholder="Skriv in en ordform" results="10" />
<input type="submit" value="Sök" />
</form>

<h1>segment: dväljas</h1>
<p><a href="http://testserver/sms/html/dväljas">sammansättningsanalys (FUNGERAR INTE)</a></p>
<table border="1">


<tr>
<td>

<a href="http://testserver/lid/html/dväljas..1">dväljas..1</a>

</td>
<td>

<a href="http://testserver/lid/html/bo..1">bo..1</a>


</td>
<td><a href="http://testserver/gen/html/vb_vs_dväljas/dväljas">dväljas..vb.1</a>

</td>
<td><a href="https://spraakbanken.gu.se/korp/#?search=lemgram|dväljas..vb.1">korpus</a></td>
</tr>


</table>

</center>

</div>
Expand All @@ -163,7 +147,7 @@
</html>
'''
# ---
# name: TestFullformLexRoutes.test_html_valid_input_returns_200[dv\xe4ljsxdf]
# name: TestFullformLexRoutes.test_html_valid_input_returns_200[dv\xe4ljas]
'''
<!DOCTYPE html>
<html>
Expand All @@ -172,7 +156,7 @@
<meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
<link rel="shortcut icon" href="https://svn.spraakdata.gu.se/sb-arkiv/pub/images/saldo_icon.png">
<link rel="stylesheet" type="text/css" href="http://testserver/static/saldo.css" />
<title>Fullform med semantisk koppling | dväljsxdf</title>
<title>Fullform med semantisk koppling | dväljas</title>

<script src="https://unpkg.com/htmx.org@1.9.8" crossorigin="anonymous"></script>
</head>
Expand All @@ -189,19 +173,35 @@

<center>
<form action="http://testserver/fl/html" method="get" class="inputclass">
<input type="search" id="search" class="inputclass" value="dväljsxdf" name="segment" size="30"
<input type="search" id="search" class="inputclass" value="dväljas" name="segment" size="30"
placeholder="Skriv in en ordform" results="10" />
<input type="submit" value="Sök" />
</form>

<h1>segment: dväljsxdf</h1>
<p><a href="http://testserver/sms/html/dväljsxdf">sammansättningsanalys (FUNGERAR INTE)</a></p>
<h1>segment: dväljas</h1>
<p><a href="http://testserver/sms/html/dväljas">sammansättningsanalys (FUNGERAR INTE)</a></p>
<table border="1">


<tr>
<td>ordet saknas i lexikonet</td>
<td>

<a href="http://testserver/lid/html/dväljas..1">dväljas..1</a>

</td>
<td>

<a href="http://testserver/lid/html/bo..1">bo..1</a>


</td>
<td><a href="http://testserver/gen/html/vb_vs_dväljas/dväljas">dväljas..vb.1</a>

</td>
<td><a href="https://spraakbanken.gu.se/korp/#?search=lemgram|dväljas..vb.1">korpus</a></td>
</tr>


</table>

</center>
Expand Down
12 changes: 11 additions & 1 deletion tests/e2e/saldo_ws/test_fullform_lex_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ async def test_xml_valid_input_returns_200(
assert res.headers["content-type"] == "application/xml"
assert res.text == snapshot

@pytest.mark.parametrize("segment", ["", " ", "dväljas", "dväljsxdf"])
@pytest.mark.parametrize("segment", ["", " ", "dväljas"])
@pytest.mark.asyncio
async def test_html_valid_input_returns_200(
self, client: AsyncClient, segment: str, snapshot
Expand All @@ -42,6 +42,16 @@ async def test_html_valid_input_returns_200(
assert res.headers["content-type"] == "text/html; charset=utf-8"
assert res.text == snapshot

@pytest.mark.parametrize("segment", ["löparsko"])
@pytest.mark.asyncio
async def test_html_missing_input_returns_404(
self, client: AsyncClient, segment: str, snapshot
) -> None:
res = await client.get(f"/fl/html?segment={segment}")
assert res.status_code == status.HTTP_404_NOT_FOUND
assert res.headers["content-type"] == "text/html; charset=utf-8"
assert res.text == snapshot

@pytest.mark.parametrize("segment", ["", " ", "dväljas", "dväljsxdf"])
@pytest.mark.asyncio
async def test_html_orig_valid_input_returns_307(
Expand Down
4 changes: 4 additions & 0 deletions tests/e2e/test_infrastructure/__snapshots__/test_queries.ambr
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# serializer version: 1
# name: TestHttpMorphology.test_lookup_from_bytes
b'{"a":[{"gf":"dv\xc3\xa4ljas","id":"dv\xc3\xa4ljas..vb.1","pos":"vb","is":[],"msd":"pres ind s-form","p":"vb_vs_dv\xc3\xa4ljas"},{"gf":"dv\xc3\xa4ljas","id":"dv\xc3\xa4ljas..vb.1","pos":"vb","is":[],"msd":"imper","p":"vb_vs_dv\xc3\xa4ljas"}],"c":""}'
# ---
17 changes: 17 additions & 0 deletions tests/e2e/test_infrastructure/test_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pytest
import pytest_asyncio
from httpx import AsyncClient
from sblex.infrastructure.queries.http_morpology import HttpMorphology


@pytest_asyncio.fixture(name="http_morphology")
async def fixture_http_morphology(fm_client: AsyncClient) -> HttpMorphology:
return HttpMorphology(http_client=fm_client)


class TestHttpMorphology:
@pytest.mark.asyncio
async def test_lookup_from_bytes(self, http_morphology: HttpMorphology, snapshot) -> None:
result = await http_morphology.lookup_from_bytes(b"dv\xc3\xa4ljes")

assert result == snapshot
17 changes: 17 additions & 0 deletions tests/unit/test_queries/test_http_morphology.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from unittest.mock import AsyncMock

import pytest
from httpx import AsyncClient, Response
from sblex.infrastructure.queries.http_morpology import HttpMorphology


@pytest.mark.asyncio
async def test_lookup_from_bytes() -> None:
mock_client = AsyncMock(spec=AsyncClient)
attrs = {"get.return_value": Response(400, content="error")}
mock_client.configure_mock(**attrs)

http_morphology = HttpMorphology(mock_client)
result = await http_morphology.lookup("dväljes")

assert result is None
11 changes: 11 additions & 0 deletions tests/unit/test_trie.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from sblex.trie.trie import TrieBuilder


def test_trie():
trie_builder = TrieBuilder()
trie_builder.insert("ösja", b"{head:osja,pos:vb}")
trie_builder.insert("örliga", b"{head:orliga,pos:vb}")
print(f"{trie_builder.trie=}")
trie = trie_builder.build()
print(f"{trie._trie=}")
# assert False

0 comments on commit 22ef6e9

Please sign in to comment.