From c3bcee544fdab016f309ff0df381f9b61cf5f8be Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Wed, 17 Apr 2024 14:44:01 +0200 Subject: [PATCH] feat: port para json api --- src/sblex/application/queries/__init__.py | 3 + src/sblex/application/queries/paradigms.py | 26 +++ src/sblex/fm/fm_runner.py | 33 ++- .../queries/fm_runner_paradigms.py | 22 ++ src/sblex/saldo_ws/deps.py | 6 + src/sblex/saldo_ws/routes/__init__.py | 2 + src/sblex/saldo_ws/routes/paradigms.py | 18 ++ tests/adapters/mem_fm_runner.py | 14 +- tests/e2e/conftest.py | 193 +++++++++++++++++- ...utes.test_json_valid_input_return_200.json | 189 +++++++++++++++++ tests/e2e/saldo_ws/test_para_api.py | 11 + 11 files changed, 509 insertions(+), 8 deletions(-) create mode 100644 src/sblex/application/queries/paradigms.py create mode 100644 src/sblex/infrastructure/queries/fm_runner_paradigms.py create mode 100644 src/sblex/saldo_ws/routes/paradigms.py create mode 100644 tests/e2e/saldo_ws/__snapshots__/test_para_api/TestParaRoutes.test_json_valid_input_return_200.json create mode 100644 tests/e2e/saldo_ws/test_para_api.py diff --git a/src/sblex/application/queries/__init__.py b/src/sblex/application/queries/__init__.py index e936474..c6cfd25 100644 --- a/src/sblex/application/queries/__init__.py +++ b/src/sblex/application/queries/__init__.py @@ -2,6 +2,7 @@ from sblex.application.queries.inflection import InflectionTableQuery, InflectionTableRow from sblex.application.queries.lex_fullforms import FullformLex, FullformLexQuery from sblex.application.queries.lookup_lid import LookupLid +from sblex.application.queries.paradigms import NoPartOfSpeechOnBaseform, Paradigms __all__ = [ "FullformLex", @@ -10,4 +11,6 @@ "FullformQuery", "InflectionTableQuery", "InflectionTableRow", + "Paradigms", + "NoPartOfSpeechOnBaseform", ] diff --git a/src/sblex/application/queries/paradigms.py b/src/sblex/application/queries/paradigms.py new file mode 100644 index 0000000..14d7e77 --- /dev/null +++ b/src/sblex/application/queries/paradigms.py @@ -0,0 +1,26 @@ +import abc +import typing + + +class Paradigms(abc.ABC): + @abc.abstractmethod + def query(self, s: str) -> typing.Tuple[str, list[str]]: ... + + def prepare_args(self, s: str) -> typing.Tuple[str, list[str]]: + xs = [x.strip() for x in s.split(",") if len(x) > 0] + baseform, pos = split_word_and_pos(xs[0]) + xs[0] = f"{baseform}:{pos}" + return baseform, xs + + +def split_word_and_pos(s: str) -> typing.Tuple[str, str]: + n = s.find(":") + if n == -1: + raise NoPartOfSpeechOnBaseform() + word = s[:n].strip() + pos = s[n + 1 :].strip() + return word, pos + + +class NoPartOfSpeechOnBaseform(Exception): + """The baseform must contain a Part-of-Speech tag.""" diff --git a/src/sblex/fm/fm_runner.py b/src/sblex/fm/fm_runner.py index 821d6d3..cae71e3 100644 --- a/src/sblex/fm/fm_runner.py +++ b/src/sblex/fm/fm_runner.py @@ -1,7 +1,9 @@ import logging import subprocess +import sys from pathlib import Path from typing import TypedDict +import typing from json_arrays import jsonlib from opentelemetry import trace @@ -26,18 +28,26 @@ def __init__(self, binary_path: Path, *, locale: str | None = None) -> None: self.locale = locale or 'LC_ALL="sv_SE.UTF-8"' def inflection(self, paradigm: str, word: str) -> list[InflectionRow]: - with tracer.start_as_current_span("call_fm_binary") as call_span: - args = f'{paradigm} "{word}";' - program = [ + with trace.get_tracer(__name__).start_as_current_span( + sys._getframe().f_code.co_name + ) as _call_span: + program: list[typing.Union[str, Path]] = [ self.binary_path, "-i", ] args = f'{paradigm} "{word}";' + return self._call_fm_binary(program=program, args=args) + + def _call_fm_binary(self, program: list[typing.Union[str, Path]], args: str): + with trace.get_tracer(__name__).start_as_current_span( + sys._getframe().f_code.co_name + ) as call_span: call_span.set_attribute("program", str(program)) call_span.set_attribute("args", args) process = subprocess.run( program, # type: ignore # noqa: S603 input=args.encode("utf-8"), + check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) @@ -47,10 +57,25 @@ def inflection(self, paradigm: str, word: str) -> list[InflectionRow]: extra={ "stderr": process.stderr.decode("utf-8"), "binary_path": self.binary_path, + "program": str(program), "args": args, }, ) raise RuntimeError("Call to `fm-sblex` failed") - raw_data = process.stdout.strip() + raw_data = process.stdout return jsonlib.loads(raw_data) if len(raw_data) > 0 else [] + + def paradigms(self, words: list[str]) -> list[str]: + with trace.get_tracer(__name__).start_as_current_span( + sys._getframe().f_code.co_name + ) as _call_span: + program: list[typing.Union[str, Path]] = [ + self.binary_path, + "-f", + ] + args = ",".join(words) + return self._call_fm_binary( + program=program, + args=args, + ) diff --git a/src/sblex/infrastructure/queries/fm_runner_paradigms.py b/src/sblex/infrastructure/queries/fm_runner_paradigms.py new file mode 100644 index 0000000..0c1d7b1 --- /dev/null +++ b/src/sblex/infrastructure/queries/fm_runner_paradigms.py @@ -0,0 +1,22 @@ +import sys +import typing + +from opentelemetry import trace +from sblex.application.queries import Paradigms +from sblex.fm import FmRunner + + +class FmRunnerParadigms(Paradigms): + def __init__(self, *, fm_runner: FmRunner) -> None: + super().__init__() + self.fm_runner = fm_runner + + def query(self, s: str) -> typing.Tuple[str, list]: + with trace.get_tracer(__name__).start_as_current_span( + sys._getframe().f_code.co_name + ) as _process_api_span: + baseform, words = self.prepare_args(s) + print(f"{baseform=}, {words=}") + print(f"{type(self.fm_runner)=}") + result = self.fm_runner.paradigms(words) + return baseform, result diff --git a/src/sblex/saldo_ws/deps.py b/src/sblex/saldo_ws/deps.py index 3a5aa66..ec3b5d0 100644 --- a/src/sblex/saldo_ws/deps.py +++ b/src/sblex/saldo_ws/deps.py @@ -2,11 +2,13 @@ from fastapi import Depends, Request from sblex.application.queries import FullformLexQuery, LookupLid from sblex.application.queries.inflection import InflectionTableQuery +from sblex.application.queries.paradigms import Paradigms from sblex.application.services import LookupService from sblex.fm import Morphology from sblex.fm.fm_runner import FmRunner from sblex.infrastructure.queries import LookupFullformLexQuery from sblex.infrastructure.queries.fm_runner_inflection import FmRunnerInflectionTable +from sblex.infrastructure.queries.fm_runner_paradigms import FmRunnerParadigms from sblex.infrastructure.queries.http_morpology import HttpMorphology @@ -48,3 +50,7 @@ def get_fullform_lex_query( lookup_service: LookupService = Depends(get_lookup_service), # noqa: B008 ) -> FullformLexQuery: return LookupFullformLexQuery(lookup_service=lookup_service) + + +def get_paradigms(fm_runner: FmRunner = Depends(get_fm_runner)) -> Paradigms: # noqa: B008 + return FmRunnerParadigms(fm_runner=fm_runner) diff --git a/src/sblex/saldo_ws/routes/__init__.py b/src/sblex/saldo_ws/routes/__init__.py index 8fcc1c3..5665e38 100644 --- a/src/sblex/saldo_ws/routes/__init__.py +++ b/src/sblex/saldo_ws/routes/__init__.py @@ -5,6 +5,7 @@ fullform_lex, inflection, lids, + paradigms, system_info, ) @@ -18,4 +19,5 @@ compounds.router, prefix="/sms", tags=["sms", "sammansättning", "compound"] ) router.include_router(inflection.router, prefix="/gen", tags=["inflection"]) +router.include_router(paradigms.router, prefix="/para", tags=["paradigms"]) router.include_router(system_info.router, tags=["system-info"]) diff --git a/src/sblex/saldo_ws/routes/paradigms.py b/src/sblex/saldo_ws/routes/paradigms.py new file mode 100644 index 0000000..1d21e46 --- /dev/null +++ b/src/sblex/saldo_ws/routes/paradigms.py @@ -0,0 +1,18 @@ +import sys + +from fastapi import APIRouter, Depends +from opentelemetry import trace +from sblex.application.queries import Paradigms +from sblex.saldo_ws import deps + +router = APIRouter() + + +@router.get("/json/{words}", response_model=list[str]) +async def get_para_json(words: str, paradigms: Paradigms = Depends(deps.get_paradigms)): # noqa: B008 + with trace.get_tracer(__name__).start_as_current_span( + sys._getframe().f_code.co_name + ) as _process_api_span: + print(f"{words=}") + _baseform, result = paradigms.query(words) + return result diff --git a/tests/adapters/mem_fm_runner.py b/tests/adapters/mem_fm_runner.py index 5346de6..ca9232c 100644 --- a/tests/adapters/mem_fm_runner.py +++ b/tests/adapters/mem_fm_runner.py @@ -2,8 +2,16 @@ class MemFmRunner(FmRunner): - def __init__(self, paradigms: dict[str, dict[str, list[InflectionRow]]]) -> None: - self.paradigms = paradigms + def __init__( + self, + paradigms: dict[str, dict[str, list[InflectionRow]]], + word_to_paradigm: dict[str, list[str]], + ) -> None: + self._paradigms = paradigms + self._word_to_paradigm = word_to_paradigm def inflection(self, paradigm: str, word: str) -> list[InflectionRow]: - return self.paradigms.get(paradigm, {}).get(word) or [] + return self._paradigms.get(paradigm, {}).get(word) or [] + + def paradigms(self, words: list[str]): + return self._word_to_paradigm.get(words[0], []) diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index 40dec52..4e99f0e 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -144,7 +144,198 @@ def override_fm_runner() -> FmRunner: }, ] } - } + }, + word_to_paradigm={ + "dväljes:vb": [ + "vb_0d_lyss", + "vb_0d_lyster", + "vb_0d_nåde", + "vb_0d_vederböra", + "vb_0d_värdes", + "vb_1a_beundra", + "vb_1a_hitta", + "vb_1a_häda", + "vb_1a_klaga", + "vb_1a_laga", + "vb_1a_skapa", + "vb_1a_spara", + "vb_1a_ugnsbaka", + "vb_1a_unna", + "vb_1a_vissla", + "vb_1a_vänta", + "vb_1m_existera", + "vb_1m_hisna", + "vb_1m_kackla", + "vb_1m_svira", + "vb_1m_vånna", + "vb_1s_andas", + "vb_1s_gillas", + "vb_2a_ansöka", + "vb_2a_genmäla", + "vb_2a_göra", + "vb_2a_hyra", + "vb_2a_känna", + "vb_2a_leda", + "vb_2a_leva", + "vb_2a_lyfta", + "vb_2a_lägga", + "vb_2a_mista", + "vb_2a_motsäga", + "vb_2a_spörja", + "vb_2a_städja", + "vb_2a_stödja", + "vb_2a_säga", + "vb_2a_sälja", + "vb_2a_sända", + "vb_2a_sätta", + "vb_2a_tämja", + "vb_2a_viga", + "vb_2a_välja", + "vb_2d_må", + "vb_2d_rädas", + "vb_2d_torde", + "vb_2m_böra", + "vb_2m_gitta", + "vb_2m_glädja", + "vb_2m_ha", + "vb_2m_hända", + "vb_2m_höta", + "vb_2m_mysa", + "vb_2m_väga", + "vb_2s_blygas", + "vb_2s_giftas", + "vb_2s_glädjas", + "vb_2s_hövas", + "vb_2s_idas", + "vb_2s_minnas", + "vb_2s_nöjas", + "vb_2s_rymmas", + "vb_2s_skiljas", + "vb_2s_synas", + "vb_2s_trivas", + "vb_2s_töras", + "vb_2s_vämjas", + "vb_3a_sy", + "vb_3s_brås", + "vb_4a_be", + "vb_4a_bli", + "vb_4a_bottenfrysa", + "vb_4a_bära", + "vb_4a_dricka", + "vb_4a_emotstå", + "vb_4a_falla", + "vb_4a_fara", + "vb_4a_flyga", + "vb_4a_förgäta", + "vb_4a_ge", + "vb_4a_gå", + "vb_4a_hålla", + "vb_4a_komma", + "vb_4a_missförstå", + "vb_4a_rida", + "vb_4a_se", + "vb_4a_skjuta", + "vb_4a_slå", + "vb_4a_stinga", + "vb_4a_stjäla", + "vb_4a_svära", + "vb_4a_ta", + "vb_4a_tillåta", + "vb_4a_äta", + "vb_4d_vederfås", + "vb_4m_angå", + "vb_4m_bekomma", + "vb_4m_erfara", + "vb_4m_förevara", + "vb_4m_förslå", + "vb_4m_gråta", + "vb_4m_innebära", + "vb_4m_le", + "vb_4m_ligga", + "vb_4m_ljuda", + "vb_4m_ryta", + "vb_4m_sitta", + "vb_4m_skåpäta", + "vb_4m_småsvära", + "vb_4m_sova", + "vb_4m_stå", + "vb_4m_svälta_1", + "vb_4m_vara", + "vb_4m_vina", + "vb_4s_bitas", + "vb_4s_finnas", + "vb_4s_hållas", + "vb_4s_munhuggas", + "vb_4s_slåss", + "vb_4s_tas", + "vb_4s_umgås", + "vb_4s_vederfaras", + "vb_id_månde", + "vb_ik_bevare", + "vb_oa_varda", + "vb_om_heta", + "vb_om_kunna", + "vb_om_måste", + "vb_om_skola", + "vb_om_veta", + "vb_om_vilja", + "vb_va_begrava", + "vb_va_besluta", + "vb_va_bestrida", + "vb_va_besvärja", + "vb_va_bringa", + "vb_va_framtvinga", + "vb_va_frysa", + "vb_va_förlöpa", + "vb_va_förmäla", + "vb_va_förse", + "vb_va_gälda", + "vb_va_gälla_kastrera", + "vb_va_klyva", + "vb_va_klä", + "vb_va_koka", + "vb_va_kväda", + "vb_va_lyda", + "vb_va_löpa", + "vb_va_mala", + "vb_va_nypa", + "vb_va_nästa", + "vb_va_simma", + "vb_va_skvätta", + "vb_va_smälta", + "vb_va_snusmala", + "vb_va_sprida", + "vb_va_strypa", + "vb_va_stupa", + "vb_va_svälta_2", + "vb_va_tala", + "vb_va_träda", + "vb_va_tvinga", + "vb_va_två", + "vb_va_tälja", + "vb_va_utlöpa", + "vb_va_vika", + "vb_va_växa", + "vb_vm_avvara", + "vb_vm_drypa", + "vb_vm_drösa", + "vb_vm_duga", + "vb_vm_fnysa", + "vb_vm_gala", + "vb_vm_klinga", + "vb_vm_kvida", + "vb_vm_nysa", + "vb_vm_ryka", + "vb_vm_samvara", + "vb_vm_sluta", + "vb_vm_smälla", + "vb_vm_snika", + "vb_vm_strida", + "vb_vm_undvara", + "vb_vm_upphäva", + "vb_vs_dväljas", + ] + }, ) webapp.dependency_overrides[get_fm_client] = override_fm_client diff --git a/tests/e2e/saldo_ws/__snapshots__/test_para_api/TestParaRoutes.test_json_valid_input_return_200.json b/tests/e2e/saldo_ws/__snapshots__/test_para_api/TestParaRoutes.test_json_valid_input_return_200.json new file mode 100644 index 0000000..b6e4f64 --- /dev/null +++ b/tests/e2e/saldo_ws/__snapshots__/test_para_api/TestParaRoutes.test_json_valid_input_return_200.json @@ -0,0 +1,189 @@ +[ + "vb_0d_lyss", + "vb_0d_lyster", + "vb_0d_nåde", + "vb_0d_vederböra", + "vb_0d_värdes", + "vb_1a_beundra", + "vb_1a_hitta", + "vb_1a_häda", + "vb_1a_klaga", + "vb_1a_laga", + "vb_1a_skapa", + "vb_1a_spara", + "vb_1a_ugnsbaka", + "vb_1a_unna", + "vb_1a_vissla", + "vb_1a_vänta", + "vb_1m_existera", + "vb_1m_hisna", + "vb_1m_kackla", + "vb_1m_svira", + "vb_1m_vånna", + "vb_1s_andas", + "vb_1s_gillas", + "vb_2a_ansöka", + "vb_2a_genmäla", + "vb_2a_göra", + "vb_2a_hyra", + "vb_2a_känna", + "vb_2a_leda", + "vb_2a_leva", + "vb_2a_lyfta", + "vb_2a_lägga", + "vb_2a_mista", + "vb_2a_motsäga", + "vb_2a_spörja", + "vb_2a_städja", + "vb_2a_stödja", + "vb_2a_säga", + "vb_2a_sälja", + "vb_2a_sända", + "vb_2a_sätta", + "vb_2a_tämja", + "vb_2a_viga", + "vb_2a_välja", + "vb_2d_må", + "vb_2d_rädas", + "vb_2d_torde", + "vb_2m_böra", + "vb_2m_gitta", + "vb_2m_glädja", + "vb_2m_ha", + "vb_2m_hända", + "vb_2m_höta", + "vb_2m_mysa", + "vb_2m_väga", + "vb_2s_blygas", + "vb_2s_giftas", + "vb_2s_glädjas", + "vb_2s_hövas", + "vb_2s_idas", + "vb_2s_minnas", + "vb_2s_nöjas", + "vb_2s_rymmas", + "vb_2s_skiljas", + "vb_2s_synas", + "vb_2s_trivas", + "vb_2s_töras", + "vb_2s_vämjas", + "vb_3a_sy", + "vb_3s_brås", + "vb_4a_be", + "vb_4a_bli", + "vb_4a_bottenfrysa", + "vb_4a_bära", + "vb_4a_dricka", + "vb_4a_emotstå", + "vb_4a_falla", + "vb_4a_fara", + "vb_4a_flyga", + "vb_4a_förgäta", + "vb_4a_ge", + "vb_4a_gå", + "vb_4a_hålla", + "vb_4a_komma", + "vb_4a_missförstå", + "vb_4a_rida", + "vb_4a_se", + "vb_4a_skjuta", + "vb_4a_slå", + "vb_4a_stinga", + "vb_4a_stjäla", + "vb_4a_svära", + "vb_4a_ta", + "vb_4a_tillåta", + "vb_4a_äta", + "vb_4d_vederfås", + "vb_4m_angå", + "vb_4m_bekomma", + "vb_4m_erfara", + "vb_4m_förevara", + "vb_4m_förslå", + "vb_4m_gråta", + "vb_4m_innebära", + "vb_4m_le", + "vb_4m_ligga", + "vb_4m_ljuda", + "vb_4m_ryta", + "vb_4m_sitta", + "vb_4m_skåpäta", + "vb_4m_småsvära", + "vb_4m_sova", + "vb_4m_stå", + "vb_4m_svälta_1", + "vb_4m_vara", + "vb_4m_vina", + "vb_4s_bitas", + "vb_4s_finnas", + "vb_4s_hållas", + "vb_4s_munhuggas", + "vb_4s_slåss", + "vb_4s_tas", + "vb_4s_umgås", + "vb_4s_vederfaras", + "vb_id_månde", + "vb_ik_bevare", + "vb_oa_varda", + "vb_om_heta", + "vb_om_kunna", + "vb_om_måste", + "vb_om_skola", + "vb_om_veta", + "vb_om_vilja", + "vb_va_begrava", + "vb_va_besluta", + "vb_va_bestrida", + "vb_va_besvärja", + "vb_va_bringa", + "vb_va_framtvinga", + "vb_va_frysa", + "vb_va_förlöpa", + "vb_va_förmäla", + "vb_va_förse", + "vb_va_gälda", + "vb_va_gälla_kastrera", + "vb_va_klyva", + "vb_va_klä", + "vb_va_koka", + "vb_va_kväda", + "vb_va_lyda", + "vb_va_löpa", + "vb_va_mala", + "vb_va_nypa", + "vb_va_nästa", + "vb_va_simma", + "vb_va_skvätta", + "vb_va_smälta", + "vb_va_snusmala", + "vb_va_sprida", + "vb_va_strypa", + "vb_va_stupa", + "vb_va_svälta_2", + "vb_va_tala", + "vb_va_träda", + "vb_va_tvinga", + "vb_va_två", + "vb_va_tälja", + "vb_va_utlöpa", + "vb_va_vika", + "vb_va_växa", + "vb_vm_avvara", + "vb_vm_drypa", + "vb_vm_drösa", + "vb_vm_duga", + "vb_vm_fnysa", + "vb_vm_gala", + "vb_vm_klinga", + "vb_vm_kvida", + "vb_vm_nysa", + "vb_vm_ryka", + "vb_vm_samvara", + "vb_vm_sluta", + "vb_vm_smälla", + "vb_vm_snika", + "vb_vm_strida", + "vb_vm_undvara", + "vb_vm_upphäva", + "vb_vs_dväljas" +] diff --git a/tests/e2e/saldo_ws/test_para_api.py b/tests/e2e/saldo_ws/test_para_api.py new file mode 100644 index 0000000..c6e9665 --- /dev/null +++ b/tests/e2e/saldo_ws/test_para_api.py @@ -0,0 +1,11 @@ +import pytest +from fastapi import status +from httpx import AsyncClient + + +class TestParaRoutes: + @pytest.mark.asyncio + async def test_json_valid_input_return_200(self, client: AsyncClient, snapshot_json) -> None: + res = await client.get("/para/json/dväljes:vb") + assert res.status_code == status.HTTP_200_OK + assert res.json() == snapshot_json