diff --git a/data/deps.json b/data/deps.json index 46c34a6..116c6c6 100644 --- a/data/deps.json +++ b/data/deps.json @@ -5,7 +5,9 @@ "spacy": "3.7.5", "spacy_cpu_model": "3.7.0", "spacy_trf_model": "3.7.2", + "en_spacy_cpu_model": "3.7.1", + "en_spacy_trf_model": "3.7.3", "thinc-apple-ops": "0.1.5", - "torch": "2.3.1", + "torch": "2.4.0", "typing-extensions": "4.12.2" } diff --git a/deps.py b/deps.py index 269e41b..a3c2917 100644 --- a/deps.py +++ b/deps.py @@ -11,6 +11,7 @@ PROFICIENCY_RELEASE_URL, Prefs, get_plugin_path, + get_spacy_model_version, get_wiktionary_klld_path, kindle_db_path, load_plugin_json, @@ -44,9 +45,7 @@ def install_deps(pkg: str, notif: Any) -> None: if pkg == "": pip_install("spacy", dep_versions["spacy"], notif=notif) else: - model_version = dep_versions[ - "spacy_trf_model" if pkg.endswith("_trf") else "spacy_cpu_model" - ] + model_version = get_spacy_model_version(pkg, dep_versions) url = ( "https://github.com/explosion/spacy-models/releases/download/" f"{pkg}-{model_version}/{pkg}-{model_version}-py3-none-any.whl" diff --git a/dump_lemmas.py b/dump_lemmas.py index 3b5bac7..c1e3508 100644 --- a/dump_lemmas.py +++ b/dump_lemmas.py @@ -6,6 +6,7 @@ from .utils import ( Prefs, custom_lemmas_folder, + get_spacy_model_version, insert_installed_libs, load_plugin_json, use_kindle_ww_db, @@ -14,6 +15,7 @@ from utils import ( Prefs, custom_lemmas_folder, + get_spacy_model_version, insert_installed_libs, load_plugin_json, use_kindle_ww_db, @@ -74,9 +76,7 @@ def dump_spacy_docs( save_spacy_docs( nlp, spacy_model, - pkg_versions[ - "spacy_trf_model" if spacy_model.endswith("_trf") else "spacy_cpu_model" - ], + get_spacy_model_version(spacy_model, pkg_versions), lemma_lang, is_kindle, lemmas_conn, diff --git a/parse_job.py b/parse_job.py index 3947668..5f879ca 100644 --- a/parse_job.py +++ b/parse_job.py @@ -32,6 +32,7 @@ Prefs, dump_prefs, get_plugin_path, + get_spacy_model_version, get_user_agent, get_wiktionary_klld_path, insert_installed_libs, @@ -68,6 +69,7 @@ from utils import ( CJK_LANGS, Prefs, + get_spacy_model_version, insert_installed_libs, kindle_db_path, load_languages_data, @@ -795,9 +797,7 @@ def create_spacy_matcher( disabled_pipes = list(set(["ner", "parser", "senter"]) & set(nlp.pipe_names)) pkg_versions = load_plugin_json(plugin_path, "data/deps.json") - model_version = pkg_versions[ - "spacy_trf_model" if model.endswith("_trf") else "spacy_cpu_model" - ] + model_version = get_spacy_model_version(model, pkg_versions) # Chinese words don't have inflection forms, only use phrase matcher use_lemma_matcher = prefs["use_pos"] and lemma_lang != "zh" and model != "" phrase_matcher = PhraseMatcher(nlp.vocab, attr="LOWER") diff --git a/utils.py b/utils.py index 5654c91..ab14bf5 100644 --- a/utils.py +++ b/utils.py @@ -185,3 +185,14 @@ def load_languages_data( supported_languages["zh_cn"] = supported_languages["zh"].copy() supported_languages["zh_cn"]["name"] = "Simplified Chinese" return supported_languages + + +def get_spacy_model_version( + model_name: str, dependency_versions: dict[str, str] +) -> str: + key = "spacy_trf_model" if model_name.endswith("_trf") else "spacy_cpu_model" + lang_code = model_name[:2] + lang_key = f"{lang_code}_{key}" + if lang_key in dependency_versions: + return dependency_versions[lang_key] + return dependency_versions.get(key, "")