diff --git a/README.md b/README.md index fe51663..bd74887 100644 --- a/README.md +++ b/README.md @@ -44,9 +44,15 @@ To install the package: pip install poetry poetry install --with=dev +# Download tge spaCy pipeline used for tokenization +poetry run python -m spacy download en_core_web_sm + # To install with all additional NER dependencies (e.g. Flair, Stanza), run: # poetry install --with='ner,dev' +# To use the default Presidio configuration, a spaCy model is required: +poetry run python -m spacy download en_core_web_lg + # Verify installation pytest ``` diff --git a/pyproject.toml b/pyproject.toml index f6f44b7..3faf974 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,8 +22,6 @@ xmltodict = "^0.12.0" python-dotenv = "^1.0.0" plotly= "^5.24.0" azure-ai-textanalytics = "^5.3.0" -en_core_web_sm = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0.tar.gz"} -en_core_web_lg = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz"} # optional dependencies for the different NLP approaches [tool.poetry.group.ner]