From 36715e7355e0cf53176bcb14fcb1e2980183a707 Mon Sep 17 00:00:00 2001 From: nobu-g Date: Sun, 10 Sep 2023 22:50:28 +0900 Subject: [PATCH 01/13] use official black pre-commit mirror --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e5378b18..997fc85c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,8 +8,8 @@ repos: - id: check-yaml - id: check-toml - id: check-added-large-files - - repo: https://github.com/psf/black - rev: 23.7.0 + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 23.9.0 hooks: - id: black - repo: https://github.com/PyCQA/flake8 From 78235fbefb13db43029e62b56757f70767dc3c7d Mon Sep 17 00:00:00 2001 From: nobu-g Date: Fri, 15 Sep 2023 11:32:42 +0900 Subject: [PATCH 02/13] change dependabot upgrade interval to monthly --- .github/dependabot.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index d4abccfa..6ab23893 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -8,7 +8,7 @@ updates: - package-ecosystem: "pip" directory: "/" schedule: - interval: "weekly" # TODO: Change to "monthly" + interval: "monthly" timezone: "Asia/Tokyo" target-branch: "develop" ignore: From 8d784b895f3db8a5050e2350c5c95885a669b144 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 15 Sep 2023 02:36:06 +0000 Subject: [PATCH 03/13] Bump actions/checkout from 3 to 4 Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/build.yml | 2 +- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/lint.yml | 2 +- .github/workflows/publish.yml | 2 +- .github/workflows/release.yml | 2 +- .github/workflows/test-example.yml | 2 +- .github/workflows/test.yml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c53c93f3..4d168d4d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -14,7 +14,7 @@ jobs: python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index a9a16ec2..ca6b8064 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -38,7 +38,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 383794cc..d2eee160 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python 3.7 uses: actions/setup-python@v4 with: diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 1043a44a..eafc9074 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -8,7 +8,7 @@ jobs: build-publish: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python 3.9 uses: actions/setup-python@v4 with: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 46547937..e1921707 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Create Release id: create_release uses: actions/create-release@v1 diff --git a/.github/workflows/test-example.yml b/.github/workflows/test-example.yml index e4e77a60..77d15432 100644 --- a/.github/workflows/test-example.yml +++ b/.github/workflows/test-example.yml @@ -16,7 +16,7 @@ jobs: python-version: ["3.8", "3.9", "3.10", "3.11"] steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install required apt packages run: | apt update -y diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e94be0d5..c9b49cfb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,7 +13,7 @@ jobs: python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install required apt packages run: | apt update -y From 5869c1d972abe113de97213ab676fdcbfcff036d Mon Sep 17 00:00:00 2001 From: nobu-g Date: Fri, 15 Sep 2023 12:36:21 +0900 Subject: [PATCH 04/13] update sphinx-prompt --- poetry.lock | 18 +++++++++++++++++- pyproject.toml | 4 ++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 7a546ad9..cc9d2086 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1659,6 +1659,22 @@ docutils = "*" pygments = "*" Sphinx = ">=7.0.0,<8.0.0" +[[package]] +name = "sphinx-prompt" +version = "1.8.0" +description = "Sphinx directive to add unselectable prompt" +optional = false +python-versions = ">=3.9,<4.0" +files = [ + {file = "sphinx_prompt-1.8.0-py3-none-any.whl", hash = "sha256:369ecc633f0711886f9b3a078c83264245be1adf46abeeb9b88b5519e4b51007"}, + {file = "sphinx_prompt-1.8.0.tar.gz", hash = "sha256:47482f86fcec29662fdfd23e7c04ef03582714195d01f5d565403320084372ed"}, +] + +[package.dependencies] +docutils = "*" +pygments = "*" +Sphinx = ">=7.0.0,<8.0.0" + [[package]] name = "sphinxcontrib-applehelp" version = "1.0.2" @@ -1980,4 +1996,4 @@ cli = ["PyYAML", "fastapi", "jinja2", "rich", "typer", "typer", "uvicorn", "uvic [metadata] lock-version = "2.0" python-versions = "^3.7" -content-hash = "7658ad408f5e2933532295a299be7eff9b5ee546660cac80b016b084c10cb4ca" +content-hash = "28559da1c2153842f7eafd184e07038d840b4013900dfa0effc1575e8076cbd0" diff --git a/pyproject.toml b/pyproject.toml index f10b146b..de629e17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,10 +57,10 @@ Sphinx = [ { version = "^5.3", python = "<3.8" }, { version = "^7.0", python = ">=3.8" }, ] -# sphinx-prompt 1.7.0 requires python < 3.11, which will be fixed in the next version sphinx-prompt = [ { version = "~1.5.0", python = "<3.8" }, - { version = "^1.6", python = ">=3.8,<3.11" }, + { version = "^1.6", python = ">=3.8,<3.9" }, + { version = "^1.8", python = ">=3.9" }, ] sphinx-copybutton = "^0.5.0" myst-parser = [ From 11374f8e98772bedf1f078922191587c3743df77 Mon Sep 17 00:00:00 2001 From: nobu-g Date: Fri, 15 Sep 2023 12:37:47 +0900 Subject: [PATCH 05/13] add Python 3.12 to actions workflows --- .github/workflows/build.yml | 4 ++-- .github/workflows/test-example.yml | 3 ++- .github/workflows/test.yml | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4d168d4d..71798ef0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,11 +7,11 @@ jobs: name: Build the project runs-on: ${{ matrix.os }} strategy: - max-parallel: 15 + max-parallel: 18 fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] steps: - name: Checkout repository uses: actions/checkout@v4 diff --git a/.github/workflows/test-example.yml b/.github/workflows/test-example.yml index 77d15432..803ae033 100644 --- a/.github/workflows/test-example.yml +++ b/.github/workflows/test-example.yml @@ -11,9 +11,10 @@ jobs: container: kunlp/jumanpp-knp:ubuntu runs-on: ubuntu-20.04 # to meet the ubuntu version in the kunlp/jumanpp-knp:ubuntu container strategy: + max-parallel: 5 fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - name: Checkout repository uses: actions/checkout@v4 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c9b49cfb..ab2ee473 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,9 +8,10 @@ jobs: container: kunlp/jumanpp-knp:ubuntu runs-on: ubuntu-20.04 # to meet the ubuntu version in the kunlp/jumanpp-knp:ubuntu container strategy: + max-parallel: 6 fail-fast: false matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] steps: - name: Checkout repository uses: actions/checkout@v4 From 95254baa40676193488b43f3f548e197b15cb67a Mon Sep 17 00:00:00 2001 From: nobu-g Date: Fri, 15 Sep 2023 12:46:34 +0900 Subject: [PATCH 06/13] add py312 to black target-version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index de629e17..5da0293c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,7 +84,7 @@ rhoknp = "rhoknp.cli.cli:app" [tool.black] line-length = 120 -target-version = ["py37", "py38", "py39", "py310", "py311"] +target-version = ["py37", "py38", "py39", "py310", "py311", "py312"] [tool.isort] line_length = 120 From ea1ad54e8f116dab055335b0b02df97be3a72197 Mon Sep 17 00:00:00 2001 From: nobu-g Date: Fri, 15 Sep 2023 13:07:58 +0900 Subject: [PATCH 07/13] allow using prerelease python --- .github/workflows/build.yml | 1 + .github/workflows/test-example.yml | 1 + .github/workflows/test.yml | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 71798ef0..2a4bee0d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -19,6 +19,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + allow-prereleases: true # TODO: Remove this line when Python 3.12 is released - name: Install Poetry run: | pipx install poetry diff --git a/.github/workflows/test-example.yml b/.github/workflows/test-example.yml index 803ae033..15e73991 100644 --- a/.github/workflows/test-example.yml +++ b/.github/workflows/test-example.yml @@ -26,6 +26,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + allow-prereleases: true # TODO: Remove this line when Python 3.12 is released - name: Install Poetry run: | python3 -m pip install --user pipx diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ab2ee473..326baf14 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,6 +23,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + allow-prereleases: true # TODO: Remove this line when Python 3.12 is released - name: Install Poetry run: | python3 -m pip install --user pipx From fdb4189464416342fcb679a0d11ceef934d44643 Mon Sep 17 00:00:00 2001 From: nobu-g Date: Fri, 15 Sep 2023 13:27:53 +0900 Subject: [PATCH 08/13] fix test workflow --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 326baf14..1e28bf86 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -36,8 +36,8 @@ jobs: run: | poetry install --no-interaction --without dev,docs --extras=cli - name: Install KWJA - # KWJA does not support Python 3.7 - if: ${{ matrix.python-version != 3.7 }} + # KWJA does not support Python 3.7 or 3.12 + if: ${{ matrix.python-version != 3.7 && matrix.python-version != 3.12 }} run: | pipx install kwja kwja --model-size tiny --text "ใƒ†ใ‚นใƒˆ" From dd64b1ed97d062e88d8f292f88105d5f0edc2655 Mon Sep 17 00:00:00 2001 From: nobu-g Date: Mon, 18 Sep 2023 17:29:59 +0900 Subject: [PATCH 09/13] refactor --- src/rhoknp/cli/static/css/style.css | 4 ++-- src/rhoknp/units/base_phrase.py | 13 +++++++------ src/rhoknp/units/morpheme.py | 6 ++---- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/rhoknp/cli/static/css/style.css b/src/rhoknp/cli/static/css/style.css index efa92765..4e9223ab 100644 --- a/src/rhoknp/cli/static/css/style.css +++ b/src/rhoknp/cli/static/css/style.css @@ -1,12 +1,12 @@ /* templates/components/raw_input.jinja2 */ .input-text { white-space: pre-wrap; - margin: 0em 0.5em 1em; + margin: 0 0.5em 1em; padding: 0.5em; } .result { - margin: 0em 0.5em 1em; + margin: 0 0.5em 1em; padding: 0.5em; } diff --git a/src/rhoknp/units/base_phrase.py b/src/rhoknp/units/base_phrase.py index 203fb144..b0acd57f 100644 --- a/src/rhoknp/units/base_phrase.py +++ b/src/rhoknp/units/base_phrase.py @@ -247,12 +247,13 @@ def from_knp(cls, knp_text: str) -> "BasePhrase": match = cls.PAT.match(first_line) if match is None: raise ValueError(f"malformed base phrase line: {first_line}") - parent_index = int(match["pid"]) if match["pid"] is not None else None - dep_type = DepType(match["dtype"]) if match["dtype"] is not None else None - features = FeatureDict.from_fstring(match["feats"] or "") - rel_tags = RelTagList.from_fstring(match["feats"] or "") - memo_tag = MemoTag.from_fstring(match["feats"] or "") - base_phrase = cls(parent_index, dep_type, features, rel_tags, memo_tag) + base_phrase = cls( + parent_index=int(match["pid"]) if match["pid"] is not None else None, + dep_type=DepType(match["dtype"]) if match["dtype"] is not None else None, + features=FeatureDict.from_fstring(match["feats"] or ""), + rel_tags=RelTagList.from_fstring(match["feats"] or ""), + memo_tag=MemoTag.from_fstring(match["feats"] or ""), + ) morphemes: List[Morpheme] = [] for line in lines: diff --git a/src/rhoknp/units/morpheme.py b/src/rhoknp/units/morpheme.py index 333392db..199181b3 100644 --- a/src/rhoknp/units/morpheme.py +++ b/src/rhoknp/units/morpheme.py @@ -301,8 +301,6 @@ def _from_jumanpp_line(cls, jumanpp_line: str, homograph: bool = False) -> "Morp match_attr = cls._ATTRIBUTE_PAT.match(match["attrs"]) or cls._ATTRIBUTE_PAT_REPEATED.match(match["attrs"]) assert match_attr is not None attributes = match_attr.groups() - semantics = SemanticsDict.from_sstring(match["sems"] or "") - features = FeatureDict.from_fstring(match["feats"] or "") return cls( surf, attributes[0], @@ -315,8 +313,8 @@ def _from_jumanpp_line(cls, jumanpp_line: str, homograph: bool = False) -> "Morp int(attributes[7]), attributes[8], int(attributes[9]), - semantics, - features, + semantics=SemanticsDict.from_sstring(match["sems"] or ""), + features=FeatureDict.from_fstring(match["feats"] or ""), homograph=homograph, ) From 60cce3f42b75397a09371151c91aab152301104d Mon Sep 17 00:00:00 2001 From: nobu-g Date: Mon, 18 Sep 2023 17:36:44 +0900 Subject: [PATCH 10/13] fix a bug with kwja v2.2.0 --- src/rhoknp/processors/kwja.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rhoknp/processors/kwja.py b/src/rhoknp/processors/kwja.py index bb686e21..6a94599c 100644 --- a/src/rhoknp/processors/kwja.py +++ b/src/rhoknp/processors/kwja.py @@ -45,10 +45,10 @@ def __init__( tasks: List[str] = self.options[self.options.index("--tasks") + 1].split(",") if "word" in tasks: self._output_format = "knp" - elif "char" in tasks: - self._output_format = "words" elif "seq2seq" in tasks: self._output_format = "jumanpp" + elif "char" in tasks: + self._output_format = "words" elif "senter" in tasks: self._output_format = "line_by_line" elif "typo" in tasks: From 4eaf85e51c4cf9d93c77a7cfb03d775a14e5f65f Mon Sep 17 00:00:00 2001 From: nobu-g Date: Mon, 18 Sep 2023 17:45:39 +0900 Subject: [PATCH 11/13] update deps --- poetry.lock | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index cc9d2086..395cc2a5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1436,13 +1436,13 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "rich" -version = "13.5.2" +version = "13.5.3" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" optional = true python-versions = ">=3.7.0" files = [ - {file = "rich-13.5.2-py3-none-any.whl", hash = "sha256:146a90b3b6b47cac4a73c12866a499e9817426423f57c5a66949c086191a8808"}, - {file = "rich-13.5.2.tar.gz", hash = "sha256:fb9d6c0a0f643c99eed3875b5377a184132ba9be4d61516a55273d3554d75a39"}, + {file = "rich-13.5.3-py3-none-any.whl", hash = "sha256:9257b468badc3d347e146a4faa268ff229039d4c2d176ab0cffb4c4fbc73d5d9"}, + {file = "rich-13.5.3.tar.gz", hash = "sha256:87b43e0543149efa1253f485cd845bb7ee54df16c9617b8a893650ab84b4acb6"}, ] [package.dependencies] @@ -1857,6 +1857,21 @@ files = [ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] test = ["argcomplete (>=2.0)", "pre-commit", "pytest", "pytest-mock"] +[[package]] +name = "traitlets" +version = "5.10.0" +description = "Traitlets Python configuration system" +optional = false +python-versions = ">=3.8" +files = [ + {file = "traitlets-5.10.0-py3-none-any.whl", hash = "sha256:417745a96681fbb358e723d5346a547521f36e9bd0d50ba7ab368fff5d67aa54"}, + {file = "traitlets-5.10.0.tar.gz", hash = "sha256:f584ea209240466e66e91f3c81aa7d004ba4cf794990b0c775938a1544217cd1"}, +] + +[package.extras] +docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] +test = ["argcomplete (>=3.0.3)", "mypy (>=1.5.1)", "pre-commit", "pytest (>=7.0,<7.5)", "pytest-mock", "pytest-mypy-testing"] + [[package]] name = "typer" version = "0.8.0" From 9aa5dcc0b014161f8c18ecaff6e07ea463a00930 Mon Sep 17 00:00:00 2001 From: nobu-g Date: Mon, 18 Sep 2023 17:46:55 +0900 Subject: [PATCH 12/13] bump version to 1.5.1 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5da0293c..2a3a2268 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "rhoknp" -version = "1.5.0" +version = "1.5.1" description = "Yet another Python binding for Juman++/KNP/KWJA" license = "MIT" authors = [ From f89221b4873c82fa1606a9d7af2fd1173fb86e69 Mon Sep 17 00:00:00 2001 From: nobu-g Date: Mon, 18 Sep 2023 17:50:49 +0900 Subject: [PATCH 13/13] fix the example of applying kwja --- examples/apply_kwja.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/apply_kwja.py b/examples/apply_kwja.py index d74e3e87..4ed2e555 100644 --- a/examples/apply_kwja.py +++ b/examples/apply_kwja.py @@ -10,11 +10,11 @@ # Create a KWJA instance. kwja = KWJA() -# Apply KNP to a sentence. -sent = kwja.apply_to_sentence(sys.argv[1]) +# Apply KWJA to a document. +doc = kwja.apply_to_document(sys.argv[1]) # Get information. -for mrph in sent.morphemes: +for mrph in doc.morphemes: print(f"Text: {mrph.text}") print(f"Reading: {mrph.reading}") print(f"Lemma: {mrph.lemma}")