[pt-PT] Added compound words to AO45 and AO90 (#48) #238
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build | |
on: | |
push: | |
paths: | |
- "data/**" | |
- "pos_tagger_scripts/build-lt.sh" | |
- "lt-changes/**" | |
- ".github/workflows/build.yml" | |
pull_request: | |
branches: | |
- "main" | |
workflow_dispatch: {} | |
jobs: | |
check_files: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Check out code | |
uses: actions/checkout@v3 | |
- name: Check Hunspell encoding | |
run: bash ./scripts/check_encoding.sh | |
- name: Check tagger file newlines | |
run: bash ./scripts/check_newlines.sh | |
build: | |
runs-on: ubuntu-latest | |
needs: check_files | |
strategy: | |
matrix: | |
python-version: ["3.11"] | |
env: | |
LT_BRANCH: "master" | |
steps: | |
- name: Set paths | |
run: | | |
lt_dir="${{ github.workspace }}/lt" | |
echo "LT_HOME=${lt_dir}/languagetool" >> $GITHUB_ENV | |
echo "PT_DICT_HOME=${lt_dir}/portuguese-pos-dict" >> $GITHUB_ENV | |
echo "LT_TMP_DIR=${lt_dir}/tmp" >> $GITHUB_ENV | |
echo "HUNSPELL_DIR=${lt_dir}/hunspell" >> $GITHUB_ENV | |
- uses: actions/checkout@v3 | |
with: | |
path: ${{ env.PT_DICT_HOME }} | |
repository: ${{ env.GITHUB_ACTION_REPOSITORY }} | |
fetch-depth: 0 | |
submodules: recursive | |
- name: Get number of CPU cores | |
uses: SimenB/github-actions-cpu-cores@v1 | |
id: cpu-cores | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Set up Poetry | |
uses: Gr1N/setup-poetry@v8 | |
- name: Check Poetry config | |
working-directory: ${{ env.PT_DICT_HOME }} | |
run: poetry check | |
- name: Install Python dependencies | |
working-directory: ${{ env.PT_DICT_HOME }} | |
run: poetry install | |
env: | |
POETRY_HTTP_BASIC_GITLAB_USERNAME: "lt-ai-package-registry" | |
POETRY_HTTP_BASIC_GITLAB_PASSWORD: ${{ secrets.POETRY_HTTP_BASIC_GITLAB_PASSWORD }} | |
- name: Set dictionary version | |
working-directory: ${{ env.PT_DICT_HOME }} | |
run: echo "PT_DICT_VERSION=test-$(date "+%Y-%m-%d")-$RANDOM" >> $GITHUB_ENV | |
- name: Set up Perl | |
uses: shogo82148/actions-setup-perl@v1 | |
with: | |
perl-version: "5.38" | |
- name: Install Perl dependencies | |
run: cpan install "Switch" "Text::Unaccent::PurePerl" | |
- name: Checkout Hunspell | |
uses: actions/checkout@v3 | |
with: | |
repository: hunspell/hunspell | |
path: ${{ env.HUNSPELL_DIR }} | |
- name: Cache Hunspell | |
uses: actions/cache@v4 | |
with: | |
path: | | |
$HOME/local/bin/unmunch | |
$HOME/local/bin/hunspell | |
key: hunspell-${{ hashFiles('lt/hunspell/src', 'lt/hunspell/po', 'lt/hunspell/m4', 'lt/hunspell/Makefile.am', 'lt/hunspell/configure.ac') }} | |
save-always: true | |
id: hunspell-cache | |
- name: Compile Hunspell | |
if: steps.hunspell-cache.outputs.cache-hit != 'true' | |
working-directory: ${{ env.HUNSPELL_DIR }} | |
run: | | |
sudo apt-get install autoconf automake autopoint libtool | |
autoreconf -vfi | |
./configure --prefix $HOME/local | |
make | |
sudo make install | |
sudo ldconfig | |
export PATH=$HOME/local/bin:$PATH | |
which unmunch | |
- name: Set up JDK 11 for x64 | |
uses: actions/setup-java@v3 | |
with: | |
java-version: '11' | |
distribution: 'temurin' | |
architecture: x64 | |
- name: Checkout LT | |
uses: actions/checkout@v3 | |
with: | |
repository: languagetool-org/languagetool | |
path: ${{ env.LT_HOME }} | |
token: ${{ secrets.LT_OS_TOKEN }} | |
ref: ${{ env.LT_BRANCH }} | |
- name: Cache LT Build | |
uses: actions/cache@v4 | |
with: | |
path: | | |
${{ env.LT_HOME }}/languagetool-standalone/target | |
${{ env.LT_HOME }}/languagetool-dev/target | |
~/.m2 | |
key: lt-build-${{ hashFiles('lt/languagetool/pom.xml', 'lt/languagetool/**/*.java') }} | |
save-always: true | |
id: lt-cache | |
- name: Build LT | |
if: steps.lt-cache.outputs.cache-hit != 'true' | |
run: mvn clean install -DskipTests | |
working-directory: ${{ env.LT_HOME }} | |
- name: Build LT Dev | |
if: steps.lt-cache.outputs.cache-hit != 'true' | |
run: mvn clean compile assembly:single | |
working-directory: ${{ env.LT_HOME }}/languagetool-dev | |
- name: Build POS tagging dictionary | |
working-directory: ${{ env.PT_DICT_HOME }} | |
run: | | |
export PATH=$HOME/local/bin:$PATH | |
poetry run python "dict_tools/scripts/build_tagger_dicts.py" \ | |
--language pt \ | |
--no-force-compile \ | |
--install-version "${{ env.PT_DICT_VERSION }}" \ | |
--force-install | |
- name: Update dictionary version in LT pom.xml | |
working-directory: ${{ env.PT_DICT_HOME }} | |
run: | | |
poetry run python "dict_tools/scripts/update_pom.py" \ | |
--new-version "${{ env.PT_DICT_VERSION }}" \ | |
--package-name "portuguese-pos-dict" | |
# This step will also re-compile LT with the new dictionary version in pom.xml | |
- name: Build spelling dictionaries | |
working-directory: ${{ env.PT_DICT_HOME }} | |
run: | | |
mkdir -p "${{ env.LT_TMP_DIR }}/compounds" | |
export PATH=$HOME/local/bin:$PATH | |
poetry run python "dict_tools/scripts/build_spelling_dicts.py" \ | |
--language pt \ | |
--tmp-dir "${{ env.LT_TMP_DIR }}" \ | |
--force-install \ | |
--install-version "${{ env.PT_DICT_VERSION }}" \ | |
--max-threads "${{ steps.cpu-cores.outputs.count }}" | |
- name: Test LT | |
working-directory: ${{ env.LT_HOME }} | |
if: > | |
(contains(github.event.pull_request.head.commit.message, '[skip lt test]') == false) && | |
(contains(github.event.pull_request.labels.*.name, 'skip_lt_test') == false) | |
run: | | |
mvn clean install -DskipTests | |
./build.sh pt test | |
- name: Archive binaries | |
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') | |
uses: actions/upload-artifact@v4 | |
with: | |
name: src | |
path: ${{ env.PT_DICT_HOME }}/results/java-lt/src | |
deploy: | |
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') | |
runs-on: ubuntu-latest | |
needs: build | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
submodules: recursive | |
# Necessary since downloading artefacts with an 'overwrite' option is not supported | |
- name: Empty out src directory | |
run: rm -rf results/java-lt/src/* | |
- name: Download artefact from upstream workflow | |
uses: actions/download-artifact@v4 | |
with: | |
name: src | |
path: results/java-lt/src/ | |
- name: Set dictionary version | |
run: echo "PT_DICT_VERSION=$(git describe --tags --abbrev=0 | sed 's/^v//g')" >> $GITHUB_ENV | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Set up Poetry | |
uses: Gr1N/setup-poetry@v8 | |
- name: Check Poetry config | |
run: poetry check | |
- name: Install Python dependencies | |
run: poetry install | |
env: | |
POETRY_HTTP_BASIC_GITLAB_USERNAME: "lt-ai-package-registry" | |
POETRY_HTTP_BASIC_GITLAB_PASSWORD: ${{ secrets.POETRY_HTTP_BASIC_GITLAB_PASSWORD }} | |
- name: Add dictionary version to package pom | |
run: | | |
poetry run python "dict_tools/scripts/update_pom.py" \ | |
--new-version "${{ env.PT_DICT_VERSION }}" \ | |
--package-name "portuguese-pos-dict" \ | |
--verbosity debug | |
- name: Set up JDK 11 for x64 | |
uses: actions/setup-java@v3 | |
with: | |
java-version: '11' | |
distribution: 'temurin' | |
architecture: x64 | |
- name: Import GPG Key | |
run: echo "${{ secrets.GPG_PRIVATE_KEY }}" | gpg --batch --import | |
- name: Deploy to SonaType | |
env: | |
GPG_KEYNAME: ${{ secrets.GPG_KEYNAME }} | |
GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} | |
SONATYPE_USERNAME: ${{ secrets.SONATYPE_USERNAME }} | |
SONATYPE_PASSWORD: ${{ secrets.SONATYPE_PASSWORD }} | |
working-directory: results/java-lt | |
run: mvn clean deploy -P release -s ${{ github.workspace }}/results/java-lt/settings.xml |