Skip to content

feat: add chunking_strategy kwarg #959

feat: add chunking_strategy kwarg

feat: add chunking_strategy kwarg #959

Workflow file for this run

name: CI
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
env:
PYTHON_VERSION: "3.10"
PIPELINE_FAMILY: "general"
jobs:
setup:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/cache@v3
id: virtualenv-cache
with:
path: |
.venv
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/test.txt') }}
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Setup virtual environment (no cache hit)
if: steps.virtualenv-cache.outputs.cache-hit != 'true'
run: |
python${{ env.PYTHON_VERSION }} -m venv .venv
source .venv/bin/activate
make install-ci
lint:
runs-on: ubuntu-latest
needs: setup
steps:
- uses: actions/checkout@v4
- uses: actions/cache@v3
id: virtualenv-cache
with:
path: |
.venv
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/test.txt') }}
- name: Lint
run: |
source .venv/bin/activate
make check
shellcheck:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: ShellCheck
uses: ludeeus/action-shellcheck@master
test:
runs-on: ubuntu-latest
needs: [setup, lint]
steps:
- uses: actions/checkout@v4
- uses: actions/cache@v3
id: virtualenv-cache
with:
path: |
.venv
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/test.txt') }}
- name: Run core tests
run: |
source .venv/bin/activate
sudo apt-get update && sudo apt-get install --yes poppler-utils libreoffice
make install-pandoc
sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5
sudo apt-get install -y tesseract-ocr tesseract-ocr-kor
tesseract --version
make install-nltk-models
make test
make check-coverage
changelog:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- if: github.ref != 'refs/heads/main'
uses: dorny/paths-filter@v2
id: changes
with:
filters: |
src:
- 'doc_recipe/**'
- 'recipe-notebooks/**'
- if: steps.changes.outputs.src == 'true' && github.ref != 'refs/heads/main'
uses: dangoslen/changelog-enforcer@v3
# TODO - figure out best practice for caching docker images
# (Using the virtualenv to get pytest)
test_dockerfile:
runs-on: ubuntu-latest
needs: [setup, lint]
steps:
- uses: actions/checkout@v4
- uses: actions/cache@v3
id: virtualenv-cache
with:
path: |
.venv
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/test.txt') }}
- name: Test Dockerfile
run: |
source .venv/bin/activate
make docker-build
make docker-test