Skip to content

Commit

Permalink
Merge pull request #50 from kaylode/dev
Browse files Browse the repository at this point in the history
Another big update
  • Loading branch information
kaylode committed Apr 2, 2023
2 parents c4623d9 + 65f3cd0 commit 9cbc62b
Show file tree
Hide file tree
Showing 125 changed files with 1,739 additions and 1,982 deletions.
3 changes: 3 additions & 0 deletions .dvc/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/config.local
/tmp
/cache
4 changes: 4 additions & 0 deletions .dvc/config
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[core]
remote = gdrive
['remote "gdrive"']
url = gdrive://155tBftKDG8VSAWojOWT3exax3hz0Xuwg
3 changes: 3 additions & 0 deletions .dvcignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Add patterns of files dvc should ignore, which could improve
# the performance. Learn more at
# https://dvc.org/doc/user-guide/dvcignore
38 changes: 33 additions & 5 deletions .github/workflows/clf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ on:
branches:
- master
- 'V**'
pull_request:
branches:
- master
- 'V**'
- dev
env:
GDRIVE_CREDENTIALS_DATA: ${{ secrets.GDRIVE_CREDENTIALS_DATA }}
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}

jobs:
build:
runs-on: ubuntu-latest
Expand All @@ -18,13 +19,40 @@ jobs:
with:
python-version: '3.8'
architecture: 'x64'
- uses: actions/setup-node@v3
with:
node-version: '16'
- uses: iterative/setup-cml@v1
- name: Display Python version
run: python -c "import sys; print(sys.version)"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu
pip install -e '.[cv,cv_classification]'
pip install -e '.[dev,cv,cv_classification]'
pip install dvc dvc-gdrive
npm install -g markdown-table-cli
- name: Download data
run: |
echo "$GDRIVE_CREDENTIALS_DATA" > credentials.json
dvc remote modify gdrive --local gdrive_user_credentials_file ./credentials.json
dvc pull
ls -la samples
- name: Test with pytest
run: |
pytest tests/classification --capture=no
- name: Display report
run: |
echo "# Classification Report" > report.md
echo "#### Metrics" >> report.md
cat runs/pytest_clf/Validation/metrics.json | md-table >> report.md
echo "#### Confusion Matrix" >> report.md
echo "![Confusion Matrix](runs/pytest_clf/Validation/cfm.png)" >> report.md
echo "#### Errorcases" >> report.md
echo "![Confusion Matrix](runs/pytest_clf/Validation/errorcases.png)" >> report.md
echo "------------------" >> report.md
echo "#### Hyperparameters Tuning" >> report.md
echo "Leaderboard" >> report.md
cat runs/optuna/clf/overview/leaderboard.json | md-table >> report.md
echo >> report.md
cml comment create report.md
69 changes: 69 additions & 0 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
name: autobuild_docker
on:
pull_request:
branches:
- master
- 'V**'
- dev

env:
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GDRIVE_CREDENTIALS_DATA: ${{ secrets.GDRIVE_CREDENTIALS_DATA }}
DOCKERHUB_TAG: kaylode/theseus:latest

jobs:
run:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.8
uses: actions/setup-python@v4
with:
python-version: '3.8'
architecture: 'x64'
- uses: iterative/setup-cml@v1
- uses: iterative/setup-dvc@v1
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Create credentials file
run: |
echo "$GDRIVE_CREDENTIALS_DATA" > credentials.json
- name: Build Dockerfile
uses: docker/build-push-action@v4
with:
context: .
tags: ${{ env.DOCKERHUB_TAG }}
outputs: type=docker,dest=${{ github.workspace }}/image.tar
secret-files: |
credentials=./credentials.json
- name: Free up space
run: |
docker system prune -f
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- name: Docker name
run: |
docker load -i ${{ github.workspace }}/image.tar
docker image inspect ${{ env.DOCKERHUB_TAG }}
- name: Create outputs folder
run: |
mkdir -p outputs
- name: Test Docker
uses: addnab/docker-run-action@v3
with:
image: ${{ env.DOCKERHUB_TAG }}
options: --rm -v ${{ github.workspace }}/outputs/:/workspace/runs/
run: |
pytest tests --capture=no
- name: where am I
run: |
pwd
ls -la
ls -la outputs
32 changes: 27 additions & 5 deletions .github/workflows/segm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ on:
branches:
- master
- 'V**'
pull_request:
branches:
- master
- 'V**'
- dev
env:
GDRIVE_CREDENTIALS_DATA: ${{ secrets.GDRIVE_CREDENTIALS_DATA }}
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}

jobs:
build:
runs-on: ubuntu-latest
Expand All @@ -18,13 +19,34 @@ jobs:
with:
python-version: '3.8'
architecture: 'x64'
- uses: actions/setup-node@v3
with:
node-version: '16'
- uses: iterative/setup-cml@v1
- name: Display Python version
run: python -c "import sys; print(sys.version)"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu
pip install -e '.[cv,cv_semantic]'
pip install -e '.[dev,cv,cv_semantic]'
pip install dvc dvc-gdrive
npm install -g markdown-table-cli
- name: Download data
run: |
echo "$GDRIVE_CREDENTIALS_DATA" > credentials.json
dvc remote modify gdrive --local gdrive_user_credentials_file ./credentials.json
dvc pull
ls -la samples
- name: Test with pytest
run: |
pytest tests/semantic --capture=no
- name: Display report
run: |
echo "# Semantic Report" > report.md
echo "#### Metrics" >> report.md
cat runs/pytest_segm/Validation/metrics.json | md-table >> report.md
echo >> report.md
echo "#### Prediction" >> report.md
echo "![Prediction](runs/pytest_segm/Validation/prediction.png)" >> report.md
cml comment create report.md
46 changes: 41 additions & 5 deletions .github/workflows/tablr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ on:
branches:
- master
- 'V**'
pull_request:
branches:
- master
- 'V**'
- dev
env:
GDRIVE_CREDENTIALS_DATA: ${{ secrets.GDRIVE_CREDENTIALS_DATA }}
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}

jobs:
build:
runs-on: ubuntu-latest
Expand All @@ -18,13 +19,48 @@ jobs:
with:
python-version: '3.8'
architecture: 'x64'
- uses: actions/setup-node@v3
with:
node-version: '16'
- uses: iterative/setup-cml@v1
- name: Display Python version
run: python -c "import sys; print(sys.version)"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu
pip install -e '.[tabular, tabular_classification]'
pip install -e '.[dev,tabular,tabular_classification]'
pip install dvc dvc-gdrive
npm install -g markdown-table-cli
- name: Download data
run: |
echo "$GDRIVE_CREDENTIALS_DATA" > credentials.json
dvc remote modify gdrive --local gdrive_user_credentials_file ./credentials.json
dvc pull
ls -la samples
- name: Test with pytest
run: |
pytest tests/tabular --capture=no
- name: Display report
run: |
echo "# Tabular Classification Report" > report.md
echo "#### Metrics" >> report.md
cat runs/pytest_tablr/Validation/metrics.json | md-table >> report.md
echo "#### SHAP train" >> report.md
echo "![SHAP](runs/pytest_tablr/Importance/SHAP/train.png)" >> report.md
echo "#### SHAP val" >> report.md
echo "![SHAP](runs/pytest_tablr/Importance/SHAP/val.png)" >> report.md
echo "------------------" >> report.md
echo "#### Hyperparameters Tuning" >> report.md
echo "Leaderboard" >> report.md
cat runs/optuna/tablr/overview/leaderboard.json | md-table >> report.md
echo >> report.md
echo "Figures" >> report.md
echo "![History](runs/optuna/tablr/overview/history.png)" >> report.md
echo "![Contour plot](runs/optuna/tablr/overview/contour.png)" >> report.md
echo "![Parallel](runs/optuna/tablr/overview/parallel_coordinate.png)" >> report.md
echo "![Importance](runs/optuna/tablr/overview/param_importances.png)" >> report.md
echo "![Slice](runs/optuna/tablr/overview/slice.png)" >> report.md
cml comment create report.md
54 changes: 38 additions & 16 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,40 +10,62 @@
ARG BASE_IMAGE=ubuntu:18.04

# Instal basic utilities
ENV DEBIAN_FRONTEND noninteractiveee
RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
apt-get update && apt-get install -y --no-install-recommends \
FROM ${BASE_IMAGE} as dev-base
RUN apt-get clean && apt-get update && apt-get upgrade && apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
ccache \
cmake \
curl \
git \
gcc \
wget \
libjpeg-dev \
zip \
swig python3-dev \
unzip bzip2 ffmpeg libsm6 libxext6 \
libpng-dev && \
rm -rf /var/lib/apt/lists/*

RUN curl -fsSL -v -o ~/miniconda.sh -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
chmod +x ~/miniconda.sh && \
~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh && \
/opt/conda/bin/conda install -c pytorch -c nvidia -y \
python=${PYTHON_VERSION} \
pytorch=${PYTORCH_VERSION} torchvision "pytorch-cuda=${CUDA_VERSION}" && \
/opt/conda/bin/conda clean -ya

RUN /usr/sbin/update-ccache-symlinks
RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache

ENV PATH /opt/conda/bin:$PATH

# Instal environment
FROM dev-base as conda-installs
ARG PYTHON_VERSION=3.9
ARG CUDA_VERSION=11.3
ARG PYTORCH_VERSION=1.12.1
ARG CUDA_CHANNEL=nvidia
ARG INSTALL_CHANNEL=pytorch
ENV CONDA_OVERRIDE_CUDA=${CUDA_VERSION}
RUN curl -fsSL -v -o ~/mambaforge.sh -O https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh && \
chmod +x ~/mambaforge.sh && \
~/mambaforge.sh -b -p /opt/mamba && \
rm ~/mambaforge.sh && \
/opt/mamba/bin/mamba install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y \
python=${PYTHON_VERSION} \
pytorch=${PYTORCH_VERSION} torchvision "cudatoolkit=${CUDA_VERSION}" && \
/opt/mamba/bin/mamba clean -ya

ENV PATH /opt/mamba/bin:$PATH
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
ENV PYTORCH_VERSION ${PYTORCH_VERSION}

# Install
# Install dependencies
COPY ./ /workspace/
WORKDIR /workspace/
RUN /opt/conda/bin/python -m pip install -e .
RUN /opt/mamba/bin/python -m pip install --upgrade pip && \
/opt/mamba/bin/python -m pip install -e .[cv,cv_classification,cv_semantic,cv_detection,nlp,nlp_retrieval,tabular,tabular_classification,dev] && \
/opt/mamba/bin/python -m pip install dvc dvc-gdrive && \
/opt/mamba/bin/python -m pip install -U timm

# Pull data from GDrive
RUN --mount=type=secret,id=credentials \
CREDENTIALS=$(cat /run/secrets/credentials) \
&& echo "$CREDENTIALS" > /workspace/credentials.json
RUN dvc remote modify gdrive --local gdrive_user_credentials_file /workspace/credentials.json
RUN dvc pull

ENTRYPOINT ["/bin/bash"]
15 changes: 15 additions & 0 deletions configs/base/globals.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
global:
exp_name: null
exist_ok: false
debug: false
save_dir: runs
device: cuda:0
pretrained: null
resume: null
trainer:
name: SupervisedTrainer
args:
num_iterations: 10000
clip_grad: 1.0
evaluate_interval: 1
use_fp16: true
16 changes: 16 additions & 0 deletions configs/base/optimizer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
optimizer:
name: AdamW
args:
lr: 0.001
weight_decay: 0.0005
betas:
- 0.937
- 0.999
scheduler:
name: SchedulerWrapper
args:
scheduler_name: cosine2
t_initial: 7
t_mul: 0.9
eta_mul: 0.9
eta_min: 1.0e-06
24 changes: 24 additions & 0 deletions configs/classification/optuna/pipeline.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
includes:
- configs/base/globals.yaml
- configs/base/optimizer.yaml
- configs/classification/transform.yaml
- configs/classification/pipeline.yaml

trainer:
name: SupervisedTrainer
args:
num_iterations: 10
clip_grad: null
evaluate_interval: 0
use_fp16: false

callbacks: []

optimizer:
name: AdamW
args:
lr: [0.0001, 0.001]

optuna:
float:
- optimizer.args.lr
Loading

0 comments on commit 9cbc62b

Please sign in to comment.