Merge pull request #50 from kaylode/dev

Another big update
kaylode · Apr 2, 2023 · 9cbc62b · 9cbc62b
2 parents c4623d9 + 65f3cd0
commit 9cbc62b
Show file tree

Hide file tree

Showing 125 changed files with 1,739 additions and 1,982 deletions.
diff --git a/.dvc/.gitignore b/.dvc/.gitignore
@@ -0,0 +1,3 @@
+/config.local
+/tmp
+/cache
diff --git a/.dvc/config b/.dvc/config
@@ -0,0 +1,4 @@
+[core]
+    remote = gdrive
+['remote "gdrive"']
+    url = gdrive://155tBftKDG8VSAWojOWT3exax3hz0Xuwg
diff --git a/.dvcignore b/.dvcignore
@@ -0,0 +1,3 @@
+# Add patterns of files dvc should ignore, which could improve
+# the performance. Learn more at
+# https://dvc.org/doc/user-guide/dvcignore
diff --git a/.github/workflows/clf.yml b/.github/workflows/clf.yml
@@ -4,10 +4,11 @@ on:
     branches:
       - master
       - 'V**'
-  pull_request:
-    branches:
-      - master
-      - 'V**'
+      - dev
+env:
+  GDRIVE_CREDENTIALS_DATA: ${{ secrets.GDRIVE_CREDENTIALS_DATA }}
+  REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
 jobs:
   build:
     runs-on: ubuntu-latest
@@ -18,13 +19,40 @@ jobs:
         with:
           python-version: '3.8'
           architecture: 'x64'
+      - uses: actions/setup-node@v3
+        with:
+          node-version: '16'
+      - uses: iterative/setup-cml@v1
       - name: Display Python version
         run: python -c "import sys; print(sys.version)"
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
           pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu
-          pip install -e '.[cv,cv_classification]'
+          pip install -e '.[dev,cv,cv_classification]'
+          pip install dvc dvc-gdrive
+          npm install -g markdown-table-cli
+      - name: Download data
+        run: |
+          echo "$GDRIVE_CREDENTIALS_DATA" > credentials.json
+          dvc remote modify gdrive --local gdrive_user_credentials_file ./credentials.json
+          dvc pull
+          ls -la samples
       - name: Test with pytest
         run: |
           pytest tests/classification --capture=no
+      - name: Display report
+        run: |
+          echo "# Classification Report" > report.md
+          echo "#### Metrics" >> report.md
+          cat runs/pytest_clf/Validation/metrics.json | md-table >> report.md
+          echo "#### Confusion Matrix" >> report.md
+          echo "![Confusion Matrix](runs/pytest_clf/Validation/cfm.png)" >> report.md
+          echo "#### Errorcases" >> report.md
+          echo "![Confusion Matrix](runs/pytest_clf/Validation/errorcases.png)" >> report.md
+          echo "------------------" >> report.md
+          echo "#### Hyperparameters Tuning" >> report.md
+          echo "Leaderboard" >> report.md
+          cat runs/optuna/clf/overview/leaderboard.json | md-table >> report.md
+          echo >> report.md
+          cml comment create report.md
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -0,0 +1,69 @@
+name: autobuild_docker
+on:
+  pull_request:
+    branches:
+      - master
+      - 'V**'
+      - dev
+
+env:
+  REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+  GDRIVE_CREDENTIALS_DATA: ${{ secrets.GDRIVE_CREDENTIALS_DATA }}
+  DOCKERHUB_TAG: kaylode/theseus:latest
+
+jobs:
+  run:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.8'
+          architecture: 'x64'
+      - uses: iterative/setup-cml@v1
+      - uses: iterative/setup-dvc@v1
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v2
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+      - name: Create credentials file
+        run: |
+          echo "$GDRIVE_CREDENTIALS_DATA" > credentials.json
+      - name: Build Dockerfile
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          tags: ${{ env.DOCKERHUB_TAG }}
+          outputs: type=docker,dest=${{ github.workspace }}/image.tar
+          secret-files: |
+            credentials=./credentials.json
+
+      - name: Free up space
+        run: |
+          docker system prune -f
+          sudo rm -rf "/usr/local/share/boost"
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+
+      - name: Docker name
+        run: |
+          docker load -i ${{ github.workspace }}/image.tar
+          docker image inspect ${{ env.DOCKERHUB_TAG }}
+
+      - name: Create outputs folder
+        run: |
+          mkdir -p outputs
+
+      - name: Test Docker
+        uses: addnab/docker-run-action@v3
+        with:
+          image: ${{ env.DOCKERHUB_TAG }}
+          options: --rm -v ${{ github.workspace }}/outputs/:/workspace/runs/
+          run: |
+              pytest tests --capture=no
+
+      - name: where am I
+        run: |
+          pwd
+          ls -la
+          ls -la outputs
diff --git a/.github/workflows/segm.yml b/.github/workflows/segm.yml
@@ -4,10 +4,11 @@ on:
     branches:
       - master
       - 'V**'
-  pull_request:
-    branches:
-      - master
-      - 'V**'
+      - dev
+env:
+  GDRIVE_CREDENTIALS_DATA: ${{ secrets.GDRIVE_CREDENTIALS_DATA }}
+  REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
 jobs:
   build:
     runs-on: ubuntu-latest
@@ -18,13 +19,34 @@ jobs:
         with:
           python-version: '3.8'
           architecture: 'x64'
+      - uses: actions/setup-node@v3
+        with:
+          node-version: '16'
+      - uses: iterative/setup-cml@v1
       - name: Display Python version
         run: python -c "import sys; print(sys.version)"
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
           pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu
-          pip install -e '.[cv,cv_semantic]'
+          pip install -e '.[dev,cv,cv_semantic]'
+          pip install dvc dvc-gdrive
+          npm install -g markdown-table-cli
+      - name: Download data
+        run: |
+          echo "$GDRIVE_CREDENTIALS_DATA" > credentials.json
+          dvc remote modify gdrive --local gdrive_user_credentials_file ./credentials.json
+          dvc pull
+          ls -la samples
       - name: Test with pytest
         run: |
           pytest tests/semantic --capture=no
+      - name: Display report
+        run: |
+          echo "# Semantic Report" > report.md
+          echo "#### Metrics" >> report.md
+          cat runs/pytest_segm/Validation/metrics.json | md-table >> report.md
+          echo >> report.md
+          echo "#### Prediction" >> report.md
+          echo "![Prediction](runs/pytest_segm/Validation/prediction.png)" >> report.md
+          cml comment create report.md
diff --git a/.github/workflows/tablr.yml b/.github/workflows/tablr.yml
@@ -4,10 +4,11 @@ on:
     branches:
       - master
       - 'V**'
-  pull_request:
-    branches:
-      - master
-      - 'V**'
+      - dev
+env:
+  GDRIVE_CREDENTIALS_DATA: ${{ secrets.GDRIVE_CREDENTIALS_DATA }}
+  REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
 jobs:
   build:
     runs-on: ubuntu-latest
@@ -18,13 +19,48 @@ jobs:
         with:
           python-version: '3.8'
           architecture: 'x64'
+      - uses: actions/setup-node@v3
+        with:
+          node-version: '16'
+      - uses: iterative/setup-cml@v1
       - name: Display Python version
         run: python -c "import sys; print(sys.version)"
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
           pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu
-          pip install -e '.[tabular, tabular_classification]'
+          pip install -e '.[dev,tabular,tabular_classification]'
+          pip install dvc dvc-gdrive
+          npm install -g markdown-table-cli
+      - name: Download data
+        run: |
+          echo "$GDRIVE_CREDENTIALS_DATA" > credentials.json
+          dvc remote modify gdrive --local gdrive_user_credentials_file ./credentials.json
+          dvc pull
+          ls -la samples
       - name: Test with pytest
         run: |
           pytest tests/tabular --capture=no
+      - name: Display report
+        run: |
+          echo "# Tabular Classification Report" > report.md
+          echo "#### Metrics" >> report.md
+          cat runs/pytest_tablr/Validation/metrics.json | md-table >> report.md
+          echo "#### SHAP train" >> report.md
+          echo "![SHAP](runs/pytest_tablr/Importance/SHAP/train.png)" >> report.md
+          echo "#### SHAP val" >> report.md
+          echo "![SHAP](runs/pytest_tablr/Importance/SHAP/val.png)" >> report.md
+          echo "------------------" >> report.md
+
+          echo "#### Hyperparameters Tuning" >> report.md
+          echo "Leaderboard" >> report.md
+          cat runs/optuna/tablr/overview/leaderboard.json | md-table >> report.md
+          echo >> report.md
+          echo "Figures" >> report.md
+          echo "![History](runs/optuna/tablr/overview/history.png)" >> report.md
+          echo "![Contour plot](runs/optuna/tablr/overview/contour.png)" >> report.md
+          echo "![Parallel](runs/optuna/tablr/overview/parallel_coordinate.png)" >> report.md
+          echo "![Importance](runs/optuna/tablr/overview/param_importances.png)" >> report.md
+          echo "![Slice](runs/optuna/tablr/overview/slice.png)" >> report.md
+
+          cml comment create report.md
diff --git a/Dockerfile b/Dockerfile
@@ -10,40 +10,62 @@
 ARG BASE_IMAGE=ubuntu:18.04
 
 # Instal basic utilities
-ENV DEBIAN_FRONTEND noninteractiveee
-RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
-    apt-get update && apt-get install -y --no-install-recommends \
+FROM ${BASE_IMAGE} as dev-base
+RUN  apt-get clean && apt-get update && apt-get upgrade && apt-get install -y --no-install-recommends \
     build-essential \
     ca-certificates \
     ccache \
     cmake \
     curl \
     git \
     gcc \
+    wget \
     libjpeg-dev \
+    zip \
+    swig python3-dev \
     unzip bzip2 ffmpeg libsm6 libxext6 \
     libpng-dev && \
     rm -rf /var/lib/apt/lists/*
-
-RUN curl -fsSL -v -o ~/miniconda.sh -O  https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh  && \
-    chmod +x ~/miniconda.sh && \
-    ~/miniconda.sh -b -p /opt/conda && \
-    rm ~/miniconda.sh && \
-    /opt/conda/bin/conda install -c pytorch -c nvidia -y \
-    python=${PYTHON_VERSION} \
-    pytorch=${PYTORCH_VERSION} torchvision "pytorch-cuda=${CUDA_VERSION}" && \
-    /opt/conda/bin/conda clean -ya
-
 RUN /usr/sbin/update-ccache-symlinks
 RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
-
 ENV PATH /opt/conda/bin:$PATH
+
+# Instal environment
+FROM dev-base as conda-installs
+ARG PYTHON_VERSION=3.9
+ARG CUDA_VERSION=11.3
+ARG PYTORCH_VERSION=1.12.1
+ARG CUDA_CHANNEL=nvidia
+ARG INSTALL_CHANNEL=pytorch
+ENV CONDA_OVERRIDE_CUDA=${CUDA_VERSION}
+RUN curl -fsSL -v -o ~/mambaforge.sh -O https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh && \
+    chmod +x ~/mambaforge.sh && \
+    ~/mambaforge.sh -b -p /opt/mamba && \
+    rm ~/mambaforge.sh && \
+    /opt/mamba/bin/mamba install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y \
+    python=${PYTHON_VERSION} \
+    pytorch=${PYTORCH_VERSION} torchvision "cudatoolkit=${CUDA_VERSION}" && \
+    /opt/mamba/bin/mamba clean -ya
+
+ENV PATH /opt/mamba/bin:$PATH
 ENV NVIDIA_VISIBLE_DEVICES all
 ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
 ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
 ENV PYTORCH_VERSION ${PYTORCH_VERSION}
 
-# Install
+# Install dependencies
 COPY ./ /workspace/
 WORKDIR /workspace/
-RUN /opt/conda/bin/python -m pip install -e .
+RUN /opt/mamba/bin/python -m pip install --upgrade pip && \
+    /opt/mamba/bin/python -m pip install -e .[cv,cv_classification,cv_semantic,cv_detection,nlp,nlp_retrieval,tabular,tabular_classification,dev] && \
+    /opt/mamba/bin/python -m pip install dvc dvc-gdrive && \
+    /opt/mamba/bin/python -m pip install -U timm
+
+# Pull data from GDrive
+RUN --mount=type=secret,id=credentials \
+  CREDENTIALS=$(cat /run/secrets/credentials) \
+  && echo "$CREDENTIALS" > /workspace/credentials.json
+RUN dvc remote modify gdrive --local gdrive_user_credentials_file /workspace/credentials.json
+RUN dvc pull
+
+ENTRYPOINT ["/bin/bash"]
diff --git a/configs/base/globals.yaml b/configs/base/globals.yaml
@@ -0,0 +1,15 @@
+global:
+  exp_name: null
+  exist_ok: false
+  debug: false
+  save_dir: runs
+  device: cuda:0
+  pretrained: null
+  resume: null
+trainer:
+  name: SupervisedTrainer
+  args:
+    num_iterations: 10000
+    clip_grad: 1.0
+    evaluate_interval: 1
+    use_fp16: true
diff --git a/configs/base/optimizer.yaml b/configs/base/optimizer.yaml
@@ -0,0 +1,16 @@
+optimizer:
+  name: AdamW
+  args:
+    lr: 0.001
+    weight_decay: 0.0005
+    betas:
+      - 0.937
+      - 0.999
+scheduler:
+  name: SchedulerWrapper
+  args:
+    scheduler_name: cosine2
+    t_initial: 7
+    t_mul: 0.9
+    eta_mul: 0.9
+    eta_min: 1.0e-06
diff --git a/configs/classification/optuna/pipeline.yaml b/configs/classification/optuna/pipeline.yaml
@@ -0,0 +1,24 @@
+includes:
+  - configs/base/globals.yaml
+  - configs/base/optimizer.yaml
+  - configs/classification/transform.yaml
+  - configs/classification/pipeline.yaml
+
+trainer:
+  name: SupervisedTrainer
+  args:
+    num_iterations: 10
+    clip_grad: null
+    evaluate_interval: 0
+    use_fp16: false
+
+callbacks: []
+
+optimizer:
+  name: AdamW
+  args:
+    lr: [0.0001, 0.001]
+
+optuna:
+  float:
+    - optimizer.args.lr