Workflow for benchmarks

pytorch · Nov 12, 2024 · 8a6257a · 8a6257a
1 parent 8ad2e1b
commit 8a6257a
Show file tree

Hide file tree

Showing 2 changed files with 93 additions and 14 deletions.
diff --git a/.github/workflows/benchmarks.yaml b/.github/workflows/benchmarks.yaml
@@ -0,0 +1,49 @@
+name: Benchmarks
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    paths:
+      - src/torchcodec/*
+      - benchmarks/*
+      - .github/workflows/benchmarks.yaml
+
+defaults:
+  run:
+    shell: bash -l -eo pipefail {0}
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+    steps:
+      - name: Check out repo
+        uses: actions/checkout@v3
+      - name: Setup conda env
+        uses: conda-incubator/setup-miniconda@v2
+        with:
+          auto-update-conda: true
+          miniconda-version: "latest"
+          activate-environment: test
+          python-version: '3.12'
+      - name: Update pip
+        run: python -m pip install --upgrade pip
+      - name: Install dependencies and FFmpeg
+        run: |
+          # TODO: torchvision and torchaudio shouldn't be needed. They were only added
+          #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
+          python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
+          conda install "ffmpeg=7.0.1" pkg-config -c conda-forge
+          ffmpeg -version
+      - name: Build and install torchcodec
+        run: |
+          python -m pip install -e ".[dev]" --no-build-isolation -vvv
+      - name: Test generic decoder benchmark
+        run: |
+          python benchmarks/decoders/benchmark_decoders.py --bm_video_speed_min_run_seconds 1
+      - name: TEST README data geeneration benchmark
+        run: |
+          python benchmarks/decoders/generate_readme_data.py --test_run
+
diff --git a/benchmarks/decoders/generate_readme_data.py b/benchmarks/decoders/generate_readme_data.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import argparse
 import json
 import os
 import platform
@@ -26,18 +27,48 @@
 def main() -> None:
     """Benchmarks the performance of a few video decoders on synthetic videos"""
 
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--test_run",
+        help="Test run only; use small values for experiments to ensure everything works. Does not overwrite the data file.",
+        action="store_true",
+    )
+    args = parser.parse_args()
+
+    # The logic is clearer internally if we invert the boolean. However, we want to
+    # maintain the external default that a test run is off by default.
+    data_generation_run = not args.test_run
+
+    if data_generation_run:
+        resolutions = ["1280x720"]
+        encodings = ["libx264"]
+        patterns = ["mandelbrot"]
+        fpses = [60]
+        gop_sizes = [600]
+        durations = [120]
+        pix_fmts = ["yuv420p"]
+        ffmpeg_path = "ffmpeg"
+        min_runtime_seconds = 30
+
+        # These are the number of uniform seeks we do in the seek+decode benchmark.
+        num_samples = 10
+    else:
+        resolutions = ["640x480"]
+        encodings = ["libx264"]
+        patterns = ["mandelbrot"]
+        fpses = [30]
+        gop_sizes = [20]
+        durations = [10] # if this goes too low, we hit EOF errors in some decoders
+        pix_fmts = ["yuv420p"]
+        ffmpeg_path = "ffmpeg"
+        min_runtime_seconds = 1
+
+        num_samples = 4
+
     videos_dir_path = "/tmp/torchcodec_benchmarking_videos"
     shutil.rmtree(videos_dir_path, ignore_errors=True)
     os.makedirs(videos_dir_path)
 
-    resolutions = ["1280x720"]
-    encodings = ["libx264"]
-    patterns = ["mandelbrot"]
-    fpses = [60]
-    gop_sizes = [600]
-    durations = [120]
-    pix_fmts = ["yuv420p"]
-    ffmpeg_path = "ffmpeg"
     generate_videos(
         resolutions,
         encodings,
@@ -61,15 +92,13 @@ def main() -> None:
     decoder_dict["TorchAudio"] = TorchAudioDecoder()
     decoder_dict["Decord"] = DecordAccurateBatch()
 
-    # These are the number of uniform seeks we do in the seek+decode benchmark.
-    num_samples = 10
     video_files_paths = list(Path(videos_dir_path).glob("*.mp4"))
     df_data = run_benchmarks(
         decoder_dict,
         video_files_paths,
         num_samples,
         num_sequential_frames_from_start=[100],
-        min_runtime_seconds=30,
+        min_runtime_seconds=min_runtime_seconds,
         benchmark_video_creation=False,
     )
     df_data.append(
@@ -82,9 +111,10 @@ def main() -> None:
         }
     )
 
-    data_json = Path(__file__).parent / "benchmark_readme_data.json"
-    with open(data_json, "w") as write_file:
-        json.dump(df_data, write_file, sort_keys=True, indent=4)
+    if data_generation_run:
+        data_json = Path(__file__).parent / "benchmark_readme_data.json"
+        with open(data_json, "w") as write_file:
+            json.dump(df_data, write_file, sort_keys=True, indent=4)
 
 
 if __name__ == "__main__":