Add Llama3.1 1B HTP to benchmark

pytorch · Dec 19, 2024 · 32a44a8 · 32a44a8
1 parent 62016d6
commit 32a44a8
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 4 deletions.
diff --git a/.ci/scripts/gather_benchmark_configs.py b/.ci/scripts/gather_benchmark_configs.py
@@ -33,8 +33,7 @@
     ],
     "android": [
         "qnn_q8",
-        # TODO: Add support for llama3 htp
-        # "llama3_qnn_htp",
+        "llama3_qnn_htp",
     ],
     "ios": [
         "coreml_fp16",

diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
@@ -132,7 +132,7 @@ jobs:
       matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }}
       fail-fast: false
     with:
-      runner: linux.2xlarge.memory
+      runner: linux.4xlarge.memory
       docker-image: executorch-ubuntu-22.04-qnn-sdk
       submodules: 'true'
       timeout: 60
@@ -243,10 +243,11 @@ jobs:
                     export PYTHONPATH=$(pwd)/..
 
                     DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth")
-                    python -m examples.qualcomm.oss_scripts.llama3_2.llama -- \
+                    python -m examples.qualcomm.oss_scripts.llama3_2.llama \
                       --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
                       --params "${DOWNLOADED_PATH}/params.json" \
                       --tokenizer_model "${DOWNLOADED_PATH}/tokenizer.model" \
+                      -b "cmake-android-out" \
                       --compile_only \
                       --ptq 16a4w \
                       -m SM8650 \