Skip to content

Commit

Permalink
Add Llama3.1 1B HTP to benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
Github Executorch committed Dec 19, 2024
1 parent 62016d6 commit 3287b18
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 6 deletions.
3 changes: 1 addition & 2 deletions .ci/scripts/gather_benchmark_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@
],
"android": [
"qnn_q8",
# TODO: Add support for llama3 htp
# "llama3_qnn_htp",
"llama3_qnn_htp",
],
"ios": [
"coreml_fp16",
Expand Down
29 changes: 25 additions & 4 deletions .github/workflows/android-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ jobs:
runner: linux.2xlarge.memory
docker-image: executorch-ubuntu-22.04-qnn-sdk
submodules: 'true'
timeout: 60
timeout: 120
upload-artifact: android-models
upload-artifact-to-s3: true
secrets-env: EXECUTORCH_HF_TOKEN
Expand Down Expand Up @@ -237,13 +237,33 @@ jobs:
--metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \
--output_name="${OUT_ET_MODEL_NAME}.pte"
ls -lh "${OUT_ET_MODEL_NAME}.pte"
elif [[ ${{ matrix.config }} == "llama3_qnn_htp" ]]; then
export QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728
echo "QNN_SDK_ROOT=${QNN_SDK_ROOT}"
export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}"
export PYTHONPATH=$(pwd)/..
echo "PYTHONPATH=${PYTHONPATH}"
python -m examples.qualcomm.oss_scripts.llama3_2.llama \
--checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
--params "${DOWNLOADED_PATH}/params.json" \
--tokenizer_model "${DOWNLOADED_PATH}/tokenizer.model" \
--compile_only \
--ptq 16a4w \
-m SM8650 \
--model_size 1B \
--model_mode kv \
-b "cmake-out" \
--prompt "Once"
elif [[ ${{ matrix.config }} == "llama3_qnn_htp" ]]; then
DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth")
python -m examples.qualcomm.oss_scripts.llama3_2.llama -- \
export QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728
echo "QNN_SDK_ROOT=${QNN_SDK_ROOT}"
export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}"
export PYTHONPATH=$(pwd)/..
echo "PYTHONPATH=${PYTHONPATH}"
python -m examples.qualcomm.oss_scripts.llama3_2.llama \
--checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
--params "${DOWNLOADED_PATH}/params.json" \
--tokenizer_model "${DOWNLOADED_PATH}/tokenizer.model" \
Expand All @@ -252,6 +272,7 @@ jobs:
-m SM8650 \
--model_size 1B \
--model_mode kv \
-b "cmake-out" \
--prompt "Once"
OUT_ET_MODEL_NAME="llama3_2_qnn" # Qualcomm hard-coded it in their script
Expand Down

0 comments on commit 3287b18

Please sign in to comment.