[ExecuTorch] Allow setting dtype to bf16 in export_llama #3055
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Android | |
on: | |
push: | |
branches: | |
- main | |
- release/* | |
tags: | |
- ciflow/android/* | |
pull_request: | |
paths: | |
- .ci/docker/** | |
- .github/workflows/android.yml | |
- build/*android*.sh | |
- install_requirements.sh | |
- examples/demo-apps/android/** | |
- extension/android/** | |
- extension/module/** | |
workflow_dispatch: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | |
cancel-in-progress: true | |
jobs: | |
build-llm-demo: | |
name: build-llm-demo | |
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-clang12-android | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
upload-artifact: android-apps | |
script: | | |
set -eux | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2 | |
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded | |
# Build LLM Demo for Android | |
bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME} | |
# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat | |
upload-artifacts: | |
needs: build-llm-demo | |
runs-on: linux.2xlarge | |
steps: | |
- name: Download the artifacts from GitHub | |
uses: actions/download-artifact@v3 | |
with: | |
# The name here needs to match the name of the upload-artifact parameter | |
name: android-apps | |
path: ${{ runner.temp }}/artifacts/ | |
- name: Verify the artifacts | |
shell: bash | |
working-directory: ${{ runner.temp }}/artifacts/ | |
run: | | |
ls -lah ./ | |
- name: Upload the artifacts to S3 | |
uses: seemethere/upload-artifact-s3@v5 | |
with: | |
s3-bucket: gha-artifacts | |
s3-prefix: | | |
${{ github.repository }}/${{ github.run_id }}/artifact | |
# NOTE: Consume stale artifacts won't make sense for benchmarking as the goal is always to | |
# benchmark models as fresh as possible. I'm okay to keep the 14 retention-days for now | |
# for TorchChat until we have a periodic job can publish it more often. Ideally I want to | |
# reduce it to <= 2 day, meaning the benchmark job will run daily. | |
retention-days: 14 | |
if-no-files-found: ignore | |
path: ${{ runner.temp }}/artifacts/ | |
# Running Android emulator directly on the runner and not using Docker | |
run-emulator: | |
needs: build-llm-demo | |
runs-on: amz2023.linux.4xlarge | |
env: | |
ANDROID_NDK_VERSION: r26c | |
API_LEVEL: 34 | |
steps: | |
- name: Setup SSH (Click me for login details) | |
uses: pytorch/test-infra/.github/actions/setup-ssh@main | |
with: | |
github-secret: ${{ secrets.GITHUB_TOKEN }} | |
instructions: | | |
This is used to run Android emulators, ANDROID_HOME is installed at /opt/android/sdk | |
- uses: actions/checkout@v3 | |
with: | |
submodules: false | |
- name: Setup conda | |
uses: pytorch/test-infra/.github/actions/setup-miniconda@main | |
with: | |
python-version: '3.10' | |
- name: Install Android dependencies | |
shell: bash | |
run: | | |
set -eux | |
# Reuse the script that install Android on ET Docker image | |
sudo -E bash .ci/docker/common/install_android.sh | |
- name: Gradle cache | |
uses: gradle/actions/setup-gradle@v3 | |
- name: AVD cache | |
uses: actions/cache@v4 | |
id: avd-cache | |
with: | |
path: | | |
~/.android/avd/* | |
~/.android/adb* | |
key: avd-${{ env.API_LEVEL }} | |
# NB: It takes about 10m to cold boot the emulator here | |
- name: Run Android emulator | |
env: | |
ANDROID_HOME: /opt/android/sdk | |
uses: reactivecircus/android-emulator-runner@v2 | |
with: | |
api-level: ${{ env.API_LEVEL }} | |
# NB: x86_64 emulator is slow because the lack of KVM support on AWS, it | |
# seems that we can use metal instance for that but it hasn't been tried | |
# out yet. Also arm64-v8a arch requires an ARM runner | |
arch: x86_64 | |
script: ./build/run_android_emulator.sh | |
# NB: This is to boot the emulator faster following the instructions on | |
# https://github.com/ReactiveCircus/android-emulator-runner. The max number | |
# of cores we can set is 6, any higher number will be reduced to 6. | |
cores: 6 | |
ram-size: 12288M | |
force-avd-creation: false | |
disable-animations: true | |
emulator-options: -no-snapshot-save -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -camera-back none | |
# This is to make sure that the job doesn't fail flakily | |
emulator-boot-timeout: 900 | |
# Let's see how expensive this job is, we might want to tone it down by running it periodically | |
test-llama-app: | |
# Only PR from ExecuTorch itself has permission to access AWS, forked PRs will fail to | |
# authenticate with the cloud service | |
if: ${{ !github.event.pull_request.head.repo.fork }} | |
needs: upload-artifacts | |
permissions: | |
id-token: write | |
contents: read | |
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main | |
with: | |
device-type: android | |
runner: linux.2xlarge | |
test-infra-ref: '' | |
# This is the ARN of ExecuTorch project on AWS | |
project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6 | |
# This is the custom Android device pool that only includes Samsung Galaxy S2x | |
device-pool-arn: arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa | |
# Uploaded to S3 from the previous job, the name of the app comes from the project itself | |
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug.apk | |
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug-androidTest.apk | |
test-spec: https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml | |
# Among the input, this is the biggest file, so it is cached on AWS to make the test faster. Note that the file is deleted by AWS after 30 | |
# days and the job will automatically re-upload the file when that happens. | |
extra-data: https://ossci-assets.s3.amazonaws.com/executorch-android-llama2-7b-0717.zip |