[ExecuTorch] Allow setting dtype to bf16 in export_llama #3055

Workflow file for this run

.github/workflows/android.yml at 1511fc1

	name: Android

	on:
	push:
	branches:
	- main
	- release/*
	tags:
	- ciflow/android/*
	pull_request:
	paths:
	- .ci/docker/**
	- .github/workflows/android.yml
	- build/android.sh
	- install_requirements.sh
	- examples/demo-apps/android/**
	- extension/android/**
	- extension/module/**
	workflow_dispatch:

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
	cancel-in-progress: true

	jobs:
	build-llm-demo:
	name: build-llm-demo
	uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
	with:
	runner: linux.2xlarge
	docker-image: executorch-ubuntu-22.04-clang12-android
	submodules: 'true'
	ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	timeout: 90
	upload-artifact: android-apps
	script: \|
	set -eux

	# The generic Linux job chooses to use base env, not the one setup by the image
	CONDA_ENV=$(conda env list --json \| jq -r ".envs \| .[-1]")
	conda activate "${CONDA_ENV}"
	PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
	export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded

	# Build LLM Demo for Android
	bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}

	# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
	upload-artifacts:
	needs: build-llm-demo
	runs-on: linux.2xlarge
	steps:
	- name: Download the artifacts from GitHub
	uses: actions/download-artifact@v3
	with:
	# The name here needs to match the name of the upload-artifact parameter
	name: android-apps
	path: ${{ runner.temp }}/artifacts/

	- name: Verify the artifacts
	shell: bash
	working-directory: ${{ runner.temp }}/artifacts/
	run: \|
	ls -lah ./

	- name: Upload the artifacts to S3
	uses: seemethere/upload-artifact-s3@v5
	with:
	s3-bucket: gha-artifacts
	s3-prefix: \|
	${{ github.repository }}/${{ github.run_id }}/artifact
	# NOTE: Consume stale artifacts won't make sense for benchmarking as the goal is always to
	# benchmark models as fresh as possible. I'm okay to keep the 14 retention-days for now
	# for TorchChat until we have a periodic job can publish it more often. Ideally I want to
	# reduce it to <= 2 day, meaning the benchmark job will run daily.
	retention-days: 14
	if-no-files-found: ignore
	path: ${{ runner.temp }}/artifacts/

	# Running Android emulator directly on the runner and not using Docker
	run-emulator:
	needs: build-llm-demo
	runs-on: amz2023.linux.4xlarge
	env:
	ANDROID_NDK_VERSION: r26c
	API_LEVEL: 34
	steps:
	- name: Setup SSH (Click me for login details)
	uses: pytorch/test-infra/.github/actions/setup-ssh@main
	with:
	github-secret: ${{ secrets.GITHUB_TOKEN }}
	instructions: \|
	This is used to run Android emulators, ANDROID_HOME is installed at /opt/android/sdk

	- uses: actions/checkout@v3
	with:
	submodules: false

	- name: Setup conda
	uses: pytorch/test-infra/.github/actions/setup-miniconda@main
	with:
	python-version: '3.10'

	- name: Install Android dependencies
	shell: bash
	run: \|
	set -eux

	# Reuse the script that install Android on ET Docker image
	sudo -E bash .ci/docker/common/install_android.sh

	- name: Gradle cache
	uses: gradle/actions/setup-gradle@v3

	- name: AVD cache
	uses: actions/cache@v4
	id: avd-cache
	with:
	path: \|
	~/.android/avd/*
	~/.android/adb*
	key: avd-${{ env.API_LEVEL }}

	# NB: It takes about 10m to cold boot the emulator here
	- name: Run Android emulator
	env:
	ANDROID_HOME: /opt/android/sdk
	uses: reactivecircus/android-emulator-runner@v2
	with:
	api-level: ${{ env.API_LEVEL }}
	# NB: x86_64 emulator is slow because the lack of KVM support on AWS, it
	# seems that we can use metal instance for that but it hasn't been tried
	# out yet. Also arm64-v8a arch requires an ARM runner
	arch: x86_64
	script: ./build/run_android_emulator.sh
	# NB: This is to boot the emulator faster following the instructions on
	# https://github.com/ReactiveCircus/android-emulator-runner. The max number
	# of cores we can set is 6, any higher number will be reduced to 6.
	cores: 6
	ram-size: 12288M
	force-avd-creation: false
	disable-animations: true
	emulator-options: -no-snapshot-save -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -camera-back none
	# This is to make sure that the job doesn't fail flakily
	emulator-boot-timeout: 900

	# Let's see how expensive this job is, we might want to tone it down by running it periodically
	test-llama-app:
	# Only PR from ExecuTorch itself has permission to access AWS, forked PRs will fail to
	# authenticate with the cloud service
	if: ${{ !github.event.pull_request.head.repo.fork }}
	needs: upload-artifacts
	permissions:
	id-token: write
	contents: read
	uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
	with:
	device-type: android
	runner: linux.2xlarge
	test-infra-ref: ''
	# This is the ARN of ExecuTorch project on AWS
	project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
	# This is the custom Android device pool that only includes Samsung Galaxy S2x
	device-pool-arn: arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa
	# Uploaded to S3 from the previous job, the name of the app comes from the project itself
	android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug.apk
	android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug-androidTest.apk
	test-spec: https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml
	# Among the input, this is the biggest file, so it is cached on AWS to make the test faster. Note that the file is deleted by AWS after 30
	# days and the job will automatically re-upload the file when that happens.
	extra-data: https://ossci-assets.s3.amazonaws.com/executorch-android-llama2-7b-0717.zip

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[ExecuTorch] Allow setting dtype to bf16 in export_llama #3055

Workflow file

[ExecuTorch] Allow setting dtype to bf16 in export_llama #3055

Jobs

Run details

Workflow file for this run