Skip to content

Commit

Permalink
rebase stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
kirklandsign committed Aug 2, 2024
2 parents 6ad6514 + 76f0b61 commit 15518b7
Show file tree
Hide file tree
Showing 275 changed files with 8,015 additions and 2,631 deletions.
6 changes: 5 additions & 1 deletion .ci/docker/requirements-ci.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
mpmath==1.3.0
numpy==1.25.2
numpy==1.21.3; python_version == '3.10'
numpy==1.23.2; python_version == '3.11'
numpy; python_version >= '3.12'
PyYAML==6.0.1
ruamel.yaml==0.17.32
sympy==1.12
Expand All @@ -8,6 +10,8 @@ tomli==2.0.1
torchsr==1.0.4
transformers==4.38.0
zstd==1.5.5.1
pandas==2.0.3; python_version == '3.10'
pandas; python_version >= '3.11'
pytest==7.2.0
pytest-cov==4.1.0
expecttest==0.1.6
Expand Down
9 changes: 8 additions & 1 deletion .ci/scripts/gather_test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
# This one causes timeout on smaller runner, the root cause is unclear (T161064121)
"dl3": "linux.12xlarge",
"emformer_join": "linux.12xlarge",
"emformer_predict": "linux.12xlarge",
}
}

Expand All @@ -35,9 +36,11 @@
# Just some examples on how custom timeout can be set
"linux": {
"mobilebert": 90,
"emformer_predict": 360,
},
"macos": {
"mobilebert": 90,
"emformer_predict": 360,
},
}

Expand Down Expand Up @@ -84,7 +87,11 @@ def model_should_run_on_event(model: str, event: str) -> bool:
"""
if event == "pull_request":
return model in ["mv3", "vit"]
return True
elif event == "push":
# 'emformer_predict' is running super slow. Only run it periodically
return model not in ["emformer_predict"]
else:
return True


def model_should_run_on_target_os(model: str, target_os: str) -> bool:
Expand Down
11 changes: 11 additions & 0 deletions .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ MODEL_NAME=$1 # stories110M.pt
BUILD_TOOL=$2 # buck2 or cmake
DTYPE=$3 # fp16 or fp32
MODE=${4:-"xnnpack+custom"} # portable or xnnpack+custom or xnnpack+custom+qe
UPLOAD_DIR=${5:-}
if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
echo "Expecting atleast 4 positional arguments"
echo "Usage: [...]"
Expand Down Expand Up @@ -126,6 +127,15 @@ cleanup_files() {
rm params.json
}

prepare_artifacts_upload() {
if [ -n "$UPLOAD_DIR" ]; then
echo "Preparing for uploading generated artifacs"
mkdir -p "${UPLOAD_DIR}"
zip -j "model.zip" "${MODEL_NAME}" tokenizer.bin
cp "model.zip" "${UPLOAD_DIR}"
fi
}

# Download and create artifacts.
PARAMS="params.json"
touch "${PARAMS}"
Expand Down Expand Up @@ -205,6 +215,7 @@ if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
echo "Actual result: ${RESULT}"
echo "Success"

prepare_artifacts_upload
cleanup_files
else
echo "Expected result prefix: ${EXPECTED_PREFIX}"
Expand Down
224 changes: 224 additions & 0 deletions .github/workflows/android-perf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
name: android-perf

on:
schedule:
- cron: 0 0 * * *
# Note: GitHub has an upper limit of 10 inputs
workflow_dispatch:
inputs:
models:
description: Models to be benchmarked
required: false
type: string
default: stories110M
devices:
description: Target devices to run benchmark
required: false
type: string
default: samsung_galaxy_s2x
delegates:
description: Backend delegates
required: false
type: string
default: xnnpack
threadpool:
description: Run with threadpool?
required: false
type: boolean
default: false
benchmark_configs:
description: The list of configs used the benchmark
required: false
type: string

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true

permissions: read-all

jobs:
set-parameters:
runs-on: linux.2xlarge
outputs:
models: ${{ steps.set-parameters.outputs.models }}
devices: ${{ steps.set-parameters.outputs.devices }}
delegates: ${{ steps.set-parameters.outputs.delegates }}
steps:
- name: Set parameters
id: set-parameters
shell: bash
run: |
set -ex
MODELS="${{ inputs.models }}"
DEVICES="${{ inputs.devices }}"
DELEGATES="${{ inputs.delegates }}"
# Mapping devices to their corresponding device-pool-arn
declare -A DEVICE_POOL_ARNS
DEVICE_POOL_ARNS[samsung_galaxy_s2x]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa"
# Resolve device names with their corresponding ARNs
if [[ ! $(echo "$DEVICES" | jq empty 2>/dev/null) ]]; then
DEVICES=$(echo "$DEVICES" | jq -Rc 'split(",")')
fi
declare -a MAPPED_ARNS=()
for DEVICE in $(echo "$DEVICES" | jq -r '.[]'); do
if [[ -z "${DEVICE_POOL_ARNS[$DEVICE]}" ]]; then
echo "Error: No ARN found for device '$DEVICE'. Abort." >&2
exit 1
fi
MAPPED_ARNS+=("${DEVICE_POOL_ARNS[$DEVICE]}")
done
echo "models=$(echo $MODELS | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT
MAPPED_ARNS_JSON=$(printf '%s\n' "${MAPPED_ARNS[@]}" | jq -R . | jq -s .)
echo "devices=$(echo "$MAPPED_ARNS_JSON" | jq -c .)" >> $GITHUB_OUTPUT
echo "delegates=$(echo $DELEGATES | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT
export-models:
name: export-models
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
needs: set-parameters
strategy:
matrix:
model: ${{ fromJson(needs.set-parameters.outputs.models) }}
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
fail-fast: false
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-clang12
submodules: 'true'
timeout: 60
upload-artifact: android-models
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
echo "Exporting model: ${{ matrix.model }}"
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.delegate }}
# TODO(T197546696): Note that the following scripts/steps only work for llama. It's expected to fail for other models+delegates.
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}.pt" "cmake" "fp32" "xnnpack+custom+qe" "${ARTIFACTS_DIR_NAME}"\
# Upload models to S3. The artifacts are needed not only by the device farm but also TorchChat
upload-models:
needs: export-models
runs-on: linux.2xlarge
steps:
- name: Download the models from GitHub
uses: actions/download-artifact@v3
with:
# The name here needs to match the name of the upload-artifact parameter
name: android-models
path: ${{ runner.temp }}/artifacts/

- name: Verify the models
shell: bash
working-directory: ${{ runner.temp }}/artifacts/
run: |
ls -lah ./
- name: Upload the models to S3
uses: seemethere/upload-artifact-s3@v5
with:
s3-bucket: gha-artifacts
s3-prefix: |
${{ github.repository }}/${{ github.run_id }}/artifact
retention-days: 1
if-no-files-found: ignore
path: ${{ runner.temp }}/artifacts/

build-llm-demo:
name: build-llm-demo
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
needs: set-parameters
strategy:
matrix:
tokenizer: [bpe]
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-clang12-android
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
upload-artifact: android-apps
script: |
set -eux
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
# TODO: This needs to be replaced with a generic loader .apk
# Build LLM Demo for Android
bash build/build_android_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME}
# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
upload-android-apps:
needs: build-llm-demo
runs-on: linux.2xlarge
steps:
- name: Download the apps from GitHub
uses: actions/download-artifact@v3
with:
# The name here needs to match the name of the upload-artifact parameter
name: android-apps
path: ${{ runner.temp }}/artifacts/

- name: Verify the apps
shell: bash
working-directory: ${{ runner.temp }}/artifacts/
run: |
ls -lah ./
- name: Upload the apps to S3
uses: seemethere/upload-artifact-s3@v5
with:
s3-bucket: gha-artifacts
s3-prefix: |
${{ github.repository }}/${{ github.run_id }}/artifact
retention-days: 14
if-no-files-found: ignore
path: ${{ runner.temp }}/artifacts/

# Let's see how expensive this job is, we might want to tone it down by running it periodically
benchmark-on-device:
permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
needs:
- set-parameters
- upload-models
- upload-android-apps
strategy:
matrix:
model: ${{ fromJson(needs.set-parameters.outputs.models) }}
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
device: ${{ fromJson(needs.set-parameters.outputs.devices) }}
with:
device-type: android
runner: linux.2xlarge
test-infra-ref: ''
# This is the ARN of ExecuTorch project on AWS
project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
device-pool-arn: ${{ matrix.device }}
# Uploaded to S3 from the previous job, the name of the app comes from the project itself.
# Unlike models there are limited numbers of build flavor for apps, and the model controls whether it should build with bpe/tiktoken tokenizer.
# It's okay to build all possible apps with all possible flavors in job "build-llm-demo". However, in this job, once a model is given, there is only
# one app+flavor that could load and run the model.
# TODO: Hard code llm_demo_bpe for now in this job.
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_bpe/app-debug.apk
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_bpe/app-debug-androidTest.apk
# The test spec can be downloaded from https://ossci-assets.s3.amazonaws.com/android-llama2-device-farm-test-spec.yml
test-spec: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/abd86868-fa63-467e-a5c7-218194665a77
# Uploaded to S3 from the previous job
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
16 changes: 12 additions & 4 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ jobs:
strategy:
fail-fast: false
with:
runner: linux.12xlarge
runner: linux.24xlarge
docker-image: executorch-ubuntu-22.04-clang12
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
Expand All @@ -210,11 +210,19 @@ jobs:
bash examples/models/llava/install_requirements.sh
# run export_llava.sh
python examples/models/llava/export_llava.py
python examples/models/llava/export_llava.py --use-sdpa-with-kv-cache --pte-name llava_custom_sdpa.pte
# verify file exists
if [ ! -f "llava_combined_xnnpack.pte" ]; then
echo "llava_combined_xnnpack.pte not found!"
if [ ! -f "llava_custom_sdpa.pte" ]; then
echo "llava_custom_sdpa.pte not found!"
exit 1
fi
python examples/models/llava/export_llava.py --no-use-sdpa-with-kv-cache --pte-name llava.pte
# verify file exists
if [ ! -f "llava.pte" ]; then
echo "llava.pte not found!"
exit 1
fi
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@ endif()
if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
# TODO: move all custom kernels to ${CMAKE_CURRENT_SOURCE_DIR}/kernels/custom
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/examples/models/llama2/custom_ops
${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/custom_ops
)
endif()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ - (void)prewarmRecentlyUsedAssetsWithMaxCount:(NSUInteger)maxCount {

NSError *prewarmError = nil;
if (![asset prewarmAndReturnError:&prewarmError]) {
ETCoreMLLogError(localError,
ETCoreMLLogError(prewarmError,
"%@: Failed to prewarm asset with identifier = %@",
NSStringFromClass(strongSelf.assetManager.class),
asset.identifier);
Expand Down
13 changes: 9 additions & 4 deletions backends/apple/coreml/runtime/delegate/backend_delegate.mm
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ - (BOOL)_loadAndReturnError:(NSError * _Nullable __autoreleasing *)error {
if (self.config.should_prewarm_asset) {
[modelManager prewarmRecentlyUsedAssetsWithMaxCount:1];
}

return YES;
}

Expand Down Expand Up @@ -188,9 +188,14 @@ - (ModelHandle*)loadModelFromAOTData:(NSData*)data
return nil;
}

return [self.impl loadModelFromAOTData:data
configuration:configuration
error:error];
auto handle = [self.impl loadModelFromAOTData:data
configuration:configuration
error:error];
if ((handle != NULL) && self.config.should_prewarm_model) {
[self.impl prewarmModelWithHandle:handle error:nil];
}

return handle;
}

- (BOOL)executeModelWithHandle:(ModelHandle*)handle
Expand Down
Loading

0 comments on commit 15518b7

Please sign in to comment.