Skip to content

Commit

Permalink
2024-08-31 nightly release (cd8aed6)
Browse files Browse the repository at this point in the history
  • Loading branch information
pytorchbot committed Aug 31, 2024
1 parent ac2248f commit 3c93579
Show file tree
Hide file tree
Showing 40 changed files with 608 additions and 165 deletions.
1 change: 1 addition & 0 deletions .ci/scripts/build-qnn-sdk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# LICENSE file in the root directory of this source tree.

set -eux
set -o xtrace

build_qnn_backend() {
echo "Start building qnn backend."
Expand Down
12 changes: 7 additions & 5 deletions .github/workflows/android-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ jobs:
# Separate default values from the workflow dispatch. To ensure defaults are accessible
# during scheduled runs and to provide flexibility for different defaults between
# on-demand and periodic benchmarking.
CRON_DEFAULT_MODELS: "dl3,mv3,mv2,ic4,ic3"
CRON_DEFAULT_MODELS: "stories110M,dl3,mv3,mv2,ic4,ic3"
CRON_DEFAULT_DEVICES: "samsung_galaxy_s2x"
CRON_DEFAULT_DELEGATES: "xnnpack,qnn"
run: |
Expand Down Expand Up @@ -162,6 +162,11 @@ jobs:
# Test llama2
if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
DELEGATE_CONFIG="xnnpack+custom+qe"
elif [[ ${{ matrix.delegate }} == "qnn" ]]; then
DELEGATE_CONFIG="qnn"
else
echo "Unsupported delegate ${{ matrix.delegate }}"
exit 1
fi
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
else
Expand Down Expand Up @@ -201,9 +206,6 @@ jobs:
name: build-llm-demo
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
needs: set-parameters
strategy:
matrix:
tokenizer: [bpe]
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-clang12-android
Expand All @@ -222,7 +224,7 @@ jobs:
# TODO: This needs to be replaced with a generic loader .apk
# Build LLM Demo for Android
bash build/build_android_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME}
bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
upload-android-apps:
Expand Down
16 changes: 3 additions & 13 deletions .github/workflows/android.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@ jobs:
build-llm-demo:
name: build-llm-demo
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
strategy:
matrix:
tokenizer: [bpe, tiktoken]
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-clang12-android
Expand All @@ -44,7 +41,7 @@ jobs:
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
# Build LLM Demo for Android
bash build/build_android_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME}
bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
upload-artifacts:
Expand Down Expand Up @@ -155,13 +152,6 @@ jobs:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
strategy:
matrix:
# https://github.com/pytorch/executorch/blob/main/examples/demo-apps/android/LlamaDemo/README.md#alternative-2-build-from-local-machine
# mentions that tiktoken is only for Llama3. So, we can export it later in another archive
# like https://ossci-assets.s3.amazonaws.com/executorch-android-llama2-7b-0717.zip when this is
# updated to run Llama3
tokenizer: [bpe]
with:
device-type: android
runner: linux.2xlarge
Expand All @@ -171,8 +161,8 @@ jobs:
# This is the custom Android device pool that only includes Samsung Galaxy S2x
device-pool-arn: arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa
# Uploaded to S3 from the previous job, the name of the app comes from the project itself
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_${{ matrix.tokenizer }}/app-debug.apk
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_${{ matrix.tokenizer }}/app-debug-androidTest.apk
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug.apk
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug-androidTest.apk
test-spec: https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml
# Among the input, this is the biggest file, so it is cached on AWS to make the test faster. Note that the file is deleted by AWS after 30
# days and the job will automatically re-upload the file when that happens.
Expand Down
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,13 @@ if(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/runner_util)
endif()

if(EXECUTORCH_BUILD_PTHREADPOOL
AND EXECUTORCH_BUILD_CPUINFO
AND CMAKE_CXX_STANDARD GREATER_EQUAL 14
)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
endif()

if(EXECUTORCH_BUILD_PYBIND)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/pybind11)

Expand Down
3 changes: 3 additions & 0 deletions backends/qualcomm/runtime/QnnManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <executorch/backends/qualcomm/runtime/QnnManager.h>
#include <executorch/backends/qualcomm/runtime/SharedBuffer.h>
#include <executorch/backends/qualcomm/runtime/Utils.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnImplementation.h>
#include <algorithm>
#include <cstdlib>
Expand Down Expand Up @@ -281,6 +282,8 @@ Error QnnManager::Init() {
options_->backend_options()->backend_type());
backend_params_ptr_ = QnnBackendFactory().Create(
qnn_loaded_backend_, logger_.get(), qnn_context_blob_, options_);
ET_CHECK_OR_RETURN_ERROR(
backend_params_ptr_ != nullptr, Internal, "Failed to load Qnn backend.")
ET_CHECK_OR_RETURN_ERROR(
backend_params_ptr_->qnn_backend_ptr_->Configure() == Error::Ok,
Internal,
Expand Down
79 changes: 79 additions & 0 deletions backends/qualcomm/runtime/backends/QnnBackendCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,85 @@ Error QnnBackend::Configure() {
}
return Error::Ok;
}

Error QnnBackend::VerifyQNNSDKVersion(
const QnnExecuTorchBackendType backend_id) {
const QnnInterface& qnn_interface = implementation_.GetQnnInterface();

Qnn_ApiVersion_t qnn_version = {QNN_VERSION_INIT};
Qnn_ErrorHandle_t error =
qnn_interface.qnn_backend_get_api_version(&qnn_version);
if (error != QNN_SUCCESS) {
QNN_EXECUTORCH_LOG_ERROR("Failed to get Qnn API version.");
return Error::Internal;
}

Qnn_ApiVersion_t expected_version = {QNN_VERSION_INIT};
expected_version.coreApiVersion.major = QNN_API_VERSION_MAJOR;
expected_version.coreApiVersion.minor = QNN_API_VERSION_MINOR;
expected_version.coreApiVersion.patch = QNN_API_VERSION_PATCH;
expected_version.backendApiVersion = GetExpectedBackendVersion();
const char* backend_type = EnumNameQnnExecuTorchBackendType(backend_id);

Error status = VersionChecker(
qnn_version.coreApiVersion, expected_version.coreApiVersion, "Qnn API");
if (status == Error::Ok) {
status = VersionChecker(
qnn_version.backendApiVersion,
expected_version.backendApiVersion,
backend_type);
}

return status;
}

Error QnnBackend::VersionChecker(
const Qnn_Version_t& qnn_version,
const Qnn_Version_t& expected,
const std::string& prefix) {
if (qnn_version.major != expected.major) {
QNN_EXECUTORCH_LOG_ERROR(
"%s version %u.%u.%u is not supported. "
"The minimum supported version is %u.%u.%u. Please make "
"sure you have the correct backend library version.",
prefix.c_str(),
qnn_version.major,
qnn_version.minor,
qnn_version.patch,
expected.major,
expected.minor,
expected.patch);
return Error::Internal;
}
if (qnn_version.major == QNN_API_VERSION_MAJOR &&
qnn_version.minor < expected.minor) {
QNN_EXECUTORCH_LOG_WARN(
"%s version %u.%u.%u is mismatched. "
"The minimum supported version is %u.%u.%u. Please make "
"sure you have the correct backend library version.",
prefix.c_str(),
qnn_version.major,
qnn_version.minor,
qnn_version.patch,
expected.major,
expected.minor,
expected.patch);
}
if ((qnn_version.major == QNN_API_VERSION_MAJOR &&
qnn_version.minor > expected.minor)) {
QNN_EXECUTORCH_LOG_WARN(
"%s version %u.%u.%u is used. "
"The version is tested against %u.%u.%u.",
prefix.c_str(),
qnn_version.major,
qnn_version.minor,
qnn_version.patch,
expected.major,
expected.minor,
expected.patch);
}
return Error::Ok;
}
} // namespace qnn
} // namespace executor
} // namespace torch
9 changes: 9 additions & 0 deletions backends/qualcomm/runtime/backends/QnnBackendCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@

#include <vector>

#include "HTP/QnnHtpCommon.h"
#include "QnnBackend.h"
#include "QnnCommon.h"
#include "QnnTypes.h"
namespace torch {
namespace executor {
namespace qnn {
Expand Down Expand Up @@ -43,7 +45,10 @@ class QnnBackend {
return handle_;
}

Error VerifyQNNSDKVersion(const QnnExecuTorchBackendType backend_id);

protected:
virtual Qnn_Version_t GetExpectedBackendVersion() const = 0;
virtual Error MakeConfig(std::vector<const QnnBackend_Config_t*>& config) {
return Error::Ok;
};
Expand All @@ -52,6 +57,10 @@ class QnnBackend {
Qnn_BackendHandle_t handle_;
const QnnImplementation& implementation_;
QnnLogger* logger_;
Error VersionChecker(
const Qnn_Version_t& qnn_version,
const Qnn_Version_t& expected,
const std::string& prefix);
};
} // namespace qnn
} // namespace executor
Expand Down
9 changes: 7 additions & 2 deletions backends/qualcomm/runtime/backends/QnnBackendFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
const QnnExecuTorchContextBinary& qnn_context_blob,
const QnnExecuTorchOptions* options) {
auto backend_params = std::make_unique<BackendConfigParameters>();

switch (options->backend_options()->backend_type()) {
case QnnExecuTorchBackendType::kHtpBackend: {
auto htp_options = options->backend_options()->htp_options();
Expand Down Expand Up @@ -51,6 +52,7 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
}
backend_params->qnn_backend_ptr_ =
std::make_unique<HtpBackend>(implementation, logger);

backend_params->qnn_device_ptr_ = std::make_unique<HtpDevice>(
implementation, logger, options->soc_info(), htp_options);

Expand All @@ -72,7 +74,6 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
backend_params->qnn_mem_manager_ptr_ = std::make_unique<QnnMemManager>(
implementation, backend_params->qnn_context_ptr_.get());
backend_params->backend_init_state_ = BackendInitializeState::INITIALIZED;
return backend_params;
} break;
case QnnExecuTorchBackendType::kGpuBackend:
case QnnExecuTorchBackendType::kDspBackend:
Expand All @@ -81,7 +82,11 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
return nullptr;
}

// should not reach here
if (backend_params->qnn_backend_ptr_->VerifyQNNSDKVersion(
options->backend_options()->backend_type()) == Error::Ok) {
return backend_params;
}

return nullptr;
}
} // namespace qnn
Expand Down
10 changes: 10 additions & 0 deletions backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
#pragma once

#include <executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h>
#include "HTP/QnnHtpCommon.h"
#include "HTP/QnnHtpProfile.h"
#include "QnnTypes.h"
namespace torch {
namespace executor {
namespace qnn {
Expand All @@ -24,6 +26,14 @@ class HtpBackend : public QnnBackend {
event_type == QNN_HTP_PROFILE_EVENTTYPE_GRAPH_EXECUTE_ACCEL_TIME_CYCLE);
}

Qnn_Version_t GetExpectedBackendVersion() const override {
Qnn_Version_t backend_version;
backend_version.major = QNN_HTP_API_VERSION_MAJOR;
backend_version.minor = QNN_HTP_API_VERSION_MINOR;
backend_version.patch = QNN_HTP_API_VERSION_PATCH;
return backend_version;
}

protected:
Error MakeConfig(std::vector<const QnnBackend_Config_t*>& config) override {
return Error::Ok;
Expand Down
1 change: 1 addition & 0 deletions backends/qualcomm/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
set -e
set -o xtrace

if [[ -z ${QNN_SDK_ROOT} ]]; then
echo "Please export QNN_SDK_ROOT=/path/to/qnn_sdk"
Expand Down
1 change: 1 addition & 0 deletions backends/xnnpack/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ et_cxx_test(
SOURCES
${_test_srcs}
EXTRA_LIBS
extension_threadpool
xnnpack_backend
XNNPACK
pthreadpool
Expand Down
2 changes: 2 additions & 0 deletions build/Test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ find_package(executorch CONFIG REQUIRED)
enable_testing()
find_package(GTest CONFIG REQUIRED)

target_link_options_shared_lib(cpuinfo)
target_link_options_shared_lib(extension_data_loader)
target_link_options_shared_lib(portable_kernels)
target_link_options_shared_lib(portable_ops_lib)
target_link_options_shared_lib(pthreadpool)
target_link_options_shared_lib(quantized_ops_lib)

# Add code coverage flags to supported compilers
Expand Down
20 changes: 5 additions & 15 deletions build/build_android_llm_demo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,8 @@ build_jar() {

build_android_native_library() {
ANDROID_ABI="$1"
TOKENIZER="$2"
ANDROID_NDK="${ANDROID_NDK:-/opt/ndk}"
CMAKE_OUT="cmake-out-android-${ANDROID_ABI}"
if [[ $TOKENIZER == "tiktoken" ]]; then
EXECUTORCH_USE_TIKTOKEN=ON
else
EXECUTORCH_USE_TIKTOKEN=OFF
fi

cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
Expand Down Expand Up @@ -54,7 +48,6 @@ build_android_native_library() {
-DANDROID_ABI="$ANDROID_ABI" \
-DANDROID_PLATFORM=android-23 \
-DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
Expand All @@ -72,7 +65,6 @@ build_android_native_library() {
-DEXECUTORCH_ENABLE_LOGGING=ON \
-DEXECUTORCH_LOG_LEVEL=Info \
-DEXECUTORCH_BUILD_LLAMA_JNI=ON \
-DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \
-DCMAKE_BUILD_TYPE=Release \
-B"${CMAKE_OUT}"/extension/android

Expand Down Expand Up @@ -107,9 +99,8 @@ build_android_llm_demo_app() {
}

collect_artifacts_to_be_uploaded() {
TOKENIZER="$1"
ARTIFACTS_DIR_NAME="$2"
DEMO_APP_DIR="${ARTIFACTS_DIR_NAME}/llm_demo_${TOKENIZER}"
ARTIFACTS_DIR_NAME="$1"
DEMO_APP_DIR="${ARTIFACTS_DIR_NAME}/llm_demo"
# The app directory is named using its build flavor as a suffix.
mkdir -p "${DEMO_APP_DIR}"
# Collect the app and its test suite
Expand All @@ -131,13 +122,12 @@ export BUILD_AAR_DIR
ANDROID_ABIS=("arm64-v8a" "x86_64")
export ANDROID_ABIS

TOKENIZER="${1:-bpe}"
ARTIFACTS_DIR_NAME="$2"
ARTIFACTS_DIR_NAME="$1"

build_jar
for ANDROID_ABI in "${ANDROID_ABIS[@]}"; do
build_android_native_library ${ANDROID_ABI} ${TOKENIZER}
build_android_native_library ${ANDROID_ABI}
done
build_aar
build_android_llm_demo_app
collect_artifacts_to_be_uploaded ${TOKENIZER} ${ARTIFACTS_DIR_NAME}
collect_artifacts_to_be_uploaded ${ARTIFACTS_DIR_NAME}
1 change: 1 addition & 0 deletions build/executorch-config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ set(lib_list
extension_module
extension_module_static
extension_runner_util
extension_threadpool
xnnpack_backend
XNNPACK
cpuinfo
Expand Down
Loading

0 comments on commit 3c93579

Please sign in to comment.