2024-08-31 nightly release (cd8aed6)

pytorch · Aug 31, 2024 · 3c93579 · 3c93579
1 parent ac2248f
commit 3c93579
Show file tree

Hide file tree

Showing 40 changed files with 608 additions and 165 deletions.
diff --git a/.ci/scripts/build-qnn-sdk.sh b/.ci/scripts/build-qnn-sdk.sh
@@ -6,6 +6,7 @@
 # LICENSE file in the root directory of this source tree.
 
 set -eux
+set -o xtrace
 
 build_qnn_backend() {
   echo "Start building qnn backend."

diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
@@ -84,7 +84,7 @@ jobs:
           # Separate default values from the workflow dispatch. To ensure defaults are accessible
           # during scheduled runs and to provide flexibility for different defaults between
           # on-demand and periodic benchmarking.
-          CRON_DEFAULT_MODELS: "dl3,mv3,mv2,ic4,ic3"
+          CRON_DEFAULT_MODELS: "stories110M,dl3,mv3,mv2,ic4,ic3"
           CRON_DEFAULT_DEVICES: "samsung_galaxy_s2x"
           CRON_DEFAULT_DELEGATES: "xnnpack,qnn"
         run: |
@@ -162,6 +162,11 @@ jobs:
             # Test llama2
             if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
                 DELEGATE_CONFIG="xnnpack+custom+qe"
+            elif [[ ${{ matrix.delegate }} == "qnn" ]]; then
+                DELEGATE_CONFIG="qnn"
+            else
+                echo "Unsupported delegate ${{ matrix.delegate }}"
+                exit 1
             fi
             PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
         else
@@ -201,9 +206,6 @@ jobs:
     name: build-llm-demo
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     needs: set-parameters
-    strategy:
-      matrix:
-          tokenizer: [bpe]
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-clang12-android
@@ -222,7 +224,7 @@ jobs:
 
         # TODO: This needs to be replaced with a generic loader .apk
         # Build LLM Demo for Android
-        bash build/build_android_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME}
+        bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
 
   # Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
   upload-android-apps:

diff --git a/.github/workflows/android.yml b/.github/workflows/android.yml
@@ -24,9 +24,6 @@ jobs:
   build-llm-demo:
     name: build-llm-demo
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    strategy:
-      matrix:
-          tokenizer: [bpe, tiktoken]
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-clang12-android
@@ -44,7 +41,7 @@ jobs:
         export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
 
         # Build LLM Demo for Android
-        bash build/build_android_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME}
+        bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
 
   # Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
   upload-artifacts:
@@ -155,13 +152,6 @@ jobs:
       id-token: write
       contents: read
     uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
-    strategy:
-      matrix:
-        # https://github.com/pytorch/executorch/blob/main/examples/demo-apps/android/LlamaDemo/README.md#alternative-2-build-from-local-machine
-        # mentions that tiktoken is only for Llama3. So, we can export it later in another archive
-        # like https://ossci-assets.s3.amazonaws.com/executorch-android-llama2-7b-0717.zip when this is
-        # updated to run Llama3
-        tokenizer: [bpe]
     with:
       device-type: android
       runner: linux.2xlarge
@@ -171,8 +161,8 @@ jobs:
       # This is the custom Android device pool that only includes Samsung Galaxy S2x
       device-pool-arn: arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa
       # Uploaded to S3 from the previous job, the name of the app comes from the project itself
-      android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_${{ matrix.tokenizer }}/app-debug.apk
-      android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_${{ matrix.tokenizer }}/app-debug-androidTest.apk
+      android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug.apk
+      android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug-androidTest.apk
       test-spec: https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml
       # Among the input, this is the biggest file, so it is cached on AWS to make the test faster. Note that the file is deleted by AWS after 30
       # days and the job will automatically re-upload the file when that happens.

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -637,6 +637,13 @@ if(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/runner_util)
 endif()
 
+if(EXECUTORCH_BUILD_PTHREADPOOL
+   AND EXECUTORCH_BUILD_CPUINFO
+   AND CMAKE_CXX_STANDARD GREATER_EQUAL 14
+)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
+endif()
+
 if(EXECUTORCH_BUILD_PYBIND)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/pybind11)
 

diff --git a/backends/qualcomm/runtime/QnnManager.cpp b/backends/qualcomm/runtime/QnnManager.cpp
@@ -8,6 +8,7 @@
 #include <executorch/backends/qualcomm/runtime/QnnManager.h>
 #include <executorch/backends/qualcomm/runtime/SharedBuffer.h>
 #include <executorch/backends/qualcomm/runtime/Utils.h>
+#include <executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnImplementation.h>
 #include <algorithm>
 #include <cstdlib>
@@ -281,6 +282,8 @@ Error QnnManager::Init() {
         options_->backend_options()->backend_type());
     backend_params_ptr_ = QnnBackendFactory().Create(
         qnn_loaded_backend_, logger_.get(), qnn_context_blob_, options_);
+    ET_CHECK_OR_RETURN_ERROR(
+        backend_params_ptr_ != nullptr, Internal, "Failed to load Qnn backend.")
     ET_CHECK_OR_RETURN_ERROR(
         backend_params_ptr_->qnn_backend_ptr_->Configure() == Error::Ok,
         Internal,

diff --git a/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp b/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp
@@ -53,6 +53,85 @@ Error QnnBackend::Configure() {
   }
   return Error::Ok;
 }
+
+Error QnnBackend::VerifyQNNSDKVersion(
+    const QnnExecuTorchBackendType backend_id) {
+  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+
+  Qnn_ApiVersion_t qnn_version = {QNN_VERSION_INIT};
+  Qnn_ErrorHandle_t error =
+      qnn_interface.qnn_backend_get_api_version(&qnn_version);
+  if (error != QNN_SUCCESS) {
+    QNN_EXECUTORCH_LOG_ERROR("Failed to get Qnn API version.");
+    return Error::Internal;
+  }
+
+  Qnn_ApiVersion_t expected_version = {QNN_VERSION_INIT};
+  expected_version.coreApiVersion.major = QNN_API_VERSION_MAJOR;
+  expected_version.coreApiVersion.minor = QNN_API_VERSION_MINOR;
+  expected_version.coreApiVersion.patch = QNN_API_VERSION_PATCH;
+  expected_version.backendApiVersion = GetExpectedBackendVersion();
+  const char* backend_type = EnumNameQnnExecuTorchBackendType(backend_id);
+
+  Error status = VersionChecker(
+      qnn_version.coreApiVersion, expected_version.coreApiVersion, "Qnn API");
+  if (status == Error::Ok) {
+    status = VersionChecker(
+        qnn_version.backendApiVersion,
+        expected_version.backendApiVersion,
+        backend_type);
+  }
+
+  return status;
+}
+
+Error QnnBackend::VersionChecker(
+    const Qnn_Version_t& qnn_version,
+    const Qnn_Version_t& expected,
+    const std::string& prefix) {
+  if (qnn_version.major != expected.major) {
+    QNN_EXECUTORCH_LOG_ERROR(
+        "%s version %u.%u.%u is not supported. "
+        "The minimum supported version is %u.%u.%u. Please make "
+        "sure you have the correct backend library version.",
+        prefix.c_str(),
+        qnn_version.major,
+        qnn_version.minor,
+        qnn_version.patch,
+        expected.major,
+        expected.minor,
+        expected.patch);
+    return Error::Internal;
+  }
+  if (qnn_version.major == QNN_API_VERSION_MAJOR &&
+      qnn_version.minor < expected.minor) {
+    QNN_EXECUTORCH_LOG_WARN(
+        "%s version %u.%u.%u is mismatched. "
+        "The minimum supported version is %u.%u.%u. Please make "
+        "sure you have the correct backend library version.",
+        prefix.c_str(),
+        qnn_version.major,
+        qnn_version.minor,
+        qnn_version.patch,
+        expected.major,
+        expected.minor,
+        expected.patch);
+  }
+  if ((qnn_version.major == QNN_API_VERSION_MAJOR &&
+       qnn_version.minor > expected.minor)) {
+    QNN_EXECUTORCH_LOG_WARN(
+        "%s version %u.%u.%u is used. "
+        "The version is tested against %u.%u.%u.",
+        prefix.c_str(),
+        qnn_version.major,
+        qnn_version.minor,
+        qnn_version.patch,
+        expected.major,
+        expected.minor,
+        expected.patch);
+  }
+  return Error::Ok;
+}
 } // namespace qnn
 } // namespace executor
 } // namespace torch
diff --git a/backends/qualcomm/runtime/backends/QnnBackendCommon.h b/backends/qualcomm/runtime/backends/QnnBackendCommon.h
@@ -13,8 +13,10 @@
 
 #include <vector>
 
+#include "HTP/QnnHtpCommon.h"
 #include "QnnBackend.h"
 #include "QnnCommon.h"
+#include "QnnTypes.h"
 namespace torch {
 namespace executor {
 namespace qnn {
@@ -43,7 +45,10 @@ class QnnBackend {
     return handle_;
   }
 
+  Error VerifyQNNSDKVersion(const QnnExecuTorchBackendType backend_id);
+
  protected:
+  virtual Qnn_Version_t GetExpectedBackendVersion() const = 0;
   virtual Error MakeConfig(std::vector<const QnnBackend_Config_t*>& config) {
     return Error::Ok;
   };
@@ -52,6 +57,10 @@ class QnnBackend {
   Qnn_BackendHandle_t handle_;
   const QnnImplementation& implementation_;
   QnnLogger* logger_;
+  Error VersionChecker(
+      const Qnn_Version_t& qnn_version,
+      const Qnn_Version_t& expected,
+      const std::string& prefix);
 };
 } // namespace qnn
 } // namespace executor

diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp
@@ -16,6 +16,7 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
     const QnnExecuTorchContextBinary& qnn_context_blob,
     const QnnExecuTorchOptions* options) {
   auto backend_params = std::make_unique<BackendConfigParameters>();
+
   switch (options->backend_options()->backend_type()) {
     case QnnExecuTorchBackendType::kHtpBackend: {
       auto htp_options = options->backend_options()->htp_options();
@@ -51,6 +52,7 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
       }
       backend_params->qnn_backend_ptr_ =
           std::make_unique<HtpBackend>(implementation, logger);
+
       backend_params->qnn_device_ptr_ = std::make_unique<HtpDevice>(
           implementation, logger, options->soc_info(), htp_options);
 
@@ -72,7 +74,6 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
       backend_params->qnn_mem_manager_ptr_ = std::make_unique<QnnMemManager>(
           implementation, backend_params->qnn_context_ptr_.get());
       backend_params->backend_init_state_ = BackendInitializeState::INITIALIZED;
-      return backend_params;
     } break;
     case QnnExecuTorchBackendType::kGpuBackend:
     case QnnExecuTorchBackendType::kDspBackend:
@@ -81,7 +82,11 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
       return nullptr;
   }
 
-  // should not reach here
+  if (backend_params->qnn_backend_ptr_->VerifyQNNSDKVersion(
+          options->backend_options()->backend_type()) == Error::Ok) {
+    return backend_params;
+  }
+
   return nullptr;
 }
 } // namespace qnn

diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h b/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h
@@ -8,7 +8,9 @@
 #pragma once
 
 #include <executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h>
+#include "HTP/QnnHtpCommon.h"
 #include "HTP/QnnHtpProfile.h"
+#include "QnnTypes.h"
 namespace torch {
 namespace executor {
 namespace qnn {
@@ -24,6 +26,14 @@ class HtpBackend : public QnnBackend {
         event_type == QNN_HTP_PROFILE_EVENTTYPE_GRAPH_EXECUTE_ACCEL_TIME_CYCLE);
   }
 
+  Qnn_Version_t GetExpectedBackendVersion() const override {
+    Qnn_Version_t backend_version;
+    backend_version.major = QNN_HTP_API_VERSION_MAJOR;
+    backend_version.minor = QNN_HTP_API_VERSION_MINOR;
+    backend_version.patch = QNN_HTP_API_VERSION_PATCH;
+    return backend_version;
+  }
+
  protected:
   Error MakeConfig(std::vector<const QnnBackend_Config_t*>& config) override {
     return Error::Ok;

diff --git a/backends/qualcomm/scripts/build.sh b/backends/qualcomm/scripts/build.sh
@@ -4,6 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 set -e
+set -o xtrace
 
 if [[ -z ${QNN_SDK_ROOT} ]]; then
     echo "Please export QNN_SDK_ROOT=/path/to/qnn_sdk"

diff --git a/backends/xnnpack/test/CMakeLists.txt b/backends/xnnpack/test/CMakeLists.txt
@@ -34,6 +34,7 @@ et_cxx_test(
   SOURCES
   ${_test_srcs}
   EXTRA_LIBS
+  extension_threadpool
   xnnpack_backend
   XNNPACK
   pthreadpool

diff --git a/build/Test.cmake b/build/Test.cmake
@@ -25,9 +25,11 @@ find_package(executorch CONFIG REQUIRED)
 enable_testing()
 find_package(GTest CONFIG REQUIRED)
 
+target_link_options_shared_lib(cpuinfo)
 target_link_options_shared_lib(extension_data_loader)
 target_link_options_shared_lib(portable_kernels)
 target_link_options_shared_lib(portable_ops_lib)
+target_link_options_shared_lib(pthreadpool)
 target_link_options_shared_lib(quantized_ops_lib)
 
 # Add code coverage flags to supported compilers

diff --git a/build/build_android_llm_demo.sh b/build/build_android_llm_demo.sh
@@ -17,14 +17,8 @@ build_jar() {
 
 build_android_native_library() {
   ANDROID_ABI="$1"
-  TOKENIZER="$2"
   ANDROID_NDK="${ANDROID_NDK:-/opt/ndk}"
   CMAKE_OUT="cmake-out-android-${ANDROID_ABI}"
-  if [[ $TOKENIZER == "tiktoken" ]]; then
-    EXECUTORCH_USE_TIKTOKEN=ON
-  else
-    EXECUTORCH_USE_TIKTOKEN=OFF
-  fi
 
   cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
     -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
@@ -54,7 +48,6 @@ build_android_native_library() {
     -DANDROID_ABI="$ANDROID_ABI" \
     -DANDROID_PLATFORM=android-23 \
     -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-    -DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \
     -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_XNNPACK=ON \
@@ -72,7 +65,6 @@ build_android_native_library() {
     -DEXECUTORCH_ENABLE_LOGGING=ON \
     -DEXECUTORCH_LOG_LEVEL=Info \
     -DEXECUTORCH_BUILD_LLAMA_JNI=ON \
-    -DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \
     -DCMAKE_BUILD_TYPE=Release \
     -B"${CMAKE_OUT}"/extension/android
 
@@ -107,9 +99,8 @@ build_android_llm_demo_app() {
 }
 
 collect_artifacts_to_be_uploaded() {
-  TOKENIZER="$1"
-  ARTIFACTS_DIR_NAME="$2"
-  DEMO_APP_DIR="${ARTIFACTS_DIR_NAME}/llm_demo_${TOKENIZER}"
+  ARTIFACTS_DIR_NAME="$1"
+  DEMO_APP_DIR="${ARTIFACTS_DIR_NAME}/llm_demo"
   # The app directory is named using its build flavor as a suffix.
   mkdir -p "${DEMO_APP_DIR}"
   # Collect the app and its test suite
@@ -131,13 +122,12 @@ export BUILD_AAR_DIR
 ANDROID_ABIS=("arm64-v8a" "x86_64")
 export ANDROID_ABIS
 
-TOKENIZER="${1:-bpe}"
-ARTIFACTS_DIR_NAME="$2"
+ARTIFACTS_DIR_NAME="$1"
 
 build_jar
 for ANDROID_ABI in "${ANDROID_ABIS[@]}"; do
-  build_android_native_library ${ANDROID_ABI} ${TOKENIZER}
+  build_android_native_library ${ANDROID_ABI}
 done
 build_aar
 build_android_llm_demo_app
-collect_artifacts_to_be_uploaded ${TOKENIZER} ${ARTIFACTS_DIR_NAME}
+collect_artifacts_to_be_uploaded ${ARTIFACTS_DIR_NAME}
diff --git a/build/executorch-config.cmake b/build/executorch-config.cmake
@@ -46,6 +46,7 @@ set(lib_list
     extension_module
     extension_module_static
     extension_runner_util
+    extension_threadpool
     xnnpack_backend
     XNNPACK
     cpuinfo