2024-08-23 nightly release (6c26a87)

pytorch · Aug 23, 2024 · 28eef00 · 28eef00
1 parent c35ce10
commit 28eef00
Show file tree

Hide file tree

Showing 183 changed files with 1,643 additions and 615 deletions.
diff --git a/.ci/docker/common/install_linter.sh b/.ci/docker/common/install_linter.sh
@@ -13,3 +13,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 # NB: Install all linter dependencies, the caching of lintrunner init could be
 # done after Executorch becomes public
 pip_install -r requirements-lintrunner.txt
+
+# Install google-java-format
+curl -L --retry 3 https://github.com/google/google-java-format/releases/download/v1.23.0/google-java-format_linux-x86-64 > /opt/google-java-format
+chmod +x /opt/google-java-format
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -54,3 +54,20 @@ jobs:
           lint.json || true
 
         exit $RC
+
+  android-java-format:
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-linter
+      fetch-depth: 0
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      script: |
+        FILES_NEEDS_FORMAT=$(/opt/google-java-format -n extension/android/src/main/java/org/pytorch/executorch/*.java \
+          examples/demo-apps/android/ExecuTorchDemo/app/src/main/java/com/example/executorchdemo/*.java \
+          examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/*.java)
+        if [ -n "$FILES_NEEDS_FORMAT" ]; then
+          echo "Warning: The following files need formatting. Please use google-java-format."
+          echo "$FILES_NEEDS_FORMAT"
+          exit 1
+        fi
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -617,7 +617,7 @@ if(EXECUTORCH_BUILD_SDK)
       ON
       CACHE BOOL "EXECUTORCH_BUILD_EXTENSION_DATA_LOADER" FORCE
   )
-  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/sdk)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/devtools)
 endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_APPLE)
@@ -676,7 +676,7 @@ if(EXECUTORCH_BUILD_PYBIND)
   endif()
 
   if(NOT EXECUTORCH_BUILD_SDK)
-    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/sdk)
+    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/devtools)
   endif()
 
   # find pytorch lib, to allow pybind to take at::Tensor as input/output

diff --git a/README.md b/README.md
@@ -93,7 +93,7 @@ tools.
 ├── schema                          #  ExecuTorch PTE file format flatbuffer
 schemas.
 ├── scripts                         #  Utility scripts for size management, dependency management, etc.
-├── sdk                             #  Model profiling, debugging, and introspection.
+├── devtools                        #  Model profiling, debugging, and introspection.
 ├── shim                            #  Compatibility layer between OSS and Internal builds
 ├── test                            #  Broad scoped end-to-end tests.
 ├── third-party                     #  Third-party dependencies.

diff --git a/backends/apple/mps/TARGETS b/backends/apple/mps/TARGETS
@@ -95,8 +95,8 @@ runtime.python_test(
         "//executorch/examples/models:models",
         "//executorch/exir/tests:models",
         "//executorch/extension/export_util:export_util",
-        "//executorch/sdk:lib",
-        "//executorch/sdk/bundled_program/serialize:lib",
+        "//executorch/devtools:lib",
+        "//executorch/devtools/bundled_program/serialize:lib",
         "fbsource//third-party/pypi/pytest:pytest",
     ],
 )
diff --git a/backends/apple/mps/targets.bzl b/backends/apple/mps/targets.bzl
@@ -47,7 +47,7 @@ def define_common_targets(is_xplat = False, platforms = []):
             "//executorch/exir/backend:backend_lib",
             "//executorch/extension/pybindings/...",
             "//executorch/runtime/backend/...",
-            "//executorch/sdk/runners/...",
+            "//executorch/devtools/runners/...",
             "//executorch/test/...",
             "@EXECUTORCH_CLIENTS",
         ],

diff --git a/backends/apple/mps/test/test_mps_utils.py b/backends/apple/mps/test/test_mps_utils.py
@@ -12,16 +12,16 @@
 import torch
 from executorch.backends.apple.mps import MPSBackend
 from executorch.backends.apple.mps.partition import MPSPartitioner
+from executorch.devtools import BundledProgram
+from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite
+from executorch.devtools.bundled_program.serialize import (
+    serialize_from_bundled_program_to_flatbuffer,
+)
 from executorch.exir import EdgeCompileConfig, ExirExportedProgram, to_edge
 from executorch.exir.backend.backend_api import to_backend
 from executorch.exir.backend.backend_details import CompileSpec
 from executorch.exir.capture._config import ExecutorchBackendConfig
 from executorch.extension.export_util.utils import export_to_edge
-from executorch.sdk import BundledProgram
-from executorch.sdk.bundled_program.config import MethodTestCase, MethodTestSuite
-from executorch.sdk.bundled_program.serialize import (
-    serialize_from_bundled_program_to_flatbuffer,
-)
 from torch.export import export
 
 # Config for Capturing the weights, will be moved in the future

diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py
@@ -18,12 +18,13 @@
     ReplaceLogicalNotBooleanWhereWithWherePass,
     ReplacePT2DequantWithCadenceDequantPass,
     ReplacePT2QuantWithCadenceQuantPass,
+    ReplaceSafeSoftmaxWithSoftmax,
     ReplaceScalarTensorWithFullPass,
     ReplaceSqueezeAndUnsqueezeWithViewPass,
 )
 from executorch.backends.cadence.aot.quantizer.fusion_pass import QuantFusion
 from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer
-from executorch.backends.cadence.aot.utils import model_is_quantized
+from executorch.backends.cadence.aot.utils import model_gm_has_SDPA, model_is_quantized
 from executorch.backends.transforms.decompose_sdpa import (
     DecomposeScaledDotProductAttention,
 )
@@ -57,13 +58,20 @@ def convert_pt2(
     """
 
     # Export with dynamo
-    model_exp = capture_pre_autograd_graph(model, inputs)
+    model_gm = capture_pre_autograd_graph(model, inputs)
 
-    # Decompose SDPA
-    DecomposeScaledDotProductAttention(False)(model_exp)
+    if model_gm_has_SDPA(model_gm):
+        # Decompose SDPA
+        DecomposeScaledDotProductAttention(False)(model_gm)
+
+        # Swap _safe_softmax with _softmax (see https://github.com/pytorch/pytorch/pull/133882
+        # for details).
+        result = ReplaceSafeSoftmaxWithSoftmax()(model_gm)
+        assert result is not None
+        model_gm = result.graph_module
 
     # Prepare
-    prepared_model = prepare_pt2e(model_exp, quantizer)
+    prepared_model = prepare_pt2e(model_gm, quantizer)
 
     # Calibrate
     prepared_model(*inputs)

diff --git a/backends/cadence/aot/passes.py b/backends/cadence/aot/passes.py
@@ -266,3 +266,29 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
         result = SpecPropPass()(graph_module)
         assert result is not None
         return result
+
+
+class ReplaceSafeSoftmaxWithSoftmax(ExportPass):
+    """
+    Replace _safe_softmax with _softmax
+    """
+
+    def call_operator(
+        self,
+        op,  # pyre-ignore
+        args: tuple[Argument, ...],
+        kwargs: dict[str, Argument],
+        meta: NodeMetadata,
+    ) -> ProxyValue:
+        if op != torch.ops.aten._safe_softmax.default:
+            return super().call_operator(op, args, kwargs, meta)
+
+        # Add False for the half_to_float argument of softmax
+        softmax_args = list(args) + [False]
+
+        return super().call_operator(
+            torch.ops.aten._softmax.default,
+            tuple(softmax_args),
+            kwargs,
+            meta,
+        )
diff --git a/backends/cadence/aot/utils.py b/backends/cadence/aot/utils.py
@@ -177,3 +177,11 @@ def print_ops_info(
                 tablefmt="outline",
             )
         )
+
+
+def model_gm_has_SDPA(model_gm: torch.fx.GraphModule) -> bool:
+    for node in model_gm.graph.nodes:
+        if node.op == "call_function":
+            if node.target == torch.ops.aten.scaled_dot_product_attention.default:
+                return True
+    return False
diff --git a/backends/cadence/cadence_runner/cadence_runner.cpp b/backends/cadence/cadence_runner/cadence_runner.cpp
@@ -22,13 +22,13 @@
 
 #include <gflags/gflags.h>
 
+#include <executorch/devtools/bundled_program/bundled_program.h>
+#include <executorch/devtools/etdump/etdump_flatcc.h>
 #include <executorch/extension/data_loader/buffer_data_loader.h>
 #include <executorch/runtime/executor/method.h>
 #include <executorch/runtime/executor/program.h>
 #include <executorch/runtime/platform/log.h>
 #include <executorch/runtime/platform/runtime.h>
-#include <executorch/sdk/bundled_program/bundled_program.h>
-#include <executorch/sdk/etdump/etdump_flatcc.h>
 
 static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4MB
 

diff --git a/backends/cadence/cadence_runner/targets.bzl b/backends/cadence/cadence_runner/targets.bzl
@@ -19,12 +19,12 @@ def define_common_targets():
         visibility = ["PUBLIC"],
         deps = [
             "fbsource//arvr/third-party/gflags:gflags",
-            "fbsource//xplat/executorch/kernels/portable:generated_lib",
-            "fbsource//xplat/executorch/runtime/executor:program",
+            "fbsource//xplat/executorch/devtools/etdump:etdump_flatcc",
+            "fbsource//xplat/executorch/devtools/bundled_program:runtime",
             "fbsource//xplat/executorch/extension/data_loader:file_data_loader",
             "fbsource//xplat/executorch/extension/data_loader:buffer_data_loader",
+            "fbsource//xplat/executorch/kernels/portable:generated_lib",
+            "fbsource//xplat/executorch/runtime/executor:program",
             "fbsource//xplat/executorch/util:util",
-            "fbsource//xplat/executorch/sdk/etdump:etdump_flatcc",
-            "fbsource//xplat/executorch/sdk/bundled_program:runtime",
         ],
     )
diff --git a/backends/cadence/runtime/TARGETS b/backends/cadence/runtime/TARGETS
@@ -13,9 +13,9 @@ python_library(
     typing = True,
     deps = [
         "//caffe2:torch",
+        "//executorch/devtools/bundled_program:config",
+        "//executorch/devtools/bundled_program:core",
+        "//executorch/devtools/bundled_program/serialize:lib",
         "//executorch/exir:lib",
-        "//executorch/sdk/bundled_program:config",
-        "//executorch/sdk/bundled_program:core",
-        "//executorch/sdk/bundled_program/serialize:lib",
     ],
 )
diff --git a/backends/cadence/runtime/executor.py b/backends/cadence/runtime/executor.py
@@ -18,14 +18,13 @@
 
 import torch
 
-from executorch.exir import ExecutorchProgram, ExecutorchProgramManager
-
-from executorch.sdk.bundled_program.config import MethodTestCase, MethodTestSuite
-from executorch.sdk.bundled_program.core import BundledProgram
+from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite
+from executorch.devtools.bundled_program.core import BundledProgram
 
-from executorch.sdk.bundled_program.serialize import (
+from executorch.devtools.bundled_program.serialize import (
     serialize_from_bundled_program_to_flatbuffer,
 )
+from executorch.exir import ExecutorchProgram, ExecutorchProgramManager
 
 # If quiet is true, suppress the printing of stdout and stderr output.
 quiet = False

diff --git a/backends/cadence/runtime/runtime.py b/backends/cadence/runtime/runtime.py
@@ -18,10 +18,10 @@
 
 from executorch.backends.cadence.runtime import utils
 from executorch.backends.cadence.runtime.executor import Executor
+from executorch.devtools import Inspector
 from executorch.exir import ExecutorchProgramManager
 from executorch.exir._serialize._program import deserialize_pte_binary
 from executorch.exir.schema import DataLocation
-from executorch.sdk import Inspector
 
 from numpy import ndarray
 

diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py
@@ -27,6 +27,8 @@
     QcomChipset,
 )
 from executorch.backends.qualcomm.utils.utils import capture_program
+from executorch.devtools import generate_etrecord
+from executorch.devtools.inspector import Inspector
 from executorch.examples.qualcomm.utils import (
     generate_inputs,
     make_output_dir,
@@ -40,8 +42,6 @@
 from executorch.exir.pass_base import ExportPass
 from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
 from executorch.exir.program._program import ExecutorchProgram
-from executorch.sdk import generate_etrecord
-from executorch.sdk.inspector import Inspector
 from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 

diff --git a/backends/vulkan/partitioner/supported_ops.py b/backends/vulkan/partitioner/supported_ops.py
@@ -8,7 +8,10 @@
 
 import operator
 
-from executorch.backends.vulkan.passes.custom_ops_defs import grid_priors_op  # noqa
+from executorch.backends.vulkan.passes.custom_ops_defs import (  # noqa
+    conv_with_clamp_op,
+    grid_priors_op,
+)
 
 from executorch.exir.dialects._ops import ops as exir_ops
 
@@ -84,6 +87,7 @@ def __contains__(self, op):
 
 CONVOLUTION_OPS = [
     exir_ops.edge.aten.convolution.default,
+    exir_ops.edge.et_vk.conv_with_clamp.default,
 ]
 
 REDUCTION_OPS = [

diff --git a/backends/vulkan/passes/custom_ops_defs.py b/backends/vulkan/passes/custom_ops_defs.py
@@ -48,6 +48,43 @@ def conv_with_clamp_impl(
 conv_with_clamp_op = getattr(getattr(torch.ops, namespace), name)
 
 
+def conv_with_clamp_out_impl(
+    input,
+    weight,
+    bias=None,
+    stride=1,
+    padding=0,
+    dilation=1,
+    transposed=False,
+    output_padding=0,
+    groups=1,
+    output_min=-float("inf"),
+    output_max=float("inf"),
+    out=None,
+):
+    out = conv_with_clamp_impl(
+        input,
+        weight,
+        bias,
+        stride,
+        padding,
+        dilation,
+        transposed,
+        output_padding,
+        groups,
+        output_min,
+        output_max,
+    )
+    return out
+
+
+name = "conv_with_clamp.out"
+lib.define(
+    f"{name}(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, Scalar? output_min, Scalar? output_max, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.impl(name, conv_with_clamp_out_impl, "CompositeExplicitAutograd")
+
+
 # The dimension of x should be larger than 1
 def grid_priors_impl(
     x,