From 28eef00411dc63e5d0d5b95d26a965c8acf5c408 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Fri, 23 Aug 2024 11:35:05 +0000 Subject: [PATCH] 2024-08-23 nightly release (6c26a872323e13c723e1544282fafb51f880742b) --- .ci/docker/common/install_linter.sh | 4 + .github/workflows/lint.yml | 17 ++ CMakeLists.txt | 4 +- README.md | 2 +- backends/apple/mps/TARGETS | 4 +- backends/apple/mps/targets.bzl | 2 +- backends/apple/mps/test/test_mps_utils.py | 10 +- backends/cadence/aot/compiler.py | 18 +- backends/cadence/aot/passes.py | 26 +++ backends/cadence/aot/utils.py | 8 + .../cadence/cadence_runner/cadence_runner.cpp | 4 +- backends/cadence/cadence_runner/targets.bzl | 8 +- backends/cadence/runtime/TARGETS | 6 +- backends/cadence/runtime/executor.py | 9 +- backends/cadence/runtime/runtime.py | 2 +- backends/qualcomm/tests/utils.py | 4 +- backends/vulkan/partitioner/supported_ops.py | 6 +- backends/vulkan/passes/custom_ops_defs.py | 37 ++++ .../vulkan/runtime/api/containers/Tensor.cpp | 204 ++++++++++++------ .../vulkan/runtime/api/containers/Tensor.h | 88 ++++++-- .../vulkan/runtime/graph/ComputeGraph.cpp | 7 + backends/vulkan/runtime/graph/ComputeGraph.h | 9 +- .../vulkan/runtime/graph/ops/ExecuteNode.cpp | 15 ++ .../vulkan/runtime/graph/ops/ExecuteNode.h | 16 +- .../runtime/graph/ops/impl/Convolution.cpp | 1 + backends/vulkan/runtime/vk_api/Shader.h | 8 +- backends/vulkan/test/test_vulkan_delegate.py | 36 ++++ backends/vulkan/test/utils/test_utils.cpp | 6 + backends/vulkan/test/utils/test_utils.h | 6 + .../vulkan/test/vulkan_compute_api_test.cpp | 105 +++++++-- backends/xnnpack/test/TARGETS | 6 +- backends/xnnpack/test/test_xnnpack_utils.py | 12 +- {sdk => devtools}/CMakeLists.txt | 20 +- {sdk => devtools}/TARGETS | 6 +- {sdk => devtools}/__init__.py | 8 +- {sdk => devtools}/backend_debug/TARGETS | 0 {sdk => devtools}/backend_debug/__init__.py | 2 +- .../backend_debug/delegation_info.py | 0 {sdk => devtools}/backend_debug/tests/TARGETS | 2 +- .../tests/test_delegation_info.py | 2 +- {sdk => devtools}/bundled_program/TARGETS | 4 +- .../bundled_program/bundled_program.cpp | 4 +- .../bundled_program/bundled_program.h | 0 {sdk => devtools}/bundled_program/config.py | 0 {sdk => devtools}/bundled_program/core.py | 10 +- .../bundled_program/schema/README.md | 0 .../bundled_program/schema/TARGETS | 4 +- .../bundled_program/schema/__init__.py | 0 .../schema/bundled_program_schema.fbs | 0 .../schema/bundled_program_schema.py | 0 .../bundled_program/schema/scalar_type.fbs | 0 .../bundled_program/schema/targets.bzl | 6 +- .../bundled_program/schema/test/TARGETS | 0 .../schema/test/test_schema.py | 4 +- .../bundled_program/serialize/TARGETS | 8 +- .../bundled_program/serialize/__init__.py | 4 +- .../bundled_program/serialize/test/TARGETS | 7 +- .../serialize/test/test_serialize.py | 8 +- {sdk => devtools}/bundled_program/targets.bzl | 2 +- .../bundled_program/test/TARGETS | 21 +- .../bundled_program/test/test_bundle_data.py | 10 +- .../bundled_program/test/test_config.py | 6 +- .../bundled_program/test/test_end2end.py | 8 +- .../bundled_program/util/TARGETS | 4 +- .../bundled_program/util/test_util.py | 6 +- {sdk => devtools}/bundled_program/version.py | 0 {sdk => devtools}/debug_format/TARGETS | 0 {sdk => devtools}/debug_format/base_schema.py | 0 {sdk => devtools}/debug_format/et_schema.py | 2 +- {sdk => devtools}/etdump/TARGETS | 6 +- {sdk => devtools}/etdump/emitter.cpp | 2 +- {sdk => devtools}/etdump/emitter.h | 2 +- {sdk => devtools}/etdump/etdump_flatcc.cpp | 8 +- {sdk => devtools}/etdump/etdump_flatcc.h | 0 .../etdump/etdump_schema_flatcc.fbs | 0 {sdk => devtools}/etdump/scalar_type.fbs | 0 {sdk => devtools}/etdump/schema_flatcc.py | 2 +- {sdk => devtools}/etdump/serialize.py | 2 +- {sdk => devtools}/etdump/targets.bzl | 0 {sdk => devtools}/etdump/tests/CMakeLists.txt | 0 {sdk => devtools}/etdump/tests/TARGETS | 4 +- .../etdump/tests/etdump_test.cpp | 6 +- .../etdump/tests/serialize_test.py | 6 +- {sdk => devtools}/etdump/tests/targets.bzl | 4 +- {sdk => devtools}/etrecord/TARGETS | 4 +- {sdk => devtools}/etrecord/__init__.py | 2 +- {sdk => devtools}/etrecord/_etrecord.py | 6 +- {sdk => devtools}/etrecord/tests/TARGETS | 12 +- .../etrecord/tests/etrecord_test.py | 10 +- {sdk => devtools}/inspector/TARGETS | 18 +- {sdk => devtools}/inspector/__init__.py | 9 +- {sdk => devtools}/inspector/_inspector.py | 16 +- .../inspector/_inspector_utils.py | 12 +- {sdk => devtools}/inspector/inspector_cli.py | 4 +- devtools/inspector/tests/TARGETS | 41 ++++ .../inspector/tests/event_blocks_test.py | 8 +- .../inspector/tests/inspector_test.py | 23 +- .../inspector/tests/inspector_utils_test.py | 12 +- {sdk => devtools}/size_analysis_tool/TARGETS | 8 +- .../size_analysis_tool/size_analysis_tool.py | 2 +- .../size_analysis_tool_test.py | 6 +- {sdk => devtools}/targets.bzl | 0 docs/source/extension-module.md | 2 +- docs/source/llm/getting-started.md | 6 +- docs/source/sdk-bundled-io.md | 38 ++-- docs/source/sdk-debugging.md | 4 +- docs/source/sdk-etdump.md | 2 +- docs/source/sdk-etrecord.rst | 2 +- docs/source/sdk-inspector.rst | 18 +- .../sdk-integration-tutorial.py | 18 +- .../website/docs/tutorials/bundled_program.md | 2 +- examples/apple/coreml/executor_runner/main.mm | 2 +- .../coreml/scripts/build_executor_runner.sh | 2 +- examples/apple/coreml/scripts/export.py | 2 +- .../apple/coreml/scripts/inspector_cli.py | 4 +- .../apple/coreml/scripts/inspector_utils.py | 15 +- examples/apple/mps/CMakeLists.txt | 4 +- .../executor_runner/mps_executor_runner.mm | 4 +- .../apple/mps/executor_runner/targets.bzl | 4 +- examples/apple/mps/scripts/mps_example.py | 10 +- .../LLaMA/LLaMA.xcodeproj/project.pbxproj | 4 +- .../cross_attention/cross_attention_mask.cpp | 169 +++++++++++++++ .../cross_attention/cross_attention_mask.h | 71 ++++++ .../cross_attention_mask_test.cpp | 71 ++++++ .../flamingo/cross_attention/targets.bzl | 25 +++ examples/models/llama2/TARGETS | 2 +- examples/models/llama2/eval_llama.py | 2 + examples/models/llama2/export_llama_lib.py | 14 +- examples/models/llama2/llama_transformer.py | 9 + examples/models/llama2/model.py | 2 + examples/models/llava/runner/llava_runner.cpp | 2 + examples/models/llava/runner/llava_runner.h | 3 +- .../executor_runner/qnn_executor_runner.cpp | 2 +- examples/qualcomm/scripts/export_example.py | 2 +- examples/sdk/CMakeLists.txt | 2 +- examples/sdk/README.md | 4 +- .../sdk/scripts/export_bundled_program.py | 12 +- examples/sdk/scripts/gen_sample_etrecord.py | 2 +- .../sdk_example_runner/sdk_example_runner.cpp | 4 +- examples/sdk/sdk_example_runner/targets.bzl | 4 +- examples/xnnpack/aot_compiler.py | 2 +- examples/xnnpack/targets.bzl | 2 +- exir/_serialize/TARGETS | 12 +- exir/emit/_emit_program.py | 27 +++ exir/tests/test_joint_graph.py | 20 ++ extension/llm/custom_ops/op_sdpa.cpp | 4 +- extension/llm/custom_ops/op_sdpa_test.cpp | 15 +- .../custom_ops/op_sdpa_with_kv_cache_test.cpp | 9 +- .../llm/custom_ops/op_tile_crop_test.cpp | 2 +- extension/llm/runner/image.h | 16 +- extension/llm/runner/image_prefiller.h | 24 ++- extension/llm/runner/metadata_util.h | 15 +- extension/llm/runner/multimodal_runner.h | 21 +- extension/llm/runner/stats.h | 26 ++- extension/llm/runner/text_decoder_runner.cpp | 23 +- extension/llm/runner/text_decoder_runner.h | 24 ++- extension/llm/runner/text_prefiller.cpp | 25 ++- extension/llm/runner/text_prefiller.h | 18 +- extension/llm/runner/text_token_generator.h | 26 ++- extension/llm/runner/util.h | 17 +- extension/llm/sampler/sampler.cpp | 10 +- extension/llm/sampler/sampler.h | 15 +- extension/llm/sampler/test/test_sampler.cpp | 17 +- extension/llm/tokenizer/base64.h | 17 +- extension/llm/tokenizer/bpe_tokenizer.cpp | 13 +- extension/llm/tokenizer/bpe_tokenizer.h | 25 ++- .../llm/tokenizer/test/test_bpe_tokenizer.cpp | 11 +- .../llm/tokenizer/test/test_tiktoken.cpp | 11 +- extension/llm/tokenizer/tiktoken.cpp | 13 +- extension/llm/tokenizer/tiktoken.h | 27 ++- extension/llm/tokenizer/tokenizer.h | 32 ++- extension/pybindings/pybindings.cpp | 6 +- .../training/test/training_loop_test.cpp | 2 +- pytest.ini | 3 +- runtime/executor/test/targets.bzl | 4 +- schema/targets.bzl | 2 +- sdk/inspector/tests/TARGETS | 40 ---- setup.py | 12 +- .../extension/pybindings/pybindings.bzl | 12 +- test/end2end/TARGETS | 12 +- .../generate_linear_out_bundled_program.py | 10 +- test/models/targets.bzl | 6 +- test/run_oss_cpp_tests.sh | 2 +- 183 files changed, 1643 insertions(+), 615 deletions(-) rename {sdk => devtools}/CMakeLists.txt (89%) rename {sdk => devtools}/TARGETS (54%) rename {sdk => devtools}/__init__.py (57%) rename {sdk => devtools}/backend_debug/TARGETS (100%) rename {sdk => devtools}/backend_debug/__init__.py (83%) rename {sdk => devtools}/backend_debug/delegation_info.py (100%) rename {sdk => devtools}/backend_debug/tests/TARGETS (86%) rename {sdk => devtools}/backend_debug/tests/test_delegation_info.py (96%) rename {sdk => devtools}/bundled_program/TARGETS (88%) rename {sdk => devtools}/bundled_program/bundled_program.cpp (98%) rename {sdk => devtools}/bundled_program/bundled_program.h (100%) rename {sdk => devtools}/bundled_program/config.py (100%) rename {sdk => devtools}/bundled_program/core.py (98%) rename {sdk => devtools}/bundled_program/schema/README.md (100%) rename {sdk => devtools}/bundled_program/schema/TARGETS (84%) rename {sdk => devtools}/bundled_program/schema/__init__.py (100%) rename {sdk => devtools}/bundled_program/schema/bundled_program_schema.fbs (100%) rename {sdk => devtools}/bundled_program/schema/bundled_program_schema.py (100%) rename {sdk => devtools}/bundled_program/schema/scalar_type.fbs (100%) rename {sdk => devtools}/bundled_program/schema/targets.bzl (93%) rename {sdk => devtools}/bundled_program/schema/test/TARGETS (100%) rename {sdk => devtools}/bundled_program/schema/test/test_schema.py (79%) rename {sdk => devtools}/bundled_program/serialize/TARGETS (76%) rename {sdk => devtools}/bundled_program/serialize/__init__.py (97%) rename {sdk => devtools}/bundled_program/serialize/test/TARGETS (51%) rename {sdk => devtools}/bundled_program/serialize/test/test_serialize.py (82%) rename {sdk => devtools}/bundled_program/targets.bzl (91%) rename {sdk => devtools}/bundled_program/test/TARGETS (68%) rename {sdk => devtools}/bundled_program/test/test_bundle_data.py (93%) rename {sdk => devtools}/bundled_program/test/test_config.py (97%) rename {sdk => devtools}/bundled_program/test/test_end2end.py (88%) rename {sdk => devtools}/bundled_program/util/TARGETS (68%) rename {sdk => devtools}/bundled_program/util/test_util.py (99%) rename {sdk => devtools}/bundled_program/version.py (100%) rename {sdk => devtools}/debug_format/TARGETS (100%) rename {sdk => devtools}/debug_format/base_schema.py (100%) rename {sdk => devtools}/debug_format/et_schema.py (99%) rename {sdk => devtools}/etdump/TARGETS (81%) rename {sdk => devtools}/etdump/emitter.cpp (98%) rename {sdk => devtools}/etdump/emitter.h (92%) rename {sdk => devtools}/etdump/etdump_flatcc.cpp (98%) rename {sdk => devtools}/etdump/etdump_flatcc.h (100%) rename {sdk => devtools}/etdump/etdump_schema_flatcc.fbs (100%) rename {sdk => devtools}/etdump/scalar_type.fbs (100%) rename {sdk => devtools}/etdump/schema_flatcc.py (97%) rename {sdk => devtools}/etdump/serialize.py (98%) rename {sdk => devtools}/etdump/targets.bzl (100%) rename {sdk => devtools}/etdump/tests/CMakeLists.txt (100%) rename {sdk => devtools}/etdump/tests/TARGETS (75%) rename {sdk => devtools}/etdump/tests/etdump_test.cpp (99%) rename {sdk => devtools}/etdump/tests/serialize_test.py (97%) rename {sdk => devtools}/etdump/tests/targets.bzl (82%) rename {sdk => devtools}/etrecord/TARGETS (71%) rename {sdk => devtools}/etrecord/__init__.py (86%) rename {sdk => devtools}/etrecord/_etrecord.py (98%) rename {sdk => devtools}/etrecord/tests/TARGETS (64%) rename {sdk => devtools}/etrecord/tests/etrecord_test.py (96%) rename {sdk => devtools}/inspector/TARGETS (70%) rename {sdk => devtools}/inspector/__init__.py (60%) rename {sdk => devtools}/inspector/_inspector.py (99%) rename {sdk => devtools}/inspector/_inspector_utils.py (97%) rename {sdk => devtools}/inspector/inspector_cli.py (93%) create mode 100644 devtools/inspector/tests/TARGETS rename {sdk => devtools}/inspector/tests/event_blocks_test.py (98%) rename {sdk => devtools}/inspector/tests/inspector_test.py (97%) rename {sdk => devtools}/inspector/tests/inspector_utils_test.py (94%) rename {sdk => devtools}/size_analysis_tool/TARGETS (86%) rename {sdk => devtools}/size_analysis_tool/size_analysis_tool.py (99%) rename {sdk => devtools}/size_analysis_tool/size_analysis_tool_test.py (98%) rename {sdk => devtools}/targets.bzl (100%) create mode 100644 examples/models/flamingo/cross_attention/cross_attention_mask.cpp create mode 100644 examples/models/flamingo/cross_attention/cross_attention_mask.h create mode 100644 examples/models/flamingo/cross_attention/cross_attention_mask_test.cpp create mode 100644 examples/models/flamingo/cross_attention/targets.bzl delete mode 100644 sdk/inspector/tests/TARGETS diff --git a/.ci/docker/common/install_linter.sh b/.ci/docker/common/install_linter.sh index 4a796a72d5..d262176e49 100755 --- a/.ci/docker/common/install_linter.sh +++ b/.ci/docker/common/install_linter.sh @@ -13,3 +13,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" # NB: Install all linter dependencies, the caching of lintrunner init could be # done after Executorch becomes public pip_install -r requirements-lintrunner.txt + +# Install google-java-format +curl -L --retry 3 https://github.com/google/google-java-format/releases/download/v1.23.0/google-java-format_linux-x86-64 > /opt/google-java-format +chmod +x /opt/google-java-format diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 7cb2cf69b8..ea068f65e1 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -54,3 +54,20 @@ jobs: lint.json || true exit $RC + + android-java-format: + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + with: + runner: linux.2xlarge + docker-image: executorch-ubuntu-22.04-linter + fetch-depth: 0 + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + script: | + FILES_NEEDS_FORMAT=$(/opt/google-java-format -n extension/android/src/main/java/org/pytorch/executorch/*.java \ + examples/demo-apps/android/ExecuTorchDemo/app/src/main/java/com/example/executorchdemo/*.java \ + examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/*.java) + if [ -n "$FILES_NEEDS_FORMAT" ]; then + echo "Warning: The following files need formatting. Please use google-java-format." + echo "$FILES_NEEDS_FORMAT" + exit 1 + fi diff --git a/CMakeLists.txt b/CMakeLists.txt index afb0437fae..b5a5b59235 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -617,7 +617,7 @@ if(EXECUTORCH_BUILD_SDK) ON CACHE BOOL "EXECUTORCH_BUILD_EXTENSION_DATA_LOADER" FORCE ) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/sdk) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/devtools) endif() if(EXECUTORCH_BUILD_EXTENSION_APPLE) @@ -676,7 +676,7 @@ if(EXECUTORCH_BUILD_PYBIND) endif() if(NOT EXECUTORCH_BUILD_SDK) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/sdk) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/devtools) endif() # find pytorch lib, to allow pybind to take at::Tensor as input/output diff --git a/README.md b/README.md index c4e6e0caf7..914eab472e 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,7 @@ tools. ├── schema # ExecuTorch PTE file format flatbuffer schemas. ├── scripts # Utility scripts for size management, dependency management, etc. -├── sdk # Model profiling, debugging, and introspection. +├── devtools # Model profiling, debugging, and introspection. ├── shim # Compatibility layer between OSS and Internal builds ├── test # Broad scoped end-to-end tests. ├── third-party # Third-party dependencies. diff --git a/backends/apple/mps/TARGETS b/backends/apple/mps/TARGETS index b8ab3427a9..1ab92b3fca 100644 --- a/backends/apple/mps/TARGETS +++ b/backends/apple/mps/TARGETS @@ -95,8 +95,8 @@ runtime.python_test( "//executorch/examples/models:models", "//executorch/exir/tests:models", "//executorch/extension/export_util:export_util", - "//executorch/sdk:lib", - "//executorch/sdk/bundled_program/serialize:lib", + "//executorch/devtools:lib", + "//executorch/devtools/bundled_program/serialize:lib", "fbsource//third-party/pypi/pytest:pytest", ], ) diff --git a/backends/apple/mps/targets.bzl b/backends/apple/mps/targets.bzl index 8b9c64e143..74d7944836 100644 --- a/backends/apple/mps/targets.bzl +++ b/backends/apple/mps/targets.bzl @@ -47,7 +47,7 @@ def define_common_targets(is_xplat = False, platforms = []): "//executorch/exir/backend:backend_lib", "//executorch/extension/pybindings/...", "//executorch/runtime/backend/...", - "//executorch/sdk/runners/...", + "//executorch/devtools/runners/...", "//executorch/test/...", "@EXECUTORCH_CLIENTS", ], diff --git a/backends/apple/mps/test/test_mps_utils.py b/backends/apple/mps/test/test_mps_utils.py index d7efe8bde4..77c02f533b 100644 --- a/backends/apple/mps/test/test_mps_utils.py +++ b/backends/apple/mps/test/test_mps_utils.py @@ -12,16 +12,16 @@ import torch from executorch.backends.apple.mps import MPSBackend from executorch.backends.apple.mps.partition import MPSPartitioner +from executorch.devtools import BundledProgram +from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite +from executorch.devtools.bundled_program.serialize import ( + serialize_from_bundled_program_to_flatbuffer, +) from executorch.exir import EdgeCompileConfig, ExirExportedProgram, to_edge from executorch.exir.backend.backend_api import to_backend from executorch.exir.backend.backend_details import CompileSpec from executorch.exir.capture._config import ExecutorchBackendConfig from executorch.extension.export_util.utils import export_to_edge -from executorch.sdk import BundledProgram -from executorch.sdk.bundled_program.config import MethodTestCase, MethodTestSuite -from executorch.sdk.bundled_program.serialize import ( - serialize_from_bundled_program_to_flatbuffer, -) from torch.export import export # Config for Capturing the weights, will be moved in the future diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py index 509e254b55..405f8b5db4 100644 --- a/backends/cadence/aot/compiler.py +++ b/backends/cadence/aot/compiler.py @@ -18,12 +18,13 @@ ReplaceLogicalNotBooleanWhereWithWherePass, ReplacePT2DequantWithCadenceDequantPass, ReplacePT2QuantWithCadenceQuantPass, + ReplaceSafeSoftmaxWithSoftmax, ReplaceScalarTensorWithFullPass, ReplaceSqueezeAndUnsqueezeWithViewPass, ) from executorch.backends.cadence.aot.quantizer.fusion_pass import QuantFusion from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer -from executorch.backends.cadence.aot.utils import model_is_quantized +from executorch.backends.cadence.aot.utils import model_gm_has_SDPA, model_is_quantized from executorch.backends.transforms.decompose_sdpa import ( DecomposeScaledDotProductAttention, ) @@ -57,13 +58,20 @@ def convert_pt2( """ # Export with dynamo - model_exp = capture_pre_autograd_graph(model, inputs) + model_gm = capture_pre_autograd_graph(model, inputs) - # Decompose SDPA - DecomposeScaledDotProductAttention(False)(model_exp) + if model_gm_has_SDPA(model_gm): + # Decompose SDPA + DecomposeScaledDotProductAttention(False)(model_gm) + + # Swap _safe_softmax with _softmax (see https://github.com/pytorch/pytorch/pull/133882 + # for details). + result = ReplaceSafeSoftmaxWithSoftmax()(model_gm) + assert result is not None + model_gm = result.graph_module # Prepare - prepared_model = prepare_pt2e(model_exp, quantizer) + prepared_model = prepare_pt2e(model_gm, quantizer) # Calibrate prepared_model(*inputs) diff --git a/backends/cadence/aot/passes.py b/backends/cadence/aot/passes.py index db419bfb5e..83ef43d151 100644 --- a/backends/cadence/aot/passes.py +++ b/backends/cadence/aot/passes.py @@ -266,3 +266,29 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult: result = SpecPropPass()(graph_module) assert result is not None return result + + +class ReplaceSafeSoftmaxWithSoftmax(ExportPass): + """ + Replace _safe_softmax with _softmax + """ + + def call_operator( + self, + op, # pyre-ignore + args: tuple[Argument, ...], + kwargs: dict[str, Argument], + meta: NodeMetadata, + ) -> ProxyValue: + if op != torch.ops.aten._safe_softmax.default: + return super().call_operator(op, args, kwargs, meta) + + # Add False for the half_to_float argument of softmax + softmax_args = list(args) + [False] + + return super().call_operator( + torch.ops.aten._softmax.default, + tuple(softmax_args), + kwargs, + meta, + ) diff --git a/backends/cadence/aot/utils.py b/backends/cadence/aot/utils.py index f0c294260a..b710f7d4e5 100644 --- a/backends/cadence/aot/utils.py +++ b/backends/cadence/aot/utils.py @@ -177,3 +177,11 @@ def print_ops_info( tablefmt="outline", ) ) + + +def model_gm_has_SDPA(model_gm: torch.fx.GraphModule) -> bool: + for node in model_gm.graph.nodes: + if node.op == "call_function": + if node.target == torch.ops.aten.scaled_dot_product_attention.default: + return True + return False diff --git a/backends/cadence/cadence_runner/cadence_runner.cpp b/backends/cadence/cadence_runner/cadence_runner.cpp index d76ba004aa..a269ed5a8e 100644 --- a/backends/cadence/cadence_runner/cadence_runner.cpp +++ b/backends/cadence/cadence_runner/cadence_runner.cpp @@ -22,13 +22,13 @@ #include +#include +#include #include #include #include #include #include -#include -#include static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4MB diff --git a/backends/cadence/cadence_runner/targets.bzl b/backends/cadence/cadence_runner/targets.bzl index 028ff7ad2e..361fe9712e 100644 --- a/backends/cadence/cadence_runner/targets.bzl +++ b/backends/cadence/cadence_runner/targets.bzl @@ -19,12 +19,12 @@ def define_common_targets(): visibility = ["PUBLIC"], deps = [ "fbsource//arvr/third-party/gflags:gflags", - "fbsource//xplat/executorch/kernels/portable:generated_lib", - "fbsource//xplat/executorch/runtime/executor:program", + "fbsource//xplat/executorch/devtools/etdump:etdump_flatcc", + "fbsource//xplat/executorch/devtools/bundled_program:runtime", "fbsource//xplat/executorch/extension/data_loader:file_data_loader", "fbsource//xplat/executorch/extension/data_loader:buffer_data_loader", + "fbsource//xplat/executorch/kernels/portable:generated_lib", + "fbsource//xplat/executorch/runtime/executor:program", "fbsource//xplat/executorch/util:util", - "fbsource//xplat/executorch/sdk/etdump:etdump_flatcc", - "fbsource//xplat/executorch/sdk/bundled_program:runtime", ], ) diff --git a/backends/cadence/runtime/TARGETS b/backends/cadence/runtime/TARGETS index 9f30cadf6f..1b55a7d541 100644 --- a/backends/cadence/runtime/TARGETS +++ b/backends/cadence/runtime/TARGETS @@ -13,9 +13,9 @@ python_library( typing = True, deps = [ "//caffe2:torch", + "//executorch/devtools/bundled_program:config", + "//executorch/devtools/bundled_program:core", + "//executorch/devtools/bundled_program/serialize:lib", "//executorch/exir:lib", - "//executorch/sdk/bundled_program:config", - "//executorch/sdk/bundled_program:core", - "//executorch/sdk/bundled_program/serialize:lib", ], ) diff --git a/backends/cadence/runtime/executor.py b/backends/cadence/runtime/executor.py index 7bcf705c03..d07b1b6a52 100644 --- a/backends/cadence/runtime/executor.py +++ b/backends/cadence/runtime/executor.py @@ -18,14 +18,13 @@ import torch -from executorch.exir import ExecutorchProgram, ExecutorchProgramManager - -from executorch.sdk.bundled_program.config import MethodTestCase, MethodTestSuite -from executorch.sdk.bundled_program.core import BundledProgram +from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite +from executorch.devtools.bundled_program.core import BundledProgram -from executorch.sdk.bundled_program.serialize import ( +from executorch.devtools.bundled_program.serialize import ( serialize_from_bundled_program_to_flatbuffer, ) +from executorch.exir import ExecutorchProgram, ExecutorchProgramManager # If quiet is true, suppress the printing of stdout and stderr output. quiet = False diff --git a/backends/cadence/runtime/runtime.py b/backends/cadence/runtime/runtime.py index ec282f8f7b..33bb20719c 100644 --- a/backends/cadence/runtime/runtime.py +++ b/backends/cadence/runtime/runtime.py @@ -18,10 +18,10 @@ from executorch.backends.cadence.runtime import utils from executorch.backends.cadence.runtime.executor import Executor +from executorch.devtools import Inspector from executorch.exir import ExecutorchProgramManager from executorch.exir._serialize._program import deserialize_pte_binary from executorch.exir.schema import DataLocation -from executorch.sdk import Inspector from numpy import ndarray diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index 5fd6d5ad19..b206a7e133 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -27,6 +27,8 @@ QcomChipset, ) from executorch.backends.qualcomm.utils.utils import capture_program +from executorch.devtools import generate_etrecord +from executorch.devtools.inspector import Inspector from executorch.examples.qualcomm.utils import ( generate_inputs, make_output_dir, @@ -40,8 +42,6 @@ from executorch.exir.pass_base import ExportPass from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass from executorch.exir.program._program import ExecutorchProgram -from executorch.sdk import generate_etrecord -from executorch.sdk.inspector import Inspector from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e diff --git a/backends/vulkan/partitioner/supported_ops.py b/backends/vulkan/partitioner/supported_ops.py index 08d7f96a6b..ca7ce72cae 100644 --- a/backends/vulkan/partitioner/supported_ops.py +++ b/backends/vulkan/partitioner/supported_ops.py @@ -8,7 +8,10 @@ import operator -from executorch.backends.vulkan.passes.custom_ops_defs import grid_priors_op # noqa +from executorch.backends.vulkan.passes.custom_ops_defs import ( # noqa + conv_with_clamp_op, + grid_priors_op, +) from executorch.exir.dialects._ops import ops as exir_ops @@ -84,6 +87,7 @@ def __contains__(self, op): CONVOLUTION_OPS = [ exir_ops.edge.aten.convolution.default, + exir_ops.edge.et_vk.conv_with_clamp.default, ] REDUCTION_OPS = [ diff --git a/backends/vulkan/passes/custom_ops_defs.py b/backends/vulkan/passes/custom_ops_defs.py index 62f21bfee6..fd586b665a 100644 --- a/backends/vulkan/passes/custom_ops_defs.py +++ b/backends/vulkan/passes/custom_ops_defs.py @@ -48,6 +48,43 @@ def conv_with_clamp_impl( conv_with_clamp_op = getattr(getattr(torch.ops, namespace), name) +def conv_with_clamp_out_impl( + input, + weight, + bias=None, + stride=1, + padding=0, + dilation=1, + transposed=False, + output_padding=0, + groups=1, + output_min=-float("inf"), + output_max=float("inf"), + out=None, +): + out = conv_with_clamp_impl( + input, + weight, + bias, + stride, + padding, + dilation, + transposed, + output_padding, + groups, + output_min, + output_max, + ) + return out + + +name = "conv_with_clamp.out" +lib.define( + f"{name}(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, Scalar? output_min, Scalar? output_max, *, Tensor(a!) out) -> Tensor(a!)" +) +lib.impl(name, conv_with_clamp_out_impl, "CompositeExplicitAutograd") + + # The dimension of x should be larger than 1 def grid_priors_impl( x, diff --git a/backends/vulkan/runtime/api/containers/Tensor.cpp b/backends/vulkan/runtime/api/containers/Tensor.cpp index 78aa4796aa..be44679f3b 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.cpp +++ b/backends/vulkan/runtime/api/containers/Tensor.cpp @@ -13,36 +13,15 @@ namespace vkcompute { namespace api { -/* - * Given the strides of a buffer-backed tensor, find the index of the "fastest - * moving" dimension in WHCN dimension order. If multiple dims have the lowest - * stride, then the "earlier" dim is assumed to be the fastest moving (width is - * "earlier" than height). - */ -int32_t find_fastest_whcn_dim(const std::vector& strides) { - if (strides.size() == 0) { - return 0; - } - int32_t fastest_dim = 0; - int64_t min_stride = strides.at(0); - for (int d = strides.size() - 1; d >= 0; --d) { - if (strides.at(d) < min_stride) { - fastest_dim = d; - min_stride = strides.at(d); - } - } - return (strides.size() - 1 - fastest_dim); -} - /* * Given the strides of a buffer-backed tensor, estimate the equivalent memory * layout enum value by identifying the fastest moving dimension. */ utils::GPUMemoryLayout estimate_memory_layout( - const std::vector& strides) { - int32_t fastest_dim = find_fastest_whcn_dim(strides); - if (fastest_dim <= 3) { - return utils::GPUMemoryLayout(fastest_dim); + const std::vector& dim_order) { + int64_t fastest_dim_whcn = dim_order.size() - 1 - dim_order.back(); + if (fastest_dim_whcn >= 0 && fastest_dim_whcn < 3) { + return utils::GPUMemoryLayout(fastest_dim_whcn); } // TODO(ssjia) find a way to gracefully recover from this case by i.e. adding @@ -51,41 +30,70 @@ utils::GPUMemoryLayout estimate_memory_layout( VK_THROW("No compatible GPUMemoryLayout value"); } +std::vector calculate_dim_order( + const size_t ndim, + const utils::GPUMemoryLayout memory_layout) { + // Special case for zero dim tensors + if (ndim == 0) { + return {0}; + } + std::vector dim_order(ndim); + int64_t last_dim = + ndim - utils::to_packed_dim_nchw_offset(memory_layout); + + int64_t cur_dim = 0; + for (int d = 0; d < ndim; ++d) { + if (d == last_dim) { + cur_dim++; + } + dim_order[d] = cur_dim; + cur_dim++; + } + if (last_dim >= 0) { + dim_order[ndim - 1] = last_dim; + } + + return dim_order; +} + std::vector calculate_strides( const std::vector& sizes, - const utils::GPUMemoryLayout memory_layout) { + const std::vector& dim_order) { // For zero dim tensors if (sizes.size() == 0) { return {1}; } - const int64_t dim_offset = - utils::to_packed_dim_nchw_offset(memory_layout); - int64_t last_dim = sizes.size() - dim_offset; - if (last_dim < 0) { - last_dim = sizes.size() - 1; - } - size_t ndim = sizes.size(); std::vector strides(ndim); - const int64_t last_dim_size = sizes.at(last_dim); - - for (int stride_d = ndim - 1; stride_d >= 0; stride_d--) { - strides.at(stride_d) = 1; - if (stride_d == last_dim) { - continue; - } - strides.at(stride_d) = last_dim_size; - for (int size_d = ndim - 1; size_d > stride_d; size_d--) { - if (size_d != last_dim) { - strides.at(stride_d) *= sizes.at(size_d); - } + strides[dim_order[ndim - 1]] = 1; + for (int32_t i = ndim - 2; i >= 0; --i) { + if (sizes[dim_order[i + 1]] == 0) { + strides[dim_order[i]] = strides[dim_order[i + 1]]; + } else { + strides[dim_order[i]] = + strides[dim_order[i + 1]] * sizes[dim_order[i + 1]]; } } + return strides; } +bool dim_order_is_valid(const std::vector& dim_order) { + int64_t sum = 0; + for (size_t i = 0; i < dim_order.size(); ++i) { + if (dim_order[i] < 0 || dim_order[i] >= dim_order.size()) { + return false; + } + sum += dim_order[i]; + } + int64_t n = static_cast(dim_order.size() - 1); + // Sanity check that the sum of the indices in the vector is equal to the sum + // of 0 + 1 + 2 + ... + (ndim - 1) + return sum == n * (n + 1) / 2; +} + std::vector unsqueeze_strides( const std::vector& strides, const int64_t numel) { @@ -170,7 +178,8 @@ vTensor::vTensor( memory_layout_(memory_layout), // Calculate tensor size metadata sizes_(sizes.begin(), sizes.end()), - strides_(calculate_strides(sizes, memory_layout_)), + dim_order_(calculate_dim_order(sizes_.size(), memory_layout_)), + strides_(calculate_strides(sizes, dim_order_)), numel_(utils::multiply_integers(sizes_)), padded_sizes_{calculate_padded_sizes(sizes, memory_layout_)}, unsqueezed_strides_{unsqueeze_strides(strides_, numel_)}, @@ -189,6 +198,9 @@ vTensor::vTensor( padded_sizes_, dtype_, allocate_memory) { + VK_CHECK_COND( + dim_order_is_valid(dim_order_), "computed dim order is invalid"); + if (storage_type != utils::kBuffer) { texture_limits_.limits = utils::ivec3{ utils::safe_downcast(storage_.image_extents_[0]), @@ -204,16 +216,39 @@ vTensor::vTensor( } } +vTensor::vTensor(const vTensor& other) + : dtype_(other.dtype_), + memory_layout_(other.memory_layout_), + // Copy tensor size metadata + sizes_(other.sizes_.begin(), other.sizes_.end()), + dim_order_(other.dim_order_.begin(), other.dim_order_.end()), + strides_(other.strides_.begin(), other.strides_.end()), + numel_(other.numel_), + padded_sizes_{other.padded_sizes_.begin(), other.padded_sizes_.end()}, + unsqueezed_strides_{ + other.unsqueezed_strides_.begin(), + other.unsqueezed_strides_.end()}, + padded_numel_(other.padded_numel_), + texture_limits_{other.texture_limits_}, + // Empty initialize Utility Uniform Buffers + sizes_uniform_(), + strides_uniform_(), + numel_uniform_(), + texture_limits_uniform_(), + // Copy Tensor storage + storage_(other.storage_) {} + vTensor::vTensor( const vTensor& other, const std::vector& sizes, - const std::vector& strides, - const size_t offset_numel) + const std::vector& dim_order, + const int64_t offset_numel) : dtype_(other.dtype_), - memory_layout_(estimate_memory_layout(strides)), + memory_layout_(estimate_memory_layout(dim_order)), // Copy tensor size metadata sizes_(sizes.begin(), sizes.end()), - strides_(strides.begin(), strides.end()), + dim_order_(dim_order.begin(), dim_order.end()), + strides_(calculate_strides(sizes_, dim_order_)), numel_(utils::multiply_integers(sizes_)), padded_sizes_{calculate_padded_sizes(sizes, memory_layout_)}, unsqueezed_strides_{unsqueeze_strides(strides_, numel_)}, @@ -226,6 +261,8 @@ vTensor::vTensor( texture_limits_uniform_(), // Copy Tensor storage storage_(other.storage_, vkapi::element_size(dtype_) * offset_numel) { + VK_CHECK_COND( + dim_order_is_valid(dim_order_), "new dim order provided is invalid"); VK_CHECK_COND( offset_numel + numel_ <= other.numel(), "Tensor alias cannot access more elements than available in the original" @@ -339,9 +376,17 @@ void vTensor::bind_allocation(const vkapi::Allocation& allocation) { } } -void vTensor::update_size_metadata(const std::vector& new_sizes) { +void vTensor::update_metadata( + const std::vector& new_sizes, + const std::vector& new_dim_order) { sizes_ = new_sizes; - strides_ = calculate_strides(new_sizes, memory_layout_); + dim_order_ = new_dim_order; + strides_ = calculate_strides(sizes_, dim_order_); + // Only update the memory layout for buffer-backed tensors. Strides are + // meaningless for texture-backed tensors and do not impact the memory layout. + if (storage_type() == utils::kBuffer) { + memory_layout_ = estimate_memory_layout(dim_order_); + } numel_ = utils::multiply_integers(sizes_); padded_sizes_ = calculate_padded_sizes(sizes_, memory_layout_); @@ -373,15 +418,7 @@ void vTensor::update_size_metadata(const std::vector& new_sizes) { } } -void vTensor::reallocate(const std::vector& new_sizes) { - update_size_metadata(new_sizes); - storage_.discard_and_reallocate( - calculate_padded_sizes(new_sizes, memory_layout_), - memory_layout_, - dtype_); -} - -void vTensor::virtual_resize(const std::vector& new_sizes) { +void vTensor::check_sizes(const std::vector& sizes) const { if (storage_type() != utils::kBuffer) { // For texture storage check that the current texture is large enough for // the new sizes of the tensor. @@ -394,10 +431,47 @@ void vTensor::virtual_resize(const std::vector& new_sizes) { VK_CHECK_COND( valid_resize, - "Cannot use virtual resize if new sizes requires a larger texture."); + "tensor sizes requires a larger texture than the current one."); + } else { + // For buffer storage check that the current buffer is large enough for the + // new sizes of the tensor. + int64_t numel = utils::multiply_integers(sizes); + bool valid_resize = + numel + storage_.buffer_offset_ <= storage_.buffer_length_; + VK_CHECK_COND( + valid_resize, + "tensor sizes requires a larger buffer than the current one."); } +} + +void vTensor::virtual_reconfigure( + const std::vector& new_sizes, + const std::vector& new_dim_order) { + VK_CHECK_COND( + storage_type() == utils::kBuffer, + "virtual_reconfigure is only applicable for buffer backed tensors"); + VK_CHECK_COND(new_sizes.size() == new_dim_order.size()); + VK_CHECK_COND(dim_order_is_valid(new_dim_order)); - update_size_metadata(new_sizes); + check_sizes(new_sizes); + update_metadata(new_sizes, new_dim_order); +} + +void vTensor::virtual_resize(const std::vector& new_sizes) { + VK_CHECK_COND( + new_sizes.size() == dim_order_.size(), + "new sizes cannot modify the dimensionality of the tensor "); + + check_sizes(new_sizes); + update_metadata(new_sizes, dim_order_); +} + +void vTensor::reallocate(const std::vector& new_sizes) { + update_metadata(new_sizes, dim_order_); + storage_.discard_and_reallocate( + calculate_padded_sizes(new_sizes, memory_layout_), + memory_layout_, + dtype_); } // @@ -480,6 +554,7 @@ vTensorStorage::vTensorStorage( storage_type_{storage_type}, image_extents_(calculate_image_extents(padded_sizes, gpu_memory_layout)), buffer_length_{utils::multiply_integers(padded_sizes)}, + buffer_offset_{0}, image_(allocate_image( context_, image_extents_, @@ -496,11 +571,12 @@ vTensorStorage::vTensorStorage( vTensorStorage::vTensorStorage( const vTensorStorage& other, - const size_t buffer_offset) + const int64_t buffer_offset) : context_(other.context_), storage_type_{other.storage_type_}, image_extents_(other.image_extents_), buffer_length_{other.buffer_length_}, + buffer_offset_{buffer_offset}, image_(), buffer_(other.buffer_, buffer_offset), last_access_{other.last_access_} { diff --git a/backends/vulkan/runtime/api/containers/Tensor.h b/backends/vulkan/runtime/api/containers/Tensor.h index 5a4598291c..8186ef1bd6 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.h +++ b/backends/vulkan/runtime/api/containers/Tensor.h @@ -20,14 +20,21 @@ namespace vkcompute { namespace api { /* - * Given the sizes of a tensor and the GPU memory layout, calculate the strides - * of the tensor in NCHW dimension order. The GPU memory layout will be used to - * determine which dimension is packed along a texel; that dimension will be - * used as the "fasted moving" dimension with a stride of 1. + * Given a GPUMemoryLayout value, produce a dim order vector that matches the + * given memory layout. The produced dim order vector will be in the NCHW + * dimension order + */ +std::vector calculate_dim_order( + const size_t ndim, + const utils::GPUMemoryLayout memory_layout); + +/* + * Given the sizes of a tensor and the dim order of the tensor (both in NCHW) + * dimension order, calculate the strides of the tensor. */ std::vector calculate_strides( const std::vector& sizes, - const utils::GPUMemoryLayout memory_layout); + const std::vector& dim_order); std::vector unsqueeze_strides( const std::vector& strides, @@ -96,7 +103,7 @@ class vTensorStorage final { * because this behaviour is unsafe, since the original tensor may be * destroyed before the copy is destroyed. */ - vTensorStorage(const vTensorStorage& other, const size_t buffer_offset = 0); + vTensorStorage(const vTensorStorage& other, const int64_t buffer_offset = 0); public: // To discourage creating copies, the assignment operator is still deleted. @@ -118,6 +125,7 @@ class vTensorStorage final { // Resource sizings utils::uvec3 image_extents_{}; int64_t buffer_length_{}; + int64_t buffer_offset_{}; // GPU Storage mutable vkapi::VulkanImage image_; @@ -167,8 +175,16 @@ class vTensor final { const utils::GPUMemoryLayout memory_layout = utils::kChannelsPacked, const bool allocate_memory = true); - vTensor(const vTensor& other) = delete; - vTensor& operator=(const vTensor& other) = delete; + /* + * This constructor allows for the creation of a vTensor that references the + * same buffer resource of another vTensor, with the same sizes and strides + * metadata. The created vTensor will not own the underlying resource. This is + * only applicable for buffer backed tensors at the moment. + * + * Once created, the sizes and strides of the aliased vTensor can be changed + * using the `virtual_reconfigure` member function. + */ + vTensor(const vTensor& other); /* * This constructor allows for the creation of a vTensor that references the @@ -176,6 +192,10 @@ class vTensor final { * strides metatdata. The created vTensor will not own the underlying * resource. This is only applicable for buffer backed tensors at the moment. * + * Note that dim order is used as the source of truth regarding the strides, + * and the new strides are computed from the new sizes and new dim order. + * Thus only the dim order is provided as an argument to this function. + * * The offset_numel argument allows the aliased tensor's memory region to * begin at an offset of N elements from the start of the original tensor's * buffer. @@ -183,8 +203,11 @@ class vTensor final { vTensor( const vTensor& other, const std::vector& sizes, - const std::vector& strides, - const size_t offset_numel = 0); + const std::vector& dim_order, + const int64_t offset_numel = 0); + + // To discourage making copies, the copy assignment operator is still deleted + vTensor& operator=(const vTensor& other) = delete; vTensor(vTensor&& other) = default; vTensor& operator=(vTensor&& other) = default; @@ -195,6 +218,11 @@ class vTensor final { // sizes of the tensor in NCHW dimension order std::vector sizes_; + // dim order of the tensor; dimension indices are in NCHW dimension order + // i.e. 0 is N, 1 is C, 2 is H, 3 is W for a 4D tensor. The dims with larger + // strides precede the dims with smaller strides in the dim order. The last + // dim is always the fastest moving dim with a stride of 1. + std::vector dim_order_; // strides of the tensor in NCHW dimension order std::vector strides_; // Contains the number of elements in the tensor according to the canonical @@ -305,6 +333,10 @@ class vTensor final { return sizes_.size(); } + inline const std::vector& dim_order() const { + return dim_order_; + } + inline const std::vector& strides() const { return strides_; } @@ -386,24 +418,46 @@ class vTensor final { private: /* - * Update the size metadata of the vTensor to be new sizes. Should not be used - * directly, reallocate() or virtual_resize() should be used instead. + * Given new sizes and new strides of the dim order, update the sizes and dim + * order metadata of the vTensor. New strides are computed using the new sizes + * and new dim order. + */ + void update_metadata( + const std::vector& new_sizes, + const std::vector& new_dim_order); + + /* + * Check that tensor sizes are valid given the current storage resource's + * limits. */ - void update_size_metadata(const std::vector& new_sizes); + void check_sizes(const std::vector& sizes) const; public: /* - * Discard the underlying VkImage or VkBuffer and re-allocate based on new - * tensor sizes + * Change how the tensor should be interpreted by compute shaders via updating + * the size and dim order of the tensor. The new sizes and dim order may have + * different dimensionality than the current dimensionality of the tensor. + * + * This function can only be used for buffer-backed tensors, since texture + * backed buffers cannot change dimensionality or memory layout. */ - void reallocate(const std::vector& new_sizes); + void virtual_reconfigure( + const std::vector& new_sizes, + const std::vector& new_dim_order); /* * Perform a virtual resize of the vTensor by modifying the size metadata that * gets used in compute shaders. This allows the shader to treat the - * underlying resource as if it were a different size. + * underlying resource as if it were a different size. The new sizes cannot + * modify the dimensionality of the tensor. */ void virtual_resize(const std::vector& new_sizes); + + /* + * Discard the underlying VkImage or VkBuffer and re-allocate based on new + * tensor sizes + */ + void reallocate(const std::vector& new_sizes); }; } // namespace api diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index 50d927a913..48e1ebf0a8 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -203,6 +203,13 @@ ValueRef ComputeGraph::add_tensor( sizes, dtype, suggested_memory_layout(sizes), shared_object_idx); } +ValueRef ComputeGraph::add_tensor_view(const ValueRef vref) { + const vTensorPtr t = get_tensor(vref); + ValueRef idx(static_cast(values_.size())); + values_.emplace_back(api::vTensor(*t)); + return idx; +} + ValueRef ComputeGraph::add_tensor_view( const ValueRef vref, const std::vector& sizes, diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index b432be8388..faa2f4107e 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -356,10 +356,17 @@ class ComputeGraph final { * `vTensor` value at `vref`. See the copy constructor of `api::vTensor` for * more details. */ + ValueRef add_tensor_view(const ValueRef vref); + + /* + * Use the copy constructor of `api::vTensor` to create a "view" of the + * `vTensor` value at `vref` with different sizes and dim order. See the copy + * constructor of `api::vTensor` for more details. + */ ValueRef add_tensor_view( const ValueRef vref, const std::vector& sizes, - const std::vector& strides, + const std::vector& dim_order, const size_t offset_numel = 0); /* diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp b/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp index 3b2a826f87..2cb00ba65a 100644 --- a/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp +++ b/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp @@ -35,7 +35,22 @@ ExecuteNode::ExecuteNode( graph.update_descriptor_counts(shader, /*execute = */ true); } +ExecuteNode::ExecuteNode( + const ResizeFunction& resize_fn, + const std::vector& resize_args) + : shader_(), + global_workgroup_size_({0u, 0u, 0u}), + local_workgroup_size_({0u, 0u, 0u}), + args_(), + params_(), + spec_vars_(), + resize_fn_(resize_fn), + resize_args_(resize_args) {} + void ExecuteNode::encode(ComputeGraph* graph) { + if (!shader_) { + return; + } api::Context* const context = graph->context(); vkapi::PipelineBarrier pipeline_barrier{}; diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.h b/backends/vulkan/runtime/graph/ops/ExecuteNode.h index 1fff14e020..dece9ddb50 100644 --- a/backends/vulkan/runtime/graph/ops/ExecuteNode.h +++ b/backends/vulkan/runtime/graph/ops/ExecuteNode.h @@ -48,7 +48,7 @@ class ExecuteNode final { const std::vector&, const std::vector&)>; - ExecuteNode( + explicit ExecuteNode( ComputeGraph& graph, const vkapi::ShaderInfo& shader, const utils::uvec3& global_workgroup_size, @@ -59,6 +59,15 @@ class ExecuteNode final { const ResizeFunction& resize_fn = nullptr, const std::vector& resize_args = {}); + /* + * This overload of the ExecuteNode constructor is used to register ops which + * update a tensor view. No shader is dispatched, but the node still needs to + * update the view's sizes and strides after a resize. + */ + explicit ExecuteNode( + const ResizeFunction& resize_fn = nullptr, + const std::vector& resize_args = {}); + ~ExecuteNode() = default; void encode(ComputeGraph* graph); @@ -83,6 +92,11 @@ class ExecuteNode final { const vkapi::SpecVarList spec_vars_; const ResizeFunction resize_fn_; const std::vector resize_args_; + + public: + operator bool() const { + return shader_; + } }; } // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp index 52af0542b6..74113197d4 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp @@ -562,6 +562,7 @@ void conv(ComputeGraph& graph, const std::vector& args) { REGISTER_OPERATORS { VK_REGISTER_OP(aten.convolution.default, conv); VK_REGISTER_OP(conv_with_clamp.default, conv); + VK_REGISTER_OP(et_vk.conv_with_clamp.default, conv); } } // namespace vkcompute diff --git a/backends/vulkan/runtime/vk_api/Shader.h b/backends/vulkan/runtime/vk_api/Shader.h index 34c2d95c93..1e3b2a799f 100644 --- a/backends/vulkan/runtime/vk_api/Shader.h +++ b/backends/vulkan/runtime/vk_api/Shader.h @@ -53,8 +53,8 @@ class ShaderLayout final { struct ShaderInfo final { struct { - const uint32_t* bin; - uint32_t size; + const uint32_t* bin = nullptr; + uint32_t size = 0u; } src_code; std::string kernel_name{""}; @@ -71,6 +71,10 @@ struct ShaderInfo final { const uint32_t, std::vector, const utils::uvec3 tile_size); + + operator bool() const { + return src_code.bin != nullptr; + }; }; bool operator==(const ShaderInfo& _1, const ShaderInfo& _2); diff --git a/backends/vulkan/test/test_vulkan_delegate.py b/backends/vulkan/test/test_vulkan_delegate.py index 9f57ec49a8..d80809ec79 100644 --- a/backends/vulkan/test/test_vulkan_delegate.py +++ b/backends/vulkan/test/test_vulkan_delegate.py @@ -1633,6 +1633,42 @@ def forward(self, x): memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED], ) + def test_vulkan_backend_conv_with_clamp(self): + class ConvWithClampModule(torch.nn.Module): + def __init__(self): + super().__init__() + self.weight = torch.randn(6, 8, 3, 3) + self.bias = torch.randn(8) + self.stride = (1, 2) + self.padding = (2, 3) + self.dilation = (1, 1) + self.transposed = True + self.output_padding = (0, 1) + self.groups = 1 + self.output_min = 0 + self.output_max = 10 + + def forward(self, x): + return torch.ops.et_vk.conv_with_clamp( + x, + self.weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.transposed, + self.output_padding, + self.groups, + self.output_min, + self.output_max, + ) + + self.lower_module_and_test_output( + ConvWithClampModule(), + (torch.randn(size=(1, 6, 40, 50), dtype=torch.float32),), + memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED], + ) + def test_vulkan_backend_grid_priors(self): class GridPriorsModule(torch.nn.Module): def __init__(self): diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp index ad49687369..6c056cc9d9 100644 --- a/backends/vulkan/test/utils/test_utils.cpp +++ b/backends/vulkan/test/utils/test_utils.cpp @@ -482,3 +482,9 @@ void execute_graph_and_check_output( } } } + +bool check_close(float a, float b, float atol, float rtol) { + float max = std::max(std::abs(a), std::abs(b)); + float diff = std::abs(a - b); + return diff <= (atol + rtol * max); +} diff --git a/backends/vulkan/test/utils/test_utils.h b/backends/vulkan/test/utils/test_utils.h index f9969eddbf..bf54944617 100644 --- a/backends/vulkan/test/utils/test_utils.h +++ b/backends/vulkan/test/utils/test_utils.h @@ -242,3 +242,9 @@ void print_vector( } std::cout << std::endl; } + +// +// Misc. Utilities +// + +bool check_close(float a, float b, float atol = 1e-4, float rtol = 1e-5); diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index 307593d8fd..1ac74e29ef 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -168,7 +168,50 @@ std::vector get_reference_strides( return {}; } +TEST_F(VulkanComputeAPITest, empty_init_shader_info_test) { + vkapi::ShaderInfo empty_shader_info; + EXPECT_FALSE(empty_shader_info); + EXPECT_TRUE(empty_shader_info.src_code.bin == nullptr); + EXPECT_TRUE(empty_shader_info.src_code.size == 0u); +} + +TEST_F(VulkanComputeAPITest, calculate_dim_order_test) { + // ndim, GPUMemoryLayout, expected dim order pairs + std::vector>> + test_cases = { + {1, utils::kWidthPacked, {0}}, + {1, utils::kHeightPacked, {0}}, + {1, utils::kChannelsPacked, {0}}, + {2, utils::kWidthPacked, {0, 1}}, + {2, utils::kHeightPacked, {1, 0}}, + {2, utils::kChannelsPacked, {0, 1}}, + {3, utils::kWidthPacked, {0, 1, 2}}, + {3, utils::kHeightPacked, {0, 2, 1}}, + {3, utils::kChannelsPacked, {1, 2, 0}}, + {4, utils::kWidthPacked, {0, 1, 2, 3}}, + {4, utils::kHeightPacked, {0, 1, 3, 2}}, + {4, utils::kChannelsPacked, {0, 2, 3, 1}}, + }; + + for (const auto& test_case : test_cases) { + const size_t& ndim = std::get<0>(test_case); + const utils::GPUMemoryLayout& layout = std::get<1>(test_case); + const auto& expected_dim_order = std::get<2>(test_case); + std::vector dim_order = calculate_dim_order(ndim, layout); + + ASSERT_TRUE(dim_order == expected_dim_order); + } +} + TEST_F(VulkanComputeAPITest, calculate_tensor_strides_test) { + vTensor v_tensor_to_resize( + context(), + {25, 25, 25, 25}, + vkapi::kFloat, + utils::kBuffer, + utils::kWidthPacked, + /*allocate_memory = */ false); + for (const auto& sizes : standard_sizes_to_test) { if (sizes.size() < 3) { continue; @@ -176,7 +219,9 @@ TEST_F(VulkanComputeAPITest, calculate_tensor_strides_test) { for (const auto& layout : {utils::kWidthPacked, utils::kHeightPacked, utils::kChannelsPacked}) { { - std::vector strides = calculate_strides(sizes, layout); + std::vector dim_order = + calculate_dim_order(sizes.size(), layout); + std::vector strides = calculate_strides(sizes, dim_order); std::vector ref_strides = get_reference_strides(sizes, layout); ASSERT_TRUE(strides == ref_strides); @@ -187,6 +232,25 @@ TEST_F(VulkanComputeAPITest, calculate_tensor_strides_test) { get_reference_strides(sizes, layout, true); ASSERT_TRUE(unsqueezed_strides == ref_unsqueezed_strides); + + // Create new vTensor and check that the strides are correct + vTensor new_v_tensor( + context(), + sizes, + vkapi::kFloat, + utils::kBuffer, + layout, + /*allocate_memory = */ false); + + ASSERT_TRUE(new_v_tensor.strides() == ref_strides); + ASSERT_TRUE( + new_v_tensor.unsqueezed_strides() == ref_unsqueezed_strides); + + // Resize vtensor and check that updated metadata is correct + v_tensor_to_resize.virtual_reconfigure(sizes, dim_order); + ASSERT_TRUE(v_tensor_to_resize.strides() == ref_strides); + ASSERT_TRUE( + v_tensor_to_resize.unsqueezed_strides() == ref_unsqueezed_strides); } } } @@ -542,9 +606,10 @@ TEST_F(VulkanComputeAPITest, tensor_copy_test) { std::vector sizes = {9, 9}; std::vector strides = get_reference_strides(sizes, utils::kWidthPacked); + std::vector dim_order = {0, 1}; vTensor original = CREATE_FLOAT_BUFFER(sizes, /*allocate_memory=*/true); - vTensor copy = vTensor(original, sizes, strides); + vTensor copy = vTensor(original, sizes, dim_order); EXPECT_TRUE(get_vma_allocation_count() == 1); // Fill original tensor with some data @@ -557,7 +622,6 @@ TEST_F(VulkanComputeAPITest, tensor_copy_test) { for (size_t i = 0; i < data_out.size(); ++i) { CHECK_VALUE(data_out, i, 2.5f + i); } - std::cout << std::endl; } TEST_F(VulkanComputeAPITest, tensor_no_copy_transpose_test) { @@ -569,7 +633,7 @@ TEST_F(VulkanComputeAPITest, tensor_no_copy_transpose_test) { std::vector mat2_t_sizes = {K, N}; std::vector out_sizes = {M, N}; - std::vector transposed_strides = {1, K}; + std::vector transposed_dim_order = {1, 0}; vTensor mat1 = CREATE_FLOAT_BUFFER(mat1_sizes, /*allocate_memory=*/true); vTensor mat2 = CREATE_FLOAT_BUFFER(mat2_sizes, /*allocate_memory=*/true); @@ -581,8 +645,8 @@ TEST_F(VulkanComputeAPITest, tensor_no_copy_transpose_test) { std::vector mat2_data = create_random_float_buffer(mat2.staging_buffer_numel()); - vTensor mat2_t = vTensor(mat2, mat2_t_sizes, transposed_strides); - EXPECT_TRUE(mat2_t.gpu_memory_layout() == utils::kHeightPacked); + // Create direct view and modify sizes and strides later + vTensor mat2_t = vTensor(mat2); std::vector mat2_t_data = transpose_matrix(mat2_data, N, K); std::vector ref_out = @@ -594,6 +658,10 @@ TEST_F(VulkanComputeAPITest, tensor_no_copy_transpose_test) { record_reference_matmul(api::context(), out, mat1, mat2_t); + // Update sizes and strides of mat2_t to be that of a transposed tensor + mat2_t.virtual_reconfigure(mat2_t_sizes, transposed_dim_order); + EXPECT_TRUE(mat2_t.gpu_memory_layout() == utils::kHeightPacked); + std::vector data_out(out.staging_buffer_numel()); // Extract the copy tensor; should contain the data of the original tensor extract_vtensor(out, data_out); @@ -601,7 +669,7 @@ TEST_F(VulkanComputeAPITest, tensor_no_copy_transpose_test) { EXPECT_TRUE(data_out.size() == ref_out.size()); for (size_t i = 0; i < data_out.size(); ++i) { - EXPECT_TRUE(data_out[i] == ref_out[i]); + EXPECT_TRUE(check_close(data_out[i], ref_out[i])); } } @@ -615,7 +683,7 @@ TEST_F(VulkanComputeAPITest, tensor_no_copy_slice_test) { constexpr int L_S2 = 7; constexpr int O_S2 = 3; - std::vector strides = {1}; + std::vector dim_order = {0}; std::vector t_sizes = {L}; std::vector s1_sizes = {L_S1}; @@ -625,8 +693,8 @@ TEST_F(VulkanComputeAPITest, tensor_no_copy_slice_test) { fill_vtensor(orig, 0); - vTensor s1 = vTensor(orig, s1_sizes, strides, O_S1); - vTensor s2 = vTensor(s1, s2_sizes, strides, O_S2); + vTensor s1 = vTensor(orig, s1_sizes, dim_order, O_S1); + vTensor s2 = vTensor(s1, s2_sizes, dim_order, O_S2); record_scalar_add_buffer(api::context(), s1, 4.5f); record_scalar_add_buffer(api::context(), s2, 7.5f); @@ -975,6 +1043,19 @@ TEST(VulkanComputeGraphTest, test_values_string) { EXPECT_TRUE(stored == "hello, world"); } +TEST(VulkanComputeGraphTest, empty_init_executenode_test) { + ExecuteNode node(nullptr, {}); + EXPECT_FALSE(node); + + GraphConfig config; + ComputeGraph graph(config); + + // Encode an empty ExecuteNode and check that command buffer encoding does not + // crash. + graph.execute_nodes().emplace_back(new ExecuteNode(nullptr, {})); + EXPECT_NO_FATAL_FAILURE(graph.encode_execute()); +} + TEST(VulkanComputeGraphTest, test_zero_dim_tensor) { GraphConfig config; ComputeGraph graph(config); @@ -1073,7 +1154,7 @@ TEST(VulkanComputeGraphTest, test_simple_graph_with_view) { config.set_storage_type_override(utils::kBuffer); ComputeGraph graph(config); - std::vector strides = {W, 1}; + std::vector dim_order = {0, 1}; std::vector orig_sizes = {H, W}; std::vector slice_sizes = {S_H, W}; @@ -1083,7 +1164,7 @@ TEST(VulkanComputeGraphTest, test_simple_graph_with_view) { IOValueRef orig = graph.add_input_tensor(orig_sizes, vkapi::kFloat); ValueRef slice = - graph.add_tensor_view(orig.value, slice_sizes, strides, offset); + graph.add_tensor_view(orig.value, slice_sizes, dim_order, offset); IOValueRef out = {}; diff --git a/backends/xnnpack/test/TARGETS b/backends/xnnpack/test/TARGETS index abedffb8e6..629ac8275b 100644 --- a/backends/xnnpack/test/TARGETS +++ b/backends/xnnpack/test/TARGETS @@ -36,10 +36,10 @@ runtime.python_test( deps = [ "//executorch/backends/xnnpack/partition:xnnpack_partitioner", "//executorch/backends/xnnpack/test/tester:tester", + "//executorch/devtools:lib", + "//executorch/devtools/bundled_program:config", + "//executorch/devtools/bundled_program/serialize:lib", "//executorch/exir/passes:constant_prop_pass", - "//executorch/sdk:lib", - "//executorch/sdk/bundled_program:config", - "//executorch/sdk/bundled_program/serialize:lib", "//pytorch/ao:torchao", # @manual ], external_deps = [ diff --git a/backends/xnnpack/test/test_xnnpack_utils.py b/backends/xnnpack/test/test_xnnpack_utils.py index c6b1513d31..3f5359a3f4 100644 --- a/backends/xnnpack/test/test_xnnpack_utils.py +++ b/backends/xnnpack/test/test_xnnpack_utils.py @@ -25,6 +25,12 @@ # import the xnnpack backend implementation from executorch.backends.xnnpack.xnnpack_preprocess import XnnpackBackend +from executorch.devtools import BundledProgram + +from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite +from executorch.devtools.bundled_program.serialize import ( + serialize_from_bundled_program_to_flatbuffer, +) from executorch.exir import ExecutorchProgram, ExirExportedProgram from executorch.exir.backend.backend_api import to_backend, validation_disabled @@ -34,12 +40,6 @@ _load_for_executorch_from_buffer, ) from executorch.extension.pytree import tree_flatten -from executorch.sdk import BundledProgram - -from executorch.sdk.bundled_program.config import MethodTestCase, MethodTestSuite -from executorch.sdk.bundled_program.serialize import ( - serialize_from_bundled_program_to_flatbuffer, -) from torch.ao.quantization import ( # @manual default_per_channel_symmetric_qnnpack_qconfig, diff --git a/sdk/CMakeLists.txt b/devtools/CMakeLists.txt similarity index 89% rename from sdk/CMakeLists.txt rename to devtools/CMakeLists.txt index 79903fc315..4c4d15fd73 100644 --- a/sdk/CMakeLists.txt +++ b/devtools/CMakeLists.txt @@ -78,8 +78,8 @@ set_property(TARGET flatccrt PROPERTY POSITION_INDEPENDENT_CODE ON) include(ExternalProject) # The include directory that will contain the generated schema headers. -set(_program_schema__include_dir "${CMAKE_BINARY_DIR}/sdk/include") -set(_bundled_schema__include_dir "${CMAKE_BINARY_DIR}/sdk/bundled_program") +set(_program_schema__include_dir "${CMAKE_BINARY_DIR}/devtools/include") +set(_bundled_schema__include_dir "${CMAKE_BINARY_DIR}/devtools/bundled_program") # TODO(dbort): Only enable this when cross-compiling. It can cause build race # conditions (libflatcc.a errors) when enabled. @@ -128,11 +128,11 @@ set(_etdump_schema__outputs) foreach(fbs_file ${_etdump_schema_names}) string(REGEX REPLACE "[.]fbs$" "_reader.h" generated "${fbs_file}") list(APPEND _etdump_schema__outputs - "${_program_schema__include_dir}/executorch/sdk/etdump/${generated}" + "${_program_schema__include_dir}/executorch/devtools/etdump/${generated}" ) string(REGEX REPLACE "[.]fbs$" "_builder.h" generated "${fbs_file}") list(APPEND _etdump_schema__outputs - "${_program_schema__include_dir}/executorch/sdk/etdump/${generated}" + "${_program_schema__include_dir}/executorch/devtools/etdump/${generated}" ) endforeach() @@ -143,7 +143,7 @@ foreach(fbs_file ${_bundled_input_schema_names}) list( APPEND _bundled_program_schema__outputs - "${_bundled_schema__include_dir}/executorch/sdk/bundled_program/schema/${generated}" + "${_bundled_schema__include_dir}/executorch/devtools/bundled_program/schema/${generated}" ) endforeach() @@ -152,9 +152,9 @@ add_library( bundled_program_schema INTERFACE ${_bundled_program_schema__outputs} ) -file(MAKE_DIRECTORY ${_program_schema__include_dir}/executorch/sdk/etdump) +file(MAKE_DIRECTORY ${_program_schema__include_dir}/executorch/devtools/etdump) file(MAKE_DIRECTORY - ${_program_schema__include_dir}/executorch/sdk/bundled_program + ${_program_schema__include_dir}/executorch/devtools/bundled_program ) add_custom_command( @@ -164,7 +164,7 @@ add_custom_command( # tree instead of under the binary directory, and there's no way to change # that behavior. ${_flatcc_source_dir}/bin/flatcc -cwr -o - ${_program_schema__include_dir}/executorch/sdk/etdump + ${_program_schema__include_dir}/executorch/devtools/etdump ${_etdump_schema__srcs} COMMAND rm -f ${_etdump_schema_cleanup_paths} DEPENDS ${_etdump_schema_gen_dep} @@ -186,9 +186,9 @@ add_custom_command( OUTPUT ${_bundled_program_schema__outputs} COMMAND ${FLATC_EXECUTABLE} --cpp --cpp-std c++11 --gen-mutable --scoped-enums -o - "${_bundled_schema__include_dir}/executorch/sdk/bundled_program/schema" + "${_bundled_schema__include_dir}/executorch/devtools/bundled_program/schema" ${_bundled_program_schema__srcs} - WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/sdk + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/devtools DEPENDS ${FLATC_EXECUTABLE} ${_bundled_program_schema__srcs} COMMENT "Generating bundled_program headers" VERBATIM diff --git a/sdk/TARGETS b/devtools/TARGETS similarity index 54% rename from sdk/TARGETS rename to devtools/TARGETS index 56d38a4ad3..06964b8387 100644 --- a/sdk/TARGETS +++ b/devtools/TARGETS @@ -6,8 +6,8 @@ python_library( name = "lib", srcs = ["__init__.py"], deps = [ - "//executorch/sdk/bundled_program:core", - "//executorch/sdk/etrecord:etrecord", - "//executorch/sdk/inspector:lib", + "//executorch/devtools/bundled_program:core", + "//executorch/devtools/etrecord:etrecord", + "//executorch/devtools/inspector:lib", ], ) diff --git a/sdk/__init__.py b/devtools/__init__.py similarity index 57% rename from sdk/__init__.py rename to devtools/__init__.py index 11134bf276..821d75901f 100644 --- a/sdk/__init__.py +++ b/devtools/__init__.py @@ -4,10 +4,10 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import executorch.sdk.inspector as inspector -from executorch.sdk.bundled_program.core import BundledProgram -from executorch.sdk.etrecord import ETRecord, generate_etrecord, parse_etrecord -from executorch.sdk.inspector import Inspector +import executorch.devtools.inspector as inspector +from executorch.devtools.bundled_program.core import BundledProgram +from executorch.devtools.etrecord import ETRecord, generate_etrecord, parse_etrecord +from executorch.devtools.inspector import Inspector __all__ = [ "ETRecord", diff --git a/sdk/backend_debug/TARGETS b/devtools/backend_debug/TARGETS similarity index 100% rename from sdk/backend_debug/TARGETS rename to devtools/backend_debug/TARGETS diff --git a/sdk/backend_debug/__init__.py b/devtools/backend_debug/__init__.py similarity index 83% rename from sdk/backend_debug/__init__.py rename to devtools/backend_debug/__init__.py index c1c9726b86..b457b7d11d 100644 --- a/sdk/backend_debug/__init__.py +++ b/devtools/backend_debug/__init__.py @@ -4,7 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from executorch.sdk.backend_debug.delegation_info import ( +from executorch.devtools.backend_debug.delegation_info import ( DelegationBreakdown, get_delegation_info, ) diff --git a/sdk/backend_debug/delegation_info.py b/devtools/backend_debug/delegation_info.py similarity index 100% rename from sdk/backend_debug/delegation_info.py rename to devtools/backend_debug/delegation_info.py diff --git a/sdk/backend_debug/tests/TARGETS b/devtools/backend_debug/tests/TARGETS similarity index 86% rename from sdk/backend_debug/tests/TARGETS rename to devtools/backend_debug/tests/TARGETS index 3c9f6c2e64..ae234df8ce 100644 --- a/sdk/backend_debug/tests/TARGETS +++ b/devtools/backend_debug/tests/TARGETS @@ -10,8 +10,8 @@ python_unittest( deps = [ "fbsource//third-party/pypi/pandas:pandas", "//caffe2:torch", + "//executorch/devtools/backend_debug:delegation_info", "//executorch/exir:lib", "//executorch/exir/backend/test:op_partitioner_demo", - "//executorch/sdk/backend_debug:delegation_info", ], ) diff --git a/sdk/backend_debug/tests/test_delegation_info.py b/devtools/backend_debug/tests/test_delegation_info.py similarity index 96% rename from sdk/backend_debug/tests/test_delegation_info.py rename to devtools/backend_debug/tests/test_delegation_info.py index 2d98e9a595..6ff5169094 100644 --- a/sdk/backend_debug/tests/test_delegation_info.py +++ b/devtools/backend_debug/tests/test_delegation_info.py @@ -9,9 +9,9 @@ import pandas as pd import torch +from executorch.devtools.backend_debug import DelegationBreakdown, get_delegation_info from executorch.exir import to_edge from executorch.exir.backend.test.op_partitioner_demo import AddMulPartitionerDemo -from executorch.sdk.backend_debug import DelegationBreakdown, get_delegation_info from pandas.testing import assert_frame_equal diff --git a/sdk/bundled_program/TARGETS b/devtools/bundled_program/TARGETS similarity index 88% rename from sdk/bundled_program/TARGETS rename to devtools/bundled_program/TARGETS index c731606217..27560f7087 100644 --- a/sdk/bundled_program/TARGETS +++ b/devtools/bundled_program/TARGETS @@ -18,10 +18,10 @@ runtime.python_library( ":config", ":version", "//caffe2:torch", + "//executorch/devtools/bundled_program/schema:bundled_program_schema_py", "//executorch/exir:schema", "//executorch/exir:tensor", "//executorch/exir/_serialize:lib", - "//executorch/sdk/bundled_program/schema:bundled_program_schema_py", ], ) @@ -46,6 +46,6 @@ runtime.python_library( "version.py", ], visibility = [ - "//executorch/sdk/...", + "//executorch/devtools/...", ], ) diff --git a/sdk/bundled_program/bundled_program.cpp b/devtools/bundled_program/bundled_program.cpp similarity index 98% rename from sdk/bundled_program/bundled_program.cpp rename to devtools/bundled_program/bundled_program.cpp index 63affa5c7f..d174cbdcda 100644 --- a/sdk/bundled_program/bundled_program.cpp +++ b/devtools/bundled_program/bundled_program.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ -#include +#include #include #include @@ -16,12 +16,12 @@ #include #endif // USE_ATEN_LIB +#include #include #include #include #include #include -#include namespace torch { namespace executor { diff --git a/sdk/bundled_program/bundled_program.h b/devtools/bundled_program/bundled_program.h similarity index 100% rename from sdk/bundled_program/bundled_program.h rename to devtools/bundled_program/bundled_program.h diff --git a/sdk/bundled_program/config.py b/devtools/bundled_program/config.py similarity index 100% rename from sdk/bundled_program/config.py rename to devtools/bundled_program/config.py diff --git a/sdk/bundled_program/core.py b/devtools/bundled_program/core.py similarity index 98% rename from sdk/bundled_program/core.py rename to devtools/bundled_program/core.py index 56fc817bbe..c775fb1510 100644 --- a/sdk/bundled_program/core.py +++ b/devtools/bundled_program/core.py @@ -8,19 +8,19 @@ import typing from typing import Dict, List, Optional, Sequence, Type, Union -import executorch.exir.schema as core_schema +import executorch.devtools.bundled_program.schema as bp_schema -import executorch.sdk.bundled_program.schema as bp_schema +import executorch.exir.schema as core_schema import torch import torch.fx +from executorch.devtools.bundled_program.config import ConfigValue, MethodTestSuite + +from executorch.devtools.bundled_program.version import BUNDLED_PROGRAM_SCHEMA_VERSION from executorch.exir import ExecutorchProgram, ExecutorchProgramManager from executorch.exir._serialize import _serialize_pte_binary from executorch.exir.tensor import get_scalar_type, scalar_type_enum, TensorSpec -from executorch.sdk.bundled_program.config import ConfigValue, MethodTestSuite - -from executorch.sdk.bundled_program.version import BUNDLED_PROGRAM_SCHEMA_VERSION # pyre-ignore supported_program_type_table: Dict[Type[core_schema.KernelTypes], ConfigValue] = { diff --git a/sdk/bundled_program/schema/README.md b/devtools/bundled_program/schema/README.md similarity index 100% rename from sdk/bundled_program/schema/README.md rename to devtools/bundled_program/schema/README.md diff --git a/sdk/bundled_program/schema/TARGETS b/devtools/bundled_program/schema/TARGETS similarity index 84% rename from sdk/bundled_program/schema/TARGETS rename to devtools/bundled_program/schema/TARGETS index e9bd642069..51c004cbec 100644 --- a/sdk/bundled_program/schema/TARGETS +++ b/devtools/bundled_program/schema/TARGETS @@ -15,8 +15,8 @@ runtime.python_library( "bundled_program_schema.py", ], visibility = [ - "//executorch/sdk/bundled_program/...", - "//executorch/sdk/etrecord/...", + "//executorch/devtools/bundled_program/...", + "//executorch/devtools/etrecord/...", ], deps = [ "//executorch/exir:scalar_type", diff --git a/sdk/bundled_program/schema/__init__.py b/devtools/bundled_program/schema/__init__.py similarity index 100% rename from sdk/bundled_program/schema/__init__.py rename to devtools/bundled_program/schema/__init__.py diff --git a/sdk/bundled_program/schema/bundled_program_schema.fbs b/devtools/bundled_program/schema/bundled_program_schema.fbs similarity index 100% rename from sdk/bundled_program/schema/bundled_program_schema.fbs rename to devtools/bundled_program/schema/bundled_program_schema.fbs diff --git a/sdk/bundled_program/schema/bundled_program_schema.py b/devtools/bundled_program/schema/bundled_program_schema.py similarity index 100% rename from sdk/bundled_program/schema/bundled_program_schema.py rename to devtools/bundled_program/schema/bundled_program_schema.py diff --git a/sdk/bundled_program/schema/scalar_type.fbs b/devtools/bundled_program/schema/scalar_type.fbs similarity index 100% rename from sdk/bundled_program/schema/scalar_type.fbs rename to devtools/bundled_program/schema/scalar_type.fbs diff --git a/sdk/bundled_program/schema/targets.bzl b/devtools/bundled_program/schema/targets.bzl similarity index 93% rename from sdk/bundled_program/schema/targets.bzl rename to devtools/bundled_program/schema/targets.bzl index a25d792c5a..532a01e039 100644 --- a/sdk/bundled_program/schema/targets.bzl +++ b/devtools/bundled_program/schema/targets.bzl @@ -49,14 +49,14 @@ def define_common_targets(): runtime.export_file( name = INPUT_BUNDLED, visibility = [ - "//executorch/sdk/bundled_program/serialize/...", + "//executorch/devtools/bundled_program/serialize/...", ], ) runtime.export_file( name = INPUT_SCALAR_TYPE, visibility = [ - "//executorch/sdk/bundled_program/serialize/...", + "//executorch/devtools/bundled_program/serialize/...", ], ) @@ -72,7 +72,7 @@ def define_common_targets(): name = BUNDLED_LIBRARY_NAME, srcs = [], visibility = [ - "//executorch/sdk/bundled_program/...", + "//executorch/devtools/bundled_program/...", "//executorch/extension/pybindings/...", ], exported_headers = { diff --git a/sdk/bundled_program/schema/test/TARGETS b/devtools/bundled_program/schema/test/TARGETS similarity index 100% rename from sdk/bundled_program/schema/test/TARGETS rename to devtools/bundled_program/schema/test/TARGETS diff --git a/sdk/bundled_program/schema/test/test_schema.py b/devtools/bundled_program/schema/test/test_schema.py similarity index 79% rename from sdk/bundled_program/schema/test/test_schema.py rename to devtools/bundled_program/schema/test/test_schema.py index ab3d2760d2..c2a19adef7 100644 --- a/sdk/bundled_program/schema/test/test_schema.py +++ b/devtools/bundled_program/schema/test/test_schema.py @@ -20,8 +20,8 @@ def test_schema_sync(self) -> None: self.assertTrue( filecmp.cmp( - prefix + "sdk/bundled_program/schema/scalar_type.fbs", + prefix + "devtools/bundled_program/schema/scalar_type.fbs", prefix + "schema/scalar_type.fbs", ), - 'Please run "hg cp fbcode//executorch/schema/scalar_type.fbs fbcode//executorch/sdk/bundled_program/schema/scalar_type.fbs" to sync schema changes.', + 'Please run "hg cp fbcode//executorch/schema/scalar_type.fbs fbcode//executorch/devtools/bundled_program/schema/scalar_type.fbs" to sync schema changes.', ) diff --git a/sdk/bundled_program/serialize/TARGETS b/devtools/bundled_program/serialize/TARGETS similarity index 76% rename from sdk/bundled_program/serialize/TARGETS rename to devtools/bundled_program/serialize/TARGETS index 20abccd7fd..11c5839977 100644 --- a/sdk/bundled_program/serialize/TARGETS +++ b/devtools/bundled_program/serialize/TARGETS @@ -10,8 +10,8 @@ runtime.python_library( "__init__.py", ], resources = { - "//executorch/sdk/bundled_program/schema:bundled_program_schema.fbs": "bundled_program_schema.fbs", - "//executorch/sdk/bundled_program/schema:scalar_type.fbs": "scalar_type.fbs", + "//executorch/devtools/bundled_program/schema:bundled_program_schema.fbs": "bundled_program_schema.fbs", + "//executorch/devtools/bundled_program/schema:scalar_type.fbs": "scalar_type.fbs", }, # Currently serialization API should only be used in some dedicated targets, # to avoid ODR violation when linking with another Flatbuffers library. @@ -20,18 +20,18 @@ runtime.python_library( "//executorch/bacends/...", "//executorch/backends/xnnpack/test/...", "//executorch/codegen/...", + "//executorch/devtools/bundled_program/tests/...", "//executorch/examples/async_exec:emit_program_lib", "//executorch/exir:lib", "//executorch/extension/pybindings/test:test", "//executorch/extension/pybindings/test:test-library", "//executorch/profiler/...", - "//executorch/sdk/bundled_program/tests/...", "//executorch/test/...", "@EXECUTORCH_CLIENTS", ], deps = [ "fbsource//third-party/pypi/setuptools:setuptools", + "//executorch/devtools/bundled_program/schema:bundled_program_schema_py", "//executorch/exir/_serialize:lib", - "//executorch/sdk/bundled_program/schema:bundled_program_schema_py", ], ) diff --git a/sdk/bundled_program/serialize/__init__.py b/devtools/bundled_program/serialize/__init__.py similarity index 97% rename from sdk/bundled_program/serialize/__init__.py rename to devtools/bundled_program/serialize/__init__.py index e0c75574c9..075436e9c1 100644 --- a/sdk/bundled_program/serialize/__init__.py +++ b/devtools/bundled_program/serialize/__init__.py @@ -12,14 +12,14 @@ import os import tempfile -import executorch.sdk.bundled_program.schema as bp_schema +import executorch.devtools.bundled_program.schema as bp_schema # @manual=fbsource//third-party/pypi/setuptools:setuptools import pkg_resources +from executorch.devtools.bundled_program.core import BundledProgram from executorch.exir._serialize._dataclass import _DataclassEncoder, _json_to_dataclass from executorch.exir._serialize._flatbuffer import _flatc_compile, _flatc_decompile -from executorch.sdk.bundled_program.core import BundledProgram # The prefix of schema files used for bundled program BUNDLED_PROGRAM_SCHEMA_NAME = "bundled_program_schema" diff --git a/sdk/bundled_program/serialize/test/TARGETS b/devtools/bundled_program/serialize/test/TARGETS similarity index 51% rename from sdk/bundled_program/serialize/test/TARGETS rename to devtools/bundled_program/serialize/test/TARGETS index 85f55c02f8..dd92f63f2d 100644 --- a/sdk/bundled_program/serialize/test/TARGETS +++ b/devtools/bundled_program/serialize/test/TARGETS @@ -10,9 +10,8 @@ python_unittest( "test_serialize.py", ], deps = [ - "//executorch/exir:print_program", - "//executorch/sdk/bundled_program:core", - "//executorch/sdk/bundled_program/serialize:lib", - "//executorch/sdk/bundled_program/util:test_util", + "//executorch/devtools/bundled_program:core", + "//executorch/devtools/bundled_program/serialize:lib", + "//executorch/devtools/bundled_program/util:test_util", ], ) diff --git a/sdk/bundled_program/serialize/test/test_serialize.py b/devtools/bundled_program/serialize/test/test_serialize.py similarity index 82% rename from sdk/bundled_program/serialize/test/test_serialize.py rename to devtools/bundled_program/serialize/test/test_serialize.py index 1db6871fc0..48a914d144 100644 --- a/sdk/bundled_program/serialize/test/test_serialize.py +++ b/devtools/bundled_program/serialize/test/test_serialize.py @@ -8,13 +8,15 @@ import unittest -from executorch.sdk.bundled_program.core import BundledProgram +from executorch.devtools.bundled_program.core import BundledProgram -from executorch.sdk.bundled_program.serialize import ( +from executorch.devtools.bundled_program.serialize import ( deserialize_from_flatbuffer_to_bundled_program, serialize_from_bundled_program_to_flatbuffer, ) -from executorch.sdk.bundled_program.util.test_util import get_common_executorch_program +from executorch.devtools.bundled_program.util.test_util import ( + get_common_executorch_program, +) class TestSerialize(unittest.TestCase): diff --git a/sdk/bundled_program/targets.bzl b/devtools/bundled_program/targets.bzl similarity index 91% rename from sdk/bundled_program/targets.bzl rename to devtools/bundled_program/targets.bzl index a3268dff2c..7035b3b31f 100644 --- a/sdk/bundled_program/targets.bzl +++ b/devtools/bundled_program/targets.bzl @@ -19,7 +19,7 @@ def define_common_targets(): ], deps = [ "//executorch/runtime/core/exec_aten/util:dim_order_util" + aten_suffix, - "//executorch/sdk/bundled_program/schema:bundled_program_schema_fbs", + "//executorch/devtools/bundled_program/schema:bundled_program_schema_fbs", ], exported_deps = [ "//executorch/runtime/core:memory_allocator", diff --git a/sdk/bundled_program/test/TARGETS b/devtools/bundled_program/test/TARGETS similarity index 68% rename from sdk/bundled_program/test/TARGETS rename to devtools/bundled_program/test/TARGETS index caf69be60e..652c74b8f4 100644 --- a/sdk/bundled_program/test/TARGETS +++ b/devtools/bundled_program/test/TARGETS @@ -1,4 +1,5 @@ # @noautodeps + load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest") oncall("executorch") @@ -10,11 +11,11 @@ python_unittest( ], deps = [ "//caffe2:torch", + "//executorch/devtools/bundled_program:config", + "//executorch/devtools/bundled_program:core", + "//executorch/devtools/bundled_program/schema:bundled_program_schema_py", + "//executorch/devtools/bundled_program/util:test_util", "//executorch/exir/_serialize:lib", - "//executorch/sdk/bundled_program:config", - "//executorch/sdk/bundled_program:core", - "//executorch/sdk/bundled_program/schema:bundled_program_schema_py", - "//executorch/sdk/bundled_program/util:test_util", ], ) @@ -25,9 +26,9 @@ python_unittest( ], deps = [ "//caffe2:torch", + "//executorch/devtools/bundled_program:config", + "//executorch/devtools/bundled_program/util:test_util", "//executorch/extension/pytree:pylib", - "//executorch/sdk/bundled_program:config", - "//executorch/sdk/bundled_program/util:test_util", ], ) @@ -38,6 +39,10 @@ python_unittest( ], deps = [ "//caffe2:torch", + "//executorch/devtools/bundled_program:config", + "//executorch/devtools/bundled_program:core", + "//executorch/devtools/bundled_program/serialize:lib", + "//executorch/devtools/bundled_program/util:test_util", "//executorch/exir:dynamic_shape", "//executorch/exir:lib", "//executorch/exir:memory", @@ -54,9 +59,5 @@ python_unittest( "//executorch/extension/pybindings:portable_lib", "//executorch/extension/pytree:pybindings", "//executorch/kernels/portable:custom_ops_generated_lib", - "//executorch/sdk/bundled_program:config", - "//executorch/sdk/bundled_program:core", - "//executorch/sdk/bundled_program/serialize:lib", - "//executorch/sdk/bundled_program/util:test_util", ], ) diff --git a/sdk/bundled_program/test/test_bundle_data.py b/devtools/bundled_program/test/test_bundle_data.py similarity index 93% rename from sdk/bundled_program/test/test_bundle_data.py rename to devtools/bundled_program/test/test_bundle_data.py index a8d9485c5f..565539cbf1 100644 --- a/sdk/bundled_program/test/test_bundle_data.py +++ b/devtools/bundled_program/test/test_bundle_data.py @@ -9,13 +9,15 @@ import unittest from typing import List -import executorch.sdk.bundled_program.schema as bp_schema +import executorch.devtools.bundled_program.schema as bp_schema import torch +from executorch.devtools.bundled_program.config import ConfigValue +from executorch.devtools.bundled_program.core import BundledProgram +from executorch.devtools.bundled_program.util.test_util import ( + get_common_executorch_program, +) from executorch.exir._serialize import _serialize_pte_binary -from executorch.sdk.bundled_program.config import ConfigValue -from executorch.sdk.bundled_program.core import BundledProgram -from executorch.sdk.bundled_program.util.test_util import get_common_executorch_program class TestBundle(unittest.TestCase): diff --git a/sdk/bundled_program/test/test_config.py b/devtools/bundled_program/test/test_config.py similarity index 97% rename from sdk/bundled_program/test/test_config.py rename to devtools/bundled_program/test/test_config.py index 3183ad907f..21f3d48042 100644 --- a/sdk/bundled_program/test/test_config.py +++ b/devtools/bundled_program/test/test_config.py @@ -10,14 +10,14 @@ from typing import get_args, List, Union import torch -from executorch.extension.pytree import tree_flatten -from executorch.sdk.bundled_program.config import DataContainer +from executorch.devtools.bundled_program.config import DataContainer -from executorch.sdk.bundled_program.util.test_util import ( +from executorch.devtools.bundled_program.util.test_util import ( get_random_test_suites, get_random_test_suites_with_eager_model, SampleModel, ) +from executorch.extension.pytree import tree_flatten class TestConfig(unittest.TestCase): diff --git a/sdk/bundled_program/test/test_end2end.py b/devtools/bundled_program/test/test_end2end.py similarity index 88% rename from sdk/bundled_program/test/test_end2end.py rename to devtools/bundled_program/test/test_end2end.py index 99d58ee15c..7cee073be0 100644 --- a/sdk/bundled_program/test/test_end2end.py +++ b/devtools/bundled_program/test/test_end2end.py @@ -21,12 +21,12 @@ import torch -from executorch.sdk.bundled_program.core import BundledProgram -from executorch.sdk.bundled_program.serialize import ( +from executorch.devtools.bundled_program.core import BundledProgram +from executorch.devtools.bundled_program.serialize import ( serialize_from_bundled_program_to_flatbuffer, ) -from executorch.sdk.bundled_program.util.test_util import ( +from executorch.devtools.bundled_program.util.test_util import ( get_common_executorch_program, SampleModel, ) @@ -45,7 +45,7 @@ pass try: - from executorch.extension.pybindings.aten_lib import ( + from executorch.extension.pybindings.aten_lib import ( # @manual=//executorch/extension/pybindings:aten_lib _load_bundled_program_from_buffer, _load_for_executorch_from_buffer, _load_for_executorch_from_bundled_program, diff --git a/sdk/bundled_program/util/TARGETS b/devtools/bundled_program/util/TARGETS similarity index 68% rename from sdk/bundled_program/util/TARGETS rename to devtools/bundled_program/util/TARGETS index 17d19dfb29..7d019ce30f 100644 --- a/sdk/bundled_program/util/TARGETS +++ b/devtools/bundled_program/util/TARGETS @@ -7,10 +7,10 @@ python_library( srcs = [ "test_util.py", ], - visibility = ["//executorch/sdk/bundled_program/..."], + visibility = ["//executorch/devtools/bundled_program/..."], deps = [ "//caffe2:torch", + "//executorch/devtools/bundled_program:config", "//executorch/exir:lib", - "//executorch/sdk/bundled_program:config", ], ) diff --git a/sdk/bundled_program/util/test_util.py b/devtools/bundled_program/util/test_util.py similarity index 99% rename from sdk/bundled_program/util/test_util.py rename to devtools/bundled_program/util/test_util.py index bfea8158ac..505186f3a0 100644 --- a/sdk/bundled_program/util/test_util.py +++ b/devtools/bundled_program/util/test_util.py @@ -10,14 +10,14 @@ from typing import List, Tuple import torch - -from executorch.exir import ExecutorchProgramManager, to_edge -from executorch.sdk.bundled_program.config import ( +from executorch.devtools.bundled_program.config import ( MethodInputType, MethodOutputType, MethodTestCase, MethodTestSuite, ) + +from executorch.exir import ExecutorchProgramManager, to_edge from torch.export import export from torch.export.unflatten import _assign_attr, _AttrKind diff --git a/sdk/bundled_program/version.py b/devtools/bundled_program/version.py similarity index 100% rename from sdk/bundled_program/version.py rename to devtools/bundled_program/version.py diff --git a/sdk/debug_format/TARGETS b/devtools/debug_format/TARGETS similarity index 100% rename from sdk/debug_format/TARGETS rename to devtools/debug_format/TARGETS diff --git a/sdk/debug_format/base_schema.py b/devtools/debug_format/base_schema.py similarity index 100% rename from sdk/debug_format/base_schema.py rename to devtools/debug_format/base_schema.py diff --git a/sdk/debug_format/et_schema.py b/devtools/debug_format/et_schema.py similarity index 99% rename from sdk/debug_format/et_schema.py rename to devtools/debug_format/et_schema.py index 9a6af4edba..abe155233a 100644 --- a/sdk/debug_format/et_schema.py +++ b/devtools/debug_format/et_schema.py @@ -21,7 +21,7 @@ import torch from executorch import exir -from executorch.sdk.debug_format.base_schema import ( +from executorch.devtools.debug_format.base_schema import ( Node, OperatorGraph, OperatorNode, diff --git a/sdk/etdump/TARGETS b/devtools/etdump/TARGETS similarity index 81% rename from sdk/etdump/TARGETS rename to devtools/etdump/TARGETS index 22d07478cb..7dcc4c1e84 100644 --- a/sdk/etdump/TARGETS +++ b/devtools/etdump/TARGETS @@ -11,7 +11,7 @@ runtime.python_library( "schema_flatcc.py", ], visibility = [ - "//executorch/sdk/...", + "//executorch/devtools/...", ], deps = [ "//executorch/exir:scalar_type", @@ -24,11 +24,11 @@ runtime.python_library( "serialize.py", ], resources = { + "//executorch/devtools/etdump:etdump_schema_flatcc.fbs": "etdump_schema_flatcc.fbs", "//executorch/schema:scalar_type.fbs": "scalar_type.fbs", - "//executorch/sdk/etdump:etdump_schema_flatcc.fbs": "etdump_schema_flatcc.fbs", }, visibility = [ - "//executorch/sdk/...", + "//executorch/devtools/...", ], deps = [ "fbsource//third-party/pypi/setuptools:setuptools", diff --git a/sdk/etdump/emitter.cpp b/devtools/etdump/emitter.cpp similarity index 98% rename from sdk/etdump/emitter.cpp rename to devtools/etdump/emitter.cpp index 1b3cba9d19..dfca629530 100644 --- a/sdk/etdump/emitter.cpp +++ b/devtools/etdump/emitter.cpp @@ -9,8 +9,8 @@ #include #include +#include "executorch/devtools/etdump/emitter.h" #include "executorch/runtime/platform/assert.h" -#include "executorch/sdk/etdump/emitter.h" namespace torch { namespace executor { diff --git a/sdk/etdump/emitter.h b/devtools/etdump/emitter.h similarity index 92% rename from sdk/etdump/emitter.h rename to devtools/etdump/emitter.h index 3910d3bd27..bf8ab0b1e1 100644 --- a/sdk/etdump/emitter.h +++ b/devtools/etdump/emitter.h @@ -9,7 +9,7 @@ #include #include -#include +#include #include #pragma once diff --git a/sdk/etdump/etdump_flatcc.cpp b/devtools/etdump/etdump_flatcc.cpp similarity index 98% rename from sdk/etdump/etdump_flatcc.cpp rename to devtools/etdump/etdump_flatcc.cpp index dab1443b55..ca46c12f51 100644 --- a/sdk/etdump/etdump_flatcc.cpp +++ b/devtools/etdump/etdump_flatcc.cpp @@ -6,16 +6,16 @@ * LICENSE file in the root directory of this source tree. */ -#include "executorch/sdk/etdump/etdump_flatcc.h" -#include -#include +#include "executorch/devtools/etdump/etdump_flatcc.h" +#include +#include #include #include #include +#include "executorch/devtools/etdump/emitter.h" #include "executorch/runtime/core/exec_aten/exec_aten.h" #include "executorch/runtime/core/exec_aten/util/scalar_type_util.h" #include "executorch/runtime/platform/assert.h" -#include "executorch/sdk/etdump/emitter.h" namespace torch { namespace executor { diff --git a/sdk/etdump/etdump_flatcc.h b/devtools/etdump/etdump_flatcc.h similarity index 100% rename from sdk/etdump/etdump_flatcc.h rename to devtools/etdump/etdump_flatcc.h diff --git a/sdk/etdump/etdump_schema_flatcc.fbs b/devtools/etdump/etdump_schema_flatcc.fbs similarity index 100% rename from sdk/etdump/etdump_schema_flatcc.fbs rename to devtools/etdump/etdump_schema_flatcc.fbs diff --git a/sdk/etdump/scalar_type.fbs b/devtools/etdump/scalar_type.fbs similarity index 100% rename from sdk/etdump/scalar_type.fbs rename to devtools/etdump/scalar_type.fbs diff --git a/sdk/etdump/schema_flatcc.py b/devtools/etdump/schema_flatcc.py similarity index 97% rename from sdk/etdump/schema_flatcc.py rename to devtools/etdump/schema_flatcc.py index eaad876a53..f19f328d3f 100644 --- a/sdk/etdump/schema_flatcc.py +++ b/devtools/etdump/schema_flatcc.py @@ -7,7 +7,7 @@ # pyre-strict """ This file is the python representation of the schema contained in -executorch/sdk/etdump/etdump_schema.fbs. Any changes made to that +executorch/devtools/etdump/etdump_schema.fbs. Any changes made to that flatbuffer schema should accordingly be reflected here also. """ diff --git a/sdk/etdump/serialize.py b/devtools/etdump/serialize.py similarity index 98% rename from sdk/etdump/serialize.py rename to devtools/etdump/serialize.py index 0cc6682bfc..4ed63bc385 100644 --- a/sdk/etdump/serialize.py +++ b/devtools/etdump/serialize.py @@ -11,11 +11,11 @@ import tempfile import pkg_resources +from executorch.devtools.etdump.schema_flatcc import ETDumpFlatCC from executorch.exir._serialize._dataclass import _DataclassEncoder, _json_to_dataclass from executorch.exir._serialize._flatbuffer import _flatc_compile, _flatc_decompile -from executorch.sdk.etdump.schema_flatcc import ETDumpFlatCC # The prefix of schema files used for etdump ETDUMP_FLATCC_SCHEMA_NAME = "etdump_schema_flatcc" diff --git a/sdk/etdump/targets.bzl b/devtools/etdump/targets.bzl similarity index 100% rename from sdk/etdump/targets.bzl rename to devtools/etdump/targets.bzl diff --git a/sdk/etdump/tests/CMakeLists.txt b/devtools/etdump/tests/CMakeLists.txt similarity index 100% rename from sdk/etdump/tests/CMakeLists.txt rename to devtools/etdump/tests/CMakeLists.txt diff --git a/sdk/etdump/tests/TARGETS b/devtools/etdump/tests/TARGETS similarity index 75% rename from sdk/etdump/tests/TARGETS rename to devtools/etdump/tests/TARGETS index ad48948c48..51e807891d 100644 --- a/sdk/etdump/tests/TARGETS +++ b/devtools/etdump/tests/TARGETS @@ -11,8 +11,8 @@ python_unittest( "serialize_test.py", ], deps = [ + "//executorch/devtools/etdump:schema_flatcc", + "//executorch/devtools/etdump:serialize", "//executorch/exir/_serialize:lib", - "//executorch/sdk/etdump:schema_flatcc", - "//executorch/sdk/etdump:serialize", ], ) diff --git a/sdk/etdump/tests/etdump_test.cpp b/devtools/etdump/tests/etdump_test.cpp similarity index 99% rename from sdk/etdump/tests/etdump_test.cpp rename to devtools/etdump/tests/etdump_test.cpp index d30bd9a303..de8c0abc39 100644 --- a/sdk/etdump/tests/etdump_test.cpp +++ b/devtools/etdump/tests/etdump_test.cpp @@ -9,12 +9,12 @@ #include #include +#include +#include +#include #include #include #include -#include -#include -#include #include #include #include diff --git a/sdk/etdump/tests/serialize_test.py b/devtools/etdump/tests/serialize_test.py similarity index 97% rename from sdk/etdump/tests/serialize_test.py rename to devtools/etdump/tests/serialize_test.py index 2b1497f597..1a7f3bd93f 100644 --- a/sdk/etdump/tests/serialize_test.py +++ b/devtools/etdump/tests/serialize_test.py @@ -12,13 +12,13 @@ from pprint import pformat from typing import List -import executorch.sdk.etdump.schema_flatcc as flatcc -from executorch.exir._serialize._dataclass import _DataclassEncoder +import executorch.devtools.etdump.schema_flatcc as flatcc -from executorch.sdk.etdump.serialize import ( +from executorch.devtools.etdump.serialize import ( deserialize_from_etdump_flatcc, serialize_to_etdump_flatcc, ) +from executorch.exir._serialize._dataclass import _DataclassEncoder def diff_jsons(a: str, b: str) -> List[str]: diff --git a/sdk/etdump/tests/targets.bzl b/devtools/etdump/tests/targets.bzl similarity index 82% rename from sdk/etdump/tests/targets.bzl rename to devtools/etdump/tests/targets.bzl index 41b19ca65e..5299b7c1cb 100644 --- a/sdk/etdump/tests/targets.bzl +++ b/devtools/etdump/tests/targets.bzl @@ -13,8 +13,8 @@ def define_common_targets(): "etdump_test.cpp", ], deps = [ - "//executorch/sdk/etdump:etdump_flatcc", - "//executorch/sdk/etdump:etdump_schema_flatcc", + "//executorch/devtools/etdump:etdump_flatcc", + "//executorch/devtools/etdump:etdump_schema_flatcc", "//executorch/runtime/platform:platform", "//executorch/runtime/core/exec_aten/testing_util:tensor_util", ], diff --git a/sdk/etrecord/TARGETS b/devtools/etrecord/TARGETS similarity index 71% rename from sdk/etrecord/TARGETS rename to devtools/etrecord/TARGETS index c7de63a81f..09fc3212bf 100644 --- a/sdk/etrecord/TARGETS +++ b/devtools/etrecord/TARGETS @@ -9,10 +9,10 @@ python_library( "_etrecord.py", ], deps = [ + "//executorch/devtools/bundled_program:core", + "//executorch/devtools/bundled_program/schema:bundled_program_schema_py", "//executorch/exir:lib", "//executorch/exir/emit:emit", "//executorch/exir/serde:serialize", - "//executorch/sdk/bundled_program:core", - "//executorch/sdk/bundled_program/schema:bundled_program_schema_py", ], ) diff --git a/sdk/etrecord/__init__.py b/devtools/etrecord/__init__.py similarity index 86% rename from sdk/etrecord/__init__.py rename to devtools/etrecord/__init__.py index 29c29462a7..59ff4e44c2 100644 --- a/sdk/etrecord/__init__.py +++ b/devtools/etrecord/__init__.py @@ -4,7 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from executorch.sdk.etrecord._etrecord import ( +from executorch.devtools.etrecord._etrecord import ( ETRecord, generate_etrecord, parse_etrecord, diff --git a/sdk/etrecord/_etrecord.py b/devtools/etrecord/_etrecord.py similarity index 98% rename from sdk/etrecord/_etrecord.py rename to devtools/etrecord/_etrecord.py index 1ae46f27aa..cd21325498 100644 --- a/sdk/etrecord/_etrecord.py +++ b/devtools/etrecord/_etrecord.py @@ -12,6 +12,9 @@ from zipfile import BadZipFile, ZipFile from executorch import exir +from executorch.devtools.bundled_program.core import BundledProgram + +from executorch.devtools.bundled_program.schema.bundled_program_schema import Value from executorch.exir import ( EdgeProgramManager, ExecutorchProgram, @@ -23,9 +26,6 @@ from executorch.exir.serde.export_serialize import SerializedArtifact from executorch.exir.serde.serialize import deserialize, serialize -from executorch.sdk.bundled_program.core import BundledProgram - -from executorch.sdk.bundled_program.schema.bundled_program_schema import Value ProgramOutput = List[Value] diff --git a/sdk/etrecord/tests/TARGETS b/devtools/etrecord/tests/TARGETS similarity index 64% rename from sdk/etrecord/tests/TARGETS rename to devtools/etrecord/tests/TARGETS index 0984c755a4..fffa7f1834 100644 --- a/sdk/etrecord/tests/TARGETS +++ b/devtools/etrecord/tests/TARGETS @@ -8,11 +8,11 @@ python_unittest( srcs = ["etrecord_test.py"], deps = [ "//caffe2:torch", + "//executorch/devtools/bundled_program:config", + "//executorch/devtools/bundled_program:core", + "//executorch/devtools/etrecord:etrecord", "//executorch/exir:lib", "//executorch/exir/tests:models", - "//executorch/sdk/bundled_program:config", - "//executorch/sdk/bundled_program:core", - "//executorch/sdk/etrecord:etrecord", ], ) @@ -21,10 +21,10 @@ python_library( srcs = ["etrecord_test.py"], deps = [ "//caffe2:torch", + "//executorch/devtools/bundled_program:config", + "//executorch/devtools/bundled_program:core", + "//executorch/devtools/etrecord:etrecord", "//executorch/exir:lib", "//executorch/exir/tests:models", - "//executorch/sdk/bundled_program:config", - "//executorch/sdk/bundled_program:core", - "//executorch/sdk/etrecord:etrecord", ], ) diff --git a/sdk/etrecord/tests/etrecord_test.py b/devtools/etrecord/tests/etrecord_test.py similarity index 96% rename from sdk/etrecord/tests/etrecord_test.py rename to devtools/etrecord/tests/etrecord_test.py index bc534fd487..b8e08dfe8c 100644 --- a/sdk/etrecord/tests/etrecord_test.py +++ b/devtools/etrecord/tests/etrecord_test.py @@ -12,14 +12,14 @@ import executorch.exir.tests.models as models import torch from executorch import exir -from executorch.exir import EdgeCompileConfig, EdgeProgramManager, to_edge -from executorch.sdk.bundled_program.config import MethodTestCase, MethodTestSuite -from executorch.sdk.bundled_program.core import BundledProgram -from executorch.sdk.etrecord import generate_etrecord, parse_etrecord -from executorch.sdk.etrecord._etrecord import ( +from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite +from executorch.devtools.bundled_program.core import BundledProgram +from executorch.devtools.etrecord import generate_etrecord, parse_etrecord +from executorch.devtools.etrecord._etrecord import ( _get_reference_outputs, ETRecordReservedFileNames, ) +from executorch.exir import EdgeCompileConfig, EdgeProgramManager, to_edge from torch.export import export diff --git a/sdk/inspector/TARGETS b/devtools/inspector/TARGETS similarity index 70% rename from sdk/inspector/TARGETS rename to devtools/inspector/TARGETS index bc53c90c11..2b1cbecff3 100644 --- a/sdk/inspector/TARGETS +++ b/devtools/inspector/TARGETS @@ -14,10 +14,10 @@ python_library( "fbsource//third-party/pypi/pandas:pandas", "fbsource//third-party/pypi/tabulate:tabulate", ":inspector_utils", + "//executorch/devtools/debug_format:et_schema", + "//executorch/devtools/etdump:schema_flatcc", + "//executorch/devtools/etrecord:etrecord", "//executorch/exir:lib", - "//executorch/sdk/debug_format:et_schema", - "//executorch/sdk/etdump:schema_flatcc", - "//executorch/sdk/etrecord:etrecord", ], ) @@ -27,7 +27,7 @@ python_binary( main_src = "inspector_cli.py", deps = [ ":inspector_utils", - "//executorch/sdk:lib", + "//executorch/devtools:lib", ], ) @@ -40,11 +40,11 @@ python_library( "fbsource//third-party/pypi/matplotlib:matplotlib", "fbsource//third-party/pypi/numpy:numpy", "//caffe2:torch", - "//executorch/sdk/debug_format:base_schema", - "//executorch/sdk/debug_format:et_schema", - "//executorch/sdk/etdump:schema_flatcc", - "//executorch/sdk/etdump:serialize", - "//executorch/sdk/etrecord:etrecord", + "//executorch/devtools/debug_format:base_schema", + "//executorch/devtools/debug_format:et_schema", + "//executorch/devtools/etdump:schema_flatcc", + "//executorch/devtools/etdump:serialize", + "//executorch/devtools/etrecord:etrecord", ], ) diff --git a/sdk/inspector/__init__.py b/devtools/inspector/__init__.py similarity index 60% rename from sdk/inspector/__init__.py rename to devtools/inspector/__init__.py index bef3d363d5..ff9bb81479 100644 --- a/sdk/inspector/__init__.py +++ b/devtools/inspector/__init__.py @@ -4,7 +4,12 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from executorch.sdk.inspector._inspector import Event, EventBlock, Inspector, PerfData -from executorch.sdk.inspector._inspector_utils import TimeScale +from executorch.devtools.inspector._inspector import ( + Event, + EventBlock, + Inspector, + PerfData, +) +from executorch.devtools.inspector._inspector_utils import TimeScale __all__ = ["Event", "EventBlock", "Inspector", "PerfData", "TimeScale"] diff --git a/sdk/inspector/_inspector.py b/devtools/inspector/_inspector.py similarity index 99% rename from sdk/inspector/_inspector.py rename to devtools/inspector/_inspector.py index 5f9bfafee7..f98e3cd3a5 100644 --- a/sdk/inspector/_inspector.py +++ b/devtools/inspector/_inspector.py @@ -26,16 +26,19 @@ Union, ) -import executorch.sdk.etdump.schema_flatcc as flatcc +import executorch.devtools.etdump.schema_flatcc as flatcc import numpy as np import pandas as pd -from executorch.exir import ExportedProgram -from executorch.sdk.debug_format.et_schema import OperatorGraph, OperatorNode -from executorch.sdk.etdump.schema_flatcc import DebugEvent, ETDumpFlatCC, ProfileEvent -from executorch.sdk.etrecord import ETRecord, parse_etrecord -from executorch.sdk.inspector._inspector_utils import ( +from executorch.devtools.debug_format.et_schema import OperatorGraph, OperatorNode +from executorch.devtools.etdump.schema_flatcc import ( + DebugEvent, + ETDumpFlatCC, + ProfileEvent, +) +from executorch.devtools.etrecord import ETRecord, parse_etrecord +from executorch.devtools.inspector._inspector_utils import ( create_debug_handle_to_op_node_mapping, EDGE_DIALECT_GRAPH_KEY, EXCLUDED_COLUMNS_WHEN_PRINTING, @@ -53,6 +56,7 @@ TimeScale, verify_debug_data_equivalence, ) +from executorch.exir import ExportedProgram from tabulate import tabulate diff --git a/sdk/inspector/_inspector_utils.py b/devtools/inspector/_inspector_utils.py similarity index 97% rename from sdk/inspector/_inspector_utils.py rename to devtools/inspector/_inspector_utils.py index 6879e85505..98b5fdc722 100644 --- a/sdk/inspector/_inspector_utils.py +++ b/devtools/inspector/_inspector_utils.py @@ -8,14 +8,14 @@ from enum import Enum from typing import Dict, List, Mapping, Optional, Tuple, TypeAlias, Union -import executorch.sdk.etdump.schema_flatcc as flatcc +import executorch.devtools.etdump.schema_flatcc as flatcc import torch -from executorch.sdk.debug_format.base_schema import OperatorNode +from executorch.devtools.debug_format.base_schema import OperatorNode -from executorch.sdk.debug_format.et_schema import FXOperatorGraph, OperatorGraph -from executorch.sdk.etdump.schema_flatcc import ( +from executorch.devtools.debug_format.et_schema import FXOperatorGraph, OperatorGraph +from executorch.devtools.etdump.schema_flatcc import ( DebugEvent, ETDumpFlatCC, ProfileEvent, @@ -25,8 +25,8 @@ ValueType, ) -from executorch.sdk.etdump.serialize import deserialize_from_etdump_flatcc -from executorch.sdk.etrecord import ETRecord +from executorch.devtools.etdump.serialize import deserialize_from_etdump_flatcc +from executorch.devtools.etrecord import ETRecord FORWARD = "forward" EDGE_DIALECT_GRAPH_KEY = "edge_dialect_graph_module" diff --git a/sdk/inspector/inspector_cli.py b/devtools/inspector/inspector_cli.py similarity index 93% rename from sdk/inspector/inspector_cli.py rename to devtools/inspector/inspector_cli.py index d6c8d5442f..bd76607a94 100644 --- a/sdk/inspector/inspector_cli.py +++ b/devtools/inspector/inspector_cli.py @@ -6,8 +6,8 @@ import argparse -from executorch.sdk import Inspector -from executorch.sdk.inspector._inspector_utils import compare_results, TimeScale +from executorch.devtools import Inspector +from executorch.devtools.inspector._inspector_utils import compare_results, TimeScale def main() -> None: diff --git a/devtools/inspector/tests/TARGETS b/devtools/inspector/tests/TARGETS new file mode 100644 index 0000000000..eada6817bc --- /dev/null +++ b/devtools/inspector/tests/TARGETS @@ -0,0 +1,41 @@ +load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest") + +oncall("executorch") + +python_unittest( + name = "inspector_test", + srcs = ["inspector_test.py"], + deps = [ + "//executorch/devtools:lib", + "//executorch/devtools/debug_format:et_schema", + "//executorch/devtools/etdump:schema_flatcc", + "//executorch/devtools/etrecord/tests:etrecord_test_library", + "//executorch/devtools/inspector:inspector", + "//executorch/devtools/inspector:lib", + "//executorch/exir:lib", + ], +) + +python_unittest( + name = "event_blocks_test", + srcs = ["event_blocks_test.py"], + deps = [ + "//executorch/devtools/etdump:schema_flatcc", + "//executorch/devtools/inspector:inspector", + "//executorch/devtools/inspector:lib", + ], +) + +python_unittest( + name = "inspector_utils_test", + srcs = ["inspector_utils_test.py"], + deps = [ + "//caffe2:torch", + "//executorch/devtools:lib", + "//executorch/devtools/debug_format:base_schema", + "//executorch/devtools/debug_format:et_schema", + "//executorch/devtools/etdump:schema_flatcc", + "//executorch/devtools/etrecord/tests:etrecord_test_library", + "//executorch/devtools/inspector:inspector_utils", + ], +) diff --git a/sdk/inspector/tests/event_blocks_test.py b/devtools/inspector/tests/event_blocks_test.py similarity index 98% rename from sdk/inspector/tests/event_blocks_test.py rename to devtools/inspector/tests/event_blocks_test.py index 7c7da00186..4101035f99 100644 --- a/sdk/inspector/tests/event_blocks_test.py +++ b/devtools/inspector/tests/event_blocks_test.py @@ -8,10 +8,10 @@ import unittest from typing import List, Optional, Tuple, Union -import executorch.sdk.etdump.schema_flatcc as flatcc -from executorch.sdk.etdump.schema_flatcc import ETDumpFlatCC, ProfileEvent -from executorch.sdk.inspector import Event, EventBlock, PerfData -from executorch.sdk.inspector._inspector import ( +import executorch.devtools.etdump.schema_flatcc as flatcc +from executorch.devtools.etdump.schema_flatcc import ETDumpFlatCC, ProfileEvent +from executorch.devtools.inspector import Event, EventBlock, PerfData +from executorch.devtools.inspector._inspector import ( DelegateMetadata, EventSignature, InstructionEvent, diff --git a/sdk/inspector/tests/inspector_test.py b/devtools/inspector/tests/inspector_test.py similarity index 97% rename from sdk/inspector/tests/inspector_test.py rename to devtools/inspector/tests/inspector_test.py index a372c7c569..55f0cd10ae 100644 --- a/sdk/inspector/tests/inspector_test.py +++ b/devtools/inspector/tests/inspector_test.py @@ -14,14 +14,19 @@ from unittest.mock import patch -from executorch.exir import ExportedProgram -from executorch.sdk import generate_etrecord, parse_etrecord -from executorch.sdk.debug_format.et_schema import OperatorNode -from executorch.sdk.etdump.schema_flatcc import ProfileEvent -from executorch.sdk.etrecord.tests.etrecord_test import TestETRecord - -from executorch.sdk.inspector import _inspector, Event, EventBlock, Inspector, PerfData -from executorch.sdk.inspector._inspector import ( +from executorch.devtools import generate_etrecord, parse_etrecord +from executorch.devtools.debug_format.et_schema import OperatorNode +from executorch.devtools.etdump.schema_flatcc import ProfileEvent +from executorch.devtools.etrecord.tests.etrecord_test import TestETRecord + +from executorch.devtools.inspector import ( + _inspector, + Event, + EventBlock, + Inspector, + PerfData, +) +from executorch.devtools.inspector._inspector import ( DebugEventSignature, flatcc, InstructionEvent, @@ -29,6 +34,8 @@ ProfileEventSignature, ) +from executorch.exir import ExportedProgram + OP_TYPE = "aten::add" EVENT_BLOCK_NAME = "block_0" diff --git a/sdk/inspector/tests/inspector_utils_test.py b/devtools/inspector/tests/inspector_utils_test.py similarity index 94% rename from sdk/inspector/tests/inspector_utils_test.py rename to devtools/inspector/tests/inspector_utils_test.py index b5b9b54d6c..d853732fcc 100644 --- a/sdk/inspector/tests/inspector_utils_test.py +++ b/devtools/inspector/tests/inspector_utils_test.py @@ -10,19 +10,19 @@ import torch -from executorch.sdk import generate_etrecord, parse_etrecord +from executorch.devtools import generate_etrecord, parse_etrecord -from executorch.sdk.debug_format.base_schema import ( +from executorch.devtools.debug_format.base_schema import ( OperatorGraph, OperatorNode, ValueNode, ) -from executorch.sdk.debug_format.et_schema import FXOperatorGraph -from executorch.sdk.etdump import schema_flatcc as flatcc +from executorch.devtools.debug_format.et_schema import FXOperatorGraph +from executorch.devtools.etdump import schema_flatcc as flatcc -from executorch.sdk.etrecord.tests.etrecord_test import TestETRecord -from executorch.sdk.inspector._inspector_utils import ( +from executorch.devtools.etrecord.tests.etrecord_test import TestETRecord +from executorch.devtools.inspector._inspector_utils import ( create_debug_handle_to_op_node_mapping, EDGE_DIALECT_GRAPH_KEY, find_populated_event, diff --git a/sdk/size_analysis_tool/TARGETS b/devtools/size_analysis_tool/TARGETS similarity index 86% rename from sdk/size_analysis_tool/TARGETS rename to devtools/size_analysis_tool/TARGETS index 44ae0aa6f8..c365ba152d 100644 --- a/sdk/size_analysis_tool/TARGETS +++ b/devtools/size_analysis_tool/TARGETS @@ -12,9 +12,9 @@ python_library( visibility = ["PUBLIC"], deps = [ "//caffe2:torch", + "//executorch/devtools:lib", "//executorch/exir:lib", "//executorch/exir/backend:backend_api", - "//executorch/sdk:lib", ], ) @@ -23,13 +23,13 @@ python_binary( srcs = [ "size_analysis_tool.py", ], - main_function = "executorch.sdk.size_analysis_tool.size_analysis_tool.main", + main_function = "executorch.devtools.size_analysis_tool.size_analysis_tool.main", visibility = ["PUBLIC"], deps = [ "//caffe2:torch", + "//executorch/devtools:lib", "//executorch/exir:lib", "//executorch/exir/backend:backend_api", - "//executorch/sdk:lib", ], ) @@ -43,9 +43,9 @@ python_unittest( "//caffe2:torch", "//executorch/backends/xnnpack/partition:xnnpack_partitioner", "//executorch/backends/xnnpack/utils:xnnpack_utils", + "//executorch/devtools:lib", "//executorch/exir:lib", "//executorch/exir/backend:backend_api", "//executorch/exir/passes:spec_prop_pass", - "//executorch/sdk:lib", ], ) diff --git a/sdk/size_analysis_tool/size_analysis_tool.py b/devtools/size_analysis_tool/size_analysis_tool.py similarity index 99% rename from sdk/size_analysis_tool/size_analysis_tool.py rename to devtools/size_analysis_tool/size_analysis_tool.py index d17ec5ac47..8ea8ddbbf4 100644 --- a/sdk/size_analysis_tool/size_analysis_tool.py +++ b/devtools/size_analysis_tool/size_analysis_tool.py @@ -9,10 +9,10 @@ from typing import Any, Callable, Dict, List, Optional, Tuple import torch +from executorch.devtools import parse_etrecord from executorch.exir import ExportedProgram from executorch.exir.backend.backend_api import LoweredBackendModule -from executorch.sdk import parse_etrecord def _get_tensor_data(node: torch.fx.Node, tensor: torch.Tensor) -> Dict[str, Any]: diff --git a/sdk/size_analysis_tool/size_analysis_tool_test.py b/devtools/size_analysis_tool/size_analysis_tool_test.py similarity index 98% rename from sdk/size_analysis_tool/size_analysis_tool_test.py rename to devtools/size_analysis_tool/size_analysis_tool_test.py index 3e1efec77b..96feae7e42 100644 --- a/sdk/size_analysis_tool/size_analysis_tool_test.py +++ b/devtools/size_analysis_tool/size_analysis_tool_test.py @@ -14,12 +14,12 @@ get_xnnpack_executorch_backend_config, ) from executorch.backends.xnnpack.utils.utils import capture_graph_for_xnnpack -from executorch.exir.backend.backend_api import to_backend, validation_disabled -from executorch.exir.passes.spec_prop_pass import SpecPropPass -from executorch.sdk.size_analysis_tool.size_analysis_tool import ( +from executorch.devtools.size_analysis_tool.size_analysis_tool import ( generate_model_size_information, ) +from executorch.exir.backend.backend_api import to_backend, validation_disabled +from executorch.exir.passes.spec_prop_pass import SpecPropPass class SizeAnalysisToolTest(unittest.TestCase): diff --git a/sdk/targets.bzl b/devtools/targets.bzl similarity index 100% rename from sdk/targets.bzl rename to devtools/targets.bzl diff --git a/docs/source/extension-module.md b/docs/source/extension-module.md index 9e236e8e48..97528c9540 100644 --- a/docs/source/extension-module.md +++ b/docs/source/extension-module.md @@ -132,7 +132,7 @@ Use [ExecuTorch Dump](sdk-etdump.md) to trace model execution. Create an instanc #include #include #include -#include +#include using namespace ::torch::executor; diff --git a/docs/source/llm/getting-started.md b/docs/source/llm/getting-started.md index 5fffb7e8ca..6d79e1e0fd 100644 --- a/docs/source/llm/getting-started.md +++ b/docs/source/llm/getting-started.md @@ -763,7 +763,7 @@ In your export script, after calling `to_edge()` and `to_executorch()`, call `ge ``` import copy -from executorch.sdk import generate_etrecord +from executorch.devtools import generate_etrecord # Make the deep copy immediately after to to_edge() edge_manager_copy = copy.deepcopy(edge_manager) @@ -784,7 +784,7 @@ Include the ETDump header in your code. ```cpp // main.cpp -#include +#include ``` Create an Instance of the ETDumpGen class and pass it to the Module constructor. @@ -835,7 +835,7 @@ Run the runner, you will see “etdump.etdp” generated. Once you’ve collected debug artifacts ETDump (and optionally an ETRecord), you can use the Inspector API to view performance information. ```python -from executorch.sdk import Inspector +from executorch.devtools import Inspector inspector = Inspector(etdump_path="etdump.etdp") # If you also generated an ETRecord, then pass that in as well: `inspector = Inspector(etdump_path="etdump.etdp", etrecord="etrecord.bin")` diff --git a/docs/source/sdk-bundled-io.md b/docs/source/sdk-bundled-io.md index 33deae3904..288fce93df 100644 --- a/docs/source/sdk-bundled-io.md +++ b/docs/source/sdk-bundled-io.md @@ -28,7 +28,7 @@ In `BundledProgram`, we create two new classes, `MethodTestCase` and `MethodTest :::{dropdown} `MethodTestCase` ```{eval-rst} -.. autofunction:: executorch.sdk.bundled_program.config.MethodTestCase.__init__ +.. autofunction:: executorch.devtools.bundled_program.config.MethodTestCase.__init__ :noindex: ``` ::: @@ -38,7 +38,7 @@ In `BundledProgram`, we create two new classes, `MethodTestCase` and `MethodTest :::{dropdown} `MethodTestSuite` ```{eval-rst} -.. autofunction:: executorch.sdk.bundled_program.config.MethodTestSuite +.. autofunction:: executorch.devtools.bundled_program.config.MethodTestSuite :noindex: ``` ::: @@ -48,13 +48,13 @@ Since each model may have multiple inference methods, we need to generate `List[ ### Step 3: Generate `BundledProgram` -We provide `BundledProgram` class under `executorch/sdk/bundled_program/core.py` to bundled the `ExecutorchProgram`-like variable, including +We provide `BundledProgram` class under `executorch/devtools/bundled_program/core.py` to bundled the `ExecutorchProgram`-like variable, including `ExecutorchProgram`, `MultiMethodExecutorchProgram` or `ExecutorchProgramManager`, with the `List[MethodTestSuite]`: :::{dropdown} `BundledProgram` ```{eval-rst} -.. autofunction:: executorch.sdk.bundled_program.core.BundledProgram.__init__ +.. autofunction:: executorch.devtools.bundled_program.core.BundledProgram.__init__ :noindex: ``` ::: @@ -65,18 +65,18 @@ Construtor of `BundledProgram `will do sannity check internally to see if the gi ### Step 4: Serialize `BundledProgram` to Flatbuffer. -To serialize `BundledProgram` to make runtime APIs use it, we provide two APIs, both under `executorch/sdk/bundled_program/serialize/__init__.py`. +To serialize `BundledProgram` to make runtime APIs use it, we provide two APIs, both under `executorch/devtools/bundled_program/serialize/__init__.py`. :::{dropdown} Serialize and Deserialize ```{eval-rst} -.. currentmodule:: executorch.sdk.bundled_program.serialize +.. currentmodule:: executorch.devtools.bundled_program.serialize .. autofunction:: serialize_from_bundled_program_to_flatbuffer :noindex: ``` ```{eval-rst} -.. currentmodule:: executorch.sdk.bundled_program.serialize +.. currentmodule:: executorch.devtools.bundled_program.serialize .. autofunction:: deserialize_from_flatbuffer_to_bundled_program :noindex: ``` @@ -90,10 +90,10 @@ Here is a flow highlighting how to generate a `BundledProgram` given a PyTorch m import torch from executorch.exir import to_edge -from executorch.sdk import BundledProgram +from executorch.devtools import BundledProgram -from executorch.sdk.bundled_program.config import MethodTestCase, MethodTestSuite -from executorch.sdk.bundled_program.serialize import ( +from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite +from executorch.devtools.bundled_program.serialize import ( serialize_from_bundled_program_to_flatbuffer, ) from torch._export import capture_pre_autograd_graph @@ -187,7 +187,7 @@ with open(save_path, "wb") as f: We can also regenerate `BundledProgram` from flatbuffer file if needed: ```python -from executorch.sdk.bundled_program.serialize import deserialize_from_flatbuffer_to_bundled_program +from executorch.devtools.bundled_program.serialize import deserialize_from_flatbuffer_to_bundled_program save_path = "bundled_program.bpte" with open(save_path, "rb") as f: serialized_bundled_program = f.read() @@ -313,9 +313,9 @@ Here's the example of the dtype of test input not meet model's requirement: import torch from executorch.exir import to_edge -from executorch.sdk import BundledProgram +from executorch.devtools import BundledProgram -from executorch.sdk.bundled_program.config import MethodTestCase, MethodTestSuite +from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite from torch.export import export @@ -400,7 +400,7 @@ Cell In[1], line 72 68 ] 70 # Step 3: Generate BundledProgram ---> 72 bundled_program = create_bundled_program(program, method_test_suites) -File /executorch/sdk/bundled_program/core.py:276, in create_bundled_program(program, method_test_suites) +File /executorch/devtools/bundled_program/core.py:276, in create_bundled_program(program, method_test_suites) 264 """Create bp_schema.BundledProgram by bundling the given program and method_test_suites together. 265 266 Args: @@ -411,7 +411,7 @@ File /executorch/sdk/bundled_program/core.py:276, in create_bundled_program(prog --> 276 assert_valid_bundle(program, method_test_suites) 278 bundled_method_test_suites: List[bp_schema.BundledMethodTestSuite] = [] 280 # Emit data and metadata of bundled tensor -File /executorch/sdk/bundled_program/core.py:219, in assert_valid_bundle(program, method_test_suites) +File /executorch/devtools/bundled_program/core.py:219, in assert_valid_bundle(program, method_test_suites) 215 # type of tensor input should match execution plan 216 if type(cur_plan_test_inputs[j]) == torch.Tensor: 217 # pyre-fixme[16]: Undefined attribute [16]: Item `bool` of `typing.Union[bool, float, int, torch._tensor.Tensor]` @@ -449,9 +449,9 @@ Another common error would be the method name in any `MethodTestSuite` does not import torch from executorch.exir import to_edge -from executorch.sdk import BundledProgram +from executorch.devtools import BundledProgram -from executorch.sdk.bundled_program.config import MethodTestCase, MethodTestSuite +from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite from torch.export import export @@ -532,7 +532,7 @@ Cell In[3], line 73 70 method_test_suites[0].method_name = "MISSING_METHOD_NAME" 72 # Generate BundledProgram ---> 73 bundled_program = create_bundled_program(program, method_test_suites) -File /executorch/sdk/bundled_program/core.py:276, in create_bundled_program(program, method_test_suites) +File /executorch/devtools/bundled_program/core.py:276, in create_bundled_program(program, method_test_suites) 264 """Create bp_schema.BundledProgram by bundling the given program and method_test_suites together. 265 266 Args: @@ -543,7 +543,7 @@ File /executorch/sdk/bundled_program/core.py:276, in create_bundled_program(prog --> 276 assert_valid_bundle(program, method_test_suites) 278 bundled_method_test_suites: List[bp_schema.BundledMethodTestSuite] = [] 280 # Emit data and metadata of bundled tensor -File /executorch/sdk/bundled_program/core.py:141, in assert_valid_bundle(program, method_test_suites) +File /executorch/devtools/bundled_program/core.py:141, in assert_valid_bundle(program, method_test_suites) 138 method_name_of_program = {e.name for e in program.execution_plan} 139 method_name_of_test_suites = {t.method_name for t in method_test_suites} --> 141 assert method_name_of_test_suites.issubset( diff --git a/docs/source/sdk-debugging.md b/docs/source/sdk-debugging.md index 45e50b44e8..14d4af0f15 100644 --- a/docs/source/sdk-debugging.md +++ b/docs/source/sdk-debugging.md @@ -38,7 +38,7 @@ For a real example reflecting the steps below, please refer to [sdk_example_runn Once a model has been run, using the generated ETDump and debug buffers, users can leverage the [Inspector API's](./sdk-inspector.rst) to inspect these debug outputs. ```python -from executorch.sdk import Inspector +from executorch.devtools import Inspector # Create an Inspector instance with etdump and the debug buffer. inspector = Inspector(etdump_path=etdump_path, @@ -67,7 +67,7 @@ We've also provided a simple set of utilities that let users perform quality ana ```python -from executorch.sdk.inspector._inspector_utils import compare_results +from executorch.devtools.inspector._inspector_utils import compare_results # Run a simple quality analysis between the model outputs sourced from the # runtime and a set of reference outputs. diff --git a/docs/source/sdk-etdump.md b/docs/source/sdk-etdump.md index 4eacb18b14..aad623efc8 100644 --- a/docs/source/sdk-etdump.md +++ b/docs/source/sdk-etdump.md @@ -9,7 +9,7 @@ Generating an ETDump is a relatively straightforward process. Users can follow t 1. ***Include*** the ETDump header in your code. ```C++ -#include +#include ``` 2. ***Create*** an Instance of the ETDumpGen class and pass it into the `load_method` call that is invoked in the runtime. diff --git a/docs/source/sdk-etrecord.rst b/docs/source/sdk-etrecord.rst index 43ed5095c6..b3b7f042cc 100644 --- a/docs/source/sdk-etrecord.rst +++ b/docs/source/sdk-etrecord.rst @@ -31,7 +31,7 @@ they are interested in working with via our tooling. .. warning:: Users should do a deepcopy of the output of ``to_edge()`` and pass in the deepcopy to the ``generate_etrecord`` API. This is needed because the subsequent call, ``to_executorch()``, does an in-place mutation and will lose debug data in the process. -.. currentmodule:: executorch.sdk.etrecord._etrecord +.. currentmodule:: executorch.devtools.etrecord._etrecord .. autofunction:: generate_etrecord Using an ``ETRecord`` diff --git a/docs/source/sdk-inspector.rst b/docs/source/sdk-inspector.rst index e15c1f2a39..448f30cfb5 100644 --- a/docs/source/sdk-inspector.rst +++ b/docs/source/sdk-inspector.rst @@ -26,26 +26,26 @@ Inspector Methods Constructor ~~~~~~~~~~~ -.. autofunction:: executorch.sdk.Inspector.__init__ +.. autofunction:: executorch.devtools.Inspector.__init__ **Example Usage:** .. code:: python - from executorch.sdk import Inspector + from executorch.devtools import Inspector inspector = Inspector(etdump_path="/path/to/etdump.etdp", etrecord="/path/to/etrecord.bin") to_dataframe ~~~~~~~~~~~~~~~~ -.. autofunction:: executorch.sdk.Inspector.to_dataframe +.. autofunction:: executorch.devtools.Inspector.to_dataframe print_data_tabular ~~~~~~~~~~~~~~~~~~ -.. autofunction:: executorch.sdk.Inspector.print_data_tabular +.. autofunction:: executorch.devtools.Inspector.print_data_tabular .. _example-usage-1: @@ -62,7 +62,7 @@ Note that the unit of delegate profiling events is "cycles". We're working on pr find_total_for_module ~~~~~~~~~~~~~~~~~~~~~ -.. autofunction:: executorch.sdk.Inspector.find_total_for_module +.. autofunction:: executorch.devtools.Inspector.find_total_for_module .. _example-usage-2: @@ -80,7 +80,7 @@ find_total_for_module get_exported_program ~~~~~~~~~~~~~~~~~~~~ -.. autofunction:: executorch.sdk.Inspector.get_exported_program +.. autofunction:: executorch.devtools.Inspector.get_exported_program .. _example-usage-3: @@ -119,7 +119,7 @@ of an ``Inspector`` instance, for example: inspector.event_blocks -.. autoclass:: executorch.sdk.inspector.EventBlock +.. autoclass:: executorch.devtools.inspector.EventBlock ``Event`` Class ~~~~~~~~~~~~~~~ @@ -127,7 +127,7 @@ of an ``Inspector`` instance, for example: Access ``Event`` instances through the ``events`` attribute of an ``EventBlock`` instance. -.. autoclass:: executorch.sdk.inspector.Event +.. autoclass:: executorch.devtools.inspector.Event **Example Usage:** @@ -152,7 +152,7 @@ table. This command produces the identical table output as calling the .. code:: bash - python3 -m sdk.inspector.inspector_cli --etdump_path --etrecord_path + python3 -m devtools.inspector.inspector_cli --etdump_path --etrecord_path Note that the `etrecord_path` argument is optional. diff --git a/docs/source/tutorials_source/sdk-integration-tutorial.py b/docs/source/tutorials_source/sdk-integration-tutorial.py index ccc2e480ad..35d200204c 100644 --- a/docs/source/tutorials_source/sdk-integration-tutorial.py +++ b/docs/source/tutorials_source/sdk-integration-tutorial.py @@ -38,9 +38,9 @@ # # The first step is to generate an ``ETRecord``. ``ETRecord`` contains model # graphs and metadata for linking runtime results (such as profiling) to -# the eager model. This is generated via ``executorch.sdk.generate_etrecord``. +# the eager model. This is generated via ``executorch.devtools.generate_etrecord``. # -# ``executorch.sdk.generate_etrecord`` takes in an output file path (str), the +# ``executorch.devtools.generate_etrecord`` takes in an output file path (str), the # edge dialect model (``EdgeProgramManager``), the ExecuTorch dialect model # (``ExecutorchProgramManager``), and an optional dictionary of additional models. # @@ -51,6 +51,7 @@ import torch import torch.nn as nn import torch.nn.functional as F +from executorch.devtools import generate_etrecord from executorch.exir import ( EdgeCompileConfig, @@ -58,7 +59,6 @@ ExecutorchProgramManager, to_edge, ) -from executorch.sdk import generate_etrecord from torch.export import export, ExportedProgram @@ -129,14 +129,14 @@ def forward(self, x): # In this tutorial, a `Bundled Program` is created from the example model above. import torch +from executorch.devtools import BundledProgram -from executorch.exir import to_edge -from executorch.sdk import BundledProgram - -from executorch.sdk.bundled_program.config import MethodTestCase, MethodTestSuite -from executorch.sdk.bundled_program.serialize import ( +from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite +from executorch.devtools.bundled_program.serialize import ( serialize_from_bundled_program_to_flatbuffer, ) + +from executorch.exir import to_edge from torch.export import export # Step 1: ExecuTorch Program Export @@ -188,7 +188,7 @@ def forward(self, x): # # To visualize all runtime events, call Inspector's ``print_data_tabular``. -from executorch.sdk import Inspector +from executorch.devtools import Inspector # sphinx_gallery_start_ignore inspector_patch = patch.object(Inspector, "__init__", return_value=None) diff --git a/docs/website/docs/tutorials/bundled_program.md b/docs/website/docs/tutorials/bundled_program.md index ac67d6f628..fb119df731 100644 --- a/docs/website/docs/tutorials/bundled_program.md +++ b/docs/website/docs/tutorials/bundled_program.md @@ -122,7 +122,7 @@ ET_NODISCARD Error VerifyResultWithBundledExpectedOutput( ### Example -Here we provide an example about how to run the bundled program step by step. Most of the code are borrowed from "fbcode/executorch/sdk/fb/runners/executor_runner.cpp" and please review that file if you need more info and context: +Here we provide an example about how to run the bundled program step by step. Most of the code are borrowed from "fbcode/executorch/devtools/fb/runners/executor_runner.cpp" and please review that file if you need more info and context: ```c++ // method_name is the name for the method we want to test diff --git a/examples/apple/coreml/executor_runner/main.mm b/examples/apple/coreml/executor_runner/main.mm index 4cc21ba30a..2475d68fa9 100644 --- a/examples/apple/coreml/executor_runner/main.mm +++ b/examples/apple/coreml/executor_runner/main.mm @@ -13,7 +13,7 @@ #import #import #import -#import +#import #import #import #import diff --git a/examples/apple/coreml/scripts/build_executor_runner.sh b/examples/apple/coreml/scripts/build_executor_runner.sh index 16c5dea02a..b57a8f12e7 100755 --- a/examples/apple/coreml/scripts/build_executor_runner.sh +++ b/examples/apple/coreml/scripts/build_executor_runner.sh @@ -56,7 +56,7 @@ mkdir -p "$EXECUTORCH_INCLUDE_DIR_PATH" find extension \( -name "*.h" -o -name "*.hpp" \) -exec rsync -R '{}' "$EXECUTORCH_INCLUDE_DIR_PATH" \; find runtime \( -name "*.h" -o -name "*.hpp" \) -exec rsync -R '{}' "$EXECUTORCH_INCLUDE_DIR_PATH" \; find util \( -name "*.h" -o -name "*.hpp" \) -exec rsync -R '{}' "$EXECUTORCH_INCLUDE_DIR_PATH" \; -find sdk \( -name "*.h" -o -name "*.hpp" \) -exec rsync -R '{}' "$EXECUTORCH_INCLUDE_DIR_PATH" \; +find devtools \( -name "*.h" -o -name "*.hpp" \) -exec rsync -R '{}' "$EXECUTORCH_INCLUDE_DIR_PATH" \; cp -rf "$COREML_DIR_PATH/runtime/include/" "$INCLUDE_DIR_PATH" # Copy required libraries diff --git a/examples/apple/coreml/scripts/export.py b/examples/apple/coreml/scripts/export.py index 4bf26a7f3e..5a8c9b227f 100644 --- a/examples/apple/coreml/scripts/export.py +++ b/examples/apple/coreml/scripts/export.py @@ -17,10 +17,10 @@ from executorch.backends.apple.coreml.compiler import CoreMLBackend from executorch.backends.apple.coreml.partition import CoreMLPartitioner +from executorch.devtools.etrecord import generate_etrecord from executorch.exir import to_edge from executorch.exir.backend.backend_api import to_backend -from executorch.sdk.etrecord import generate_etrecord from torch.export import export REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent.parent.parent.parent diff --git a/examples/apple/coreml/scripts/inspector_cli.py b/examples/apple/coreml/scripts/inspector_cli.py index 768465f770..e0b81d4aff 100644 --- a/examples/apple/coreml/scripts/inspector_cli.py +++ b/examples/apple/coreml/scripts/inspector_cli.py @@ -8,8 +8,8 @@ from pathlib import Path -from executorch.sdk import Inspector -from executorch.sdk.inspector._inspector_utils import compare_results +from executorch.devtools import Inspector +from executorch.devtools.inspector._inspector_utils import compare_results def get_root_dir_path() -> Path: diff --git a/examples/apple/coreml/scripts/inspector_utils.py b/examples/apple/coreml/scripts/inspector_utils.py index 1736c2cefb..c5674ec520 100644 --- a/examples/apple/coreml/scripts/inspector_utils.py +++ b/examples/apple/coreml/scripts/inspector_utils.py @@ -20,6 +20,13 @@ from executorch.backends.apple.coreml.compiler import CoreMLBackend from executorch.backends.apple.coreml.partition import CoreMLPartitioner +from executorch.devtools import BundledProgram, generate_etrecord, Inspector +from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite +from executorch.devtools.bundled_program.serialize import ( + serialize_from_bundled_program_to_flatbuffer, +) +from executorch.devtools.inspector import Event + from executorch.exir import ( EdgeProgramManager, ExecutorchBackendConfig, @@ -30,14 +37,6 @@ from executorch.exir.backend.compile_spec_schema import CompileSpec from executorch.exir.tracer import Value -from executorch.sdk import BundledProgram, generate_etrecord, Inspector - -from executorch.sdk.bundled_program.config import MethodTestCase, MethodTestSuite -from executorch.sdk.bundled_program.serialize import ( - serialize_from_bundled_program_to_flatbuffer, -) -from executorch.sdk.inspector import Event - from torch.export import export, ExportedProgram COREML_METADATA_KEYS: Final[List[Tuple[str, str]]] = [ diff --git a/examples/apple/mps/CMakeLists.txt b/examples/apple/mps/CMakeLists.txt index d1dd8e93d7..319d8159ce 100644 --- a/examples/apple/mps/CMakeLists.txt +++ b/examples/apple/mps/CMakeLists.txt @@ -92,8 +92,8 @@ if(NOT CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$") include(${EXECUTORCH_SRCS_FILE}) target_include_directories( bundled_program - INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/../../../sdk/include - ${CMAKE_CURRENT_BINARY_DIR}/../../../sdk/bundled_program + INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/../../../devtools/include + ${CMAKE_CURRENT_BINARY_DIR}/../../../devtools/bundled_program ${EXECUTORCH_ROOT}/third-party/flatbuffers/include ${EXECUTORCH_ROOT}/third-party/flatcc/include ${_mps_schema_headers} diff --git a/examples/apple/mps/executor_runner/mps_executor_runner.mm b/examples/apple/mps/executor_runner/mps_executor_runner.mm index 604419a620..040b2fcd99 100644 --- a/examples/apple/mps/executor_runner/mps_executor_runner.mm +++ b/examples/apple/mps/executor_runner/mps_executor_runner.mm @@ -30,8 +30,8 @@ #include #include #include -#include -#include +#include +#include #include using namespace std::chrono; diff --git a/examples/apple/mps/executor_runner/targets.bzl b/examples/apple/mps/executor_runner/targets.bzl index fd0a7a5046..14399411ae 100644 --- a/examples/apple/mps/executor_runner/targets.bzl +++ b/examples/apple/mps/executor_runner/targets.bzl @@ -28,9 +28,9 @@ def define_common_targets(): "//executorch/extension/data_loader:file_data_loader", "//executorch/kernels/portable:generated_lib", "//executorch/extension/data_loader:file_data_loader", - "//executorch/sdk/etdump:etdump_flatcc", + "//executorch/devtools/etdump:etdump_flatcc", "//executorch/extension/data_loader:buffer_data_loader", - "//executorch/sdk/bundled_program:runtime", + "//executorch/devtools/bundled_program:runtime", ], external_deps = [ "gflags", diff --git a/examples/apple/mps/scripts/mps_example.py b/examples/apple/mps/scripts/mps_example.py index e561afb185..636444e2b7 100644 --- a/examples/apple/mps/scripts/mps_example.py +++ b/examples/apple/mps/scripts/mps_example.py @@ -14,6 +14,11 @@ from executorch import exir from executorch.backends.apple.mps import MPSBackend from executorch.backends.apple.mps.partition import MPSPartitioner +from executorch.devtools import BundledProgram, generate_etrecord +from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite +from executorch.devtools.bundled_program.serialize import ( + serialize_from_bundled_program_to_flatbuffer, +) from executorch.exir import ( EdgeCompileConfig, @@ -24,11 +29,6 @@ from executorch.exir.backend.backend_details import CompileSpec from executorch.exir.capture._config import ExecutorchBackendConfig from executorch.extension.export_util.utils import export_to_edge, save_pte_program -from executorch.sdk import BundledProgram, generate_etrecord -from executorch.sdk.bundled_program.config import MethodTestCase, MethodTestSuite -from executorch.sdk.bundled_program.serialize import ( - serialize_from_bundled_program_to_flatbuffer, -) from ....models import MODEL_NAME_TO_MODEL from ....models.model_factory import EagerModelFactory diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj b/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj index e3a74456b3..fd5cdc7117 100644 --- a/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj @@ -94,7 +94,7 @@ 03729ED52BB1F8DE00152F2E /* LLaMARunner.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = LLaMARunner.framework; sourceTree = BUILT_PRODUCTS_DIR; }; 03729F072BB203B300152F2E /* runner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = runner.cpp; sourceTree = ""; }; 03729F082BB203B300152F2E /* runner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = runner.h; sourceTree = ""; }; - 03729F092BB203B300152F2E /* util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = util.h; sourceTree = ""; }; + 03729F092BB203B300152F2E /* util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = util.h; path = ../../../../extension/llm/runner/util.h; sourceTree = ""; }; 03729F102BB2042B00152F2E /* sampler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sampler.h; sourceTree = ""; }; 03729F112BB2042B00152F2E /* sampler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sampler.cpp; sourceTree = ""; }; 03729F142BB2043600152F2E /* bpe_tokenizer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = bpe_tokenizer.cpp; path = ../../../../extension/llm/tokenizer/bpe_tokenizer.cpp; sourceTree = ""; }; @@ -264,7 +264,7 @@ 03729F102BB2042B00152F2E /* sampler.h */, ); name = sampler; - path = ../../../../../models/llama2/sampler; + path = ../../../../../../extension/llm/sampler; sourceTree = ""; }; /* End PBXGroup section */ diff --git a/examples/models/flamingo/cross_attention/cross_attention_mask.cpp b/examples/models/flamingo/cross_attention/cross_attention_mask.cpp new file mode 100644 index 0000000000..b2a2a6a806 --- /dev/null +++ b/examples/models/flamingo/cross_attention/cross_attention_mask.cpp @@ -0,0 +1,169 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +#include +#include + +namespace torch::executor { + +// Fowrward declaration needed for ARM compilers. +int32_t safe_size_t_to_sizes_type(size_t value); +std::vector> _get_image_attention_intervals( + const std::vector& tokens, + int image_token_id); + +int32_t safe_size_t_to_sizes_type(size_t value) { + if (value > + static_cast(std::numeric_limits::max())) { + throw std::overflow_error( + "size_t value too large for TensorImpl::SizesType"); + } + return static_cast(value); +} + +/** + * Returns a list of lists of the form [start, end) where start is the index + * of the current image token and end is the index of the next image token, + * exclusive. + * + * Example: + * >>> text = "These are two dogs. This is a cat." + * >>> size_t image_token_id = 1; + * >>> std::vector tokens = {1, 1, 9673, 527, 1403, 12875, 13, 1, 1115, + * 374, 264, 8415]}; + * >>> transform = VisionCrossAttentionMask(tile_size=400, patch_size=40, + * image_token_id=1) + * >>> intervals = _get_image_attention_intervals(tokens, image_token_id) + * [[0, 7], [1, 7], [7, 12]] + * + * @param tokens List of token IDs in the text sequence. + * @param image_token_id The value of the image token. + * + * @returns Vector of vectors of the form [start, end) indicating the range of + * positions in the text sequence that should attend to the image. + */ +std::vector> _get_image_attention_intervals( + const std::vector& tokens, + int image_token_id) { + std::vector> vision_masks; + int end = tokens.size(); + std::vector vision_token_locations; + + // Find all vision token locations. + for (int i = 0; i < tokens.size(); ++i) { + if (tokens[i] == image_token_id) { + vision_token_locations.push_back(i); + } + } + + // Return empty vector if there are no images. + if (vision_token_locations.empty()) { + return vision_masks; + } + + // If there is only one image, it will attend to subsequent text until end. + if (vision_token_locations.size() == 1) { + vision_masks.push_back({vision_token_locations[0], end}); + return vision_masks; + } + + // Construct intervals from previous image token to next image token. + for (int i = 0; i < vision_token_locations.size() - 1; ++i) { + vision_masks.push_back( + {vision_token_locations[i], vision_token_locations[i + 1]}); + } + + // Last image will attend to subsequent text until end. + vision_masks.push_back({vision_token_locations.back(), end}); + + // If there are consecutive vision tokens, they should all attend to the + // same subsequent text. + int last_mask_end = vision_masks.back()[1]; + for (auto it = vision_masks.rbegin(); it != vision_masks.rend(); ++it) { + if ((*it)[0] == (*it)[1] - 1) { + (*it)[1] = last_mask_end; + } + last_mask_end = (*it)[1]; + } + + return vision_masks; +} + +std::vector cross_attention_mask( + const std::vector& tokens, + const std::vector& images, + size_t tile_size, + size_t patch_size, + int image_token_id, + std::vector>& out) { + size_t patch_grid_size = tile_size / patch_size; + size_t patches_per_tile = patch_grid_size * patch_grid_size; + + std::vector> image_intervals = + _get_image_attention_intervals(tokens, image_token_id); + + if (image_intervals.size() != images.size()) { + throw std::runtime_error( + "The number of image tokens (" + + std::to_string(image_intervals.size()) + + ") does not match the number of images (" + + std::to_string(images.size()) + ")"); + } + + // Create mask for each individual image based on its number of tokens, + // which can vary based on number of tiles since they are not yet tile padded. + // The masks are padded and concatenated together in the batch collator. + std::vector cross_attention_masks; + size_t text_seq_len = tokens.size(); + for (size_t image_idx = 0; image_idx < image_intervals.size(); ++image_idx) { + size_t n_tiles = images[image_idx].size(0); + size_t image_seq_len = + n_tiles * (patches_per_tile + 1); // +1 for the CLS token. + + // Mask will be block of 1s at the corresponding interval in the text. + // It is not a causal block because all the image tokens correspond + // to a single image, so text tokens attend to all the image's tokens. + std::vector sizes = { + safe_size_t_to_sizes_type(text_seq_len), + safe_size_t_to_sizes_type(image_seq_len)}; + + // Allocate the underlying data to be handled by the managed tensor. + size_t num_elements = text_seq_len * image_seq_len; + size_t stride = image_seq_len; + std::vector mask_data(num_elements); + + ManagedTensor mask(mask_data.data(), sizes, ScalarType::Int); + cross_attention_masks.emplace_back(std::move(mask)); + + // Add the allocated data to the output vector. + out.emplace_back(std::move(mask_data)); + + // All rows of tensor in the text_seq_len dimension within the interval are + // set to 1 (true). + size_t start = image_intervals[image_idx][0]; + size_t end = image_intervals[image_idx][1]; // End is exclusive. + for (size_t i = start; i < end; ++i) { + for (size_t j = 0; j < image_seq_len; ++j) { + size_t unrolled_index = i * image_seq_len + j; + if (unrolled_index >= out[image_idx].size()) { + throw std::out_of_range( + "Index " + std::to_string(unrolled_index) + + " out of range of output tensor."); + } + out[image_idx][i * stride + j] = 1; + } + } + } + + return cross_attention_masks; +} + +} // namespace torch::executor diff --git a/examples/models/flamingo/cross_attention/cross_attention_mask.h b/examples/models/flamingo/cross_attention/cross_attention_mask.h new file mode 100644 index 0000000000..6998d91ad4 --- /dev/null +++ b/examples/models/flamingo/cross_attention/cross_attention_mask.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include + +namespace torch { +namespace executor { + +/** + * Computes the cross-attention mask for text + image inputs. Text tokens that + * participate in cross-attention with an image token will show True in the mask + * and follow the interleaved structure laid out in Fig. 7 of the Flamingo paper + * (https://arxiv.org/pdf/2204.14198): + * + * (1) Text tokens immediately following the image token up until the next + * image token (2) Consecutive image tokens attend to subsequent text tokens + * + * :: + * + * ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ + * img1 │ ■ │ │ ■ │ │ ■ │ │ ■ │ │ ■ │ │ ■ │ │ │ │ │ │ │ │ │ │ │ + * └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ + * ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ + * img2 │ │ │ ■ │ │ ■ │ │ ■ │ │ ■ │ │ ■ │ │ │ │ │ │ │ │ │ │ │ + * └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ + * ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ + * img3 │ │ │ │ │ │ │ │ │ │ │ │ │ ■ │ │ ■ │ │ ■ │ │ ■ │ │ ■ │ + * └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ + * These are two dogs. This is a cat. + * + * + * + * Resultant mask is constructed per image and is of shape (text_seq_len, + * image_seq_len), where True indicates that the token outputted from the image + * encoder attends to the token in the text sequence in cross-attention. A list + * of these masks are returned with length equal to number of images in the + * sample. + * + * @param tokens Vector of tokens participating in the cross attention. + * @param images Vector of images participating in the cross attention. + * @param tile_size The size of the image tiles from the image transform. + * @param patch_size The size of each patch. Used to divide the tiles into + * patches. E.g. for patch_size = 40, a tile of shape (400, 400) will have 10x10 + * grid of patches with shape (40, 40) each. image_token_id (int): Token ID of + * the image special token. + * @param image_token_id The value of the image token. + * @param out Out vector holding the raw data wrapped by the returned cross + * attention masks. + * + * @returns A vector of cross attention masks, as Tensors, one for each image. + */ +std::vector cross_attention_mask( + const std::vector& tokens, + const std::vector& images, + size_t tile_size, + size_t patch_size, + int image_token_id, + std::vector>& out); + +} // namespace executor +} // namespace torch diff --git a/examples/models/flamingo/cross_attention/cross_attention_mask_test.cpp b/examples/models/flamingo/cross_attention/cross_attention_mask_test.cpp new file mode 100644 index 0000000000..5b9e58c216 --- /dev/null +++ b/examples/models/flamingo/cross_attention/cross_attention_mask_test.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +using namespace ::testing; +using torch::executor::ManagedTensor; +using torch::executor::ScalarType; +using torch::executor::Tensor; +using torch::executor::TensorImpl; + +TEST(CrossAttentxnMaskTest, TestCrossAttentionMask) { + std::vector tokens = { + 1, 1, 9673, 527, 1403, 12875, 13, 1, 1115, 374, 264, 8415}; + + // Initialize image tensors. + TensorImpl::SizesType sizes[2] = {2, 2}; + TensorImpl::DimOrderType dim_order[2] = {0, 1}; + TensorImpl::StridesType strides[2] = {2, 1}; + + int32_t a_data[4] = {1, 2, 3, 4}; + auto a_impl = + TensorImpl(ScalarType::Int, 2, sizes, a_data, dim_order, strides); + Tensor a(&a_impl); + + int32_t b_data[4] = {5, 6, 7, 8}; + auto b_impl = + TensorImpl(ScalarType::Int, 2, sizes, b_data, dim_order, strides); + Tensor b(&b_impl); + + int32_t c_data[4] = {9, 10, 11, 12}; + auto c_impl = + TensorImpl(ScalarType::Int, 2, sizes, c_data, dim_order, strides); + Tensor c(&c_impl); + + std::vector images = {a, b, c}; + std::vector> mask_data; + std::vector output_masks = + torch::executor::cross_attention_mask( + tokens, + images, + /*tile_size=*/1, + /*patch_size=*/1, + /*image_token_id=*/1, + /*out=*/mask_data); + + // Check contents of the mask. + std::vector> expected_intervals = { + {0, 7}, {1, 7}, {7, 12}}; + for (size_t mask_idx = 0; mask_idx < output_masks.size(); ++mask_idx) { + ManagedTensor& output_mask = output_masks[mask_idx]; + Tensor output_tensor = output_mask.get_aliasing_tensor(); + for (size_t i = 0; i < output_tensor.size(0); ++i) { + for (size_t j = 0; j < output_tensor.strides()[0]; ++j) { + size_t unrolled_index = i * output_tensor.strides()[0] + j; + if (i >= expected_intervals[mask_idx][0] && + i < expected_intervals[mask_idx][1]) { + EXPECT_EQ(output_tensor.const_data_ptr()[unrolled_index], 1); + } else { + EXPECT_EQ(output_tensor.const_data_ptr()[unrolled_index], 0); + } + } + } + } +} diff --git a/examples/models/flamingo/cross_attention/targets.bzl b/examples/models/flamingo/cross_attention/targets.bzl new file mode 100644 index 0000000000..7bc13270aa --- /dev/null +++ b/examples/models/flamingo/cross_attention/targets.bzl @@ -0,0 +1,25 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + +def define_common_targets(): + """Defines targets that should be shared between fbcode and xplat. + + The directory containing this targets.bzl file should also contain both + TARGETS and BUCK files that call this function. + """ + + runtime.cxx_library( + name = "cross_attention_mask", + srcs = ["cross_attention_mask.cpp"], + exported_headers = ["cross_attention_mask.h"], + exported_deps = [ + "//executorch/runtime/core/exec_aten:lib", + "//executorch/extension/runner_util:managed_tensor", + "//executorch/runtime/core/exec_aten/util:tensor_util", + ], + ) + + runtime.cxx_test( + name = "cross_attention_mask_test", + srcs = ["cross_attention_mask_test.cpp"], + deps = [":cross_attention_mask"], + ) diff --git a/examples/models/llama2/TARGETS b/examples/models/llama2/TARGETS index 9bdbff5fbb..467949a5eb 100644 --- a/examples/models/llama2/TARGETS +++ b/examples/models/llama2/TARGETS @@ -93,7 +93,7 @@ runtime.python_library( # "//executorch/extension/pybindings:aten_lib", # "//executorch/extension/pybindings:portable_lib", # "//executorch/extension/pybindings:portable_lib_plus_custom", - "//executorch/sdk/etrecord:etrecord", + "//executorch/devtools/etrecord:etrecord", "//executorch/util:memory_profiler", "//executorch/util:python_profiler", "fbsource//third-party/pypi/coremltools:coremltools", diff --git a/examples/models/llama2/eval_llama.py b/examples/models/llama2/eval_llama.py index 0495c76bbf..4daeaf7afa 100644 --- a/examples/models/llama2/eval_llama.py +++ b/examples/models/llama2/eval_llama.py @@ -22,6 +22,8 @@ def main() -> None: modelname = "llama2" parser = build_args_parser() args = parser.parse_args() + # Overrides this arg, because evaluation requires full logits. + args.generate_full_logits = True eval_llama(modelname, args) diff --git a/examples/models/llama2/export_llama_lib.py b/examples/models/llama2/export_llama_lib.py index c22c0a3c3c..221f2f75bc 100644 --- a/examples/models/llama2/export_llama_lib.py +++ b/examples/models/llama2/export_llama_lib.py @@ -22,6 +22,8 @@ import torch +from executorch.devtools.etrecord import generate_etrecord + from executorch.examples.models.llama2.llama_transformer import ModelArgs from executorch.extension.llm.export.builder import DType, LLMEdgeManager @@ -40,8 +42,6 @@ get_pt2e_quantizers, get_qnn_quantizer, ) - -from executorch.sdk.etrecord import generate_etrecord from executorch.util.activation_memory_profiler import generate_memory_trace from ..model_factory import EagerModelFactory @@ -296,6 +296,13 @@ def build_args_parser() -> argparse.ArgumentParser: help="Generate the ETRecord debug artifact.", ) + parser.add_argument( + "--generate_full_logits", + action="store_true", + required=False, + default=True, + help="Generate logits for all inputs.", + ) return parser @@ -405,6 +412,7 @@ def _prepare_for_llama_export(modelname: str, args) -> LLMEdgeManager: params_path=params_path, use_kv_cache=args.use_kv_cache, use_sdpa_with_kv_cache=args.use_sdpa_with_kv_cache, + generate_full_logits=args.generate_full_logits, weight_type=weight_type, enable_dynamic_shape=args.enable_dynamic_shape, verbose=args.verbose, @@ -590,6 +598,7 @@ def _load_llama_model( params_path: str, use_kv_cache: bool = False, use_sdpa_with_kv_cache: bool = False, + generate_full_logits: bool = True, weight_type: WeightType = WeightType.LLAMA, enable_dynamic_shape: bool = False, verbose: bool = False, @@ -616,6 +625,7 @@ def _load_llama_model( params=params_path, use_kv_cache=use_kv_cache, use_sdpa_with_kv_cache=use_sdpa_with_kv_cache, + generate_full_logits=generate_full_logits, fairseq2=weight_type == WeightType.FAIRSEQ2, max_seq_len=max_seq_len, enable_dynamic_shape=enable_dynamic_shape, diff --git a/examples/models/llama2/llama_transformer.py b/examples/models/llama2/llama_transformer.py index 4ae12b0f64..81b47a3a5d 100644 --- a/examples/models/llama2/llama_transformer.py +++ b/examples/models/llama2/llama_transformer.py @@ -96,6 +96,10 @@ class ModelArgs: use_sdpa_with_kv_cache_op: bool = ( False # Use custom sdpa op that updates kv cache in-place ) + # Generate logits for all inputs. When it's True, it would take big memory usage + # at runtime. Enable it only necessary (e.g., use perplexity tools that requires + # logits for all input tokens.) + generate_full_logits: bool = True enable_dynamic_shape: bool = False # export model with dynamic shape support use_hf_rope: bool = False # Use HuggingFace's RoPE implementation rope_theta: Optional[float] = ( @@ -442,6 +446,7 @@ def __init__(self, params: ModelArgs): self.norm = RMSNorm(params.dim, eps=params.norm_eps) self.output = nn.Linear(params.dim, params.vocab_size, bias=False) self.use_kv_cache = params.use_kv_cache + self.generate_full_logits = params.generate_full_logits self.max_seq_len = params.max_seq_len if params.use_hf_rope: self.precompute_freqs_cis = hf_precompute_freqs_cis @@ -512,6 +517,10 @@ def forward( input_pos, ) + if not self.generate_full_logits: + # Only the last logit is used for the new generated token + h = h[:, -1, :] + h = self.norm(h) logits = self.output(h) diff --git a/examples/models/llama2/model.py b/examples/models/llama2/model.py index fdf0dc707e..b375399f33 100644 --- a/examples/models/llama2/model.py +++ b/examples/models/llama2/model.py @@ -61,6 +61,7 @@ def __init__(self, **kwargs): self.use_kv_cache = kwargs.get("use_kv_cache", False) self.use_sdpa_with_kv_cache_op = kwargs.get("use_sdpa_with_kv_cache", False) + self.generate_full_logits = kwargs.get("generate_full_logits", True) self.enable_dynamic_shape = kwargs.get("enable_dynamic_shape", False) self.max_seq_len = kwargs.get("max_seq_len", 128) @@ -145,6 +146,7 @@ def __init__(self, **kwargs): max_batch_size=max_batch_size, use_kv_cache=self.use_kv_cache, use_sdpa_with_kv_cache_op=self.use_sdpa_with_kv_cache_op, + generate_full_logits=self.generate_full_logits, enable_dynamic_shape=self.enable_dynamic_shape, **params, ) diff --git a/examples/models/llava/runner/llava_runner.cpp b/examples/models/llava/runner/llava_runner.cpp index c5ce03b88d..a58fdfd5e5 100644 --- a/examples/models/llava/runner/llava_runner.cpp +++ b/examples/models/llava/runner/llava_runner.cpp @@ -20,6 +20,8 @@ #include #include +using ::executorch::extension::llm::Stats; + namespace torch::executor { bool LlavaRunner::is_loaded() { diff --git a/examples/models/llava/runner/llava_runner.h b/examples/models/llava/runner/llava_runner.h index d9805a0c91..13d842e30f 100644 --- a/examples/models/llava/runner/llava_runner.h +++ b/examples/models/llava/runner/llava_runner.h @@ -35,7 +35,8 @@ class LlavaRunner : public MultimodalRunner { const std::string& prompt, int32_t seq_len = 1024, std::function token_callback = {}, - std::function stats_callback = {}); + std::function + stats_callback = {}); private: inline static const std::string kPresetPrompt = diff --git a/examples/qualcomm/executor_runner/qnn_executor_runner.cpp b/examples/qualcomm/executor_runner/qnn_executor_runner.cpp index 7cd3709b95..c2a6c2c46c 100644 --- a/examples/qualcomm/executor_runner/qnn_executor_runner.cpp +++ b/examples/qualcomm/executor_runner/qnn_executor_runner.cpp @@ -18,6 +18,7 @@ */ #include +#include #include #include #include @@ -26,7 +27,6 @@ #include #include #include -#include #include #include diff --git a/examples/qualcomm/scripts/export_example.py b/examples/qualcomm/scripts/export_example.py index b12a44993d..8339b9f5b5 100644 --- a/examples/qualcomm/scripts/export_example.py +++ b/examples/qualcomm/scripts/export_example.py @@ -15,12 +15,12 @@ generate_htp_compiler_spec, generate_qnn_executorch_compiler_spec, ) +from executorch.devtools import generate_etrecord from executorch.examples.models import MODEL_NAME_TO_MODEL from executorch.examples.models.model_factory import EagerModelFactory from executorch.exir.backend.backend_api import to_backend, validation_disabled from executorch.exir.capture._config import ExecutorchBackendConfig from executorch.extension.export_util.utils import save_pte_program -from executorch.sdk import generate_etrecord from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e diff --git a/examples/sdk/CMakeLists.txt b/examples/sdk/CMakeLists.txt index 76034b0760..af7e9b15bc 100644 --- a/examples/sdk/CMakeLists.txt +++ b/examples/sdk/CMakeLists.txt @@ -49,7 +49,7 @@ add_executable(sdk_example_runner sdk_example_runner/sdk_example_runner.cpp) target_compile_options(executorch INTERFACE -DET_EVENT_TRACER_ENABLED) target_include_directories( - etdump INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/../../sdk/include + etdump INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/../../devtools/include ${EXECUTORCH_ROOT}/third-party/flatcc/include ) target_link_libraries( diff --git a/examples/sdk/README.md b/examples/sdk/README.md index 68043517fb..096f90864e 100644 --- a/examples/sdk/README.md +++ b/examples/sdk/README.md @@ -59,11 +59,11 @@ Running the program will generate an `ETDump` file (`.etdp`) at the location spe Once an `ETDump` has been generated, it can be viewed using the CLI inspector. This will print a tabular view of the data recorded in the ETDump. ```bash - python3 -m sdk.inspector.inspector_cli --etdump_path mv2_etdump.etdp + python3 -m devtools.inspector.inspector_cli --etdump_path mv2_etdump.etdp ``` ### ETDump C++ API -ETDump profiling can also be used in a custom C++ program. `ETDumpGen` is an implementation of the abstract `EventTracer` class. Include the header file located at `sdk/etdump/etdump_flatcc.h`. To initialize the ETDump generator, construct it before loading the method from the program. +ETDump profiling can also be used in a custom C++ program. `ETDumpGen` is an implementation of the abstract `EventTracer` class. Include the header file located at `devtools/etdump/etdump_flatcc.h`. To initialize the ETDump generator, construct it before loading the method from the program. ```cpp torch::executor::ETDumpGen etdump_gen = torch::executor::ETDumpGen(); diff --git a/examples/sdk/scripts/export_bundled_program.py b/examples/sdk/scripts/export_bundled_program.py index a34a0ab4d3..052f5e9962 100644 --- a/examples/sdk/scripts/export_bundled_program.py +++ b/examples/sdk/scripts/export_bundled_program.py @@ -11,19 +11,19 @@ from typing import List import torch - -from executorch.exir import ExecutorchProgramManager -from executorch.extension.export_util.utils import export_to_exec_prog -from executorch.sdk import BundledProgram -from executorch.sdk.bundled_program.config import ( +from executorch.devtools import BundledProgram +from executorch.devtools.bundled_program.config import ( MethodInputType, MethodTestCase, MethodTestSuite, ) -from executorch.sdk.bundled_program.serialize import ( +from executorch.devtools.bundled_program.serialize import ( serialize_from_bundled_program_to_flatbuffer, ) +from executorch.exir import ExecutorchProgramManager +from executorch.extension.export_util.utils import export_to_exec_prog + from ...models import MODEL_NAME_TO_MODEL from ...models.model_factory import EagerModelFactory diff --git a/examples/sdk/scripts/gen_sample_etrecord.py b/examples/sdk/scripts/gen_sample_etrecord.py index c219ed4094..d2c4913b03 100644 --- a/examples/sdk/scripts/gen_sample_etrecord.py +++ b/examples/sdk/scripts/gen_sample_etrecord.py @@ -10,6 +10,7 @@ from typing import Any import torch +from executorch.devtools import generate_etrecord from executorch.exir import ( EdgeCompileConfig, EdgeProgramManager, @@ -18,7 +19,6 @@ to_edge, ) from executorch.exir.capture._config import ExecutorchBackendConfig -from executorch.sdk import generate_etrecord from torch.export import export from ...models import MODEL_NAME_TO_MODEL diff --git a/examples/sdk/sdk_example_runner/sdk_example_runner.cpp b/examples/sdk/sdk_example_runner/sdk_example_runner.cpp index e2e42ab670..7e979937d1 100644 --- a/examples/sdk/sdk_example_runner/sdk_example_runner.cpp +++ b/examples/sdk/sdk_example_runner/sdk_example_runner.cpp @@ -22,13 +22,13 @@ #include +#include +#include #include #include #include #include #include -#include -#include #include static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4MB diff --git a/examples/sdk/sdk_example_runner/targets.bzl b/examples/sdk/sdk_example_runner/targets.bzl index a5e8feb33c..025d42fee1 100644 --- a/examples/sdk/sdk_example_runner/targets.bzl +++ b/examples/sdk/sdk_example_runner/targets.bzl @@ -20,8 +20,8 @@ def define_common_targets(): "//executorch/extension/data_loader:file_data_loader", "//executorch/extension/data_loader:buffer_data_loader", "//executorch/util:util", - "//executorch/sdk/etdump:etdump_flatcc", - "//executorch/sdk/bundled_program:runtime", + "//executorch/devtools/etdump:etdump_flatcc", + "//executorch/devtools/bundled_program:runtime", ], external_deps = [ "gflags", diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py index a816c4f0e7..32d67e0cd4 100644 --- a/examples/xnnpack/aot_compiler.py +++ b/examples/xnnpack/aot_compiler.py @@ -12,9 +12,9 @@ import torch from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner +from executorch.devtools import generate_etrecord from executorch.exir import EdgeCompileConfig, ExecutorchBackendConfig from executorch.extension.export_util.utils import export_to_edge, save_pte_program -from executorch.sdk import generate_etrecord from ..models import MODEL_NAME_TO_MODEL from ..models.model_factory import EagerModelFactory diff --git a/examples/xnnpack/targets.bzl b/examples/xnnpack/targets.bzl index 30cafa56fa..35df8999b4 100644 --- a/examples/xnnpack/targets.bzl +++ b/examples/xnnpack/targets.bzl @@ -32,7 +32,7 @@ def define_common_targets(): "//executorch/examples/xnnpack/quantization:quant_utils", "//executorch/exir:lib", "//executorch/exir/backend:backend_api", - "//executorch/sdk:lib", + "//executorch/devtools:lib", ], ) diff --git a/exir/_serialize/TARGETS b/exir/_serialize/TARGETS index 8ddf830039..49419a4159 100644 --- a/exir/_serialize/TARGETS +++ b/exir/_serialize/TARGETS @@ -14,8 +14,8 @@ cpp_python_extension( "//executorch/backends/fb/qnnpack/...", "//executorch/backends/vulkan/...", "//executorch/backends/xnnpack/...", - "//executorch/sdk/bundled_program/...", - "//executorch/sdk/etdump/...", + "//executorch/devtools/bundled_program/...", + "//executorch/devtools/etdump/...", ], deps = [ "fbsource//third-party/flatbuffers:flatc_library", @@ -45,6 +45,10 @@ runtime.python_library( visibility = [ "//executorch/backends/...", "//executorch/codegen/...", + "//executorch/devtools:lib", + "//executorch/devtools/bundled_program/serialize:lib", + "//executorch/devtools/bundled_program/tests/...", + "//executorch/devtools/experimental/...", "//executorch/examples/async_exec:emit_program_lib", "//executorch/exir/...", "//executorch/exir/tests/...", @@ -52,10 +56,6 @@ runtime.python_library( "//executorch/extension/pybindings/test:test", "//executorch/extension/pybindings/test:test-library", "//executorch/profiler/...", - "//executorch/sdk:lib", - "//executorch/sdk/bundled_program/serialize:lib", - "//executorch/sdk/bundled_program/tests/...", - "//executorch/sdk/experimental/...", "//executorch/test/...", "@EXECUTORCH_CLIENTS", ], diff --git a/exir/emit/_emit_program.py b/exir/emit/_emit_program.py index 0aebab649e..bf40a78bb6 100644 --- a/exir/emit/_emit_program.py +++ b/exir/emit/_emit_program.py @@ -78,6 +78,29 @@ def _remove_non_user_outputs(exported_program: ExportedProgram) -> torch.fx.Grap return gm +# For each entry point in the model, determine if its a joint graph, +# and if it is return a map of the indices in the model output that the +# gradient outputs start at and that the parameter outputs start at. +def _get_training_metadata(methods: Dict[str, ExportedProgram]) -> Dict[str, int]: + gradients_method_prefix = "__et_training_gradients_index_" + parameters_method_prefix = "__et_training_parameters_index_" + training_metadata = {} + for name, method in methods.items(): + found_grad = False + found_param = False + i = 0 + for output_spec in method.graph_signature.output_specs: + if output_spec.kind == OutputKind.GRADIENT_TO_PARAMETER and not found_grad: + training_metadata[gradients_method_prefix + name] = i + found_grad = True + elif output_spec.kind == OutputKind.TOKEN and not found_param: + assert found_grad # Params must come after gradients + training_metadata[parameters_method_prefix + name] = i + found_param = True + i += 1 + return training_metadata + + def emit_program( methods: Union[ExportedProgram, Dict[str, ExportedProgram]], emit_stacktrace: bool = False, @@ -143,6 +166,10 @@ def emit_program( emitter.instr_id_to_delegate_debug_id_map ) + training_metadata = _get_training_metadata(methods) + if len(training_metadata) > 0: + plans.extend(emitter._emit_prim_getters(training_metadata)) + # emit any primitive getters if prim_getters is not None: plans.extend(emitter._emit_prim_getters(prim_getters)) diff --git a/exir/tests/test_joint_graph.py b/exir/tests/test_joint_graph.py index 7c80439610..0e5a322397 100644 --- a/exir/tests/test_joint_graph.py +++ b/exir/tests/test_joint_graph.py @@ -108,3 +108,23 @@ def forward(self, x, y): self.assertTrue(torch.allclose(m.linear.bias.grad, et_outputs[2])) self.assertTrue(torch.allclose(m.linear.weight, et_outputs[3])) self.assertTrue(torch.allclose(m.linear.bias, et_outputs[4])) + + self.assertEqual( + len(et.executorch_program.execution_plan), 3 + ) # forward + 2 training metadata functions + + # gradient outputs start at index 1 + self.assertEqual( + et.executorch_program.execution_plan[1] # pyre-ignore + .values[0] + .val.int_val, + 1, + ) + + # parameter outputs start at index 3 + self.assertEqual( + et.executorch_program.execution_plan[2] # pyre-ignore + .values[0] + .val.int_val, + 3, + ) diff --git a/extension/llm/custom_ops/op_sdpa.cpp b/extension/llm/custom_ops/op_sdpa.cpp index 727c04774b..d31cbaf369 100644 --- a/extension/llm/custom_ops/op_sdpa.cpp +++ b/extension/llm/custom_ops/op_sdpa.cpp @@ -158,7 +158,7 @@ static inline scalar_t* conditional_data_ptr(scalar_t* ptr, scalar_t* ptr2) { template < typename scalar_t, typename std::enable_if_t< - torch::executor::is_reduced_floating_point::value, + ::executorch::runtime::is_reduced_floating_point::value, int> = 0> static inline scalar_t* conditional_data_ptr(float* ptr, scalar_t* ptr2) { (void)ptr; @@ -247,7 +247,7 @@ void cpu_flash_attention( "KV_split_size must be greater than q_split_size"); constexpr bool is_reduced_type = - torch::executor::is_reduced_floating_point::value; + ::executorch::runtime::is_reduced_floating_point::value; ET_CHECK_MSG( !is_reduced_type, "FlashAttention does not support reduced types."); diff --git a/extension/llm/custom_ops/op_sdpa_test.cpp b/extension/llm/custom_ops/op_sdpa_test.cpp index 116be2508d..43f2022917 100644 --- a/extension/llm/custom_ops/op_sdpa_test.cpp +++ b/extension/llm/custom_ops/op_sdpa_test.cpp @@ -17,6 +17,7 @@ #include using namespace ::testing; +using executorch::runtime::testing::TensorFactory; exec_aten::Tensor op_scaled_dot_product_attention( const exec_aten::Tensor& query, @@ -37,7 +38,7 @@ Most tests are generated by FACTO */ TEST(OpScaledDotProductAttentionTest, CorrectnessTest_105) { - torch::executor::testing::TensorFactory tfFloat; + TensorFactory tfFloat; exec_aten::Tensor query = tfFloat.make( {1, 1, 4, 4}, @@ -123,7 +124,7 @@ TEST(OpScaledDotProductAttentionTest, CorrectnessTest_105) { } TEST(OpScaledDotProductAttentionTest, CorrectnessTest_11) { - torch::executor::testing::TensorFactory tfFloat; + TensorFactory tfFloat; exec_aten::Tensor query = tfFloat.make( {1, 1, 1, 8}, @@ -152,7 +153,7 @@ TEST(OpScaledDotProductAttentionTest, CorrectnessTest_11) { } TEST(OpScaledDotProductAttentionTest, CorrectnessTest_13) { - torch::executor::testing::TensorFactory tfFloat; + TensorFactory tfFloat; exec_aten::Tensor query = tfFloat.make( {1, 8, 1, 1}, {-47.0, 21.25, 74.75, 46.375, 21.0, -29.0, 2.625, 83.125}); @@ -181,7 +182,7 @@ TEST(OpScaledDotProductAttentionTest, CorrectnessTest_13) { } TEST(OpScaledDotProductAttentionTest, CorrectnessTest_17) { - torch::executor::testing::TensorFactory tfFloat; + TensorFactory tfFloat; exec_aten::Tensor query = tfFloat.make( {3, 2, 2, 6}, @@ -257,7 +258,7 @@ TEST(OpScaledDotProductAttentionTest, CorrectnessTest_17) { } TEST(OpScaledDotProductAttentionTest, CorrectnessTest_18) { - torch::executor::testing::TensorFactory tfFloat; + TensorFactory tfFloat; exec_aten::Tensor query = tfFloat.make( {3, 2, 2, 6}, @@ -333,7 +334,7 @@ TEST(OpScaledDotProductAttentionTest, CorrectnessTest_18) { // Disabling this test because right now we are enforcing that // attention mask must be 2D TEST(OpScaledDotProductAttentionTest, CorrectnessTest_19) { - torch::executor::testing::TensorFactory tfFloat; + TensorFactory tfFloat; exec_aten::Tensor query = tfFloat.make( {3, 2, 2, 6}, @@ -479,7 +480,7 @@ TEST(OpScaledDotProductAttentionTest, CorrectnessTest_19) { */ TEST(OpScaledDotProductAttentionTest, CorrectnessTest_51) { - torch::executor::testing::TensorFactory tfFloat; + TensorFactory tfFloat; exec_aten::Tensor query = tfFloat.make( {1, 1, 8, 3}, diff --git a/extension/llm/custom_ops/op_sdpa_with_kv_cache_test.cpp b/extension/llm/custom_ops/op_sdpa_with_kv_cache_test.cpp index 819dd70217..2a8124bc1e 100644 --- a/extension/llm/custom_ops/op_sdpa_with_kv_cache_test.cpp +++ b/extension/llm/custom_ops/op_sdpa_with_kv_cache_test.cpp @@ -16,6 +16,7 @@ #include using namespace ::testing; +using executorch::runtime::testing::TensorFactory; exec_aten::Tensor op_sdpa_with_kv_cache( const exec_aten::Tensor& query, @@ -79,7 +80,7 @@ Missing tests: 5. Different dtypes, fp16, bf16, double (or expect throw) */ TEST(OpScaledDotProductAttentionTest, BasicTest) { - torch::executor::testing::TensorFactory tfFloat; + TensorFactory tfFloat; exec_aten::Tensor query = tfFloat.make( {1, 1, 4, 4}, @@ -360,7 +361,7 @@ TEST(OpScaledDotProductAttentionTest, BasicTest) { } TEST(OpScaledDotProductAttentionTest, LargerTest) { - torch::executor::testing::TensorFactory tfFloat; + TensorFactory tfFloat; exec_aten::Tensor query = tfFloat.make( {1, 1, 7, 4}, {0.8823, 0.9150, 0.3829, 0.9593, 0.3904, 0.6009, 0.2566, @@ -524,7 +525,7 @@ TEST(OpScaledDotProductAttentionTest, LargerTest) { } TEST(OpScaledDotProductAttentionTest, BasicTestWithAttnMask) { - torch::executor::testing::TensorFactory tfFloat; + TensorFactory tfFloat; exec_aten::Tensor query = tfFloat.make( {1, 1, 4, 4}, @@ -807,7 +808,7 @@ TEST(OpScaledDotProductAttentionTest, BasicTestWithAttnMask) { } TEST(OpScaledDotProductAttentionTest, SequenceTest) { - torch::executor::testing::TensorFactory tfFloat; + TensorFactory tfFloat; exec_aten::Tensor query = tfFloat.make( {1, 1, 8, 4}, diff --git a/extension/llm/custom_ops/op_tile_crop_test.cpp b/extension/llm/custom_ops/op_tile_crop_test.cpp index 565f510913..36841b80f1 100644 --- a/extension/llm/custom_ops/op_tile_crop_test.cpp +++ b/extension/llm/custom_ops/op_tile_crop_test.cpp @@ -15,7 +15,7 @@ using namespace ::testing; using exec_aten::ScalarType; using exec_aten::Tensor; -using torch::executor::testing::TensorFactory; +using executorch::runtime::testing::TensorFactory; class OpTileCropOutTest : public OperatorTest { protected: diff --git a/extension/llm/runner/image.h b/extension/llm/runner/image.h index e18353dda9..32a9f87818 100644 --- a/extension/llm/runner/image.h +++ b/extension/llm/runner/image.h @@ -13,7 +13,9 @@ // patternlint-disable-next-line executorch-cpp-nostdinc #include -namespace torch::executor { +namespace executorch { +namespace extension { +namespace llm { struct Image { // Assuming NCHW format @@ -23,4 +25,14 @@ struct Image { int32_t channels; }; -} // namespace torch::executor +} // namespace llm +} // namespace extension +} // namespace executorch + +namespace torch { +namespace executor { +// TODO(T197294990): Remove these deprecated aliases once all users have moved +// to the new `::executorch` namespaces. +using ::executorch::extension::llm::Image; +} // namespace executor +} // namespace torch diff --git a/extension/llm/runner/image_prefiller.h b/extension/llm/runner/image_prefiller.h index 64b623be36..879b0a6e21 100644 --- a/extension/llm/runner/image_prefiller.h +++ b/extension/llm/runner/image_prefiller.h @@ -13,23 +13,27 @@ #include #include -namespace torch::executor { +namespace executorch { +namespace extension { +namespace llm { // Assuming kv cache and parallel prefill are enabled. class ImagePrefiller { public: - explicit ImagePrefiller(Module* module) : module_(module) {} + explicit ImagePrefiller(::executorch::extension::Module* module) + : module_(module) {} + /** * Prefill an LLM Module with the given image input. * @param image The image input to the multimodal LLM. * @param start_pos The starting position in KV cache of the input in the LLM * @return The next token of the LLM Module after prefill. */ - virtual Result prefill( + virtual ::executorch::runtime::Result prefill( Image& image, int64_t start_pos = 0) = 0; - virtual Error load() = 0; + virtual ::executorch::runtime::Error load() = 0; virtual bool is_method_loaded() = 0; virtual ~ImagePrefiller() = default; @@ -38,4 +42,14 @@ class ImagePrefiller { Module* module_; }; -} // namespace torch::executor +} // namespace llm +} // namespace extension +} // namespace executorch + +namespace torch { +namespace executor { +// TODO(T197294990): Remove these deprecated aliases once all users have moved +// to the new `::executorch` namespaces. +using ::executorch::extension::llm::ImagePrefiller; +} // namespace executor +} // namespace torch diff --git a/extension/llm/runner/metadata_util.h b/extension/llm/runner/metadata_util.h index 4ea2d9eebd..5f55dad538 100644 --- a/extension/llm/runner/metadata_util.h +++ b/extension/llm/runner/metadata_util.h @@ -14,7 +14,10 @@ #include -namespace torch::executor { +namespace executorch { +namespace extension { +namespace llm { + template T get_module_metadata( Module* module, @@ -26,9 +29,10 @@ T get_module_metadata( T res = default_val; if (model_methods.count(method_name)) { - Result> outputs = module->execute(method_name); + ::executorch::runtime::Result> + outputs = module->execute(method_name); if (outputs.ok()) { - std::vector outs = outputs.get(); + std::vector<::executorch::runtime::EValue> outs = outputs.get(); if (outs.size() > 0) { res = outs[0].to(); } @@ -43,4 +47,7 @@ T get_module_metadata( ET_LOG(Info, "%s: %lld", method_name.c_str(), (long long)res); return res; } -} // namespace torch::executor + +} // namespace llm +} // namespace extension +} // namespace executorch diff --git a/extension/llm/runner/multimodal_runner.h b/extension/llm/runner/multimodal_runner.h index ac38085be4..745f086f80 100644 --- a/extension/llm/runner/multimodal_runner.h +++ b/extension/llm/runner/multimodal_runner.h @@ -33,8 +33,9 @@ #include #include -namespace torch::executor { -using Stats = ::executorch::llm::Stats; +namespace executorch { +namespace extension { +namespace llm { class MultimodalRunner { public: @@ -53,8 +54,8 @@ class MultimodalRunner { } virtual bool is_loaded() = 0; - virtual Error load() = 0; - virtual Error generate( + virtual ::executorch::runtime::Error load() = 0; + virtual ::executorch::runtime::Error generate( std::vector& images, const std::string& prompt, int32_t seq_len = 1024, @@ -91,4 +92,14 @@ class MultimodalRunner { Stats stats_; }; -} // namespace torch::executor +} // namespace llm +} // namespace extension +} // namespace executorch + +namespace torch { +namespace executor { +// TODO(T197294990): Remove these deprecated aliases once all users have moved +// to the new `::executorch` namespaces. +using ::executorch::extension::llm::MultimodalRunner; +} // namespace executor +} // namespace torch diff --git a/extension/llm/runner/stats.h b/extension/llm/runner/stats.h index f62be0940c..902ba89296 100644 --- a/extension/llm/runner/stats.h +++ b/extension/llm/runner/stats.h @@ -14,7 +14,10 @@ #include // patternlint-disable-next-line executorch-cpp-nostdinc #include -namespace executorch::llm { + +namespace executorch { +namespace extension { +namespace llm { struct Stats { // Scaling factor for timestamps - in this case, we use ms. @@ -41,12 +44,11 @@ struct Stats { // Token count from generated (total - prompt) int64_t num_generated_tokens; inline void on_sampling_begin() { - aggregate_sampling_timer_start_timestamp = - ::torch::executor::util::time_in_ms(); + aggregate_sampling_timer_start_timestamp = time_in_ms(); } inline void on_sampling_end() { - aggregate_sampling_time_ms += ::torch::executor::util::time_in_ms() - - aggregate_sampling_timer_start_timestamp; + aggregate_sampling_time_ms += + time_in_ms() - aggregate_sampling_timer_start_timestamp; aggregate_sampling_timer_start_timestamp = 0; } @@ -132,4 +134,16 @@ inline void print_report(const Stats& stats) { stats.SCALING_FACTOR_UNITS_PER_SECOND); } -} // namespace executorch::llm +} // namespace llm +} // namespace extension +} // namespace executorch + +namespace executorch { +namespace llm { +// TODO(T197294990): Remove these deprecated aliases once all users have moved +// to the new `::executorch` namespaces. +using ::executorch::extension::llm::kTopp; +using ::executorch::extension::llm::print_report; +using ::executorch::extension::llm::Stats; +} // namespace llm +} // namespace executorch diff --git a/extension/llm/runner/text_decoder_runner.cpp b/extension/llm/runner/text_decoder_runner.cpp index 3de75ceccb..a0963769ea 100644 --- a/extension/llm/runner/text_decoder_runner.cpp +++ b/extension/llm/runner/text_decoder_runner.cpp @@ -8,11 +8,15 @@ // Given inputs, run a text decoder and return logits. -#include #include + #include -namespace torch::executor { +#include + +namespace executorch { +namespace extension { +namespace llm { // NOTE: we observed ~2x loading performance increase on iPhone 15 // and a ~5% improvement on Galaxy S22 by switching to @@ -26,22 +30,22 @@ TextDecoderRunner::TextDecoderRunner( sampler_(std::make_unique( vocab_size, temperature, - ::executorch::llm::kTopp, + kTopp, static_cast(std::time(nullptr)))), use_kv_cache_(use_kv_cache) {} // This function is functional, meaning it shouldn't modify any state of the // input. It should be safe to call multiple times with the same inputs. The // outer loop (call site) is responsible for managing state. -Result TextDecoderRunner::step( +::executorch::runtime::Result TextDecoderRunner::step( ManagedTensor& managed_tokens, ManagedTensor& managed_start_pos) { auto tokens = managed_tokens.get_aliasing_tensor(); // ET_LOG(Info, "Input token %" PRIu64, input_token); if (use_kv_cache_) { auto start_pos = managed_start_pos.get_aliasing_tensor(); - Result> outputs_res = - module_->forward({tokens, start_pos}); + ::executorch::runtime::Result> + outputs_res = module_->forward({tokens, start_pos}); ET_CHECK_OK_OR_RETURN_ERROR(outputs_res.error()); ET_CHECK_MSG( outputs_res.get().size() == 1, @@ -55,7 +59,8 @@ Result TextDecoderRunner::step( } else { // no kv cache (void)managed_start_pos; // unused - Result> outputs_res = module_->forward({tokens}); + ::executorch::runtime::Result> + outputs_res = module_->forward({tokens}); ET_CHECK_OK_OR_RETURN_ERROR(outputs_res.error()); ET_CHECK_MSG( outputs_res.get().size() == 1, @@ -69,4 +74,6 @@ Result TextDecoderRunner::step( } } -} // namespace torch::executor +} // namespace llm +} // namespace extension +} // namespace executorch diff --git a/extension/llm/runner/text_decoder_runner.h b/extension/llm/runner/text_decoder_runner.h index 49ddea6629..6a8e3396fe 100644 --- a/extension/llm/runner/text_decoder_runner.h +++ b/extension/llm/runner/text_decoder_runner.h @@ -16,7 +16,9 @@ // patternlint-disable-next-line executorch-cpp-nostdinc #include -namespace torch::executor { +namespace executorch { +namespace extension { +namespace llm { class TextDecoderRunner { public: @@ -35,7 +37,7 @@ class TextDecoderRunner { * Module. * @return The output of the LLM Module. This will be a tensor of logits. */ - virtual Result step( + virtual ::executorch::runtime::Result step( ManagedTensor& input, ManagedTensor& start_pos); @@ -43,7 +45,7 @@ class TextDecoderRunner { * Load the Module for text decode purpose. * @return The error code. */ - virtual Error load() { + virtual ::executorch::runtime::Error load() { return module_->load_method("forward"); } @@ -70,13 +72,13 @@ class TextDecoderRunner { auto vocab_size = logits_tensor.size(2); switch (logits_tensor.scalar_type()) { - case ScalarType::Float: { + case exec_aten::ScalarType::Float: { float* logits = logits_tensor.mutable_data_ptr(); float* logits_last = logits; logits_last += (num_tokens - 1) * vocab_size; return sampler_->sample(logits_last); } - case ScalarType::Half: { + case exec_aten::ScalarType::Half: { exec_aten::Half* logits = logits_tensor.mutable_data_ptr(); exec_aten::Half* logits_last = logits; @@ -99,4 +101,14 @@ class TextDecoderRunner { bool should_stop_{false}; }; -} // namespace torch::executor +} // namespace llm +} // namespace extension +} // namespace executorch + +namespace torch { +namespace executor { +// TODO(T197294990): Remove these deprecated aliases once all users have moved +// to the new `::executorch` namespaces. +using ::executorch::extension::llm::TextDecoderRunner; +} // namespace executor +} // namespace torch diff --git a/extension/llm/runner/text_prefiller.cpp b/extension/llm/runner/text_prefiller.cpp index beafb21434..19fc2d5936 100644 --- a/extension/llm/runner/text_prefiller.cpp +++ b/extension/llm/runner/text_prefiller.cpp @@ -11,7 +11,9 @@ #include -namespace torch::executor { +namespace executorch { +namespace extension { +namespace llm { TextPrefiller::TextPrefiller( Tokenizer* tokenizer, @@ -23,7 +25,7 @@ TextPrefiller::TextPrefiller( use_kv_cache_(use_kv_cache), enable_parallel_prefill_(enable_parallel_prefill) {} -Result TextPrefiller::prefill( +::executorch::runtime::Result TextPrefiller::prefill( std::vector& prompt_tokens, int64_t start_pos, std::function token_callback) { @@ -40,11 +42,14 @@ Result TextPrefiller::prefill( if (enable_parallel_prefill_ || !use_kv_cache_) { // initialize tensor wrappers ManagedTensor managed_tokens( - prompt_tokens.data(), {1, num_prompt_tokens}, ScalarType::Long); + prompt_tokens.data(), + {1, num_prompt_tokens}, + exec_aten::ScalarType::Long); - ManagedTensor managed_start_pos(&start_pos, {1}, ScalarType::Long); + ManagedTensor managed_start_pos( + &start_pos, {1}, exec_aten::ScalarType::Long); - Result outputs_res = + ::executorch::runtime::Result outputs_res = text_decoder_runner_->step(managed_tokens, managed_start_pos); ET_CHECK_OK_OR_RETURN_ERROR(outputs_res.error()); @@ -76,9 +81,11 @@ Result TextPrefiller::prefill( cur_token = prompt_tokens[0]; // initialize tensor wrappers - ManagedTensor managed_tokens(&cur_token, {1, 1}, ScalarType::Long); + ManagedTensor managed_tokens( + &cur_token, {1, 1}, exec_aten::ScalarType::Long); - ManagedTensor managed_start_pos(&pos_data, {1}, ScalarType::Long); + ManagedTensor managed_start_pos( + &pos_data, {1}, exec_aten::ScalarType::Long); // run the first token and get back logits tensor. Assuming the first token // is bos so don't callback. @@ -114,4 +121,6 @@ Result TextPrefiller::prefill( return cur_token; } -} // namespace torch::executor +} // namespace llm +} // namespace extension +} // namespace executorch diff --git a/extension/llm/runner/text_prefiller.h b/extension/llm/runner/text_prefiller.h index 7293fdca2a..bcec2b895f 100644 --- a/extension/llm/runner/text_prefiller.h +++ b/extension/llm/runner/text_prefiller.h @@ -16,7 +16,9 @@ // patternlint-disable-next-line executorch-cpp-nostdinc #include -namespace torch::executor { +namespace executorch { +namespace extension { +namespace llm { class TextPrefiller { public: @@ -35,7 +37,7 @@ class TextPrefiller { * token in the prompt. * @return The next token of the LLM Module after prefill. */ - Result prefill( + ::executorch::runtime::Result prefill( std::vector& prompt_tokens, int64_t start_pos = 0, std::function token_callback = {}); @@ -47,4 +49,14 @@ class TextPrefiller { bool enable_parallel_prefill_; }; -} // namespace torch::executor +} // namespace llm +} // namespace extension +} // namespace executorch + +namespace torch { +namespace executor { +// TODO(T197294990): Remove these deprecated aliases once all users have moved +// to the new `::executorch` namespaces. +using ::executorch::extension::llm::TextPrefiller; +} // namespace executor +} // namespace torch diff --git a/extension/llm/runner/text_token_generator.h b/extension/llm/runner/text_token_generator.h index 9b3a31f3f7..46d682a4e4 100644 --- a/extension/llm/runner/text_token_generator.h +++ b/extension/llm/runner/text_token_generator.h @@ -13,8 +13,9 @@ #include #include -namespace torch::executor { -using Stats = ::executorch::llm::Stats; +namespace executorch { +namespace extension { +namespace llm { class TextTokenGenerator { public: @@ -41,7 +42,7 @@ class TextTokenGenerator { * @param token_callback what to do after a token is generated. * @return how many tokens are generated. */ - inline Result generate( + inline ::executorch::runtime::Result generate( std::vector tokens, int64_t start_pos, int32_t seq_len, @@ -69,14 +70,14 @@ class TextTokenGenerator { // initialize tensor wrappers ManagedTensor tokens_managed( - token_data.data(), token_shape, ScalarType::Long); + token_data.data(), token_shape, exec_aten::ScalarType::Long); - ManagedTensor start_pos_managed(&pos, {1}, ScalarType::Long); + ManagedTensor start_pos_managed(&pos, {1}, exec_aten::ScalarType::Long); // Generate our tokens while (pos < seq_len - 1) { // Run the model - Result logits_res = + ::executorch::runtime::Result logits_res = text_decoder_runner_->step(tokens_managed, start_pos_managed); ET_CHECK_OK_OR_RETURN_ERROR(logits_res.error()); @@ -136,4 +137,15 @@ class TextTokenGenerator { // stats Stats* stats_; }; -} // namespace torch::executor + +} // namespace llm +} // namespace extension +} // namespace executorch + +namespace torch { +namespace executor { +// TODO(T197294990): Remove these deprecated aliases once all users have moved +// to the new `::executorch` namespaces. +using ::executorch::extension::llm::TextTokenGenerator; +} // namespace executor +} // namespace torch diff --git a/extension/llm/runner/util.h b/extension/llm/runner/util.h index 5d4792b641..baf6af328b 100644 --- a/extension/llm/runner/util.h +++ b/extension/llm/runner/util.h @@ -11,9 +11,9 @@ #include #include -namespace torch { -namespace executor { -namespace util { +namespace executorch { +namespace extension { +namespace llm { void inline safe_printf(const char* piece) { // piece might be a raw byte token, and we only want to print printable chars @@ -44,6 +44,17 @@ long inline time_in_ms() { return time.tv_sec * 1000 + time.tv_nsec / 1000000; } +} // namespace llm +} // namespace extension +} // namespace executorch + +namespace torch { +namespace executor { +namespace util { +// TODO(T197294990): Remove these deprecated aliases once all users have moved +// to the new `::executorch` namespaces. +using ::executorch::extension::llm::safe_printf; +using ::executorch::extension::llm::time_in_ms; } // namespace util } // namespace executor } // namespace torch diff --git a/extension/llm/sampler/sampler.cpp b/extension/llm/sampler/sampler.cpp index 6b0f155f12..64e1307d26 100644 --- a/extension/llm/sampler/sampler.cpp +++ b/extension/llm/sampler/sampler.cpp @@ -35,8 +35,9 @@ #include #include -namespace torch { -namespace executor { +namespace executorch { +namespace extension { +namespace llm { // sampler stuff template @@ -192,5 +193,6 @@ int32_t Sampler::sample(T* logits) { template int32_t Sampler::sample(float* logits); template int32_t Sampler::sample(exec_aten::Half* logits); -} // namespace executor -} // namespace torch +} // namespace llm +} // namespace extension +} // namespace executorch diff --git a/extension/llm/sampler/sampler.h b/extension/llm/sampler/sampler.h index 584a010bba..9d6d742e59 100644 --- a/extension/llm/sampler/sampler.h +++ b/extension/llm/sampler/sampler.h @@ -20,8 +20,9 @@ #include -namespace torch { -namespace executor { +namespace executorch { +namespace extension { +namespace llm { // A simple llama2 sampler. template @@ -57,5 +58,15 @@ class Sampler { unsigned long long rng_state_; }; +} // namespace llm +} // namespace extension +} // namespace executorch + +namespace torch { +namespace executor { +// TODO(T197294990): Remove these deprecated aliases once all users have moved +// to the new `::executorch` namespaces. +using ::executorch::extension::llm::ProbIndex; +using ::executorch::extension::llm::Sampler; } // namespace executor } // namespace torch diff --git a/extension/llm/sampler/test/test_sampler.cpp b/extension/llm/sampler/test/test_sampler.cpp index 2dac03d976..044a39458e 100644 --- a/extension/llm/sampler/test/test_sampler.cpp +++ b/extension/llm/sampler/test/test_sampler.cpp @@ -12,14 +12,10 @@ #include using namespace ::testing; +using ::executorch::extension::llm::Sampler; -namespace torch { -namespace executor { - -class SamplerTest : public Test {}; - -TEST_F(SamplerTest, TestArgMax) { - torch::executor::Sampler sampler{ +TEST(SamplerTest, TestArgMax) { + Sampler sampler{ /*vocab_size*/ 32000, /*temperature*/ 0.0f, /*topp*/ 0.9f, @@ -31,8 +27,8 @@ TEST_F(SamplerTest, TestArgMax) { EXPECT_EQ(sampler.sample(input.data_ptr()), 396); } -TEST_F(SamplerTest, TestArgMaxWithFP16) { - torch::executor::Sampler sampler{ +TEST(SamplerTest, TestArgMaxWithFP16) { + Sampler sampler{ /*vocab_size*/ 32000, /*temperature*/ 0.0f, /*topp*/ 0.9f, @@ -43,6 +39,3 @@ TEST_F(SamplerTest, TestArgMaxWithFP16) { input[0][0][396] = 1.0f; EXPECT_EQ(sampler.sample(input.data_ptr()), 396); } - -} // namespace executor -} // namespace torch diff --git a/extension/llm/tokenizer/base64.h b/extension/llm/tokenizer/base64.h index 9fb1b5129b..7337ecead4 100644 --- a/extension/llm/tokenizer/base64.h +++ b/extension/llm/tokenizer/base64.h @@ -29,8 +29,10 @@ #include #include -namespace torch { -namespace executor { +namespace executorch { +namespace extension { +namespace llm { + namespace base64 { std::string decode(const std::string_view& input); @@ -176,5 +178,16 @@ inline std::string decode(const std::string_view& input) { } // namespace base64 +} // namespace llm +} // namespace extension +} // namespace executorch + +namespace torch { +namespace executor { +namespace base64 { +// TODO(T197294990): Remove these deprecated aliases once all users have moved +// to the new `::executorch` namespaces. +using ::executorch::extension::llm::base64::decode; +} // namespace base64 } // namespace executor } // namespace torch diff --git a/extension/llm/tokenizer/bpe_tokenizer.cpp b/extension/llm/tokenizer/bpe_tokenizer.cpp index 07d138548d..1548f000a5 100644 --- a/extension/llm/tokenizer/bpe_tokenizer.cpp +++ b/extension/llm/tokenizer/bpe_tokenizer.cpp @@ -10,8 +10,12 @@ #include -namespace torch { -namespace executor { +using ::executorch::runtime::Error; +using ::executorch::runtime::Result; + +namespace executorch { +namespace extension { +namespace llm { static int compare_tokens(const void* a, const void* b) { if (((TokenIndex*)a)->str == nullptr) { @@ -311,5 +315,6 @@ BPETokenizer::encode(const std::string& text, int8_t bos, int8_t eos) const { return Result(tokens); } -} // namespace executor -} // namespace torch +} // namespace llm +} // namespace extension +} // namespace executorch diff --git a/extension/llm/tokenizer/bpe_tokenizer.h b/extension/llm/tokenizer/bpe_tokenizer.h index 7ea8402583..7fc7306c10 100644 --- a/extension/llm/tokenizer/bpe_tokenizer.h +++ b/extension/llm/tokenizer/bpe_tokenizer.h @@ -11,8 +11,9 @@ #include #include -namespace torch { -namespace executor { +namespace executorch { +namespace extension { +namespace llm { struct TokenIndex { const char* str; @@ -26,13 +27,14 @@ class BPETokenizer : public Tokenizer { explicit BPETokenizer(); ~BPETokenizer() override; - Error load(const std::string& tokenizer_path) override; + ::executorch::runtime::Error load(const std::string& tokenizer_path) override; - Result> + ::executorch::runtime::Result> encode(const std::string& input, int8_t bos, int8_t eos) const override; - Result decode(uint64_t prev_token, uint64_t token) - const override; + ::executorch::runtime::Result decode( + uint64_t prev_token, + uint64_t token) const override; private: std::unique_ptr vocab_ = nullptr; @@ -41,5 +43,16 @@ class BPETokenizer : public Tokenizer { unsigned int max_token_length_ = 0; unsigned char byte_pieces_[512]; // stores all single-byte strings }; + +} // namespace llm +} // namespace extension +} // namespace executorch + +namespace torch { +namespace executor { +// TODO(T197294990): Remove these deprecated aliases once all users have moved +// to the new `::executorch` namespaces. +using ::executorch::extension::llm::BPETokenizer; +using ::executorch::extension::llm::TokenIndex; } // namespace executor } // namespace torch diff --git a/extension/llm/tokenizer/test/test_bpe_tokenizer.cpp b/extension/llm/tokenizer/test/test_bpe_tokenizer.cpp index 17bb83e2f4..c553fe59f9 100644 --- a/extension/llm/tokenizer/test/test_bpe_tokenizer.cpp +++ b/extension/llm/tokenizer/test/test_bpe_tokenizer.cpp @@ -13,13 +13,15 @@ using namespace ::testing; -namespace torch { -namespace executor { +using ::executorch::extension::llm::BPETokenizer; +using ::executorch::extension::llm::Tokenizer; +using ::executorch::runtime::Error; +using ::executorch::runtime::Result; class TokenizerExtensionTest : public Test { public: void SetUp() override { - torch::executor::runtime_init(); + executorch::runtime::runtime_init(); tokenizer_ = std::make_unique(); modelPath_ = std::getenv("RESOURCES_PATH") + std::string("/test_bpe_tokenizer.bin"); @@ -65,6 +67,3 @@ TEST_F(TokenizerExtensionTest, SafeToDestruct) { tokenizer_ = std::make_unique(); tokenizer_.reset(); } - -} // namespace executor -} // namespace torch diff --git a/extension/llm/tokenizer/test/test_tiktoken.cpp b/extension/llm/tokenizer/test/test_tiktoken.cpp index f423183b8a..a81b20bcf8 100644 --- a/extension/llm/tokenizer/test/test_tiktoken.cpp +++ b/extension/llm/tokenizer/test/test_tiktoken.cpp @@ -12,9 +12,10 @@ #include using namespace ::testing; - -namespace torch { -namespace executor { +using ::executorch::extension::llm::Tiktoken; +using ::executorch::extension::llm::Tokenizer; +using ::executorch::runtime::Error; +using ::executorch::runtime::Result; namespace { // Test case based on Llama 2 @@ -49,7 +50,7 @@ static inline std::unique_ptr> _get_special_tokens() { class TiktokenExtensionTest : public Test { public: void SetUp() override { - torch::executor::runtime_init(); + executorch::runtime::runtime_init(); tokenizer_ = std::make_unique( _get_special_tokens(), kBOSTokenIndex, kEOSTokenIndex); modelPath_ = std::getenv("RESOURCES_PATH") + @@ -139,5 +140,3 @@ TEST_F(TiktokenExtensionTest, ConstructionWithInvalidEOSIndex) { ""); #endif } -} // namespace executor -} // namespace torch diff --git a/extension/llm/tokenizer/tiktoken.cpp b/extension/llm/tokenizer/tiktoken.cpp index 67d1f916f2..7b15d25f0d 100644 --- a/extension/llm/tokenizer/tiktoken.cpp +++ b/extension/llm/tokenizer/tiktoken.cpp @@ -30,8 +30,12 @@ #include #include -namespace torch { -namespace executor { +using ::executorch::runtime::Error; +using ::executorch::runtime::Result; + +namespace executorch { +namespace extension { +namespace llm { // ------------------------------Util start------------------------------------ @@ -415,5 +419,6 @@ Result Tiktoken::decode(uint64_t prev, uint64_t cur) const { } // -------------------------public method end------------------------------- -} // namespace executor -} // namespace torch +} // namespace llm +} // namespace extension +} // namespace executorch diff --git a/extension/llm/tokenizer/tiktoken.h b/extension/llm/tokenizer/tiktoken.h index 0b1b1fa61e..7d78f8b60d 100644 --- a/extension/llm/tokenizer/tiktoken.h +++ b/extension/llm/tokenizer/tiktoken.h @@ -14,8 +14,9 @@ #include #include -namespace torch { -namespace executor { +namespace executorch { +namespace extension { +namespace llm { using Encoder = std::unordered_map; using Decoder = std::unordered_map; @@ -33,13 +34,14 @@ class Tiktoken : public Tokenizer { size_t bos_token_index, size_t eos_token_index); - Error load(const std::string& tokenizer_path) override; + ::executorch::runtime::Error load(const std::string& tokenizer_path) override; - Result> + ::executorch::runtime::Result> encode(const std::string& input, int8_t bos, int8_t eos) const override; - Result decode(uint64_t prev_token, uint64_t token) - const override; + ::executorch::runtime::Result decode( + uint64_t prev_token, + uint64_t token) const override; private: template @@ -74,5 +76,18 @@ class Tiktoken : public Tokenizer { Re2UPtr _regex; Re2UPtr _special_token_regex; }; + +} // namespace llm +} // namespace extension +} // namespace executorch + +namespace torch { +namespace executor { +// TODO(T197294990): Remove these deprecated aliases once all users have moved +// to the new `::executorch` namespaces. +using ::executorch::extension::llm::Decoder; +using ::executorch::extension::llm::Encoder; +using ::executorch::extension::llm::Re2UPtr; +using ::executorch::extension::llm::Tiktoken; } // namespace executor } // namespace torch diff --git a/extension/llm/tokenizer/tokenizer.h b/extension/llm/tokenizer/tokenizer.h index b49dc245eb..3115cbdff7 100644 --- a/extension/llm/tokenizer/tokenizer.h +++ b/extension/llm/tokenizer/tokenizer.h @@ -17,8 +17,9 @@ #include #include -namespace torch { -namespace executor { +namespace executorch { +namespace extension { +namespace llm { // A tokenizer interface. class Tokenizer { @@ -26,15 +27,16 @@ class Tokenizer { explicit Tokenizer() {} virtual ~Tokenizer() {} - virtual Error load(const std::string& tokenizer_path) = 0; + virtual ::executorch::runtime::Error load( + const std::string& tokenizer_path) = 0; - virtual Result> + virtual ::executorch::runtime::Result> encode(const std::string& input, int8_t bos, int8_t eos) const = 0; - Error decode_verify(uint64_t token) const { + ::executorch::runtime::Error decode_verify(uint64_t token) const { if (!initialized_) { ET_LOG(Error, "Tokenizer not initialized"); - return Error::NotSupported; + return ::executorch::runtime::Error::NotSupported; } if (token >= vocab_size_) { ET_LOG( @@ -42,13 +44,14 @@ class Tokenizer { "token %" PRIu64 " is out side of vacab range %d", token, vocab_size_); - return Error::NotSupported; + return ::executorch::runtime::Error::NotSupported; } - return Error::Ok; + return ::executorch::runtime::Error::Ok; } - virtual Result decode(uint64_t prev_token, uint64_t token) - const = 0; + virtual ::executorch::runtime::Result decode( + uint64_t prev_token, + uint64_t token) const = 0; // getters int32_t vocab_size() const { @@ -70,5 +73,14 @@ class Tokenizer { uint64_t eos_tok_ = 0; }; +} // namespace llm +} // namespace extension +} // namespace executorch + +namespace torch { +namespace executor { +// TODO(T197294990): Remove these deprecated aliases once all users have moved +// to the new `::executorch` namespaces. +using ::executorch::extension::llm::Tokenizer; } // namespace executor } // namespace torch diff --git a/extension/pybindings/pybindings.cpp b/extension/pybindings/pybindings.cpp index 83cec280b8..7c98ee4aa0 100644 --- a/extension/pybindings/pybindings.cpp +++ b/extension/pybindings/pybindings.cpp @@ -17,6 +17,9 @@ #include #include +#include +#include +#include #include #include #include @@ -28,9 +31,6 @@ #include #include #include -#include -#include -#include #include #include diff --git a/extension/training/test/training_loop_test.cpp b/extension/training/test/training_loop_test.cpp index 28931fbfc0..8e62663c9f 100644 --- a/extension/training/test/training_loop_test.cpp +++ b/extension/training/test/training_loop_test.cpp @@ -23,7 +23,7 @@ // @lint-ignore-every CLANGTIDY facebook-hte-CArray using namespace ::testing; -using namespace torch::executor::training::optimizer; +using namespace executorch::extension::training::optimizer; using namespace torch::executor::testing; using exec_aten::ScalarType; using exec_aten::Tensor; diff --git a/pytest.ini b/pytest.ini index 5ed1780e61..7298773255 100644 --- a/pytest.ini +++ b/pytest.ini @@ -13,8 +13,7 @@ addopts = --ignore-glob=backends/arm/**/* # explicitly list out tests that are running successfully in oss examples/models/test - # sdk/ - sdk/ + devtools/ # examples examples/models/llama2/tests # examples/models/llava/test TODO: enable this diff --git a/runtime/executor/test/targets.bzl b/runtime/executor/test/targets.bzl index 401581421d..5ba989ef86 100644 --- a/runtime/executor/test/targets.bzl +++ b/runtime/executor/test/targets.bzl @@ -19,7 +19,7 @@ def define_common_targets(is_fbcode = False): "//executorch/exir/backend/test/...", "//executorch/runtime/backend/...", "//executorch/extension/pybindings/...", - "//executorch/sdk/fb/runners/...", + "//executorch/devtools/fb/runners/...", "//executorch/test/...", "//executorch/examples/...", ], @@ -43,7 +43,7 @@ def define_common_targets(is_fbcode = False): "//executorch/exir/backend/test/...", "//executorch/runtime/backend/...", "//executorch/extension/pybindings/...", - "//executorch/sdk/fb/runners/...", + "//executorch/devtools/fb/runners/...", "//executorch/test/...", "//executorch/examples/...", ], diff --git a/schema/targets.bzl b/schema/targets.bzl index 2c797baa16..40c6d8d5c8 100644 --- a/schema/targets.bzl +++ b/schema/targets.bzl @@ -57,7 +57,7 @@ def define_common_targets(): name = INPUT_SCALAR_TYPE, visibility = [ "//executorch/exir/_serialize/...", - "//executorch/sdk/etdump/...", + "//executorch/devtools/etdump/...", ], ) diff --git a/sdk/inspector/tests/TARGETS b/sdk/inspector/tests/TARGETS deleted file mode 100644 index 374d2ea753..0000000000 --- a/sdk/inspector/tests/TARGETS +++ /dev/null @@ -1,40 +0,0 @@ -load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest") - -oncall("executorch") - -python_unittest( - name = "inspector_test", - srcs = ["inspector_test.py"], - deps = [ - "//executorch/exir:lib", - "//executorch/sdk:lib", - "//executorch/sdk/debug_format:et_schema", - "//executorch/sdk/etdump:schema_flatcc", - "//executorch/sdk/etrecord/tests:etrecord_test_library", - "//executorch/sdk/inspector:inspector", - "//executorch/sdk/inspector:lib", - ], -) - -python_unittest( - name = "event_blocks_test", - srcs = ["event_blocks_test.py"], - deps = [ - "//executorch/sdk/etdump:schema_flatcc", - "//executorch/sdk/inspector:inspector", - "//executorch/sdk/inspector:lib", - ], -) - -python_unittest( - name = "inspector_utils_test", - srcs = ["inspector_utils_test.py"], - deps = [ - "//executorch/sdk:lib", - "//executorch/sdk/debug_format:base_schema", - "//executorch/sdk/debug_format:et_schema", - "//executorch/sdk/etdump:schema_flatcc", - "//executorch/sdk/etrecord/tests:etrecord_test_library", - "//executorch/sdk/inspector:inspector_utils", - ], -) diff --git a/setup.py b/setup.py index 58a9973c9f..75b3ece526 100644 --- a/setup.py +++ b/setup.py @@ -360,12 +360,12 @@ def run(self): ("schema/scalar_type.fbs", "exir/_serialize/scalar_type.fbs"), ("schema/program.fbs", "exir/_serialize/program.fbs"), ( - "sdk/bundled_program/schema/bundled_program_schema.fbs", - "sdk/bundled_program/serialize/bundled_program_schema.fbs", + "devtools/bundled_program/schema/bundled_program_schema.fbs", + "devtools/bundled_program/serialize/bundled_program_schema.fbs", ), ( - "sdk/bundled_program/schema/scalar_type.fbs", - "sdk/bundled_program/serialize/scalar_type.fbs", + "devtools/bundled_program/schema/scalar_type.fbs", + "devtools/bundled_program/serialize/scalar_type.fbs", ), ] for src, dst in src_to_dst: @@ -606,8 +606,8 @@ def get_ext_modules() -> List[Extension]: "executorch/extension": "extension", "executorch/kernels/quantized": "kernels/quantized", "executorch/schema": "schema", - "executorch/sdk": "sdk", - "executorch/sdk/bundled_program": "sdk/bundled_program", + "executorch/devtools": "devtools", + "executorch/devtools/bundled_program": "devtools/bundled_program", "executorch/util": "util", # Note: This will install a top-level module called "serializer", # which seems too generic and might conflict with other pip packages. diff --git a/shim/xplat/executorch/extension/pybindings/pybindings.bzl b/shim/xplat/executorch/extension/pybindings/pybindings.bzl index f62c567ba4..813b420dba 100644 --- a/shim/xplat/executorch/extension/pybindings/pybindings.bzl +++ b/shim/xplat/executorch/extension/pybindings/pybindings.bzl @@ -10,29 +10,29 @@ MODELS_ATEN_OPS_LEAN_MODE_GENERATED_LIB = [ PORTABLE_MODULE_DEPS = [ "//executorch/runtime/kernel:operator_registry", "//executorch/runtime/executor:program", - "//executorch/sdk/bundled_program/schema:bundled_program_schema_fbs", + "//executorch/devtools/bundled_program/schema:bundled_program_schema_fbs", "//executorch/extension/aten_util:aten_bridge", - "//executorch/sdk/bundled_program:runtime", + "//executorch/devtools/bundled_program:runtime", "//executorch/extension/data_loader:buffer_data_loader", "//executorch/extension/data_loader:mmap_data_loader", "//executorch/extension/memory_allocator:malloc_memory_allocator", "//executorch/util:util", "//executorch/runtime/executor/test:test_backend_compiler_lib", - "//executorch/sdk/etdump:etdump_flatcc", + "//executorch/devtools/etdump:etdump_flatcc", ] + get_all_cpu_backend_targets() ATEN_MODULE_DEPS = [ "//executorch/runtime/kernel:operator_registry", "//executorch/runtime/executor:program_aten", "//executorch/runtime/core/exec_aten:lib", - "//executorch/sdk/bundled_program/schema:bundled_program_schema_fbs", + "//executorch/devtools/bundled_program/schema:bundled_program_schema_fbs", "//executorch/extension/data_loader:buffer_data_loader", "//executorch/extension/data_loader:mmap_data_loader", "//executorch/extension/memory_allocator:malloc_memory_allocator", "//executorch/util:read_file", - "//executorch/sdk/bundled_program:runtime_aten", + "//executorch/devtools/bundled_program:runtime_aten", "//executorch/runtime/executor/test:test_backend_compiler_lib_aten", - "//executorch/sdk/etdump:etdump_flatcc", + "//executorch/devtools/etdump:etdump_flatcc", ] # Generated lib for all ATen ops with aten kernel used by models in model inventory diff --git a/test/end2end/TARGETS b/test/end2end/TARGETS index 8c0885e32e..fdac0e4887 100644 --- a/test/end2end/TARGETS +++ b/test/end2end/TARGETS @@ -42,6 +42,9 @@ python_unittest( ":exported_module", ":register_scratch_meta_fns", "//caffe2:torch", + "//executorch/devtools:lib", + "//executorch/devtools/bundled_program:config", + "//executorch/devtools/bundled_program/serialize:lib", "//executorch/exir:dynamic_shape", "//executorch/exir:lib", "//executorch/exir:memory", @@ -57,9 +60,6 @@ python_unittest( "//executorch/exir/tests:transformer", "//executorch/extension/pybindings:aten_lib", "//executorch/extension/pytree:pybindings", - "//executorch/sdk:lib", - "//executorch/sdk/bundled_program:config", - "//executorch/sdk/bundled_program/serialize:lib", ], ) @@ -73,6 +73,9 @@ python_unittest( ":exported_module", ":register_scratch_meta_fns", "//caffe2:torch", + "//executorch/devtools:lib", + "//executorch/devtools/bundled_program:config", + "//executorch/devtools/bundled_program/serialize:lib", "//executorch/exir:dynamic_shape", "//executorch/exir:lib", "//executorch/exir:memory", @@ -88,8 +91,5 @@ python_unittest( "//executorch/exir/tests:transformer", "//executorch/extension/pybindings:portable_lib", "//executorch/extension/pytree:pybindings", - "//executorch/sdk:lib", - "//executorch/sdk/bundled_program:config", - "//executorch/sdk/bundled_program/serialize:lib", ], ) diff --git a/test/models/generate_linear_out_bundled_program.py b/test/models/generate_linear_out_bundled_program.py index 9201e43adf..93fd1445ef 100644 --- a/test/models/generate_linear_out_bundled_program.py +++ b/test/models/generate_linear_out_bundled_program.py @@ -17,15 +17,15 @@ from typing import List import torch +from executorch.devtools import BundledProgram +from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite +from executorch.devtools.bundled_program.serialize import ( + serialize_from_bundled_program_to_flatbuffer, +) from executorch.exir import ExecutorchBackendConfig, to_edge from executorch.exir.passes import MemoryPlanningPass, ToOutVarPass from executorch.exir.print_program import pretty_print -from executorch.sdk import BundledProgram -from executorch.sdk.bundled_program.config import MethodTestCase, MethodTestSuite -from executorch.sdk.bundled_program.serialize import ( - serialize_from_bundled_program_to_flatbuffer, -) from executorch.test.models.linear_model import LinearModel from torch.export import export diff --git a/test/models/targets.bzl b/test/models/targets.bzl index ad907304ed..aea47c9e03 100644 --- a/test/models/targets.bzl +++ b/test/models/targets.bzl @@ -22,9 +22,9 @@ def define_common_targets(): deps = [ ":linear_model", "//caffe2:torch", - "//executorch/sdk/bundled_program:config", - "//executorch/sdk:lib", - "//executorch/sdk/bundled_program/serialize:lib", + "//executorch/devtools/bundled_program:config", + "//executorch/devtools:lib", + "//executorch/devtools/bundled_program/serialize:lib", "//executorch/exir:lib", "//executorch/exir/_serialize:lib", ], diff --git a/test/run_oss_cpp_tests.sh b/test/run_oss_cpp_tests.sh index 2d2f816209..3f17a9ead6 100755 --- a/test/run_oss_cpp_tests.sh +++ b/test/run_oss_cpp_tests.sh @@ -121,7 +121,7 @@ probe_tests() { kernels runtime schema - sdk + devtools test )