From 5793e3d05e4cf8c7e31b63df88d34571d70d989b Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Wed, 4 Sep 2024 11:35:03 +0000 Subject: [PATCH] 2024-09-04 nightly release (a4092c59066f045043b18a7fbe8844e7216b15fc) --- .ci/scripts/test.sh | 3 + .github/workflows/android-perf.yml | 2 +- .github/workflows/trunk.yml | 2 +- backends/arm/arm_partitioner.py | 2 + backends/arm/operators/__init__.py | 2 + backends/arm/operators/op_conv2d.py | 2 +- backends/arm/operators/op_exp.py | 81 ++++++++ backends/arm/operators/op_log.py | 81 ++++++++ .../annotate_channels_last_dim_order_pass.py | 4 +- backends/arm/passes/arm_pass_manager.py | 2 + .../arm/passes/size_adjust_conv2d_pass.py | 129 ++++++++++++ backends/arm/quantizer/arm_quantizer.py | 1 + .../quantization_annotation/__init__.py | 1 + .../one_to_one_annotator.py | 55 ++++++ backends/arm/runtime/ArmBackendEthosU.cpp | 18 +- backends/arm/test/ops/test_conv.py | 24 ++- backends/arm/test/ops/test_exp.py | 110 +++++++++++ backends/arm/test/ops/test_log.py | 110 +++++++++++ backends/qualcomm/partition/common_defs.py | 6 +- backends/vulkan/runtime/api/api.h | 2 +- .../{StorageBuffer.h => StagingBuffer.h} | 22 +-- .../vulkan/runtime/api/containers/Tensor.cpp | 2 +- .../vulkan/runtime/graph/ComputeGraph.cpp | 4 +- backends/vulkan/runtime/graph/ComputeGraph.h | 2 +- .../vulkan/runtime/graph/containers/Value.h | 8 +- .../vulkan/runtime/graph/ops/PrepackNode.cpp | 8 +- .../vulkan/runtime/graph/ops/PrepackNode.h | 2 +- .../runtime/graph/ops/utils/BindingUtils.cpp | 2 +- .../runtime/graph/ops/utils/BindingUtils.h | 2 +- .../runtime/graph/ops/utils/StagingUtils.cpp | 6 +- .../runtime/graph/ops/utils/StagingUtils.h | 6 +- .../runtime/vk_api/memory/Allocator.cpp | 47 ++--- .../vulkan/runtime/vk_api/memory/Allocator.h | 5 +- backends/vulkan/test/utils/test_utils.cpp | 6 +- backends/vulkan/test/utils/test_utils.h | 10 +- .../vulkan/test/vulkan_compute_api_test.cpp | 16 +- .../tools/gpuinfo/include/architecture.h | 6 +- .../vulkan/tools/gpuinfo/include/buffers.h | 8 +- .../vulkan/tools/gpuinfo/include/textures.h | 4 +- build/build_android_llm_demo.sh | 1 + codegen/tools/gen_oplist.py | 39 +++- codegen/tools/test/test_gen_oplist.py | 2 +- examples/arm/executor_runner/CMakeLists.txt | 2 +- .../executor_runner/arm_executor_runner.cpp | 7 + .../arm/executor_runner/arm_perf_monitor.cpp | 173 ++++++++++++++++ .../arm/executor_runner/arm_perf_monitor.h | 10 + examples/models/llama2/eval_llama_lib.py | 8 +- examples/models/llava/main.cpp | 2 +- examples/models/llava/runner/llava_runner.cpp | 2 +- examples/models/llava/runner/llava_runner.h | 2 +- examples/models/llava/runner/targets.bzl | 3 + examples/qualcomm/scripts/torchvision_vit.py | 23 ++- extension/android/CMakeLists.txt | 11 +- extension/android/benchmark/.gitignore | 16 ++ extension/android/benchmark/app/.gitignore | 1 + .../android/benchmark/app/build.gradle.kts | 41 ++++ .../android/benchmark/app/proguard-rules.pro | 21 ++ .../minibench/ExampleInstrumentedTest.java | 26 +++ .../app/src/main/AndroidManifest.xml | 21 ++ .../pytorch/minibench/BenchmarkActivity.java | 46 +++++ .../app/src/main/res/values/colors.xml | 10 + .../app/src/main/res/values/strings.xml | 3 + .../app/src/main/res/values/themes.xml | 5 + .../pytorch/minibench/ExampleUnitTest.java | 17 ++ extension/android/benchmark/build.gradle.kts | 4 + extension/android/benchmark/gradle.properties | 21 ++ .../gradle/wrapper/gradle-wrapper.jar | Bin 0 -> 59203 bytes .../gradle/wrapper/gradle-wrapper.properties | 6 + extension/android/benchmark/gradlew | 185 ++++++++++++++++++ extension/android/benchmark/gradlew.bat | 89 +++++++++ .../android/benchmark/settings.gradle.kts | 17 ++ extension/android/jni/jni_layer.cpp | 16 +- extension/android/jni/jni_layer_constants.h | 4 +- extension/android/jni/jni_layer_llama.cpp | 2 +- .../java/org/pytorch/executorch/Module.java | 6 + .../org/pytorch/executorch/NativePeer.java | 18 ++ extension/llm/custom_ops/TARGETS | 2 +- extension/llm/custom_ops/targets.bzl | 86 ++++---- .../llm/custom_ops/test_sdpa_with_kv_cache.py | 91 +++++++-- extension/llm/runner/multimodal_runner.h | 2 +- extension/module/module.h | 2 + kernels/optimized/lib_defs.bzl | 99 ++++++---- kernels/portable/CMakeLists.txt | 5 +- kernels/portable/cpu/op_abs.cpp | 2 + kernels/test/TestUtil.h | 36 ++++ kernels/test/op_abs_test.cpp | 25 +++ .../exec_aten/testing_util/tensor_factory.h | 156 +++++++++++++-- .../testing_util/test/tensor_factory_test.cpp | 2 +- runtime/core/exec_aten/util/tensor_util.h | 116 +++++++---- .../core/exec_aten/util/tensor_util_aten.cpp | 58 ++++++ .../exec_aten/util/tensor_util_portable.cpp | 34 ++++ runtime/core/exec_aten/util/test/targets.bzl | 22 ++- .../exec_aten/util/test/tensor_util_test.cpp | 56 +++++- shim/tools/build_defs/fb_native_wrapper.bzl | 10 + 94 files changed, 2170 insertions(+), 297 deletions(-) create mode 100644 backends/arm/operators/op_exp.py create mode 100644 backends/arm/operators/op_log.py create mode 100644 backends/arm/passes/size_adjust_conv2d_pass.py create mode 100644 backends/arm/quantizer/quantization_annotation/one_to_one_annotator.py create mode 100644 backends/arm/test/ops/test_exp.py create mode 100644 backends/arm/test/ops/test_log.py rename backends/vulkan/runtime/api/containers/{StorageBuffer.h => StagingBuffer.h} (71%) create mode 100644 examples/arm/executor_runner/arm_perf_monitor.cpp create mode 100644 examples/arm/executor_runner/arm_perf_monitor.h create mode 100644 extension/android/benchmark/.gitignore create mode 100644 extension/android/benchmark/app/.gitignore create mode 100644 extension/android/benchmark/app/build.gradle.kts create mode 100644 extension/android/benchmark/app/proguard-rules.pro create mode 100644 extension/android/benchmark/app/src/androidTest/java/org/pytorch/minibench/ExampleInstrumentedTest.java create mode 100644 extension/android/benchmark/app/src/main/AndroidManifest.xml create mode 100644 extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java create mode 100644 extension/android/benchmark/app/src/main/res/values/colors.xml create mode 100644 extension/android/benchmark/app/src/main/res/values/strings.xml create mode 100644 extension/android/benchmark/app/src/main/res/values/themes.xml create mode 100644 extension/android/benchmark/app/src/test/java/org/pytorch/minibench/ExampleUnitTest.java create mode 100644 extension/android/benchmark/build.gradle.kts create mode 100644 extension/android/benchmark/gradle.properties create mode 100644 extension/android/benchmark/gradle/wrapper/gradle-wrapper.jar create mode 100644 extension/android/benchmark/gradle/wrapper/gradle-wrapper.properties create mode 100755 extension/android/benchmark/gradlew create mode 100644 extension/android/benchmark/gradlew.bat create mode 100644 extension/android/benchmark/settings.gradle.kts create mode 100644 shim/tools/build_defs/fb_native_wrapper.bzl diff --git a/.ci/scripts/test.sh b/.ci/scripts/test.sh index 338f2868e3..1dbf4a8ce9 100755 --- a/.ci/scripts/test.sh +++ b/.ci/scripts/test.sh @@ -170,6 +170,9 @@ test_model_with_qnn() { elif [[ "${MODEL_NAME}" == "ic3" ]]; then EXPORT_SCRIPT=inception_v3 EXPORTED_MODEL_NAME=ic3_qnn.pte + elif [[ "${MODEL_NAME}" == "vit" ]]; then + EXPORT_SCRIPT=torchvision_vit + EXPORTED_MODEL_NAME=vit_qnn.pte fi "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m SM8550 --compile_only diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index cf89944abd..028cd4c5a2 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -84,7 +84,7 @@ jobs: # Separate default values from the workflow dispatch. To ensure defaults are accessible # during scheduled runs and to provide flexibility for different defaults between # on-demand and periodic benchmarking. - CRON_DEFAULT_MODELS: "stories110M,dl3,mv3,mv2,ic4,ic3" + CRON_DEFAULT_MODELS: "stories110M,dl3,mv3,mv2,ic4,ic3,vit" CRON_DEFAULT_DEVICES: "samsung_galaxy_s2x" CRON_DEFAULT_DELEGATES: "xnnpack,qnn" run: | diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 6d08675b8e..9d41f39172 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -305,7 +305,7 @@ jobs: strategy: matrix: dtype: [fp32] - model: [dl3, mv3, mv2, ic4, ic3] + model: [dl3, mv3, mv2, ic4, ic3, vit] fail-fast: false with: runner: linux.2xlarge diff --git a/backends/arm/arm_partitioner.py b/backends/arm/arm_partitioner.py index bee8b8a27f..524316613f 100644 --- a/backends/arm/arm_partitioner.py +++ b/backends/arm/arm_partitioner.py @@ -45,6 +45,8 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool: exir_ops.edge.aten.hardtanh.default, exir_ops.edge.aten.convolution.default, exir_ops.edge.aten.div.Tensor, + exir_ops.edge.aten.exp.default, + exir_ops.edge.aten.log.default, exir_ops.edge.aten.split_with_sizes_copy.default, exir_ops.edge.aten.full.default, exir_ops.edge.aten.mul.Tensor, diff --git a/backends/arm/operators/__init__.py b/backends/arm/operators/__init__.py index fb5e46c5c2..5c1109eec1 100644 --- a/backends/arm/operators/__init__.py +++ b/backends/arm/operators/__init__.py @@ -14,9 +14,11 @@ op_conv2d, op_dequant, op_div, + op_exp, op_full, op_get_item, op_hardtanh, + op_log, op_mean_dim, op_mm, op_mul, diff --git a/backends/arm/operators/op_conv2d.py b/backends/arm/operators/op_conv2d.py index 323b11601c..9437e96f5e 100644 --- a/backends/arm/operators/op_conv2d.py +++ b/backends/arm/operators/op_conv2d.py @@ -40,7 +40,7 @@ def adjust_pad_if_needed(self, input, weight, stride, pad, dilation): if mod_remainder > pad: raise RuntimeError( - f"ignoring input element is not currently supported, got a large stride {stride}" + "This case should be handled by the SizeAdjustConv2d pass, is it enabled?" ) return pad - mod_remainder diff --git a/backends/arm/operators/op_exp.py b/backends/arm/operators/op_exp.py new file mode 100644 index 0000000000..f9319b5ea8 --- /dev/null +++ b/backends/arm/operators/op_exp.py @@ -0,0 +1,81 @@ +# Copyright 2024 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import List + +import numpy as np + +import serializer.tosa_serializer as ts +from executorch.backends.arm.operators.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.arm.tosa_mapping import TosaArg + +from executorch.backends.arm.tosa_quant_utils import ( + dequantize_value, + get_quant_node_args, + QuantArgs, + quantize_value, +) +from serializer.tosa_serializer import TosaOp +from torch.fx import Node + + +@register_node_visitor +class ExpVisitor(NodeVisitor): + target = "aten.exp.default" + + def __init__(self, *args): + super().__init__(*args) + + def define_node( + self, + node: Node, + tosa_graph: ts.TosaSerializer, + inputs: List[TosaArg], + output: TosaArg, + is_quant_node: bool, + ) -> None: + + assert len(node.all_input_nodes) == 1 + assert len(node.users) == 1 + + if is_quant_node: + # Assume quantized input is 8 bit. + + # Create attribute for 8 bit table lookup. + input_node = node.all_input_nodes[0] + in_quantargs = get_quant_node_args(input_node) + output_node = list(node.users)[0] + out_quantargs = get_quant_node_args(output_node) + + table = exp_table_8bit(in_quantargs, out_quantargs) + table_attr = ts.TosaSerializerAttribute() + table_attr.TableAttribute(table) + + tosa_graph.addOperator( + TosaOp.Op().TABLE, [inputs[0].name], [output.name], table_attr + ) + else: + tosa_graph.addOperator(TosaOp.Op().EXP, [inputs[0].name], [output.name]) + + +def exp_table_8bit(in_quantargs: QuantArgs, out_quantargs: QuantArgs): + """ + Returns a table mapping 256 entries to exp([qmin,qmax]) + """ + + def exp(x): + # Convert quantized input to floating point exp input space. + v = dequantize_value(x, in_quantargs) + # Compute exp. + v = np.exp(v) + # Convert exp output back to quantized space. + return quantize_value(v, out_quantargs) + + return [ + exp(x) + for x in np.linspace(in_quantargs.qmin, in_quantargs.qmax, 256, dtype=np.int8) + ] diff --git a/backends/arm/operators/op_log.py b/backends/arm/operators/op_log.py new file mode 100644 index 0000000000..a76eb57f71 --- /dev/null +++ b/backends/arm/operators/op_log.py @@ -0,0 +1,81 @@ +# Copyright 2024 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import List + +import numpy as np + +import serializer.tosa_serializer as ts +from executorch.backends.arm.operators.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.arm.tosa_mapping import TosaArg + +from executorch.backends.arm.tosa_quant_utils import ( + dequantize_value, + get_quant_node_args, + QuantArgs, + quantize_value, +) +from serializer.tosa_serializer import TosaOp +from torch.fx import Node + + +@register_node_visitor +class LogVisitor(NodeVisitor): + target = "aten.log.default" + + def __init__(self, *args): + super().__init__(*args) + + def define_node( + self, + node: Node, + tosa_graph: ts.TosaSerializer, + inputs: List[TosaArg], + output: TosaArg, + is_quant_node: bool, + ) -> None: + + assert len(node.all_input_nodes) == 1 + assert len(node.users) == 1 + + if is_quant_node: + # Assume quantized input is 8 bit. + + # Create attribute for 8 bit table lookup. + input_node = node.all_input_nodes[0] + in_quantargs = get_quant_node_args(input_node) + output_node = list(node.users)[0] + out_quantargs = get_quant_node_args(output_node) + + table = log_table_8bit(in_quantargs, out_quantargs) + table_attr = ts.TosaSerializerAttribute() + table_attr.TableAttribute(table) + + tosa_graph.addOperator( + TosaOp.Op().TABLE, [inputs[0].name], [output.name], table_attr + ) + else: + tosa_graph.addOperator(TosaOp.Op().LOG, [inputs[0].name], [output.name]) + + +def log_table_8bit(in_quantargs: QuantArgs, out_quantargs: QuantArgs): + """ + Returns a table mapping 256 entries to log([qmin,qmax]) + """ + + def log(x): + # Convert quantized input to floating point log input space. + v = dequantize_value(x, in_quantargs) + # Compute log. + v = np.log(v) + # Convert log output back to quantized space. + return quantize_value(v, out_quantargs) + + return [ + log(x) + for x in np.linspace(in_quantargs.qmin, in_quantargs.qmax, 256, dtype=np.int8) + ] diff --git a/backends/arm/passes/annotate_channels_last_dim_order_pass.py b/backends/arm/passes/annotate_channels_last_dim_order_pass.py index 9bb45c504a..ea3c171c58 100644 --- a/backends/arm/passes/annotate_channels_last_dim_order_pass.py +++ b/backends/arm/passes/annotate_channels_last_dim_order_pass.py @@ -46,7 +46,9 @@ def call(self, graph_module: torch.fx.GraphModule): NHWC_Order = (0, 2, 3, 1) HWCM_Order = (2, 3, 0, 1) for node in graph_module.graph.nodes: - if isinstance(node.meta["val"], tuple): + if isinstance( + node.meta["val"], (tuple, torch.fx.immutable_collections.immutable_list) + ): node_data = node.meta["val"][0].data else: node_data = node.meta["val"].data diff --git a/backends/arm/passes/arm_pass_manager.py b/backends/arm/passes/arm_pass_manager.py index 054d823dbb..8cac53b134 100644 --- a/backends/arm/passes/arm_pass_manager.py +++ b/backends/arm/passes/arm_pass_manager.py @@ -16,6 +16,7 @@ ConvertSplitToSlicePass, ) from executorch.backends.arm.passes.remove_clone_pass import RemoveClonePass +from executorch.backends.arm.passes.size_adjust_conv2d_pass import SizeAdjustConv2DPass from executorch.exir.backend.compile_spec_schema import CompileSpec from executorch.exir.pass_manager import PassManager @@ -29,6 +30,7 @@ def transform_to_backend_pipeline( self, graph_module: torch.fx.Graph, compile_spec: CompileSpec ): """Apply passes before transforming program to backend""" + self.add_pass(SizeAdjustConv2DPass()) self.add_pass(RemoveClonePass()) self.add_pass(ConvertExpandCopyToRepeatPass()) self.add_pass(ConvertSplitToSlicePass()) diff --git a/backends/arm/passes/size_adjust_conv2d_pass.py b/backends/arm/passes/size_adjust_conv2d_pass.py new file mode 100644 index 0000000000..25d27e7f40 --- /dev/null +++ b/backends/arm/passes/size_adjust_conv2d_pass.py @@ -0,0 +1,129 @@ +# Copyright 2024 Arm Limited and/or its affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from typing import cast, Optional + +import torch.fx +from executorch.backends.arm.tosa_quant_utils import is_quant_node +from executorch.exir.dialects._ops import ops as exir_ops +from executorch.exir.pass_base import ExportPass, PassResult +from torch._ops import OpOverload + + +def conv_remainder(input_length, pad, dilation, weight, stride): + """ + Returns the size + """ + return (input_length + 2 * pad - dilation * (weight - 1) - 1) % stride + + +def insert_q_dq_pair( + graph: torch.fx.Graph, + anchor: torch.fx.Node, + q_params: tuple, +): + with graph.inserting_after(anchor): + q = create_node( + graph=graph, + op_target=exir_ops.edge.quantized_decomposed.quantize_per_tensor.default, + args=(), # We add the argument last + ) + q.meta = anchor.meta + + with graph.inserting_after(q): + dq = create_node( + graph=graph, + op_target=exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default, + args=(q,) + q_params, + ) + dq.meta = q.meta + + anchor.replace_all_uses_with(dq) + # We add this last so the replace all uses above does not replace the quantized + # node's first use + q.args = (anchor,) + q_params + return dq + + +def create_node( + graph: torch.fx.Graph, + op_target: OpOverload, + args: tuple = (), + kwargs: Optional[dict] = None, +): + return graph.create_node( + "call_function", + op_target, + args=args, + kwargs=kwargs or {}, + ) + + +class SizeAdjustConv2DPass(ExportPass): + """ + Adjust the convolution input size to match perfectly with the + weight size, padding, stride and dilation parameters. + This is done by inserting a slice op to remove the uneven end of the input. + """ + + conv2d_op = exir_ops.edge.aten.convolution.default + slice_op = exir_ops.edge.aten.slice_copy.Tensor + + def call(self, graph_module: torch.fx.GraphModule): + graph = graph_module.graph + modified_graph = False + for node in graph.nodes: + if node.op != "call_function": + continue + if node.target != self.conv2d_op: + continue + + conv_node = cast(torch.fx.Node, node) + input_node, weight, _, stride_hw, pad_hw, dilation_hw, _, _, _ = ( + conv_node.args + ) + weight_shape = weight.meta["val"].shape + input_shape = input_node.meta["val"].shape + + slice_args = [] + for stride, pad, dilation, dim in zip( + cast(list, stride_hw), + cast(list, pad_hw), + cast(list, dilation_hw), + (2, 3), + ): + remainder = conv_remainder( + input_shape[dim], pad, dilation, weight_shape[dim], stride + ) + if remainder > pad: + adjustment = remainder - pad + args = (dim, 0, input_shape[dim] - adjustment) + slice_args.append(args) + if len(slice_args) == 0: + continue + + with graph_module.graph.inserting_before(node): + last_node = cast(torch.fx.Node, input_node) + for args in slice_args: + slice_node = graph.create_node( + "call_function", self.slice_op, (last_node,) + args + ) + if is_quant_node(last_node): + q_params = last_node.args[1:] + dq_node = insert_q_dq_pair( + graph_module.graph, slice_node, q_params + ) + last_node = dq_node + else: + last_node = slice_node + conv_node.replace_input_with(input_node, last_node) + modified_graph = True + + if modified_graph: + graph_module = super().call(graph_module).graph_module + graph.eliminate_dead_code() + graph_module.recompile() + return PassResult(graph_module, True) diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py index 2692038352..e8fb78fea4 100644 --- a/backends/arm/quantizer/arm_quantizer.py +++ b/backends/arm/quantizer/arm_quantizer.py @@ -268,6 +268,7 @@ class ArmQuantizer(Quantizer): "sigmoid", "mm", "cat", + "one_to_one", ] def __init__(self) -> None: diff --git a/backends/arm/quantizer/quantization_annotation/__init__.py b/backends/arm/quantizer/quantization_annotation/__init__.py index 68ad522fee..f3017c2d7d 100644 --- a/backends/arm/quantizer/quantization_annotation/__init__.py +++ b/backends/arm/quantizer/quantization_annotation/__init__.py @@ -55,6 +55,7 @@ def decorator(annotator: AnnotatorType): max_pool2d_annotator, mm_annotator, mul_annotator, + one_to_one_annotator, sigmoid_annotator, sub_annotator, ) diff --git a/backends/arm/quantizer/quantization_annotation/one_to_one_annotator.py b/backends/arm/quantizer/quantization_annotation/one_to_one_annotator.py new file mode 100644 index 0000000000..2c3c485b05 --- /dev/null +++ b/backends/arm/quantizer/quantization_annotation/one_to_one_annotator.py @@ -0,0 +1,55 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright 2024 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Callable, List, Optional + +import torch +import torch.fx +from executorch.backends.arm.quantizer import arm_quantizer_utils +from executorch.backends.arm.quantizer.quantization_annotation import register_annotator +from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig +from torch.ao.quantization.quantizer.utils import ( + _annotate_input_qspec_map, + _annotate_output_qspec, +) +from torch.fx import Node + + +@register_annotator("one_to_one") +def _annotate_one_to_one( + gm: torch.fx.GraphModule, + quantization_config: QuantizationConfig, + filter_fn: Optional[Callable[[Node], bool]] = None, +) -> Optional[List[List[Node]]]: + """ + This annotator adds the input and output qspec from the quantization config to + ops in 'one_to_one_ops' that have the following properties: + - Have a single input and single output. + - Can handle different qspecs on the input and output. + + Typical ops are ops implemented with a lookup table. + """ + annotated_partitions = [] + one_to_one_ops = (torch.ops.aten.exp.default, torch.ops.aten.log.default) + for node in gm.graph.nodes: + if node.op != "call_function" or node.target not in one_to_one_ops: + continue + if filter_fn and not filter_fn(node): + continue + input_node = node.args[0] + + if not arm_quantizer_utils.is_annotated(node): + _annotate_input_qspec_map( + node, + input_node, + quantization_config.get_input_act_qspec(), + ) + _annotate_output_qspec(node, quantization_config.get_output_act_qspec()) + + arm_quantizer_utils.mark_nodes_as_annotated([node]) + annotated_partitions.append([node]) + + return annotated_partitions diff --git a/backends/arm/runtime/ArmBackendEthosU.cpp b/backends/arm/runtime/ArmBackendEthosU.cpp index 7404293551..9f9ea8ec9f 100644 --- a/backends/arm/runtime/ArmBackendEthosU.cpp +++ b/backends/arm/runtime/ArmBackendEthosU.cpp @@ -14,7 +14,6 @@ #include #include -#include #include "executorch/backends/arm/runtime/VelaBinStream.h" #include "executorch/runtime/backend/interface.h" @@ -32,6 +31,21 @@ typedef struct { bool permuted_io_flag; } ExecutionHandle; +extern "C" { +void __attribute__((weak)) ArmBackend_execute_begin() {} +void __attribute__((weak)) ArmBackend_execute_end() {} +} + +class ArmBackendExecuteCallbacks { + public: + ArmBackendExecuteCallbacks() { + ArmBackend_execute_begin(); + } + ~ArmBackendExecuteCallbacks() { + ArmBackend_execute_end(); + } +}; + class ArmBackend final : public PyTorchBackendInterface { public: ArmBackend() {} @@ -83,6 +97,7 @@ class ArmBackend final : public PyTorchBackendInterface { ExecutionHandle* execution_handle = (ExecutionHandle*)input_handle; VelaHandles handles; + ArmBackendExecuteCallbacks ArmBackend_execute_callbacks; // Command stream - we know at this point it's aligned char* data = (char*)execution_handle->processed->data(); ET_LOG(Info, "ArmBackend::execute %p", data); @@ -233,7 +248,6 @@ class ArmBackend final : public PyTorchBackendInterface { } } } - return Error::Ok; } diff --git a/backends/arm/test/ops/test_conv.py b/backends/arm/test/ops/test_conv.py index ae1c5a65a8..8274879953 100644 --- a/backends/arm/test/ops/test_conv.py +++ b/backends/arm/test/ops/test_conv.py @@ -155,14 +155,14 @@ def forward(self, x): batches=1, ) -conv2d_2x2_1x1x14x14_st2 = Conv2d( +conv2d_2x2_1x1x14x13_st2 = Conv2d( in_channels=1, out_channels=1, kernel_size=(2, 2), stride=2, padding=0, width=14, - height=14, + height=13, batches=1, ) @@ -188,6 +188,18 @@ def forward(self, x): batches=1, ) +conv2d_5x5_1x3x14x15_st3_pd1 = Conv2d( + in_channels=3, + out_channels=16, + kernel_size=(5, 5), + stride=3, + padding=1, + width=14, + height=15, + batches=1, +) + + two_conv2d_nobias = Conv2d( nbr_conv=2, width=256, @@ -221,7 +233,8 @@ def forward(self, x): ("3x3_1x3x256x256_st1", conv2d_3x3_1x3x256x256_st1), ("3x3_1x3x12x12_st2_pd1", conv2d_3x3_1x3x12x12_st2_pd1), ("1x1_1x2x128x128_st1", conv2d_1x1_1x2x128x128_st1), - ("2x2_1x1x14x14_st2", conv2d_2x2_1x1x14x14_st2), + ("2x2_1x1x14x13_st2_needs_adjust_pass", conv2d_2x2_1x1x14x13_st2), + ("conv2d_5x5_1x3x14x15_st3_pd1_needs_adjust_pass", conv2d_5x5_1x3x14x15_st3_pd1), ("5x5_3x2x128x128_st1", conv2d_5x5_3x2x128x128_st1), ("3x3_1x3x224x224_st2_pd1", conv2d_3x3_1x3x224x224_st2_pd1), ("two_conv2d_nobias", two_conv2d_nobias), @@ -236,7 +249,10 @@ def forward(self, x): testsuite_u55.remove(("5x5_3x2x128x128_st1", conv2d_5x5_3x2x128x128_st1)) # Fails when enabling CompileSpec.set_quantize_io(True). MLETORCH-191. -testsuite_u55.remove(("2x2_1x1x14x14_st2", conv2d_2x2_1x1x14x14_st2)) +testsuite_u55.remove(("2x2_1x1x14x13_st2_needs_adjust_pass", conv2d_2x2_1x1x14x13_st2)) +testsuite_u55.remove( + ("conv2d_5x5_1x3x14x15_st3_pd1_needs_adjust_pass", conv2d_5x5_1x3x14x15_st3_pd1) +) class TestConv2D(unittest.TestCase): diff --git a/backends/arm/test/ops/test_exp.py b/backends/arm/test/ops/test_exp.py new file mode 100644 index 0000000000..79020ade25 --- /dev/null +++ b/backends/arm/test/ops/test_exp.py @@ -0,0 +1,110 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright 2024 Arm Limited and/or its affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +from typing import Tuple + +import torch +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.arm_tester import ArmTester +from parameterized import parameterized + +test_data_suite = [ + # (test_name, test_data) + ("zeros", torch.zeros(1, 10, 10, 10)), + ("ones", torch.ones(10, 10, 10)), + ("rand", torch.rand(10, 10) - 0.5), + ("randn_pos", torch.randn(10) + 10), + ("randn_neg", torch.randn(10) - 10), + ("ramp", torch.arange(-16, 16, 0.2)), +] + + +class TestExp(unittest.TestCase): + """Tests lowering of aten.exp""" + + class Exp(torch.nn.Module): + def forward(self, x: torch.Tensor) -> torch.Tensor: + return torch.exp(x) + + def _test_exp_tosa_MI_pipeline( + self, module: torch.nn.Module, test_data: Tuple[torch.tensor] + ): + ( + ArmTester( + module, + example_inputs=test_data, + compile_spec=common.get_tosa_compile_spec(), + ) + .export() + .check(["torch.ops.aten.exp.default"]) + .check_not(["torch.ops.quantized_decomposed"]) + .to_edge() + .partition() + .check_not(["executorch_exir_dialects_edge__ops_aten_exp_default"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs(inputs=test_data) + ) + + def _test_exp_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tuple): + ( + ArmTester( + module, + example_inputs=test_data, + compile_spec=common.get_tosa_compile_spec(), + ) + .quantize() + .export() + .check(["torch.ops.aten.exp.default"]) + .check(["torch.ops.quantized_decomposed"]) + .to_edge() + .partition() + .check_not(["executorch_exir_dialects_edge__ops_aten_exp_default"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs(inputs=test_data) + ) + + def _test_exp_tosa_u55_BI_pipeline( + self, module: torch.nn.Module, test_data: Tuple[torch.tensor] + ): + ( + ArmTester( + module, + example_inputs=test_data, + compile_spec=common.get_u55_compile_spec(), + ) + .quantize() + .export() + .check_count({"torch.ops.aten.exp.default": 1}) + .check(["torch.ops.quantized_decomposed"]) + .to_edge() + .partition() + .check_not(["executorch_exir_dialects_edge__ops_aten_exp_default"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + ) + + @parameterized.expand(test_data_suite) + def test_exp_tosa_MI( + self, + test_name: str, + test_data: torch.Tensor, + ): + self._test_exp_tosa_MI_pipeline(self.Exp(), (test_data,)) + + @parameterized.expand(test_data_suite) + def test_exp_tosa_BI(self, test_name: str, test_data: torch.Tensor): + self._test_exp_tosa_BI_pipeline(self.Exp(), (test_data,)) + + # Fails due to Vela diff from Tosa spec, expected to work with Regor. + @parameterized.expand(test_data_suite) + @unittest.expectedFailure + def test_exp_tosa_u55_BI(self, test_name: str, test_data: torch.Tensor): + self._test_exp_tosa_u55_BI_pipeline(self.Exp(), (test_data,)) diff --git a/backends/arm/test/ops/test_log.py b/backends/arm/test/ops/test_log.py new file mode 100644 index 0000000000..80bc17c987 --- /dev/null +++ b/backends/arm/test/ops/test_log.py @@ -0,0 +1,110 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright 2024 Arm Limited and/or its affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +from typing import Tuple + +import torch +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.arm_tester import ArmTester +from parameterized import parameterized + +test_data_suite = [ + # (test_name, test_data) + ("ones_rank4", torch.ones(1, 10, 10, 10)), + ("ones_rank3", torch.ones(10, 10, 10)), + ("rand", torch.rand(10, 10) + 0.001), + ("randn_pos", torch.randn(10) + 10), + ("randn_spread", torch.max(torch.Tensor([0.0]), torch.randn(10) * 100)), + ("ramp", torch.arange(0.01, 20, 0.2)), +] + + +class TestLog(unittest.TestCase): + """Tests lowering of aten.log""" + + class Log(torch.nn.Module): + def forward(self, x: torch.Tensor) -> torch.Tensor: + return torch.log(x) + + def _test_log_tosa_MI_pipeline( + self, module: torch.nn.Module, test_data: Tuple[torch.tensor] + ): + ( + ArmTester( + module, + example_inputs=test_data, + compile_spec=common.get_tosa_compile_spec(), + ) + .export() + .check(["torch.ops.aten.log.default"]) + .check_not(["torch.ops.quantized_decomposed"]) + .to_edge() + .partition() + .check_not(["executorch_exir_dialects_edge__ops_aten_log_default"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs(inputs=test_data) + ) + + def _test_log_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tuple): + ( + ArmTester( + module, + example_inputs=test_data, + compile_spec=common.get_tosa_compile_spec(), + ) + .quantize() + .export() + .check(["torch.ops.aten.log.default"]) + .check(["torch.ops.quantized_decomposed"]) + .to_edge() + .partition() + .check_not(["executorch_exir_dialects_edge__ops_aten_log_default"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs(inputs=test_data) + ) + + def _test_log_tosa_u55_BI_pipeline( + self, module: torch.nn.Module, test_data: Tuple[torch.tensor] + ): + ( + ArmTester( + module, + example_inputs=test_data, + compile_spec=common.get_u55_compile_spec(), + ) + .quantize() + .export() + .check_count({"torch.ops.aten.log.default": 1}) + .check(["torch.ops.quantized_decomposed"]) + .to_edge() + .partition() + .check_not(["executorch_exir_dialects_edge__ops_aten_log_default"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + ) + + @parameterized.expand(test_data_suite) + def test_log_tosa_MI( + self, + test_name: str, + test_data: torch.Tensor, + ): + self._test_log_tosa_MI_pipeline(self.Log(), (test_data,)) + + @parameterized.expand(test_data_suite) + def test_log_tosa_BI(self, test_name: str, test_data: torch.Tensor): + self._test_log_tosa_BI_pipeline(self.Log(), (test_data,)) + + # Fails due to Vela diff from Tosa spec, logected to work with Regor. + @parameterized.expand(test_data_suite) + @unittest.expectedFailure + def test_log_tosa_u55_BI(self, test_name: str, test_data: torch.Tensor): + self._test_log_tosa_u55_BI_pipeline(self.Log(), (test_data,)) diff --git a/backends/qualcomm/partition/common_defs.py b/backends/qualcomm/partition/common_defs.py index 353169bc18..d68441c2f7 100644 --- a/backends/qualcomm/partition/common_defs.py +++ b/backends/qualcomm/partition/common_defs.py @@ -17,7 +17,11 @@ ] to_be_implemented_operator = [ - exir_ops.edge.aten.where.default, + exir_ops.edge.aten.any.dim, + exir_ops.edge.aten.eq.Scalar, + exir_ops.edge.aten.full_like.default, + exir_ops.edge.aten.logical_not.default, + exir_ops.edge.aten.where.self, ] allow_list_operator = [ diff --git a/backends/vulkan/runtime/api/api.h b/backends/vulkan/runtime/api/api.h index de77c57fb0..0f496a4af8 100644 --- a/backends/vulkan/runtime/api/api.h +++ b/backends/vulkan/runtime/api/api.h @@ -12,7 +12,7 @@ #include #include -#include +#include #include #include diff --git a/backends/vulkan/runtime/api/containers/StorageBuffer.h b/backends/vulkan/runtime/api/containers/StagingBuffer.h similarity index 71% rename from backends/vulkan/runtime/api/containers/StorageBuffer.h rename to backends/vulkan/runtime/api/containers/StagingBuffer.h index 17c3470605..a24728470b 100644 --- a/backends/vulkan/runtime/api/containers/StorageBuffer.h +++ b/backends/vulkan/runtime/api/containers/StagingBuffer.h @@ -17,7 +17,7 @@ namespace vkcompute { namespace api { -class StorageBuffer final { +class StagingBuffer final { private: Context* context_p_; vkapi::ScalarType dtype_; @@ -26,26 +26,24 @@ class StorageBuffer final { vkapi::VulkanBuffer vulkan_buffer_; public: - StorageBuffer( + StagingBuffer( Context* context_p, const vkapi::ScalarType dtype, - const size_t numel, - const bool gpuonly = false) + const size_t numel) : context_p_(context_p), dtype_(dtype), numel_(numel), nbytes_(element_size(dtype_) * numel_), - vulkan_buffer_(context_p_->adapter_ptr()->vma().create_storage_buffer( - nbytes_, - gpuonly)) {} + vulkan_buffer_( + context_p_->adapter_ptr()->vma().create_staging_buffer(nbytes_)) {} - StorageBuffer(const StorageBuffer&) = delete; - StorageBuffer& operator=(const StorageBuffer&) = delete; + StagingBuffer(const StagingBuffer&) = delete; + StagingBuffer& operator=(const StagingBuffer&) = delete; - StorageBuffer(StorageBuffer&&) = default; - StorageBuffer& operator=(StorageBuffer&&) = default; + StagingBuffer(StagingBuffer&&) = default; + StagingBuffer& operator=(StagingBuffer&&) = default; - ~StorageBuffer() { + ~StagingBuffer() { context_p_->register_buffer_cleanup(vulkan_buffer_); } diff --git a/backends/vulkan/runtime/api/containers/Tensor.cpp b/backends/vulkan/runtime/api/containers/Tensor.cpp index 578898ad19..7b9d30ef65 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.cpp +++ b/backends/vulkan/runtime/api/containers/Tensor.cpp @@ -540,7 +540,7 @@ vkapi::VulkanBuffer allocate_buffer( } return adapter_ptr->vma().create_storage_buffer( - element_size(dtype) * numel, /*gpu_only = */ true, allocate_memory); + element_size(dtype) * numel, allocate_memory); } vTensorStorage::vTensorStorage( diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index 9fa0091b29..6c3ec88eaa 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -38,7 +38,7 @@ namespace vkcompute { VALUE_PTR_CLASS_IMPL(vTensorPtr, api::vTensor, Tensor) VALUE_PTR_CLASS_IMPL(TensorRefPtr, TensorRef, TensorRef) -VALUE_PTR_CLASS_IMPL(StagingPtr, api::StorageBuffer, Staging) +VALUE_PTR_CLASS_IMPL(StagingPtr, api::StagingBuffer, Staging) VALUE_PTR_CLASS_IMPL(IntListPtr, std::vector, IntList) VALUE_PTR_CLASS_IMPL(DoubleListPtr, std::vector, DoubleList) VALUE_PTR_CLASS_IMPL(BoolListPtr, std::vector, BoolList) @@ -236,7 +236,7 @@ ValueRef ComputeGraph::add_staging( const size_t numel) { ValueRef idx(static_cast(values_.size())); check_no_active_value_ptrs(); - values_.emplace_back(api::StorageBuffer(context(), dtype, numel)); + values_.emplace_back(api::StagingBuffer(context(), dtype, numel)); return idx; } diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index 5740d24a44..9b04b08a70 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -58,7 +58,7 @@ class ComputeGraph; DECL_VALUE_PTR_CLASS(vTensorPtr, api::vTensor) DECL_VALUE_PTR_CLASS(TensorRefPtr, TensorRef) -DECL_VALUE_PTR_CLASS(StagingPtr, api::StorageBuffer) +DECL_VALUE_PTR_CLASS(StagingPtr, api::StagingBuffer) DECL_VALUE_PTR_CLASS(IntListPtr, std::vector) DECL_VALUE_PTR_CLASS(DoubleListPtr, std::vector) DECL_VALUE_PTR_CLASS(BoolListPtr, std::vector) diff --git a/backends/vulkan/runtime/graph/containers/Value.h b/backends/vulkan/runtime/graph/containers/Value.h index ba82213c6f..6e03bbd4a2 100644 --- a/backends/vulkan/runtime/graph/containers/Value.h +++ b/backends/vulkan/runtime/graph/containers/Value.h @@ -53,7 +53,7 @@ struct Value final { } u; api::vTensor as_tensor; - api::StorageBuffer as_staging; + api::StagingBuffer as_staging; TensorRef as_tensorref; std::vector as_int_list; @@ -108,7 +108,7 @@ struct Value final { CASE_MOVE_MOVEABLE_TYPE( TypeTag::TENSOR, api::vTensor, as_tensor, vTensor); CASE_MOVE_MOVEABLE_TYPE( - TypeTag::STAGING, api::StorageBuffer, as_staging, StorageBuffer); + TypeTag::STAGING, api::StagingBuffer, as_staging, StagingBuffer); CASE_MOVE_MOVEABLE_TYPE( TypeTag::TENSORREF, TensorRef, as_tensorref, TensorRef); // Scalar lists @@ -152,7 +152,7 @@ struct Value final { payload.as_tensor.~vTensor(); break; case TypeTag::STAGING: - payload.as_staging.~StorageBuffer(); + payload.as_staging.~StagingBuffer(); break; case TypeTag::TENSORREF: payload.as_tensorref.~TensorRef(); @@ -247,7 +247,7 @@ struct Value final { as_tensor); SUPPORT_TRIVIALLY_MOVEABLE_TYPE( - api::StorageBuffer, + api::StagingBuffer, Staging, TypeTag::STAGING, as_staging); diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp index b77c62920d..a9c2f6c9b6 100644 --- a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp +++ b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp @@ -45,14 +45,14 @@ PrepackNode::PrepackNode( graph.update_descriptor_counts(noop_shader_, /*execute = */ false); } -api::StorageBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) { +api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) { vTensorPtr packed = graph->get_tensor(packed_); // If no TensorRef is provided, create a staging buffer of zeros according to // the vkapi::vTensor metadata. if (graph->val_is_none(tref_)) { size_t numel = utils::multiply_integers(packed->sizes()); - api::StorageBuffer staging(graph->context(), packed->dtype(), numel); + api::StagingBuffer staging(graph->context(), packed->dtype(), numel); size_t nbytes = numel * vkapi::element_size(packed->dtype()); set_staging_zeros(staging, nbytes); return staging; @@ -60,7 +60,7 @@ api::StorageBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) { TensorRefPtr tref = graph->get_tref(tref_); size_t numel = utils::multiply_integers(tref->sizes); - api::StorageBuffer staging(graph->context(), tref->dtype, numel); + api::StagingBuffer staging(graph->context(), tref->dtype, numel); size_t nbytes = numel * vkapi::element_size(tref->dtype); copy_ptr_to_staging(tref->data, staging, nbytes); return staging; @@ -70,7 +70,7 @@ void PrepackNode::encode(ComputeGraph* graph) { api::Context* const context = graph->context(); vTensorPtr packed = graph->get_tensor(packed_); - api::StorageBuffer staging = create_staging_buffer(graph); + api::StagingBuffer staging = create_staging_buffer(graph); std::unique_lock cmd_lock = context->dispatch_lock(); diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.h b/backends/vulkan/runtime/graph/ops/PrepackNode.h index c3ac8b963f..3e713303c3 100644 --- a/backends/vulkan/runtime/graph/ops/PrepackNode.h +++ b/backends/vulkan/runtime/graph/ops/PrepackNode.h @@ -56,7 +56,7 @@ class PrepackNode final { const vkapi::SpecVarList spec_vars_; private: - api::StorageBuffer create_staging_buffer(ComputeGraph* graph); + api::StagingBuffer create_staging_buffer(ComputeGraph* graph); }; } // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/ops/utils/BindingUtils.cpp b/backends/vulkan/runtime/graph/ops/utils/BindingUtils.cpp index b0964ace22..2cfb34a052 100644 --- a/backends/vulkan/runtime/graph/ops/utils/BindingUtils.cpp +++ b/backends/vulkan/runtime/graph/ops/utils/BindingUtils.cpp @@ -66,7 +66,7 @@ uint32_t bind_params_to_descriptor_set( } void bind_staging_to_descriptor_set( - api::StorageBuffer& staging, + api::StagingBuffer& staging, vkapi::DescriptorSet& descriptor_set, const uint32_t idx) { descriptor_set.bind(idx, staging.buffer()); diff --git a/backends/vulkan/runtime/graph/ops/utils/BindingUtils.h b/backends/vulkan/runtime/graph/ops/utils/BindingUtils.h index 3a7ec029da..eed39a9797 100644 --- a/backends/vulkan/runtime/graph/ops/utils/BindingUtils.h +++ b/backends/vulkan/runtime/graph/ops/utils/BindingUtils.h @@ -40,7 +40,7 @@ uint32_t bind_params_to_descriptor_set( const uint32_t base_idx); void bind_staging_to_descriptor_set( - api::StorageBuffer& staging, + api::StagingBuffer& staging, vkapi::DescriptorSet& descriptor_set, const uint32_t idx); diff --git a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp index 294e36b9a8..9cb715e202 100644 --- a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp +++ b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp @@ -73,7 +73,7 @@ void memcpy_from_mapping( void copy_ptr_to_staging( const void* src, - api::StorageBuffer& staging, + api::StagingBuffer& staging, const size_t nbytes) { vkapi::MemoryMap mapping(staging.buffer(), vkapi::MemoryAccessType::WRITE); mapping.invalidate(); @@ -81,7 +81,7 @@ void copy_ptr_to_staging( } void copy_staging_to_ptr( - api::StorageBuffer& staging, + api::StagingBuffer& staging, void* dst, const size_t nbytes) { vkapi::MemoryMap mapping(staging.buffer(), vkapi::MemoryAccessType::READ); @@ -89,7 +89,7 @@ void copy_staging_to_ptr( memcpy_from_mapping(mapping, dst, nbytes, staging.dtype()); } -void set_staging_zeros(api::StorageBuffer& staging, const size_t nbytes) { +void set_staging_zeros(api::StagingBuffer& staging, const size_t nbytes) { vkapi::MemoryMap mapping(staging.buffer(), vkapi::MemoryAccessType::WRITE); uint8_t* data_ptr = mapping.template data(); memset(data_ptr, 0, staging.nbytes()); diff --git a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h index cabc17f30e..f16c52ecf3 100644 --- a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h +++ b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h @@ -18,14 +18,14 @@ namespace vkcompute { void copy_ptr_to_staging( const void* src, - api::StorageBuffer& staging, + api::StagingBuffer& staging, const size_t nbytes); void copy_staging_to_ptr( - api::StorageBuffer& staging, + api::StagingBuffer& staging, void* dst, const size_t nbytes); -void set_staging_zeros(api::StorageBuffer& staging, const size_t nbytes); +void set_staging_zeros(api::StagingBuffer& staging, const size_t nbytes); // // Functions to get shaders diff --git a/backends/vulkan/runtime/vk_api/memory/Allocator.cpp b/backends/vulkan/runtime/vk_api/memory/Allocator.cpp index 1dadca27a0..b990cf6a11 100644 --- a/backends/vulkan/runtime/vk_api/memory/Allocator.cpp +++ b/backends/vulkan/runtime/vk_api/memory/Allocator.cpp @@ -132,45 +132,36 @@ VulkanImage Allocator::create_image( allocate_memory); } -VulkanBuffer Allocator::create_storage_buffer( - const VkDeviceSize size, - const bool gpu_only, - const bool allocate_memory) { +VulkanBuffer Allocator::create_staging_buffer(const VkDeviceSize size) { const VkBufferUsageFlags buffer_usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; VmaAllocationCreateInfo alloc_create_info = {}; alloc_create_info.flags = DEFAULT_ALLOCATION_STRATEGY; alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; - // The create storage buffer will be accessed by both the CPU and GPU, so set - // the appropriate flags to indicate that the host device will be accessing + // Staging buffers are accessed by both the CPU and GPU, so set the + // appropriate flags to indicate that the host device will be accessing // the data from this buffer. - if (!gpu_only) { - // Deferred memory allocation should only be used for GPU only buffers. - VK_CHECK_COND( - allocate_memory, - "Only GPU-only buffers should use deferred memory allocation"); - - alloc_create_info.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; - alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST; - alloc_create_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - alloc_create_info.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - } + alloc_create_info.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST; + alloc_create_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + alloc_create_info.preferredFlags = + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - return VulkanBuffer( - allocator_, size, alloc_create_info, buffer_usage, allocate_memory); + return VulkanBuffer(allocator_, size, alloc_create_info, buffer_usage); } -VulkanBuffer Allocator::create_staging_buffer(const VkDeviceSize size) { +VulkanBuffer Allocator::create_storage_buffer( + const VkDeviceSize size, + const bool allocate_memory) { + const VkBufferUsageFlags buffer_usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + VmaAllocationCreateInfo alloc_create_info = {}; alloc_create_info.flags = DEFAULT_ALLOCATION_STRATEGY; - alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST; - - VkBufferUsageFlags buffer_usage = - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; - return VulkanBuffer(allocator_, size, alloc_create_info, buffer_usage); + return VulkanBuffer( + allocator_, size, alloc_create_info, buffer_usage, allocate_memory); } VulkanBuffer Allocator::create_uniform_buffer(const VkDeviceSize size) { @@ -181,9 +172,7 @@ VulkanBuffer Allocator::create_uniform_buffer(const VkDeviceSize size) { VkBufferUsageFlags buffer_usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; - VulkanBuffer uniform_buffer( - allocator_, size, alloc_create_info, buffer_usage); - return uniform_buffer; + return VulkanBuffer(allocator_, size, alloc_create_info, buffer_usage); } } // namespace vkapi diff --git a/backends/vulkan/runtime/vk_api/memory/Allocator.h b/backends/vulkan/runtime/vk_api/memory/Allocator.h index 904163cefb..7d02ffe54e 100644 --- a/backends/vulkan/runtime/vk_api/memory/Allocator.h +++ b/backends/vulkan/runtime/vk_api/memory/Allocator.h @@ -62,13 +62,12 @@ class Allocator final { const bool allow_transfer = false, const bool allocate_memory = true); + VulkanBuffer create_staging_buffer(const VkDeviceSize); + VulkanBuffer create_storage_buffer( const VkDeviceSize, - const bool gpu_only = true, const bool allocate_memory = true); - VulkanBuffer create_staging_buffer(const VkDeviceSize); - /* * Create a uniform buffer with a specified size */ diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp index 4a3a41d6c7..4c2972419d 100644 --- a/backends/vulkan/test/utils/test_utils.cpp +++ b/backends/vulkan/test/utils/test_utils.cpp @@ -112,7 +112,7 @@ void record_image_to_nchw_op( void record_int8_image_to_nchw_noint8_op( api::Context* const context, api::vTensor& v_src, - api::StorageBuffer& dst_buffer) { + api::StagingBuffer& dst_buffer) { vkapi::PipelineBarrier pipeline_barrier{}; uint32_t buffer_len = utils::safe_downcast(dst_buffer.numel() / 4); utils::uvec3 global_wg_size = {buffer_len, 1, 1}; @@ -324,7 +324,7 @@ void record_reference_matmul( _(int8_t, QInt8) void fill_vtensor(api::vTensor& vten, std::vector& data) { - api::StorageBuffer staging_buffer(api::context(), vten.dtype(), data.size()); + api::StagingBuffer staging_buffer(api::context(), vten.dtype(), data.size()); #define CASE(ctype, name) \ case vkapi::ScalarType::name: { \ @@ -411,7 +411,7 @@ void fill_vtensor( } void extract_vtensor(api::vTensor& vten, std::vector& data) { - api::StorageBuffer staging_buffer( + api::StagingBuffer staging_buffer( api::context(), vten.dtype(), vten.staging_buffer_numel()); if (vten.storage_type() == utils::StorageType::BUFFER) { diff --git a/backends/vulkan/test/utils/test_utils.h b/backends/vulkan/test/utils/test_utils.h index c8af547086..3bc12c472d 100644 --- a/backends/vulkan/test/utils/test_utils.h +++ b/backends/vulkan/test/utils/test_utils.h @@ -37,13 +37,13 @@ using namespace vkcompute; allocate_memory); #define DEFINE_STAGING_BUFFER_AND_RECORD_TO_GPU_FOR(tensor) \ - api::StorageBuffer staging_buffer_##tensor( \ + api::StagingBuffer staging_buffer_##tensor( \ api::context(), vkapi::kFloat, tensor.staging_buffer_numel()); \ record_nchw_to_image_op( \ api::context(), staging_buffer_##tensor.buffer(), tensor); #define DEFINE_STAGING_BUFFER_AND_RECORD_FROM_GPU_FOR(tensor) \ - api::StorageBuffer staging_buffer_##tensor( \ + api::StagingBuffer staging_buffer_##tensor( \ api::context(), vkapi::kFloat, tensor.staging_buffer_numel()); \ record_image_to_nchw_op( \ api::context(), tensor, staging_buffer_##tensor.buffer()); @@ -85,7 +85,7 @@ void record_image_to_nchw_op( void record_int8_image_to_nchw_noint8_op( api::Context* const context, api::vTensor& v_src, - api::StorageBuffer& dst_buffer); + api::StagingBuffer& dst_buffer); void record_conv2d_prepack_weights_op( api::Context* const context, @@ -126,7 +126,7 @@ void record_reference_matmul( // inline void -fill_staging(api::StorageBuffer& staging, float val, int numel = -1) { +fill_staging(api::StagingBuffer& staging, float val, int numel = -1) { if (numel < 0) { numel = staging.numel(); } @@ -164,7 +164,7 @@ inline std::vector extract_vtensor(api::vTensor& vten) { } inline void -check_staging_buffer(api::StorageBuffer& staging, float val, int numel = -1) { +check_staging_buffer(api::StagingBuffer& staging, float val, int numel = -1) { if (numel < 0) { numel = staging.numel(); } diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index 3d172f490c..f3c60a2137 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -336,7 +336,7 @@ TEST_F(VulkanComputeAPITest, spec_var_classes_test) { TEST_F(VulkanComputeAPITest, spec_var_shader_test) { size_t len = 16; - StorageBuffer buffer(context(), vkapi::kFloat, len); + StagingBuffer buffer(context(), vkapi::kFloat, len); float scale = 3.0f; float offset = 1.5f; @@ -407,7 +407,7 @@ TEST_F(VulkanComputeAPITest, update_params_between_submit) { params.buffer()); } - StorageBuffer staging_buffer( + StagingBuffer staging_buffer( context(), vkapi::kFloat, a.staging_buffer_numel()); record_image_to_nchw_op(context(), a, staging_buffer.buffer()); @@ -428,7 +428,7 @@ TEST_F(VulkanComputeAPITest, update_params_between_submit) { template void test_storage_buffer_type(const size_t len) { - StorageBuffer buffer(context(), dtype, len); + StagingBuffer buffer(context(), dtype, len); std::string kernel_name("idx_fill_buffer"); switch (dtype) { @@ -2040,7 +2040,7 @@ void run_from_gpu_test( vten.sizes_ubo()); } - StorageBuffer staging_buffer(context(), dtype, vten.staging_buffer_numel()); + StagingBuffer staging_buffer(context(), dtype, vten.staging_buffer_numel()); if (dtype == vkapi::kChar && !context()->adapter_ptr()->has_full_int8_buffers_support()) { @@ -2073,7 +2073,7 @@ void round_trip_test( vTensor vten = vTensor(context(), sizes, dtype, storage_type, memory_layout); // Create and fill input staging buffer - StorageBuffer staging_buffer_in( + StagingBuffer staging_buffer_in( context(), dtype, vten.staging_buffer_numel()); std::vector data_in(staging_buffer_in.numel()); @@ -2084,7 +2084,7 @@ void round_trip_test( data_in.data(), staging_buffer_in, vten.staging_buffer_nbytes()); // Output staging buffer - StorageBuffer staging_buffer_out( + StagingBuffer staging_buffer_out( context(), dtype, vten.staging_buffer_numel()); record_nchw_to_image_op(context(), staging_buffer_in.buffer(), vten); @@ -2538,7 +2538,7 @@ void test_conv2d( // Create and fill input staging buffer const int64_t in_numel = utils::multiply_integers(original_sizes); - StorageBuffer staging_buffer_in(context(), vkapi::kFloat, in_numel); + StagingBuffer staging_buffer_in(context(), vkapi::kFloat, in_numel); std::vector data_in(in_numel); for (int i = 0; i < in_numel; i++) { @@ -2550,7 +2550,7 @@ void test_conv2d( // Output staging buffer const int64_t out_numel = padded_sizes[0] * padded_sizes[1] * original_sizes[2] * original_sizes[3]; - StorageBuffer staging_buffer_out(context(), vkapi::kFloat, out_numel); + StagingBuffer staging_buffer_out(context(), vkapi::kFloat, out_numel); // Copy data in and out of the tensor record_conv2d_prepack_weights_op( diff --git a/backends/vulkan/tools/gpuinfo/include/architecture.h b/backends/vulkan/tools/gpuinfo/include/architecture.h index 0d312ee87c..20c6254e1a 100644 --- a/backends/vulkan/tools/gpuinfo/include/architecture.h +++ b/backends/vulkan/tools/gpuinfo/include/architecture.h @@ -40,7 +40,7 @@ void reg_count(const App& app) { uint32_t NITER; auto bench = [&](uint32_t ngrp, uint32_t nreg) { - StorageBuffer buffer(context(), vkapi::kFloat, 1); + StagingBuffer buffer(context(), vkapi::kFloat, 1); vkapi::PipelineBarrier pipeline_barrier{}; auto shader_name = "reg_count_" + std::to_string(nreg); @@ -164,7 +164,7 @@ void warp_size(const App& app, const bool verbose = false) { uint32_t NITER; auto bench = [&](uint32_t nthread) { - StorageBuffer out_buf(context(), vkapi::kInt, app.nthread_logic); + StagingBuffer out_buf(context(), vkapi::kInt, app.nthread_logic); vkapi::PipelineBarrier pipeline_barrier{}; auto shader_name = "warp_size_physical"; @@ -224,7 +224,7 @@ void warp_size(const App& app, const bool verbose = false) { // doesn't depend on kernel timing, so the extra wait time doesn't lead to // inaccuracy. auto bench_sm = [&](uint32_t nthread) { - StorageBuffer out_buf(context(), vkapi::kInt, app.nthread_logic); + StagingBuffer out_buf(context(), vkapi::kInt, app.nthread_logic); vkapi::PipelineBarrier pipeline_barrier{}; auto shader_name = "warp_size_scheduler"; diff --git a/backends/vulkan/tools/gpuinfo/include/buffers.h b/backends/vulkan/tools/gpuinfo/include/buffers.h index c8cf93c4a1..31137b11ee 100644 --- a/backends/vulkan/tools/gpuinfo/include/buffers.h +++ b/backends/vulkan/tools/gpuinfo/include/buffers.h @@ -35,8 +35,8 @@ void buf_cacheline_size(const App& app) { uint32_t NITER; auto bench = [&](int stride) { - StorageBuffer in_buf(context(), vkapi::kFloat, BUF_SIZE); - StorageBuffer out_buf(context(), vkapi::kFloat, 1); + StagingBuffer in_buf(context(), vkapi::kFloat, BUF_SIZE); + StagingBuffer out_buf(context(), vkapi::kFloat, 1); vkapi::PipelineBarrier pipeline_barrier{}; auto shader_name = "buf_cacheline_size"; @@ -132,8 +132,8 @@ void _bandwidth( // workgroups, once the size of the access excedes the workgroup width. const uint32_t workgroup_width = local_x * NITER * NUNROLL; - StorageBuffer in_buf(context(), vkapi::kFloat, range / sizeof(float)); - StorageBuffer out_buf( + StagingBuffer in_buf(context(), vkapi::kFloat, range / sizeof(float)); + StagingBuffer out_buf( context(), vkapi::kFloat, VEC_WIDTH * app.nthread_logic); vkapi::PipelineBarrier pipeline_barrier{}; diff --git a/backends/vulkan/tools/gpuinfo/include/textures.h b/backends/vulkan/tools/gpuinfo/include/textures.h index 7679f11b0c..c9ff133f1e 100644 --- a/backends/vulkan/tools/gpuinfo/include/textures.h +++ b/backends/vulkan/tools/gpuinfo/include/textures.h @@ -61,7 +61,7 @@ void tex_cacheline_concurr(const App& app) { vTensor in_tensor = api::vTensor(api::context(), sizes_nchw, vkapi::kFloat); - StorageBuffer out_buf(context(), vkapi::kFloat, TEXEL_WIDTH); + StagingBuffer out_buf(context(), vkapi::kFloat, TEXEL_WIDTH); vkapi::PipelineBarrier pipeline_barrier{}; @@ -173,7 +173,7 @@ void tex_bandwidth(const App& app) { // workgroups, once the size of the access excedes the workgroup width. const uint32_t workgroup_width = local_x * NITER * NUNROLL; - StorageBuffer out_buf( + StagingBuffer out_buf( context(), vkapi::kFloat, VEC_WIDTH * app.nthread_logic); vkapi::PipelineBarrier pipeline_barrier{}; diff --git a/build/build_android_llm_demo.sh b/build/build_android_llm_demo.sh index 4d34eb95b2..7b7150de21 100644 --- a/build/build_android_llm_demo.sh +++ b/build/build_android_llm_demo.sh @@ -30,6 +30,7 @@ build_android_native_library() { -DEXECUTORCH_XNNPACK_SHARED_WORKSPACE=ON \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ diff --git a/codegen/tools/gen_oplist.py b/codegen/tools/gen_oplist.py index f21fb8dc6b..fbb191a6a8 100644 --- a/codegen/tools/gen_oplist.py +++ b/codegen/tools/gen_oplist.py @@ -230,7 +230,7 @@ def gen_oplist( if model_file_path: assert os.path.isfile( model_file_path - ), "The value for --model_file_path needs to be a valid file." + ), f"The value for --model_file_path needs to be a valid file, got {model_file_path}" op_set.update(_get_operators(model_file_path)) source_name = model_file_path et_kernel_metadata = merge_et_kernel_metadata( @@ -239,7 +239,7 @@ def gen_oplist( if ops_schema_yaml_path: assert os.path.isfile( ops_schema_yaml_path - ), "The value for --ops_schema_yaml_path needs to be a valid file." + ), f"The value for --ops_schema_yaml_path needs to be a valid file, got {ops_schema_yaml_path}" et_kernel_metadata = merge_et_kernel_metadata( et_kernel_metadata, _get_et_kernel_metadata_from_ops_yaml(ops_schema_yaml_path), @@ -300,14 +300,33 @@ def main(args: List[Any]) -> None: ) options = parser.parse_args(args) - gen_oplist( - output_path=options.output_path, - model_file_path=options.model_file_path, - ops_schema_yaml_path=options.ops_schema_yaml_path, - root_ops=options.root_ops, - ops_dict=options.ops_dict, - include_all_operators=options.include_all_operators, - ) + try: + gen_oplist( + output_path=options.output_path, + model_file_path=options.model_file_path, + ops_schema_yaml_path=options.ops_schema_yaml_path, + root_ops=options.root_ops, + ops_dict=options.ops_dict, + include_all_operators=options.include_all_operators, + ) + except Exception as e: + command = ["python codegen/tools/gen_oplist.py"] + if options.model_file_path: + command.append(f"--model_file_path {options.model_file_path}") + if options.ops_schema_yaml_path: + command.append(f"--ops_schema_yaml_path {options.ops_schema_yaml_path}") + if options.root_ops: + command.append(f"--root_ops {options.root_ops}") + if options.ops_dict: + command.append(f"--ops_dict {options.ops_dict}") + if options.include_all_operators: + command.append("--include-all-operators") + repro_command = " ".join(command) + raise RuntimeError( + f"""Failed to generate selected_operators.yaml. Repro command: + {repro_command} + """ + ) from e if __name__ == "__main__": diff --git a/codegen/tools/test/test_gen_oplist.py b/codegen/tools/test/test_gen_oplist.py index d455ddb689..bd1d008248 100644 --- a/codegen/tools/test/test_gen_oplist.py +++ b/codegen/tools/test/test_gen_oplist.py @@ -42,7 +42,7 @@ def test_gen_op_list_with_wrong_path( mock_get_operators: NonCallableMock, ) -> None: args = ["--output_path=wrong_path", "--model_file_path=path2"] - with self.assertRaises(AssertionError): + with self.assertRaises(RuntimeError): gen_oplist.main(args) @patch("executorch.codegen.tools.gen_oplist._get_kernel_metadata_for_model") diff --git a/examples/arm/executor_runner/CMakeLists.txt b/examples/arm/executor_runner/CMakeLists.txt index 136f72ee75..b32b2d8d2b 100644 --- a/examples/arm/executor_runner/CMakeLists.txt +++ b/examples/arm/executor_runner/CMakeLists.txt @@ -133,7 +133,7 @@ endif() # The arm_executor_runner executable add_executable(arm_executor_runner) -target_sources(arm_executor_runner PRIVATE arm_executor_runner.cpp) +target_sources(arm_executor_runner PRIVATE arm_executor_runner.cpp arm_perf_monitor.cpp) # Include the target's bare-metal linker script ethosu_eval_link_options(arm_executor_runner) diff --git a/examples/arm/executor_runner/arm_executor_runner.cpp b/examples/arm/executor_runner/arm_executor_runner.cpp index 8605038936..f8f9d34ecf 100644 --- a/examples/arm/executor_runner/arm_executor_runner.cpp +++ b/examples/arm/executor_runner/arm_executor_runner.cpp @@ -20,6 +20,8 @@ #include #include +#include "arm_perf_monitor.h" + /** * This header file is generated by the build process based on the .pte file * specified in the ET_PTE_FILE_PATH variable to the cmake build. @@ -350,7 +352,10 @@ int main(int argc, const char* argv[]) { ET_LOG(Info, "Input prepared."); ET_LOG(Info, "Starting the model execution..."); + StartMeasurements(); Error status = method->execute(); + StopMeasurements(); + if (status != Error::Ok) { ET_LOG( Info, @@ -368,6 +373,8 @@ int main(int argc, const char* argv[]) { for (int i = 0; i < outputs.size(); ++i) { Tensor t = outputs[i].toTensor(); #ifndef SEMIHOSTING + // The output might be collected and parsed so printf() is used instead + // of ET_LOG() here for (int j = 0; j < outputs[i].toTensor().numel(); ++j) { if (t.scalar_type() == ScalarType::Int) { printf( diff --git a/examples/arm/executor_runner/arm_perf_monitor.cpp b/examples/arm/executor_runner/arm_perf_monitor.cpp new file mode 100644 index 0000000000..c53d28baab --- /dev/null +++ b/examples/arm/executor_runner/arm_perf_monitor.cpp @@ -0,0 +1,173 @@ +/* Copyright 2024 Arm Limited and/or its affiliates. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +#include "arm_perf_monitor.h" + +#ifdef ETHOSU +#include +#include +#include + +static uint32_t ethosu_inference_count = 0; +static uint64_t ethosu_ArmBackendExecuteCycleCountStart = 0; +static uint64_t ethosu_ArmBackendExecuteCycleCount = 0; +static uint64_t ethosu_ArmWhenNPURunCycleCountStart = 0; +static uint64_t ethosu_ArmWhenNPURunCycleCount = 0; +static uint64_t ethosu_pmuCycleCount = 0; +static std::vector ethosu_pmuEventCounts( + ETHOSU_PMU_Get_NumEventCounters(), + 0); + +static const uint32_t ethosu_pmuCountersUsed = 4; +// ethosu_pmuCountersUsed should match numbers of counters setup in +// ethosu_inference_begin() and not be more then the HW supports +static_assert(ETHOSU_PMU_NCOUNTERS >= ethosu_pmuCountersUsed); + +extern "C" { + +// Callback invoked at start of NPU execution +void ethosu_inference_begin(struct ethosu_driver* drv, void*) { + // Enable PMU + ETHOSU_PMU_Enable(drv); + ETHOSU_PMU_PMCCNTR_CFG_Set_Stop_Event(drv, ETHOSU_PMU_NPU_IDLE); + ETHOSU_PMU_PMCCNTR_CFG_Set_Start_Event(drv, ETHOSU_PMU_NPU_ACTIVE); + + // Setup 4 counters + ETHOSU_PMU_Set_EVTYPER(drv, 0, ETHOSU_PMU_AXI0_RD_DATA_BEAT_RECEIVED); + ETHOSU_PMU_Set_EVTYPER(drv, 1, ETHOSU_PMU_AXI1_RD_DATA_BEAT_RECEIVED); + ETHOSU_PMU_Set_EVTYPER(drv, 2, ETHOSU_PMU_AXI0_WR_DATA_BEAT_WRITTEN); + ETHOSU_PMU_Set_EVTYPER(drv, 3, ETHOSU_PMU_NPU_IDLE); + // Enable 4 counters + ETHOSU_PMU_CNTR_Enable(drv, 0xf); + + ETHOSU_PMU_CNTR_Enable(drv, ETHOSU_PMU_CCNT_Msk); + ETHOSU_PMU_CYCCNT_Reset(drv); + + // Reset all counters + ETHOSU_PMU_EVCNTR_ALL_Reset(drv); + + // Save Cortex-M cycle clock to calculate total CPU cycles used in + // ethosu_inference_end() + ethosu_ArmWhenNPURunCycleCountStart = ARM_PMU_Get_CCNTR(); +} + +// Callback invoked at end of NPU execution +void ethosu_inference_end(struct ethosu_driver* drv, void*) { + ethosu_inference_count++; + ethosu_pmuCycleCount += ETHOSU_PMU_Get_CCNTR(drv); + + for (size_t i = 0; i < ethosu_pmuCountersUsed; i++) { + ethosu_pmuEventCounts[i] += ETHOSU_PMU_Get_EVCNTR(drv, i); + } + ETHOSU_PMU_Disable(drv); + // Add Cortex-M cycle clock used during this NPU execution + ethosu_ArmWhenNPURunCycleCount += + (ARM_PMU_Get_CCNTR() - ethosu_ArmWhenNPURunCycleCountStart); +} + +// Callback invoked at start of ArmBackend::execute() +void ArmBackend_execute_begin() { + // Save Cortex-M cycle clock to calculate total CPU cycles used in + // ArmBackend_execute_end() + ethosu_ArmBackendExecuteCycleCountStart = ARM_PMU_Get_CCNTR(); +} + +// Callback invoked at end of ArmBackend::execute() +void ArmBackend_execute_end() { + // Add Cortex-M cycle clock used during this ArmBackend::execute() + ethosu_ArmBackendExecuteCycleCount += + (ARM_PMU_Get_CCNTR() - ethosu_ArmBackendExecuteCycleCountStart); +} +} + +void StartMeasurements() { + ethosu_ArmBackendExecuteCycleCount = 0; + ethosu_ArmWhenNPURunCycleCount = 0; + ethosu_pmuCycleCount = 0; + + for (size_t i = 0; i < ethosu_pmuCountersUsed; i++) { + ethosu_pmuEventCounts[i] = 0; + } + ARM_PMU_Enable(); + DCB->DEMCR |= DCB_DEMCR_TRCENA_Msk; // Trace enable + ARM_PMU_CYCCNT_Reset(); + ARM_PMU_CNTR_Enable(PMU_CNTENSET_CCNTR_ENABLE_Msk); +} + +void StopMeasurements() { + ARM_PMU_CNTR_Disable( + PMU_CNTENCLR_CCNTR_ENABLE_Msk | PMU_CNTENCLR_CNT0_ENABLE_Msk | + PMU_CNTENCLR_CNT1_ENABLE_Msk); + uint32_t cycle_count = ARM_PMU_Get_CCNTR(); + + // Number of comand streams handled by the NPU + ET_LOG(Info, "NPU Inferences : %d", ethosu_inference_count); + ET_LOG(Info, "Profiler report, CPU cycles per operator:"); + // This is number of CPU cycles for the ethos-u operator from start to finish + // in the framework If there is more then one commandstream the time is added + // together + ET_LOG( + Info, + "ethos-u : cycle_cnt : %d cycles", + ethosu_ArmBackendExecuteCycleCount); + // We could print a list of the cycles used by the other delegates here in the + // future but now we only print ethos-u: this means that "Operator(s) total: + // ..." will be the same number as ethos-u : cycle_cnt and not the sum of all + ET_LOG( + Info, + "Operator(s) total: %d CPU cycles", + ethosu_ArmBackendExecuteCycleCount); + // Total CPU cycles used in the executorch method->execute() + // Other delegates and no delegates are counted in this + ET_LOG(Info, "Inference runtime: %d CPU cycles total", cycle_count); + + ET_LOG( + Info, + "NOTE: CPU cycle values and ratio calculations require FPGA and identical CPU/NPU frequency"); + + // Avoid division with zero if ARM_PMU_Get_CCNTR() is not enabled properly. + if (cycle_count == 0) { + ET_LOG(Info, "Inference CPU ratio: ?.?? %%"); + ET_LOG(Info, "Inference NPU ratio: ?.?? %%"); + } else { + ET_LOG( + Info, + "Inference CPU ratio: %.2f %%", + 100.0 * (cycle_count - ethosu_ArmWhenNPURunCycleCount) / cycle_count); + ET_LOG( + Info, + "Inference NPU ratio: %.2f %%", + 100.0 * ethosu_ArmWhenNPURunCycleCount / cycle_count); + } + + // CPU cycles used by NPU, e.g. number of CPU cycles used between + // ethosu_inference_begin() and ethosu_inference_end() + // If there is more then one commandstream the time is added together + ET_LOG( + Info, + "cpu_wait_for_npu_cntr : %" PRIu64 " CPU cycles", + ethosu_ArmWhenNPURunCycleCount); + + ET_LOG(Info, "Ethos-U PMU report:"); + ET_LOG(Info, "ethosu_pmu_cycle_cntr : %" PRIu64, ethosu_pmuCycleCount); + + for (size_t i = 0; i < ethosu_pmuCountersUsed; i++) { + ET_LOG(Info, "ethosu_pmu_cntr%zd : %" PRIu64, i, ethosu_pmuEventCounts[i]); + } + ET_LOG( + Info, + "Ethos-U PMU Events:[ETHOSU_PMU_AXI0_RD_DATA_BEAT_RECEIVED, ETHOSU_PMU_AXI1_RD_DATA_BEAT_RECEIVED, ETHOSU_PMU_AXI0_WR_DATA_BEAT_WRITTEN, ETHOSU_PMU_NPU_IDLE]"); +} + +#else +void StartMeasurements() {} + +void StopMeasurements() {} + +#endif diff --git a/examples/arm/executor_runner/arm_perf_monitor.h b/examples/arm/executor_runner/arm_perf_monitor.h new file mode 100644 index 0000000000..3925a9a571 --- /dev/null +++ b/examples/arm/executor_runner/arm_perf_monitor.h @@ -0,0 +1,10 @@ +/* Copyright 2024 Arm Limited and/or its affiliates. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +void StartMeasurements(); +void StopMeasurements(); diff --git a/examples/models/llama2/eval_llama_lib.py b/examples/models/llama2/eval_llama_lib.py index 9e27b987bb..3ea4e77a1a 100644 --- a/examples/models/llama2/eval_llama_lib.py +++ b/examples/models/llama2/eval_llama_lib.py @@ -62,15 +62,19 @@ def _model_call(self, inps): # Given inps (tokens), return the logits from a single forward call # inps: Tensor of shape (1, max_seq_len - 1) # logits: Tensor of shape (1, max_seq_len - 1, vocab_size) + result = [] if self._use_kv_cache: pos_tensor = torch.tensor([0], dtype=torch.int64, device=self.device) result = self._et_model.forward( (inps[:, : self._max_seq_length], pos_tensor) ) - return result[0] else: result = self._et_model.forward((inps,)) - return result[0] + if result[0].dim() != 3: + raise ValueError( + f"Dim of logits must be 3 for evaluation. Got {result[0].dim()} here. Add --generate_full_logits in export_llama to generate a pte file with full logits." + ) + return result[0] class ETRunnerEvalWrapper(EagerEvalWrapper): diff --git a/examples/models/llava/main.cpp b/examples/models/llava/main.cpp index 431f86c906..171eb77077 100644 --- a/examples/models/llava/main.cpp +++ b/examples/models/llava/main.cpp @@ -103,6 +103,6 @@ int32_t main(int32_t argc, char** argv) { .width = static_cast(image_tensor.size(2)), .height = static_cast(image_tensor.size(1))}}; // generate - runner.generate(images, prompt, seq_len); + runner.generate(std::move(images), prompt, seq_len); return 0; } diff --git a/examples/models/llava/runner/llava_runner.cpp b/examples/models/llava/runner/llava_runner.cpp index b186af892f..cb968ca88d 100644 --- a/examples/models/llava/runner/llava_runner.cpp +++ b/examples/models/llava/runner/llava_runner.cpp @@ -74,7 +74,7 @@ Error LlavaRunner::load() { } Error LlavaRunner::generate( - std::vector& images, + std::vector images, const std::string& prompt, int32_t seq_len, std::function token_callback, diff --git a/examples/models/llava/runner/llava_runner.h b/examples/models/llava/runner/llava_runner.h index 13d842e30f..9b14bc9283 100644 --- a/examples/models/llava/runner/llava_runner.h +++ b/examples/models/llava/runner/llava_runner.h @@ -31,7 +31,7 @@ class LlavaRunner : public MultimodalRunner { bool is_loaded(); Error load(); Error generate( - std::vector& images, + std::vector images, const std::string& prompt, int32_t seq_len = 1024, std::function token_callback = {}, diff --git a/examples/models/llava/runner/targets.bzl b/examples/models/llava/runner/targets.bzl index 435ab2a8c7..72942acf16 100644 --- a/examples/models/llava/runner/targets.bzl +++ b/examples/models/llava/runner/targets.bzl @@ -8,6 +8,9 @@ def define_common_targets(): visibility = [ "@EXECUTORCH_CLIENTS", ], + compiler_flags = [ + "-Wno-global-constructors", + ], exported_deps = [ "//executorch/backends/xnnpack:xnnpack_backend", "//executorch/extension/llm/runner:runner_lib", diff --git a/examples/qualcomm/scripts/torchvision_vit.py b/examples/qualcomm/scripts/torchvision_vit.py index dc9459bb13..c9fc988d56 100755 --- a/examples/qualcomm/scripts/torchvision_vit.py +++ b/examples/qualcomm/scripts/torchvision_vit.py @@ -6,6 +6,7 @@ import json import os +import sys from multiprocessing.connection import Client import numpy as np @@ -61,10 +62,14 @@ def main(args): os.makedirs(args.artifact, exist_ok=True) data_num = 100 - inputs, targets, input_list = get_dataset( - dataset_path=f"{args.dataset}", - data_size=data_num, - ) + if args.compile_only: + inputs = [(torch.rand(1, 3, 224, 224),)] + else: + inputs, targets, input_list = get_dataset( + dataset_path=f"{args.dataset}", + data_size=data_num, + ) + pte_filename = "vit_qnn" instance = TorchVisionViTModel() build_executorch_binary( @@ -77,6 +82,9 @@ def main(args): shared_buffer=args.shared_buffer, ) + if args.compile_only: + sys.exit(0) + adb = SimpleADB( qnn_sdk=os.getenv("QNN_SDK_ROOT"), build_path=f"{args.build_folder}", @@ -126,13 +134,14 @@ def main(args): "for https://www.kaggle.com/datasets/ifigotin/imagenetmini-1000)" ), type=str, - required=True, + required=False, ) parser.add_argument( "-a", "--artifact", - help="path for storing generated artifacts by this example. " "Default ./vit", - default="./vit", + help="path for storing generated artifacts by this example. " + "Default ./torchvision_vit", + default="./torchvision_vit", type=str, ) diff --git a/extension/android/CMakeLists.txt b/extension/android/CMakeLists.txt index daa9c7c249..6827ae7904 100644 --- a/extension/android/CMakeLists.txt +++ b/extension/android/CMakeLists.txt @@ -32,8 +32,15 @@ find_package(executorch CONFIG REQUIRED) target_link_options_shared_lib(executorch) set(link_libraries) -list(APPEND link_libraries extension_data_loader extension_module extension_threadpool executorch - fbjni +list( + APPEND + link_libraries + executorch + extension_data_loader + extension_module + extension_runner_util + extension_threadpool + fbjni ) if(TARGET optimized_native_cpu_ops_lib) diff --git a/extension/android/benchmark/.gitignore b/extension/android/benchmark/.gitignore new file mode 100644 index 0000000000..0d02171028 --- /dev/null +++ b/extension/android/benchmark/.gitignore @@ -0,0 +1,16 @@ +*.iml +.gradle +/local.properties +/.idea/caches +/.idea/libraries +/.idea/modules.xml +/.idea/workspace.xml +/.idea/navEditor.xml +/.idea/assetWizardSettings.xml +.DS_Store +/build +/captures +.externalNativeBuild +.cxx +local.properties +*.aar diff --git a/extension/android/benchmark/app/.gitignore b/extension/android/benchmark/app/.gitignore new file mode 100644 index 0000000000..42afabfd2a --- /dev/null +++ b/extension/android/benchmark/app/.gitignore @@ -0,0 +1 @@ +/build \ No newline at end of file diff --git a/extension/android/benchmark/app/build.gradle.kts b/extension/android/benchmark/app/build.gradle.kts new file mode 100644 index 0000000000..b48404f8ff --- /dev/null +++ b/extension/android/benchmark/app/build.gradle.kts @@ -0,0 +1,41 @@ +plugins { + id("com.android.application") +} + +android { + namespace = "org.pytorch.minibench" + compileSdk = 34 + + defaultConfig { + applicationId = "org.pytorch.minibench" + minSdk = 28 + targetSdk = 33 + versionCode = 1 + versionName = "1.0" + + testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" + } + + buildTypes { + release { + isMinifyEnabled = false + proguardFiles( + getDefaultProguardFile("proguard-android-optimize.txt"), + "proguard-rules.pro" + ) + } + } + compileOptions { + sourceCompatibility = JavaVersion.VERSION_1_8 + targetCompatibility = JavaVersion.VERSION_1_8 + } +} + +dependencies { + implementation(files("libs/executorch.aar")) + implementation("com.facebook.soloader:soloader:0.10.5") + implementation("com.facebook.fbjni:fbjni:0.5.1") + testImplementation("junit:junit:4.13.2") + androidTestImplementation("androidx.test.ext:junit:1.2.1") + androidTestImplementation("androidx.test.espresso:espresso-core:3.6.1") +} diff --git a/extension/android/benchmark/app/proguard-rules.pro b/extension/android/benchmark/app/proguard-rules.pro new file mode 100644 index 0000000000..481bb43481 --- /dev/null +++ b/extension/android/benchmark/app/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile \ No newline at end of file diff --git a/extension/android/benchmark/app/src/androidTest/java/org/pytorch/minibench/ExampleInstrumentedTest.java b/extension/android/benchmark/app/src/androidTest/java/org/pytorch/minibench/ExampleInstrumentedTest.java new file mode 100644 index 0000000000..c5887aebcc --- /dev/null +++ b/extension/android/benchmark/app/src/androidTest/java/org/pytorch/minibench/ExampleInstrumentedTest.java @@ -0,0 +1,26 @@ +package org.pytorch.minibench; + +import android.content.Context; + +import androidx.test.platform.app.InstrumentationRegistry; +import androidx.test.ext.junit.runners.AndroidJUnit4; + +import org.junit.Test; +import org.junit.runner.RunWith; + +import static org.junit.Assert.*; + +/** + * Instrumented test, which will execute on an Android device. + * + * @see Testing documentation + */ +@RunWith(AndroidJUnit4.class) +public class ExampleInstrumentedTest { + @Test + public void useAppContext() { + // Context of the app under test. + Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext(); + assertEquals("org.pytorch.minibench", appContext.getPackageName()); + } +} diff --git a/extension/android/benchmark/app/src/main/AndroidManifest.xml b/extension/android/benchmark/app/src/main/AndroidManifest.xml new file mode 100644 index 0000000000..49711b6830 --- /dev/null +++ b/extension/android/benchmark/app/src/main/AndroidManifest.xml @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java new file mode 100644 index 0000000000..17897d0d36 --- /dev/null +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +package org.pytorch.minibench; + +import android.app.Activity; +import android.content.Intent; +import android.os.Bundle; + +import org.pytorch.executorch.Module; + +import java.io.FileWriter; +import java.io.IOException; + +public class BenchmarkActivity extends Activity { + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + Intent intent = getIntent(); + String modelPath = intent.getStringExtra("model_path"); + int numIter = intent.getIntExtra("num_iter", 10); + + // TODO: Format the string with a parsable format + StringBuilder resultText = new StringBuilder(); + + Module module = Module.load(modelPath); + for (int i = 0; i < numIter; i++) { + long start = System.currentTimeMillis(); + module.forward(); + long forwardMs = System.currentTimeMillis() - start; + resultText.append(forwardMs).append(";"); + } + + try (FileWriter writer = new FileWriter(getFilesDir() + "/benchmark_results.txt")) { + writer.write(resultText.toString()); + } catch (IOException e) { + e.printStackTrace(); + } + + } +} diff --git a/extension/android/benchmark/app/src/main/res/values/colors.xml b/extension/android/benchmark/app/src/main/res/values/colors.xml new file mode 100644 index 0000000000..ca1931bca9 --- /dev/null +++ b/extension/android/benchmark/app/src/main/res/values/colors.xml @@ -0,0 +1,10 @@ + + + #FFBB86FC + #FF6200EE + #FF3700B3 + #FF03DAC5 + #FF018786 + #FF000000 + #FFFFFFFF + diff --git a/extension/android/benchmark/app/src/main/res/values/strings.xml b/extension/android/benchmark/app/src/main/res/values/strings.xml new file mode 100644 index 0000000000..34062786b9 --- /dev/null +++ b/extension/android/benchmark/app/src/main/res/values/strings.xml @@ -0,0 +1,3 @@ + + MiniBench + \ No newline at end of file diff --git a/extension/android/benchmark/app/src/main/res/values/themes.xml b/extension/android/benchmark/app/src/main/res/values/themes.xml new file mode 100644 index 0000000000..8a63cb8955 --- /dev/null +++ b/extension/android/benchmark/app/src/main/res/values/themes.xml @@ -0,0 +1,5 @@ + + + + diff --git a/extension/android/benchmark/app/src/test/java/org/pytorch/minibench/ExampleUnitTest.java b/extension/android/benchmark/app/src/test/java/org/pytorch/minibench/ExampleUnitTest.java new file mode 100644 index 0000000000..134410482b --- /dev/null +++ b/extension/android/benchmark/app/src/test/java/org/pytorch/minibench/ExampleUnitTest.java @@ -0,0 +1,17 @@ +package org.pytorch.minibench; + +import org.junit.Test; + +import static org.junit.Assert.*; + +/** + * Example local unit test, which will execute on the development machine (host). + * + * @see Testing documentation + */ +public class ExampleUnitTest { + @Test + public void addition_isCorrect() { + assertEquals(4, 2 + 2); + } +} diff --git a/extension/android/benchmark/build.gradle.kts b/extension/android/benchmark/build.gradle.kts new file mode 100644 index 0000000000..cc9db8a5cc --- /dev/null +++ b/extension/android/benchmark/build.gradle.kts @@ -0,0 +1,4 @@ +// Top-level build file where you can add configuration options common to all sub-projects/modules. +plugins { + id("com.android.application") version "8.1.0" apply false +} diff --git a/extension/android/benchmark/gradle.properties b/extension/android/benchmark/gradle.properties new file mode 100644 index 0000000000..a03b354896 --- /dev/null +++ b/extension/android/benchmark/gradle.properties @@ -0,0 +1,21 @@ +# Project-wide Gradle settings. +# IDE (e.g. Android Studio) users: +# Gradle settings configured through the IDE *will override* +# any settings specified in this file. +# For more details on how to configure your build environment visit +# http://www.gradle.org/docs/current/userguide/build_environment.html +# Specifies the JVM arguments used for the daemon process. +# The setting is particularly useful for tweaking memory settings. +org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8 +# When configured, Gradle will run in incubating parallel mode. +# This option should only be used with decoupled projects. More details, visit +# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects +# org.gradle.parallel=true +# AndroidX package structure to make it clearer which packages are bundled with the +# Android operating system, and which are packaged with your app's APK +# https://developer.android.com/topic/libraries/support-library/androidx-rn +android.useAndroidX=true +# Enables namespacing of each library's R class so that its R class includes only the +# resources declared in the library itself and none from the library's dependencies, +# thereby reducing the size of the R class for that library +android.nonTransitiveRClass=true diff --git a/extension/android/benchmark/gradle/wrapper/gradle-wrapper.jar b/extension/android/benchmark/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000000000000000000000000000000000000..e708b1c023ec8b20f512888fe07c5bd3ff77bb8f GIT binary patch literal 59203 zcma&O1CT9Y(k9%tZQHhO+qUh#ZQHhO+qmuS+qP|E@9xZO?0h@l{(r>DQ>P;GjjD{w zH}lENr;dU&FbEU?00aa80D$0M0RRB{U*7-#kbjS|qAG&4l5%47zyJ#WrfA#1$1Ctx zf&Z_d{GW=lf^w2#qRJ|CvSJUi(^E3iv~=^Z(zH}F)3Z%V3`@+rNB7gTVU{Bb~90p|f+0(v;nz01EG7yDMX9@S~__vVgv%rS$+?IH+oZ03D5zYrv|^ zC1J)SruYHmCki$jLBlTaE5&dFG9-kq3!^i>^UQL`%gn6)jz54$WDmeYdsBE9;PqZ_ zoGd=P4+|(-u4U1dbAVQrFWoNgNd;0nrghPFbQrJctO>nwDdI`Q^i0XJDUYm|T|RWc zZ3^Qgo_Qk$%Fvjj-G}1NB#ZJqIkh;kX%V{THPqOyiq)d)0+(r9o(qKlSp*hmK#iIY zA^)Vr$-Hz<#SF=0@tL@;dCQsm`V9s1vYNq}K1B)!XSK?=I1)tX+bUV52$YQu*0%fnWEukW>mxkz+%3-S!oguE8u#MGzST8_Dy^#U?fA@S#K$S@9msUiX!gd_ow>08w5)nX{-KxqMOo7d?k2&?Vf z&diGDtZr(0cwPe9z9FAUSD9KC)7(n^lMWuayCfxzy8EZsns%OEblHFSzP=cL6}?J| z0U$H!4S_TVjj<`6dy^2j`V`)mC;cB%* z8{>_%E1^FH!*{>4a7*C1v>~1*@TMcLK{7nEQ!_igZC}ikJ$*<$yHy>7)oy79A~#xE zWavoJOIOC$5b6*q*F_qN1>2#MY)AXVyr$6x4b=$x^*aqF*L?vmj>Mgv+|ITnw_BoW zO?jwHvNy^prH{9$rrik1#fhyU^MpFqF2fYEt(;4`Q&XWOGDH8k6M=%@fics4ajI;st# zCU^r1CK&|jzUhRMv;+W~6N;u<;#DI6cCw-otsc@IsN3MoSD^O`eNflIoR~l4*&-%RBYk@gb^|-JXs&~KuSEmMxB}xSb z@K76cXD=Y|=I&SNC2E+>Zg?R6E%DGCH5J1nU!A|@eX9oS(WPaMm==k2s_ueCqdZw| z&hqHp)47`c{BgwgvY2{xz%OIkY1xDwkw!<0veB#yF4ZKJyabhyyVS`gZepcFIk%e2 zTcrmt2@-8`7i-@5Nz>oQWFuMC_KlroCl(PLSodswHqJ3fn<;gxg9=}~3x_L3P`9Sn zChIf}8vCHvTriz~T2~FamRi?rh?>3bX1j}%bLH+uFX+p&+^aXbOK7clZxdU~6Uxgy z8R=obwO4dL%pmVo*Ktf=lH6hnlz_5k3cG;m8lgaPp~?eD!Yn2kf)tU6PF{kLyn|oI@eQ`F z3IF7~Blqg8-uwUuWZScRKn%c2_}dXB6Dx_&xR*n9M9LXasJhtZdr$vBY!rP{c@=)& z#!?L$2UrkvClwQO>U*fSMs67oSj2mxiJ$t;E|>q%Kh_GzzWWO&3;ufU%2z%ucBU8H z3WIwr$n)cfCXR&>tyB7BcSInK>=ByZA%;cVEJhcg<#6N{aZC4>K41XF>ZgjG`z_u& zGY?;Ad?-sgiOnI`oppF1o1Gurqbi*;#x2>+SSV6|1^G@ooVy@fg?wyf@0Y!UZ4!}nGuLeC^l)6pwkh|oRY`s1Pm$>zZ3u-83T|9 zGaKJIV3_x+u1>cRibsaJpJqhcm%?0-L;2 zitBrdRxNmb0OO2J%Y&Ym(6*`_P3&&5Bw157{o7LFguvxC$4&zTy#U=W*l&(Q2MNO} zfaUwYm{XtILD$3864IA_nn34oVa_g^FRuHL5wdUd)+W-p-iWCKe8m_cMHk+=? zeKX)M?Dt(|{r5t7IenkAXo%&EXIb-i^w+0CX0D=xApC=|Xy(`xy+QG^UyFe z+#J6h_&T5i#sV)hj3D4WN%z;2+jJcZxcI3*CHXGmOF3^)JD5j&wfX)e?-|V0GPuA+ zQFot%aEqGNJJHn$!_}#PaAvQ^{3-Ye7b}rWwrUmX53(|~i0v{}G_sI9uDch_brX&6 zWl5Ndj-AYg(W9CGfQf<6!YmY>Ey)+uYd_JNXH=>|`OH-CDCmcH(0%iD_aLlNHKH z7bcW-^5+QV$jK?R*)wZ>r9t}loM@XN&M-Pw=F#xn(;u3!(3SXXY^@=aoj70;_=QE9 zGghsG3ekq#N||u{4We_25U=y#T*S{4I{++Ku)> zQ!DZW;pVcn>b;&g2;YE#+V`v*Bl&Y-i@X6D*OpNA{G@JAXho&aOk(_j^weW{#3X5Y z%$q_wpb07EYPdmyH(1^09i$ca{O<}7) zRWncXdSPgBE%BM#by!E>tdnc$8RwUJg1*x($6$}ae$e9Knj8gvVZe#bLi!<+&BkFj zg@nOpDneyc+hU9P-;jmOSMN|*H#>^Ez#?;%C3hg_65leSUm;iz)UkW)jX#p)e&S&M z1|a?wDzV5NVnlhRBCd_;F87wp>6c<&nkgvC+!@KGiIqWY4l}=&1w7|r6{oBN8xyzh zG$b#2=RJp_iq6)#t5%yLkKx(0@D=C3w+oiXtSuaQ%I1WIb-eiE$d~!)b@|4XLy!CZ z9p=t=%3ad@Ep+<9003D2KZ5VyP~_n$=;~r&YUg5UZ0KVD&tR1DHy9x)qWtKJp#Kq# zP*8p#W(8JJ_*h_3W}FlvRam?<4Z+-H77^$Lvi+#vmhL9J zJ<1SV45xi;SrO2f=-OB(7#iNA5)x1uNC-yNxUw|!00vcW2PufRm>e~toH;M0Q85MQLWd?3O{i8H+5VkR@l9Dg-ma ze2fZ%>G(u5(k9EHj2L6!;(KZ8%8|*-1V|B#EagbF(rc+5iL_5;Eu)L4Z-V;0HfK4d z*{utLse_rvHZeQ>V5H=f78M3Ntg1BPxFCVD{HbNA6?9*^YIq;B-DJd{Ca2L#)qWP? zvX^NhFmX?CTWw&Ns}lgs;r3i+Bq@y}Ul+U%pzOS0Fcv9~aB(0!>GT0)NO?p=25LjN z2bh>6RhgqD7bQj#k-KOm@JLgMa6>%-ok1WpOe)FS^XOU{c?d5shG(lIn3GiVBxmg`u%-j=)^v&pX1JecJics3&jvPI)mDut52? z3jEA)DM%}BYbxxKrizVYwq?(P&19EXlwD9^-6J+4!}9{ywR9Gk42jjAURAF&EO|~N z)?s>$Da@ikI4|^z0e{r`J8zIs>SpM~Vn^{3fArRu;?+43>lD+^XtUcY1HidJwnR6+ z!;oG2=B6Z_=M%*{z-RaHc(n|1RTKQdNjjV!Pn9lFt^4w|AeN06*j}ZyhqZ^!-=cyGP_ShV1rGxkx8t zB;8`h!S{LD%ot``700d0@Grql(DTt4Awgmi+Yr0@#jbe=2#UkK%rv=OLqF)9D7D1j z!~McAwMYkeaL$~kI~90)5vBhBzWYc3Cj1WI0RS`z000R8-@ET0dA~*r(gSiCJmQMN&4%1D zyVNf0?}sBH8zNbBLn>~(W{d3%@kL_eQ6jEcR{l>C|JK z(R-fA!z|TTRG40|zv}7E@PqCAXP3n`;%|SCQ|ZS%ym$I{`}t3KPL&^l5`3>yah4*6 zifO#{VNz3)?ZL$be;NEaAk9b#{tV?V7 zP|wf5YA*1;s<)9A4~l3BHzG&HH`1xNr#%){4xZ!jq%o=7nN*wMuXlFV{HaiQLJ`5G zBhDi#D(m`Q1pLh@Tq+L;OwuC52RdW7b8}~60WCOK5iYMUad9}7aWBuILb({5=z~YF zt?*Jr5NG+WadM{mDL>GyiByCuR)hd zA=HM?J6l1Xv0Dl+LW@w$OTcEoOda^nFCw*Sy^I@$sSuneMl{4ys)|RY#9&NxW4S)9 zq|%83IpslTLoz~&vTo!Ga@?rj_kw{|k{nv+w&Ku?fyk4Ki4I?);M|5Axm)t+BaE)D zm(`AQ#k^DWrjbuXoJf2{Aj^KT zFb1zMSqxq|vceV+Mf-)$oPflsO$@*A0n0Z!R{&(xh8s}=;t(lIy zv$S8x>m;vQNHuRzoaOo?eiWFe{0;$s`Bc+Osz~}Van${u;g(su`3lJ^TEfo~nERfP z)?aFzpDgnLYiERsKPu|0tq4l2wT)Atr6Qb%m-AUn6HnCue*yWICp7TjW$@sO zm5rm4aTcPQ(rfi7a`xP7cKCFrJD}*&_~xgLyr^-bmsL}y;A5P|al8J3WUoBSjqu%v zxC;mK!g(7r6RRJ852Z~feoC&sD3(6}^5-uLK8o)9{8L_%%rItZK9C){UxB|;G>JbP zsRRtS4-3B*5c+K2kvmgZK8472%l>3cntWUOVHxB|{Ay~aOg5RN;{PJgeVD*H%ac+y!h#wi%o2bF2Ca8IyMyH{>4#{E_8u^@+l-+n=V}Sq?$O z{091@v%Bd*3pk0^2UtiF9Z+(a@wy6 zUdw8J*ze$K#=$48IBi1U%;hmhO>lu!uU;+RS}p&6@rQila7WftH->*A4=5W|Fmtze z)7E}jh@cbmr9iup^i%*(uF%LG&!+Fyl@LFA-}Ca#bxRfDJAiR2dt6644TaYw1Ma79 zt8&DYj31j^5WPNf5P&{)J?WlCe@<3u^78wnd(Ja4^a>{^Tw}W>|Cjt^If|7l^l)^Q zbz|7~CF(k_9~n|h;ysZ+jHzkXf(*O*@5m zLzUmbHp=x!Q|!9NVXyipZ3)^GuIG$k;D)EK!a5=8MFLI_lpf`HPKl=-Ww%z8H_0$j ztJ||IfFG1lE9nmQ0+jPQy zCBdKkjArH@K7jVcMNz);Q(Q^R{d5G?-kk;Uu_IXSyWB)~KGIizZL(^&qF;|1PI7!E zTP`%l)gpX|OFn&)M%txpQ2F!hdA~hX1Cm5)IrdljqzRg!f{mN%G~H1&oqe`5eJCIF zHdD7O;AX-{XEV(a`gBFJ9ews#CVS2y!&>Cm_dm3C8*n3MA*e67(WC?uP@8TXuMroq z{#w$%z@CBIkRM7?}Xib+>hRjy?%G!fiw8! z8(gB+8J~KOU}yO7UGm&1g_MDJ$IXS!`+*b*QW2x)9>K~Y*E&bYMnjl6h!{17_8d!%&9D`a7r&LKZjC<&XOvTRaKJ1 zUY@hl5^R&kZl3lU3njk`3dPzxj$2foOL26r(9zsVF3n_F#v)s5vv3@dgs|lP#eylq62{<-vczqP!RpVBTgI>@O6&sU>W|do17+#OzQ7o5A$ICH z?GqwqnK^n2%LR;$^oZM;)+>$X3s2n}2jZ7CdWIW0lnGK-b#EG01)P@aU`pg}th&J-TrU`tIpb5t((0eu|!u zQz+3ZiOQ^?RxxK4;zs=l8q!-n7X{@jSwK(iqNFiRColuEOg}!7cyZi`iBX4g1pNBj zAPzL?P^Ljhn;1$r8?bc=#n|Ed7wB&oHcw()&*k#SS#h}jO?ZB246EGItsz*;^&tzp zu^YJ0=lwsi`eP_pU8}6JA7MS;9pfD;DsSsLo~ogzMNP70@@;Fm8f0^;>$Z>~}GWRw!W5J3tNX*^2+1f3hz{~rIzJo z6W%J(H!g-eI_J1>0juX$X4Cl6i+3wbc~k146UIX&G22}WE>0ga#WLsn9tY(&29zBvH1$`iWtTe zG2jYl@P!P)eb<5DsR72BdI7-zP&cZNI{7q3e@?N8IKc4DE#UVr->|-ryuJXk^u^>4 z$3wE~=q390;XuOQP~TNoDR?#|NSPJ%sTMInA6*rJ%go|=YjGe!B>z6u$IhgQSwoV* zjy3F2#I>uK{42{&IqP59)Y(1*Z>>#W8rCf4_eVsH)`v!P#^;BgzKDR`ARGEZzkNX+ zJUQu=*-ol=Xqqt5=`=pA@BIn@6a9G8C{c&`i^(i+BxQO9?YZ3iu%$$da&Kb?2kCCo zo7t$UpSFWqmydXf@l3bVJ=%K?SSw)|?srhJ-1ZdFu*5QhL$~-IQS!K1s@XzAtv6*Y zl8@(5BlWYLt1yAWy?rMD&bwze8bC3-GfNH=p zynNFCdxyX?K&G(ZZ)afguQ2|r;XoV^=^(;Cku#qYn4Lus`UeKt6rAlFo_rU`|Rq z&G?~iWMBio<78of-2X(ZYHx~=U0Vz4btyXkctMKdc9UM!vYr~B-(>)(Hc|D zMzkN4!PBg%tZoh+=Gba!0++d193gbMk2&krfDgcbx0jI92cq?FFESVg0D$>F+bil} zY~$)|>1HZsX=5sAZ2WgPB5P=8X#TI+NQ(M~GqyVB53c6IdX=k>Wu@A0Svf5#?uHaF zsYn|koIi3$(%GZ2+G+7Fv^lHTb#5b8sAHSTnL^qWZLM<(1|9|QFw9pnRU{svj}_Al zL)b9>fN{QiA($8peNEJyy`(a{&uh-T4_kdZFIVsKKVM(?05}76EEz?#W za^fiZOAd14IJ4zLX-n7Lq0qlQ^lW8Cvz4UKkV9~P}>sq0?xD3vg+$4vLm~C(+ zM{-3Z#qnZ09bJ>}j?6ry^h+@PfaD7*jZxBEY4)UG&daWb??6)TP+|3#Z&?GL?1i+280CFsE|vIXQbm| zM}Pk!U`U5NsNbyKzkrul-DzwB{X?n3E6?TUHr{M&+R*2%yOiXdW-_2Yd6?38M9Vy^ z*lE%gA{wwoSR~vN0=no}tP2Ul5Gk5M(Xq`$nw#ndFk`tcpd5A=Idue`XZ!FS>Q zG^0w#>P4pPG+*NC9gLP4x2m=cKP}YuS!l^?sHSFftZy{4CoQrb_ z^20(NnG`wAhMI=eq)SsIE~&Gp9Ne0nD4%Xiu|0Fj1UFk?6avDqjdXz{O1nKao*46y zT8~iA%Exu=G#{x=KD;_C&M+Zx4+n`sHT>^>=-1YM;H<72k>$py1?F3#T1*ef9mLZw z5naLQr?n7K;2l+{_uIw*_1nsTn~I|kkCgrn;|G~##hM;9l7Jy$yJfmk+&}W@JeKcF zx@@Woiz8qdi|D%aH3XTx5*wDlbs?dC1_nrFpm^QbG@wM=i2?Zg;$VK!c^Dp8<}BTI zyRhAq@#%2pGV49*Y5_mV4+OICP|%I(dQ7x=6Ob}>EjnB_-_18*xrY?b%-yEDT(wrO z9RY2QT0`_OpGfMObKHV;QLVnrK%mc?$WAdIT`kJQT^n%GuzE7|9@k3ci5fYOh(287 zuIbg!GB3xLg$YN=n)^pHGB0jH+_iIiC=nUcD;G6LuJsjn2VI1cyZx=a?ShCsF==QK z;q~*m&}L<-cb+mDDXzvvrRsybcgQ;Vg21P(uLv5I+eGc7o7tc6`;OA9{soHFOz zT~2?>Ts}gprIX$wRBb4yE>ot<8+*Bv`qbSDv*VtRi|cyWS>)Fjs>fkNOH-+PX&4(~ z&)T8Zam2L6puQl?;5zg9h<}k4#|yH9czHw;1jw-pwBM*O2hUR6yvHATrI%^mvs9q_ z&ccT0>f#eDG<^WG^q@oVqlJrhxH)dcq2cty@l3~|5#UDdExyXUmLQ}f4#;6fI{f^t zDCsgIJ~0`af%YR%Ma5VQq-p21k`vaBu6WE?66+5=XUd%Ay%D$irN>5LhluRWt7 zov-=f>QbMk*G##&DTQyou$s7UqjjW@k6=!I@!k+S{pP8R(2=e@io;N8E`EOB;OGoI zw6Q+{X1_I{OO0HPpBz!X!@`5YQ2)t{+!?M_iH25X(d~-Zx~cXnS9z>u?+If|iNJbx zyFU2d1!ITX64D|lE0Z{dLRqL1Ajj=CCMfC4lD3&mYR_R_VZ>_7_~|<^o*%_&jevU+ zQ4|qzci=0}Jydw|LXLCrOl1_P6Xf@c0$ieK2^7@A9UbF{@V_0p%lqW|L?5k>bVM8|p5v&2g;~r>B8uo<4N+`B zH{J)h;SYiIVx@#jI&p-v3dwL5QNV1oxPr8J%ooezTnLW>i*3Isb49%5i!&ac_dEXv zvXmVUck^QHmyrF8>CGXijC_R-y(Qr{3Zt~EmW)-nC!tiH`wlw5D*W7Pip;T?&j%kX z6DkZX4&}iw>hE(boLyjOoupf6JpvBG8}jIh!!VhnD0>}KSMMo{1#uU6kiFcA04~|7 zVO8eI&x1`g4CZ<2cYUI(n#wz2MtVFHx47yE5eL~8bot~>EHbevSt}LLMQX?odD{Ux zJMnam{d)W4da{l7&y-JrgiU~qY3$~}_F#G7|MxT)e;G{U`In&?`j<5D->}cb{}{T(4DF0BOk-=1195KB-E*o@c?`>y#4=dMtYtSY=&L{!TAjFVcq0y@AH`vH! z$41+u!Ld&}F^COPgL(EE{0X7LY&%D7-(?!kjFF7=qw<;`V{nwWBq<)1QiGJgUc^Vz ztMUlq1bZqKn17|6x6iAHbWc~l1HcmAxr%$Puv!znW)!JiukwIrqQ00|H$Z)OmGG@= zv%A8*4cq}(?qn4rN6o`$Y))(MyXr8R<2S^J+v(wmFmtac!%VOfN?&(8Nr!T@kV`N; z*Q33V3t`^rN&aBiHet)18wy{*wi1=W!B%B-Q6}SCrUl$~Hl{@!95ydml@FK8P=u4s z4e*7gV2s=YxEvskw2Ju!2%{8h01rx-3`NCPc(O zH&J0VH5etNB2KY6k4R@2Wvl^Ck$MoR3=)|SEclT2ccJ!RI9Nuter7u9@;sWf-%um;GfI!=eEIQ2l2p_YWUd{|6EG ze{yO6;lMc>;2tPrsNdi@&1K6(1;|$xe8vLgiouj%QD%gYk`4p{Ktv9|j+!OF-P?@p z;}SV|oIK)iwlBs+`ROXkhd&NK zzo__r!B>tOXpBJMDcv!Mq54P+n4(@dijL^EpO1wdg~q+!DT3lB<>9AANSe!T1XgC=J^)IP0XEZ()_vpu!!3HQyJhwh?r`Ae%Yr~b% zO*NY9t9#qWa@GCPYOF9aron7thfWT`eujS4`t2uG6)~JRTI;f(ZuoRQwjZjp5Pg34 z)rp$)Kr?R+KdJ;IO;pM{$6|2y=k_siqvp%)2||cHTe|b5Ht8&A{wazGNca zX$Ol?H)E_R@SDi~4{d-|8nGFhZPW;Cts1;08TwUvLLv&_2$O6Vt=M)X;g%HUr$&06 zISZb(6)Q3%?;3r~*3~USIg=HcJhFtHhIV(siOwV&QkQe#J%H9&E21!C*d@ln3E@J* zVqRO^<)V^ky-R|%{(9`l-(JXq9J)1r$`uQ8a}$vr9E^nNiI*thK8=&UZ0dsFN_eSl z(q~lnD?EymWLsNa3|1{CRPW60>DSkY9YQ;$4o3W7Ms&@&lv9eH!tk~N&dhqX&>K@} zi1g~GqglxkZ5pEFkllJ)Ta1I^c&Bt6#r(QLQ02yHTaJB~- zCcE=5tmi`UA>@P=1LBfBiqk)HB4t8D?02;9eXj~kVPwv?m{5&!&TFYhu>3=_ zsGmYZ^mo*-j69-42y&Jj0cBLLEulNRZ9vXE)8~mt9C#;tZs;=#M=1*hebkS;7(aGf zcs7zH(I8Eui9UU4L--))yy`&d&$In&VA2?DAEss4LAPCLd>-$i?lpXvn!gu^JJ$(DoUlc6wE98VLZ*z`QGQov5l4Fm_h?V-;mHLYDVOwKz7>e4+%AzeO>P6v}ndPW| zM>m#6Tnp7K?0mbK=>gV}=@k*0Mr_PVAgGMu$j+pWxzq4MAa&jpCDU&-5eH27Iz>m^ zax1?*HhG%pJ((tkR(V(O(L%7v7L%!_X->IjS3H5kuXQT2!ow(;%FDE>16&3r){!ex zhf==oJ!}YU89C9@mfDq!P3S4yx$aGB?rbtVH?sHpg?J5C->!_FHM%Hl3#D4eplxzQ zRA+<@LD%LKSkTk2NyWCg7u=$%F#;SIL44~S_OGR}JqX}X+=bc@swpiClB`Zbz|f!4 z7Ysah7OkR8liXfI`}IIwtEoL}(URrGe;IM8%{>b1SsqXh)~w}P>yiFRaE>}rEnNkT z!HXZUtxUp1NmFm)Dm@-{FI^aRQqpSkz}ZSyKR%Y}YHNzBk)ZIp} zMtS=aMvkgWKm9&oTcU0?S|L~CDqA+sHpOxwnswF-fEG)cXCzUR?ps@tZa$=O)=L+5 zf%m58cq8g_o}3?Bhh+c!w4(7AjxwQ3>WnVi<{{38g7yFboo>q|+7qs<$8CPXUFAN< zG&}BHbbyQ5n|qqSr?U~GY{@GJ{(Jny{bMaOG{|IkUj7tj^9pa9|FB_<+KHLxSxR;@ zHpS$4V)PP+tx}22fWx(Ku9y+}Ap;VZqD0AZW4gCDTPCG=zgJmF{|x;(rvdM|2|9a}cex6xrMkERnkE;}jvU-kmzd%_J50$M`lIPCKf+^*zL=@LW`1SaEc%=m zQ+lT06Gw+wVwvQ9fZ~#qd430v2HndFsBa9WjD0P}K(rZYdAt^5WQIvb%D^Q|pkVE^ zte$&#~zmULFACGfS#g=2OLOnIf2Of-k!(BIHjs77nr!5Q1*I9 z1%?=~#Oss!rV~?-6Gm~BWJiA4mJ5TY&iPm_$)H1_rTltuU1F3I(qTQ^U$S>%$l z)Wx1}R?ij0idp@8w-p!Oz{&*W;v*IA;JFHA9%nUvVDy7Q8woheC#|8QuDZb-L_5@R zOqHwrh|mVL9b=+$nJxM`3eE{O$sCt$UK^2@L$R(r^-_+z?lOo+me-VW=Zw z-Bn>$4ovfWd%SPY`ab-u9{INc*k2h+yH%toDHIyqQ zO68=u`N}RIIs7lsn1D){)~%>ByF<>i@qFb<-axvu(Z+6t7v<^z&gm9McRB~BIaDn$ z#xSGT!rzgad8o>~kyj#h1?7g96tOcCJniQ+*#=b7wPio>|6a1Z?_(TS{)KrPe}(8j z!#&A=k(&Pj^F;r)CI=Z{LVu>uj!_W1q4b`N1}E(i%;BWjbEcnD=mv$FL$l?zS6bW!{$7j1GR5ocn94P2u{ z70tAAcpqtQo<@cXw~@i-@6B23;317|l~S>CB?hR5qJ%J3EFgyBdJd^fHZu7AzHF(BQ!tyAz^L0`X z23S4Fe{2X$W0$zu9gm%rg~A>ijaE#GlYlrF9$ds^QtaszE#4M(OLVP2O-;XdT(XIC zatwzF*)1c+t~c{L=fMG8Z=k5lv>U0;C{caN1NItnuSMp)6G3mbahu>E#sj&oy94KC zpH}8oEw{G@N3pvHhp{^-YaZeH;K+T_1AUv;IKD<=mv^&Ueegrb!yf`4VlRl$M?wsl zZyFol(2|_QM`e_2lYSABpKR{{NlxlDSYQNkS;J66aT#MSiTx~;tUmvs-b*CrR4w=f z8+0;*th6kfZ3|5!Icx3RV11sp=?`0Jy3Fs0N4GZQMN=8HmT6%x9@{Dza)k}UwL6JT zHRDh;%!XwXr6yuuy`4;Xsn0zlR$k%r%9abS1;_v?`HX_hI|+EibVnlyE@3aL5vhQq zlIG?tN^w@0(v9M*&L+{_+RQZw=o|&BRPGB>e5=ys7H`nc8nx)|-g;s7mRc7hg{GJC zAe^vCIJhajmm7C6g! zL&!WAQ~5d_5)00?w_*|*H>3$loHrvFbitw#WvLB!JASO?#5Ig5$Ys10n>e4|3d;tS zELJ0|R4n3Az(Fl3-r^QiV_C;)lQ1_CW{5bKS15U|E9?ZgLec@%kXr84>5jV2a5v=w z?pB1GPdxD$IQL4)G||B_lI+A=08MUFFR4MxfGOu07vfIm+j=z9tp~5i_6jb`tR>qV z$#`=BQ*jpCjm$F0+F)L%xRlnS%#&gro6PiRfu^l!EVan|r3y}AHJQOORGx4~ z&<)3=K-tx518DZyp%|!EqpU!+X3Et7n2AaC5(AtrkW>_57i}$eqs$rupubg0a1+WO zGHZKLN2L0D;ab%{_S1Plm|hx8R?O14*w*f&2&bB050n!R2by zw!@XOQx$SqZ5I<(Qu$V6g>o#A!JVwErWv#(Pjx=KeS0@hxr4?13zj#oWwPS(7Ro|v z>Mp@Kmxo79q|}!5qtX2-O@U&&@6s~!I&)1WQIl?lTnh6UdKT_1R640S4~f=_xoN3- zI+O)$R@RjV$F=>Ti7BlnG1-cFKCC(t|Qjm{SalS~V-tX#+2ekRhwmN zZr`8{QF6y~Z!D|{=1*2D-JUa<(1Z=;!Ei!KiRNH?o{p5o3crFF=_pX9O-YyJchr$~ zRC`+G+8kx~fD2k*ZIiiIGR<8r&M@3H?%JVOfE>)})7ScOd&?OjgAGT@WVNSCZ8N(p zuQG~76GE3%(%h1*vUXg$vH{ua0b`sQ4f0*y=u~lgyb^!#CcPJa2mkSEHGLsnO^kb$ zru5_l#nu=Y{rSMWiYx?nO{8I!gH+?wEj~UM?IrG}E|bRIBUM>UlY<`T1EHpRr36vv zBi&dG8oxS|J$!zoaq{+JpJy+O^W(nt*|#g32bd&K^w-t>!Vu9N!k9eA8r!Xc{utY> zg9aZ(D2E0gL#W0MdjwES-7~Wa8iubPrd?8-$C4BP?*wok&O8+ykOx{P=Izx+G~hM8 z*9?BYz!T8~dzcZr#ux8kS7u7r@A#DogBH8km8Ry4slyie^n|GrTbO|cLhpqgMdsjX zJ_LdmM#I&4LqqsOUIXK8gW;V0B(7^$y#h3h>J0k^WJfAMeYek%Y-Dcb_+0zPJez!GM zAmJ1u;*rK=FNM0Nf}Y!!P9c4)HIkMnq^b;JFd!S3?_Qi2G#LIQ)TF|iHl~WKK6JmK zbv7rPE6VkYr_%_BT}CK8h=?%pk@3cz(UrZ{@h40%XgThP*-Oeo`T0eq9 zA8BnWZKzCy5e&&_GEsU4*;_k}(8l_&al5K-V*BFM=O~;MgRkYsOs%9eOY6s6AtE*<7GQAR2ulC3RAJrG_P1iQK5Z~&B z&f8X<>yJV6)oDGIlS$Y*D^Rj(cszTy5c81a5IwBr`BtnC6_e`ArI8CaTX_%rx7;cn zR-0?J_LFg*?(#n~G8cXut(1nVF0Oka$A$1FGcERU<^ggx;p@CZc?3UB41RY+wLS`LWFNSs~YP zuw1@DNN3lTd|jDL7gjBsd9}wIw}4xT2+8dBQzI00m<@?c2L%>}QLfK5%r!a-iII`p zX@`VEUH)uj^$;7jVUYdADQ2k*!1O3WdfgF?OMtUXNpQ1}QINamBTKDuv19^{$`8A1 zeq%q*O0mi@(%sZU>Xdb0Ru96CFqk9-L3pzLVsMQ`Xpa~N6CR{9Rm2)A|CI21L(%GW zh&)Y$BNHa=FD+=mBw3{qTgw)j0b!Eahs!rZnpu)z!!E$*eXE~##yaXz`KE5(nQM`s zD!$vW9XH)iMxu9R>r$VlLk9oIR%HxpUiW=BK@4U)|1WNQ=mz9a z^!KkO=>GaJ!GBXm{KJj^;kh-MkUlEQ%lza`-G&}C5y1>La1sR6hT=d*NeCnuK%_LV zOXt$}iP6(YJKc9j-Fxq~*ItVUqljQ8?oaysB-EYtFQp9oxZ|5m0^Hq(qV!S+hq#g( z?|i*H2MIr^Kxgz+3vIljQ*Feejy6S4v~jKEPTF~Qhq!(ms5>NGtRgO5vfPPc4Z^AM zTj!`5xEreIN)vaNxa|q6qWdg>+T`Ol0Uz)ckXBXEGvPNEL3R8hB3=C5`@=SYgAju1 z!)UBr{2~=~xa{b8>x2@C7weRAEuatC)3pkRhT#pMPTpSbA|tan%U7NGMvzmF?c!V8 z=pEWxbdXbTAGtWTyI?Fml%lEr-^AE}w#l(<7OIw;ctw}imYax&vR4UYNJZK6P7ZOd zP87XfhnUHxCUHhM@b*NbTi#(-8|wcv%3BGNs#zRCVV(W?1Qj6^PPQa<{yaBwZ`+<`w|;rqUY_C z&AeyKwwf*q#OW-F()lir=T^<^wjK65Lif$puuU5+tk$;e_EJ;Lu+pH>=-8=PDhkBg z8cWt%@$Sc#C6F$Vd+0507;{OOyT7Hs%nKS88q-W!$f~9*WGBpHGgNp}=C*7!RiZ5s zn1L_DbKF@B8kwhDiLKRB@lsXVVLK|ph=w%_`#owlf@s@V(pa`GY$8h%;-#h@TsO|Y8V=n@*!Rog7<7Cid%apR|x zOjhHCyfbIt%+*PCveTEcuiDi%Wx;O;+K=W?OFUV%)%~6;gl?<0%)?snDDqIvkHF{ zyI02)+lI9ov42^hL>ZRrh*HhjF9B$A@=H94iaBESBF=eC_KT$8A@uB^6$~o?3Wm5t1OIaqF^~><2?4e3c&)@wKn9bD? zoeCs;H>b8DL^F&>Xw-xjZEUFFTv>JD^O#1E#)CMBaG4DX9bD(Wtc8Rzq}9soQ8`jf zeSnHOL}<+WVSKp4kkq&?SbETjq6yr@4%SAqOG=9E(3YeLG9dtV+8vmzq+6PFPk{L; z(&d++iu=^F%b+ea$i2UeTC{R*0Isk;vFK!no<;L+(`y`3&H-~VTdKROkdyowo1iqR zbVW(3`+(PQ2>TKY>N!jGmGo7oeoB8O|P_!Ic@ zZ^;3dnuXo;WJ?S+)%P>{Hcg!Jz#2SI(s&dY4QAy_vRlmOh)QHvs_7c&zkJCmJGVvV zX;Mtb>QE+xp`KyciG$Cn*0?AK%-a|=o!+7x&&yzHQOS>8=B*R=niSnta^Pxp1`=md z#;$pS$4WCT?mbiCYU?FcHGZ#)kHVJTTBt^%XE(Q};aaO=Zik0UgLcc0I(tUpt(>|& zcxB_|fxCF7>&~5eJ=Dpn&5Aj{A^cV^^}(7w#p;HG&Q)EaN~~EqrE1qKrMAc&WXIE;>@<&)5;gD2?={Xf@Mvn@OJKw=8Mgn z!JUFMwD+s==JpjhroT&d{$kQAy%+d`a*XxDEVxy3`NHzmITrE`o!;5ClXNPb4t*8P zzAivdr{j_v!=9!^?T3y?gzmqDWX6mkzhIzJ-3S{T5bcCFMr&RPDryMcdwbBuZbsgN zGrp@^i?rcfN7v0NKGzDPGE#4yszxu=I_`MI%Z|10nFjU-UjQXXA?k8Pk|OE<(?ae) zE%vG#eZAlj*E7_3dx#Zz4kMLj>H^;}33UAankJiDy5ZvEhrjr`!9eMD8COp}U*hP+ zF}KIYx@pkccIgyxFm#LNw~G&`;o&5)2`5aogs`1~7cMZQ7zj!%L4E`2yzlQN6REX20&O<9 zKV6fyr)TScJPPzNTC2gL+0x#=u>(({{D7j)c-%tvqls3#Y?Z1m zV5WUE)zdJ{$p>yX;^P!UcXP?UD~YM;IRa#Rs5~l+*$&nO(;Ers`G=0D!twR(0GF@c zHl9E5DQI}Oz74n zfKP>&$q0($T4y$6w(p=ERAFh+>n%iaeRA%!T%<^+pg?M)@ucY<&59$x9M#n+V&>}=nO9wCV{O~lg&v#+jcUj(tQ z`0u1YH)-`U$15a{pBkGyPL0THv1P|4e@pf@3IBZS4dVJPo#H>pWq%Lr0YS-SeWash z8R7=jb28KPMI|_lo#GEO|5B?N_e``H*23{~a!AmUJ+fb4HX-%QI@lSEUxKlGV7z7Q zSKw@-TR>@1RL%w{x}dW#k1NgW+q4yt2Xf1J62Bx*O^WG8OJ|FqI4&@d3_o8Id@*)4 zYrk=>@!wv~mh7YWv*bZhxqSmFh2Xq)o=m;%n$I?GSz49l1$xRpPu_^N(vZ>*>Z<04 z2+rP70oM=NDysd!@fQdM2OcyT?3T^Eb@lIC-UG=Bw{BjQ&P`KCv$AcJ;?`vdZ4){d z&gkoUK{$!$$K`3*O-jyM1~p-7T*qb)Ys>Myt^;#1&a%O@x8A+E>! zY8=eD`ZG)LVagDLBeHg>=atOG?Kr%h4B%E6m@J^C+U|y)XX@f z8oyJDW|9g=<#f<{JRr{y#~euMnv)`7j=%cHWLc}ngjq~7k**6%4u>Px&W%4D94(r* z+akunK}O0DC2A%Xo9jyF;DobX?!1I(7%}@7F>i%&nk*LMO)bMGg2N+1iqtg+r(70q zF5{Msgsm5GS7DT`kBsjMvOrkx&|EU!{{~gL4d2MWrAT=KBQ-^zQCUq{5PD1orxlIL zq;CvlWx#f1NWvh`hg011I%?T_s!e38l*lWVt|~z-PO4~~1g)SrJ|>*tXh=QfXT)%( z+ex+inPvD&O4Ur;JGz>$sUOnWdpSLcm1X%aQDw4{dB!cnj`^muI$CJ2%p&-kULVCE z>$eMR36kN$wCPR+OFDM3-U(VOrp9k3)lI&YVFqd;Kpz~K)@Fa&FRw}L(SoD z9B4a+hQzZT-BnVltst&=kq6Y(f^S4hIGNKYBgMxGJ^;2yrO}P3;r)(-I-CZ)26Y6? z&rzHI_1GCvGkgy-t1E;r^3Le30|%$ebDRu2+gdLG)r=A~Qz`}~&L@aGJ{}vVs_GE* zVUjFnzHiXfKQbpv&bR&}l2bzIjAooB)=-XNcYmrGmBh(&iu@o!^hn0^#}m2yZZUK8 zufVm7Gq0y`Mj;9b>`c?&PZkU0j4>IL=UL&-Lp3j&47B5pAW4JceG{!XCA)kT<%2nqCxj<)uy6XR_uws~>_MEKPOpAQ!H zkn>FKh)<9DwwS*|Y(q?$^N!6(51O0 z^JM~Ax{AI1Oj$fs-S5d4T7Z_i1?{%0SsIuQ&r8#(JA=2iLcTN+?>wOL532%&dMYkT z*T5xepC+V6zxhS@vNbMoi|i)=rpli@R9~P!39tWbSSb904ekv7D#quKbgFEMTb48P zuq(VJ+&L8aWU(_FCD$3^uD!YM%O^K(dvy~Wm2hUuh6bD|#(I39Xt>N1Y{ZqXL`Fg6 zKQ?T2htHN!(Bx;tV2bfTtIj7e)liN-29s1kew>v(D^@)#v;}C4-G=7x#;-dM4yRWm zyY`cS21ulzMK{PoaQ6xChEZ}o_#}X-o}<&0)$1#3we?+QeLt;aVCjeA)hn!}UaKt< zat1fHEx13y-rXNMvpUUmCVzocPmN~-Y4(YJvQ#db)4|%B!rBsgAe+*yor~}FrNH08 z3V!97S}D7d$zbSD{$z;@IYMxM6aHdypIuS*pr_U6;#Y!_?0i|&yU*@16l z*dcMqDQgfNBf}?quiu4e>H)yTVfsp#f+Du0@=Kc41QockXkCkvu>FBd6Q+@FL!(Yx z2`YuX#eMEiLEDhp+9uFqME_E^faV&~9qjBHJkIp~%$x^bN=N)K@kvSVEMdDuzA0sn z88CBG?`RX1@#hQNd`o^V{37)!w|nA)QfiYBE^m=yQKv-fQF+UCMcuEe1d4BH7$?>b zJl-r9@0^Ie=)guO1vOd=i$_4sz>y3x^R7n4ED!5oXL3@5**h(xr%Hv)_gILarO46q+MaDOF%ChaymKoI6JU5Pg;7#2n9-18|S1;AK+ zgsn6;k6-%!QD>D?cFy}8F;r@z8H9xN1jsOBw2vQONVqBVEbkiNUqgw~*!^##ht>w0 zUOykwH=$LwX2j&nLy=@{hr)2O&-wm-NyjW7n~Zs9UlH;P7iP3 zI}S(r0YFVYacnKH(+{*)Tbw)@;6>%=&Th=+Z6NHo_tR|JCI8TJiXv2N7ei7M^Q+RM z?9o`meH$5Yi;@9XaNR#jIK^&{N|DYNNbtdb)XW1Lv2k{E>;?F`#Pq|&_;gm~&~Zc9 zf+6ZE%{x4|{YdtE?a^gKyzr}dA>OxQv+pq|@IXL%WS0CiX!V zm$fCePA%lU{%pTKD7|5NJHeXg=I0jL@$tOF@K*MI$)f?om)D63K*M|r`gb9edD1~Y zc|w7N)Y%do7=0{RC|AziW7#am$)9jciRJ?IWl9PE{G3U+$%FcyKs_0Cgq`=K3@ttV z9g;M!3z~f_?P%y3-ph%vBMeS@p7P&Ea8M@97+%XEj*(1E6vHj==d zjsoviB>j^$_^OI_DEPvFkVo(BGRo%cJeD){6Uckei=~1}>sp299|IRjhXe)%?uP0I zF5+>?0#Ye}T^Y$u_rc4=lPcq4K^D(TZG-w30-YiEM=dcK+4#o*>lJ8&JLi+3UcpZk z!^?95S^C0ja^jwP`|{<+3cBVog$(mRdQmadS+Vh~z zS@|P}=|z3P6uS+&@QsMp0no9Od&27O&14zHXGAOEy zh~OKpymK5C%;LLb467@KgIiVwYbYd6wFxI{0-~MOGfTq$nBTB!{SrWmL9Hs}C&l&l#m?s*{tA?BHS4mVKHAVMqm63H<|c5n0~k)-kbg zXidai&9ZUy0~WFYYKT;oe~rytRk?)r8bptITsWj(@HLI;@=v5|XUnSls7$uaxFRL+ zRVMGuL3w}NbV1`^=Pw*0?>bm8+xfeY(1PikW*PB>>Tq(FR`91N0c2&>lL2sZo5=VD zQY{>7dh_TX98L2)n{2OV=T10~*YzX27i2Q7W86M4$?gZIXZaBq#sA*{PH8){|GUi;oM>e?ua7eF4WFuFYZSG| zze?srg|5Ti8Og{O zeFxuw9!U+zhyk?@w zjsA6(oKD=Ka;A>Ca)oPORxK+kxH#O@zhC!!XS4@=swnuMk>t+JmLmFiE^1aX3f<)D@`%K0FGK^gg1a1j>zi z2KhV>sjU7AX3F$SEqrXSC}fRx64GDoc%!u2Yag68Lw@w9v;xOONf@o)Lc|Uh3<21ctTYu-mFZuHk*+R{GjXHIGq3p)tFtQp%TYqD=j1&y)>@zxoxUJ!G@ zgI0XKmP6MNzw>nRxK$-Gbzs}dyfFzt>#5;f6oR27ql!%+{tr+(`(>%51|k`ML} zY4eE)Lxq|JMas(;JibNQds1bUB&r}ydMQXBY4x(^&fY_&LlQC)3hylc$~8&~|06-D z#T+%66rYbHX%^KuqJED_wuGB+=h`nWA!>1n0)3wZrBG3%`b^Ozv6__dNa@%V14|!D zQ?o$z5u0^8`giv%qE!BzZ!3j;BlDlJDk)h@9{nSQeEk!z9RGW) z${RSF3phEM*ce*>Xdp}585vj$|40=&S{S-GTiE?Op*vY&Lvr9}BO$XWy80IF+6@%n z5*2ueT_g@ofP#u5pxb7n*fv^Xtt7&?SRc{*2Ka-*!BuOpf}neHGCiHy$@Ka1^Dint z;DkmIL$-e)rj4o2WQV%Gy;Xg(_Bh#qeOsTM2f@KEe~4kJ8kNLQ+;(!j^bgJMcNhvklP5Z6I+9Fq@c&D~8Fb-4rmDT!MB5QC{Dsb;BharP*O;SF4& zc$wj-7Oep7#$WZN!1nznc@Vb<_Dn%ga-O#J(l=OGB`dy=Sy&$(5-n3zzu%d7E#^8`T@}V+5B;PP8J14#4cCPw-SQTdGa2gWL0*zKM z#DfSXs_iWOMt)0*+Y>Lkd=LlyoHjublNLefhKBv@JoC>P7N1_#> zv=mLWe96%EY;!ZGSQDbZWb#;tzqAGgx~uk+-$+2_8U`!ypbwXl z^2E-FkM1?lY@yt8=J3%QK+xaZ6ok=-y%=KXCD^0r!5vUneW>95PzCkOPO*t}p$;-> ze5j-BLT_;)cZQzR2CEsm@rU7GZfFtdp*a|g4wDr%8?2QkIGasRfDWT-Dvy*U{?IHT z*}wGnzdlSptl#ZF^sf)KT|BJs&kLG91^A6ls{CzFprZ6-Y!V0Xysh%9p%iMd7HLsS zN+^Un$tDV)T@i!v?3o0Fsx2qI(AX_$dDkBzQ@fRM%n zRXk6hb9Py#JXUs+7)w@eo;g%QQ95Yq!K_d=z{0dGS+pToEI6=Bo8+{k$7&Z zo4>PH(`ce8E-Ps&uv`NQ;U$%t;w~|@E3WVOCi~R4oj5wP?%<*1C%}Jq%a^q~T7u>K zML5AKfQDv6>PuT`{SrKHRAF+^&edg6+5R_#H?Lz3iGoWo#PCEd0DS;)2U({{X#zU^ zw_xv{4x7|t!S)>44J;KfA|DC?;uQ($l+5Vp7oeqf7{GBF9356nx|&B~gs+@N^gSdd zvb*>&W)|u#F{Z_b`f#GVtQ`pYv3#||N{xj1NgB<#=Odt6{eB%#9RLt5v zIi|0u70`#ai}9fJjKv7dE!9ZrOIX!3{$z_K5FBd-Kp-&e4(J$LD-)NMTp^_pB`RT; zftVVlK2g@+1Ahv2$D){@Y#cL#dUj9*&%#6 zd2m9{1NYp>)6=oAvqdCn5#cx{AJ%S8skUgMglu2*IAtd+z1>B&`MuEAS(D(<6X#Lj z?f4CFx$)M&$=7*>9v1ER4b6!SIz-m0e{o0BfkySREchp?WdVPpQCh!q$t>?rL!&Jg zd#heM;&~A}VEm8Dvy&P|J*eAV&w!&Nx6HFV&B8jJFVTmgLaswn!cx$&%JbTsloz!3 zMEz1d`k==`Ueub_JAy_&`!ogbwx27^ZXgFNAbx=g_I~5nO^r)}&myw~+yY*cJl4$I znNJ32M&K=0(2Dj_>@39`3=FX!v3nZHno_@q^!y}%(yw0PqOo=);6Y@&ylVe>nMOZ~ zd>j#QQSBn3oaWd;qy$&5(5H$Ayi)0haAYO6TH>FR?rhqHmNOO+(})NB zLI@B@v0)eq!ug`>G<@htRlp3n!EpU|n+G+AvXFrWSUsLMBfL*ZB`CRsIVHNTR&b?K zxBgsN0BjfB>UVcJ|x%=-zb%OV7lmZc& zxiupadZVF7)6QuhoY;;FK2b*qL0J-Rn-8!X4ZY$-ZSUXV5DFd7`T41c(#lAeLMoeT z4%g655v@7AqT!i@)Edt5JMbN(=Q-6{=L4iG8RA%}w;&pKmtWvI4?G9pVRp|RTw`g0 zD5c12B&A2&P6Ng~8WM2eIW=wxd?r7A*N+&!Be7PX3s|7~z=APxm=A?5 zt>xB4WG|*Td@VX{Rs)PV0|yK`oI3^xn(4c_j&vgxk_Y3o(-`_5o`V zRTghg6%l@(qodXN;dB#+OKJEEvhfcnc#BeO2|E(5df-!fKDZ!%9!^BJ_4)9P+9Dq5 zK1=(v?KmIp34r?z{NEWnLB3Px{XYwy-akun4F7xTRr2^zeYW{gcK9)>aJDdU5;w5@ zak=<+-PLH-|04pelTb%ULpuuuJC7DgyT@D|p{!V!0v3KpDnRjANN12q6SUR3mb9<- z>2r~IApQGhstZ!3*?5V z8#)hJ0TdZg0M-BK#nGFP>$i=qk82DO z7h;Ft!D5E15OgW)&%lej*?^1~2=*Z5$2VX>V{x8SC+{i10BbtUk9@I#Vi&hX)q
Q!LwySI{Bnv%Sm)yh{^sSVJ8&h_D-BJ_YZe5eCaAWU9b$O2c z$T|{vWVRtOL!xC0DTc(Qbe`ItNtt5hr<)VijD0{U;T#bUEp381_y`%ZIav?kuYG{iyYdEBPW=*xNSc;Rlt6~F4M`5G+VtOjc z*0qGzCb@gME5udTjJA-9O<&TWd~}ysBd(eVT1-H82-doyH9RST)|+Pb{o*;$j9Tjs zhU!IlsPsj8=(x3bAKJTopW3^6AKROHR^7wZ185wJGVhA~hEc|LP;k7NEz-@4p5o}F z`AD6naG3(n=NF9HTH81=F+Q|JOz$7wm9I<+#BSmB@o_cLt2GkW9|?7mM;r!JZp89l zbo!Hp8=n!XH1{GwaDU+k)pGp`C|cXkCU5%vcH)+v@0eK>%7gWxmuMu9YLlChA|_D@ zi#5zovN_!a-0?~pUV-Rj*1P)KwdU-LguR>YM&*Nen+ln8Q$?WFCJg%DY%K}2!!1FE zDv-A%Cbwo^p(lzac&_TZ-l#9kq`mhLcY3h9ZTUVCM(Ad&=EriQY5{jJv<5K&g|*Lk zgV%ILnf1%8V2B0E&;Sp4sYbYOvvMebLwYwzkRQ#F8GpTQq#uv=J`uaSJ34OWITeSGo6+-8Xw znCk*n{kdDEi)Hi&u^)~cs@iyCkFWB2SWZU|Uc%^43ZIZQ-vWNExCCtDWjqHs;;tWf$v{}0{p0Rvxkq``)*>+Akq%|Na zA`@~-Vfe|+(AIlqru+7Ceh4nsVmO9p9jc8}HX^W&ViBDXT+uXbT#R#idPn&L>+#b6 zflC-4C5-X;kUnR~L>PSLh*gvL68}RBsu#2l`s_9KjUWRhiqF`j)`y`2`YU(>3bdBj z?>iyjEhe-~$^I5!nn%B6Wh+I`FvLNvauve~eX<+Ipl&04 zT}};W&1a3%W?dJ2=N#0t?e+aK+%t}5q%jSLvp3jZ%?&F}nOOWr>+{GFIa%wO_2`et z=JzoRR~}iKuuR+azPI8;Gf9)z3kyA4EIOSl!sRR$DlW}0>&?GbgPojmjmnln;cTqCt=ADbE zZ8GAnoM+S1(5$i8^O4t`ue;vO4i}z0wz-QEIVe5_u03;}-!G1NyY8;h^}y;tzY}i5 zqQr#Ur3Fy8sSa$Q0ys+f`!`+>9WbvU_I`Sj;$4{S>O3?#inLHCrtLy~!s#WXV=oVP zeE93*Nc`PBi4q@%Ao$x4lw9vLHM!6mn3-b_cebF|n-2vt-zYVF_&sDE--J-P;2WHo z+@n2areE0o$LjvjlV2X7ZU@j+`{*8zq`JR3gKF#EW|#+{nMyo-a>nFFTg&vhyT=b} zDa8+v0(Dgx0yRL@ZXOYIlVSZ0|MFizy0VPW8;AfA5|pe!#j zX}Py^8fl5SyS4g1WSKKtnyP+_PoOwMMwu`(i@Z)diJp~U54*-miOchy7Z35eL>^M z4p<-aIxH4VUZgS783@H%M7P9hX>t{|RU7$n4T(brCG#h9e9p! z+o`i;EGGq3&pF;~5V~eBD}lC)>if$w%Vf}AFxGqO88|ApfHf&Bvu+xdG)@vuF}Yvk z)o;~k-%+0K0g+L`Wala!$=ZV|z$e%>f0%XoLib%)!R^RoS+{!#X?h-6uu zF&&KxORdZU&EwQFITIRLo(7TA3W}y6X{?Y%y2j0It!ekU#<)$qghZtpcS>L3uh`Uj z7GY;6f$9qKynP#oS3$$a{p^{D+0oJQ71`1?OAn_m8)UGZmj3l*ZI)`V-a>MKGGFG< z&^jg#Ok%(hhm>hSrZ5;Qga4u(?^i>GiW_j9%_7M>j(^|Om$#{k+^*ULnEgzW_1gCICtAD^WpC`A z{9&DXkG#01Xo)U$OC(L5Y$DQ|Q4C6CjUKk1UkPj$nXH##J{c8e#K|&{mA*;b$r0E4 zUNo0jthwA(c&N1l=PEe8Rw_8cEl|-eya9z&H3#n`B$t#+aJ03RFMzrV@gowbe8v(c zIFM60^0&lCFO10NU4w@|61xiZ4CVXeaKjd;d?sv52XM*lS8XiVjgWpRB;&U_C0g+`6B5V&w|O6B*_q zsATxL!M}+$He)1eOWECce#eS@2n^xhlB4<_Nn?yCVEQWDs(r`|@2GqLe<#(|&P0U? z$7V5IgpWf09uIf_RazRwC?qEqRaHyL?iiS05UiGesJy%^>-C{{ypTBI&B0-iUYhk> zIk<5xpsuV@g|z(AZD+C-;A!fTG=df1=<%nxy(a(IS+U{ME4ZbDEBtcD_3V=icT6*_ z)>|J?>&6%nvHhZERBtjK+s4xnut*@>GAmA5m*OTp$!^CHTr}vM4n(X1Q*;{e-Rd2BCF-u@1ZGm z!S8hJ6L=Gl4T_SDa7Xx|-{4mxveJg=ctf`BJ*fy!yF6Dz&?w(Q_6B}WQVtNI!BVBC zKfX<>7vd6C96}XAQmF-Jd?1Q4eTfRB3q7hCh0f!(JkdWT5<{iAE#dKy*Jxq&3a1@~ z8C||Dn2mFNyrUV|<-)C^_y7@8c2Fz+2jrae9deBDu;U}tJ{^xAdxCD248(k;dCJ%o z`y3sADe>U%suxwwv~8A1+R$VB=Q?%U?4joI$um;aH+eCrBqpn- z%79D_7rb;R-;-9RTrwi9dPlg8&@tfWhhZ(Vx&1PQ+6(huX`;M9x~LrW~~#3{j0Bh2kDU$}@!fFQej4VGkJv?M4rU^x!RU zEwhu$!CA_iDjFjrJa`aocySDX16?~;+wgav;}Zut6Mg%C4>}8FL?8)Kgwc(Qlj{@#2Pt0?G`$h7P#M+qoXtlV@d}%c&OzO+QYKK`kyXaK{U(O^2DyIXCZlNQjt0^8~8JzNGrIxhj}}M z&~QZlbx%t;MJ(Vux;2tgNKGlAqphLq%pd}JG9uoVHUo?|hN{pLQ6Em%r*+7t^<);X zm~6=qChlNAVXNN*Sow->*4;}T;l;D1I-5T{Bif@4_}=>l`tK;qqDdt5zvisCKhMAH z#r}`)7VW?LZqfdmXQ%zo5bJ00{Xb9^YKrk0Nf|oIW*K@(=`o2Vndz}ZDyk{!u}PVx zzd--+_WC*U{~DH3{?GI64IB+@On&@9X>EUAo&L+G{L^dozaI4C3G#2wr~hseW@K&g zKWs{uHu-9Je!3;4pE>eBltKUXb^*hG8I&413)$J&{D4N%7PcloU6bn%jPxJyQL?g* z9g+YFFEDiE`8rW^laCNzQmi7CTnPfwyg3VDHRAl>h=In6jeaVOP@!-CP60j3+#vpL zEYmh_oP0{-gTe7Or`L6x)6w?77QVi~jD8lWN@3RHcm80iV%M1A!+Y6iHM)05iC64tb$X2lV_%Txk@0l^hZqi^%Z?#- zE;LE0uFx)R08_S-#(wC=dS&}vj6P4>5ZWjhthP=*Hht&TdLtKDR;rXEX4*z0h74FA zMCINqrh3Vq;s%3MC1YL`{WjIAPkVL#3rj^9Pj9Ss7>7duy!9H0vYF%>1jh)EPqvlr6h%R%CxDsk| z!BACz7E%j?bm=pH6Eaw{+suniuY7C9Ut~1cWfOX9KW9=H><&kQlinPV3h9R>3nJvK z4L9(DRM=x;R&d#a@oFY7mB|m8h4692U5eYfcw|QKwqRsshN(q^v$4$)HgPpAJDJ`I zkqjq(8Cd!K!+wCd=d@w%~e$=gdUgD&wj$LQ1r>-E=O@c ze+Z$x{>6(JA-fNVr)X;*)40Eym1TtUZI1Pwwx1hUi+G1Jlk~vCYeXMNYtr)1?qwyg zsX_e*$h?380O00ou?0R@7-Fc59o$UvyVs4cUbujHUA>sH!}L54>`e` zHUx#Q+Hn&Og#YVOuo*niy*GU3rH;%f``nk#NN5-xrZ34NeH$l`4@t);4(+0|Z#I>Y z)~Kzs#exIAaf--65L0UHT_SvV8O2WYeD>Mq^Y6L!Xu8%vnpofG@w!}R7M28?i1*T&zp3X4^OMCY6(Dg<-! zXmcGQrRgHXGYre7GfTJ)rhl|rs%abKT_Nt24_Q``XH{88NVPW+`x4ZdrMuO0iZ0g` z%p}y};~T5gbb9SeL8BSc`SO#ixC$@QhXxZ=B}L`tP}&k?1oSPS=4%{UOHe0<_XWln zwbl5cn(j-qK`)vGHY5B5C|QZd5)W7c@{bNVXqJ!!n$^ufc?N9C-BF2QK1(kv++h!>$QbAjq)_b$$PcJdV+F7hz0Hu@ zqj+}m0qn{t^tD3DfBb~0B36|Q`bs*xs|$i^G4uNUEBl4g;op-;Wl~iThgga?+dL7s zUP(8lMO?g{GcYpDS{NM!UA8Hco?#}eNEioRBHy4`mq!Pd-9@-97|k$hpEX>xoX+dY zDr$wfm^P&}Wu{!%?)U_(%Mn79$(ywvu*kJ9r4u|MyYLI_67U7%6Gd_vb##Nerf@>& z8W11z$$~xEZt$dPG}+*IZky+os5Ju2eRi;1=rUEeIn>t-AzC_IGM-IXWK3^6QNU+2pe=MBn4I*R@A%-iLDCOHTE-O^wo$sL_h{dcPl=^muAQb`_BRm};=cy{qSkui;`WSsj9%c^+bIDQ z0`_?KX0<-=o!t{u(Ln)v>%VGL z0pC=GB7*AQ?N7N{ut*a%MH-tdtNmNC+Yf$|KS)BW(gQJ*z$d{+{j?(e&hgTy^2|AR9vx1Xre2fagGv0YXWqtNkg*v%40v?BJBt|f9wX5 z{QTlCM}b-0{mV?IG>TW_BdviUKhtosrBqdfq&Frdz>cF~yK{P@(w{Vr7z2qKFwLhc zQuogKO@~YwyS9%+d-zD7mJG~@?EFJLSn!a&mhE5$_4xBl&6QHMzL?CdzEnC~C3$X@ zvY!{_GR06ep5;<#cKCSJ%srxX=+pn?ywDwtJ2{TV;0DKBO2t++B(tIO4)Wh`rD13P z4fE$#%zkd=UzOB74gi=-*CuID&Z3zI^-`4U^S?dHxK8fP*;fE|a(KYMgMUo`THIS1f!*6dOI2 zFjC3O=-AL`6=9pp;`CYPTdVX z8(*?V&%QoipuH0>WKlL8A*zTKckD!paN@~hh zmXzm~qZhMGVdQGd=AG8&20HW0RGV8X{$9LldFZYm zE?}`Q3i?xJRz43S?VFMmqRyvWaS#(~Lempg9nTM$EFDP(Gzx#$r)W&lpFKqcAoJh-AxEw$-bjW>`_+gEi z2w`99#UbFZGiQjS8kj~@PGqpsPX`T{YOj`CaEqTFag;$jY z8_{Wzz>HXx&G*Dx<5skhpETxIdhKH?DtY@b9l8$l?UkM#J-Snmts7bd7xayKTFJ(u zyAT&@6cAYcs{PBfpqZa%sxhJ5nSZBPji?Zlf&}#L?t)vC4X5VLp%~fz2Sx<*oN<7` z?ge=k<=X7r<~F7Tvp9#HB{!mA!QWBOf%EiSJ6KIF8QZNjg&x~-%e*tflL(ji_S^sO ztmib1rp09uon}RcsFi#k)oLs@$?vs(i>5k3YN%$T(5Or(TZ5JW9mA6mIMD08=749$ z!d+l*iu{Il7^Yu}H;lgw=En1sJpCKPSqTCHy4(f&NPelr31^*l%KHq^QE>z>Ks_bH zjbD?({~8Din7IvZeJ>8Ey=e;I?thpzD=zE5UHeO|neioJwG;IyLk?xOz(yO&0DTU~ z^#)xcs|s>Flgmp;SmYJ4g(|HMu3v7#;c*Aa8iF#UZo7CvDq4>8#qLJ|YdZ!AsH%^_7N1IQjCro

K7UpUK$>l@ zw`1S}(D?mUXu_C{wupRS-jiX~w=Uqqhf|Vb3Cm9L=T+w91Cu^ z*&Ty%sN?x*h~mJc4g~k{xD4ZmF%FXZNC;oVDwLZ_WvrnzY|{v8hc1nmx4^}Z;yriXsAf+Lp+OFLbR!&Ox?xABwl zu8w&|5pCxmu#$?Cv2_-Vghl2LZ6m7}VLEfR5o2Ou$x02uA-%QB2$c(c1rH3R9hesc zfpn#oqpbKuVsdfV#cv@5pV4^f_!WS+F>SV6N0JQ9E!T90EX((_{bSSFv9ld%I0&}9 zH&Jd4MEX1e0iqDtq~h?DBrxQX1iI0lIs<|kB$Yrh&cpeK0-^K%=FBsCBT46@h#yi!AyDq1V(#V}^;{{V*@T4WJ&U-NTq43w=|K>z8%pr_nC>%C(Wa_l78Ufib$r8Od)IIN=u>417 z`Hl{9A$mI5A(;+-Q&$F&h-@;NR>Z<2U;Y21>>Z;s@0V@SbkMQQj%_;~+qTuQ?c|AV zcWm3XZQHhP&R%QWarS%mJ!9R^&!_)*s(v+VR@I#QrAT}`17Y+l<`b-nvmDNW`De%y zrwTZ9EJrj1AFA>B`1jYDow}~*dfPs}IZMO3=a{Fy#IOILc8F0;JS4x(k-NSpbN@qM z`@aE_e}5{!$v3+qVs7u?sOV(y@1Os*Fgu`fCW9=G@F_#VQ%xf$hj0~wnnP0$hFI+@ zkQj~v#V>xn)u??YutKsX>pxKCl^p!C-o?+9;!Nug^ z{rP!|+KsP5%uF;ZCa5F;O^9TGac=M|=V z_H(PfkV1rz4jl?gJ(ArXMyWT4y(86d3`$iI4^l9`vLdZkzpznSd5Ikfrs8qcSy&>z zTIZgWZGXw0n9ibQxYWE@gI0(3#KA-dAdPcsL_|hg2@~C!VZDM}5;v_Nykfq!*@*Zf zE_wVgx82GMDryKO{U{D>vSzSc%B~|cjDQrt5BN=Ugpsf8H8f1lR4SGo#hCuXPL;QQ z#~b?C4MoepT3X`qdW2dNn& zo8)K}%Lpu>0tQei+{>*VGErz|qjbK#9 zvtd8rcHplw%YyQCKR{kyo6fgg!)6tHUYT(L>B7er5)41iG`j$qe*kSh$fY!PehLcD zWeKZHn<492B34*JUQh=CY1R~jT9Jt=k=jCU2=SL&&y5QI2uAG2?L8qd2U(^AW#{(x zThSy=C#>k+QMo^7caQcpU?Qn}j-`s?1vXuzG#j8(A+RUAY})F@=r&F(8nI&HspAy4 z4>(M>hI9c7?DCW8rw6|23?qQMSq?*Vx?v30U%luBo)B-k2mkL)Ljk5xUha3pK>EEj z@(;tH|M@xkuN?gsz;*bygizwYR!6=(Xgcg^>WlGtRYCozY<rFX2E>kaZo)O<^J7a`MX8Pf`gBd4vrtD|qKn&B)C&wp0O-x*@-|m*0egT=-t@%dD zgP2D+#WPptnc;_ugD6%zN}Z+X4=c61XNLb7L1gWd8;NHrBXwJ7s0ce#lWnnFUMTR& z1_R9Fin4!d17d4jpKcfh?MKRxxQk$@)*hradH2$3)nyXep5Z;B z?yX+-Bd=TqO2!11?MDtG0n(*T^!CIiF@ZQymqq1wPM_X$Iu9-P=^}v7npvvPBu!d$ z7K?@CsA8H38+zjA@{;{kG)#AHME>Ix<711_iQ@WWMObXyVO)a&^qE1GqpP47Q|_AG zP`(AD&r!V^MXQ^e+*n5~Lp9!B+#y3#f8J^5!iC@3Y@P`;FoUH{G*pj*q7MVV)29+j z>BC`a|1@U_v%%o9VH_HsSnM`jZ-&CDvbiqDg)tQEnV>b%Ptm)T|1?TrpIl)Y$LnG_ zzKi5j2Fx^K^PG1=*?GhK;$(UCF-tM~^=Z*+Wp{FSuy7iHt9#4n(sUuHK??@v+6*|10Csdnyg9hAsC5_OrSL;jVkLlf zHXIPukLqbhs~-*oa^gqgvtpgTk_7GypwH><53riYYL*M=Q@F-yEPLqQ&1Sc zZB%w}T~RO|#jFjMWcKMZccxm-SL)s_ig?OC?y_~gLFj{n8D$J_Kw%{r0oB8?@dWzn zB528d-wUBQzrrSSLq?fR!K%59Zv9J4yCQhhDGwhptpA5O5U?Hjqt>8nOD zi{)0CI|&Gu%zunGI*XFZh(ix)q${jT8wnnzbBMPYVJc4HX*9d^mz|21$=R$J$(y7V zo0dxdbX3N#=F$zjstTf*t8vL)2*{XH!+<2IJ1VVFa67|{?LP&P41h$2i2;?N~RA30LV`BsUcj zfO9#Pg1$t}7zpv#&)8`mis3~o+P(DxOMgz-V*(?wWaxi?R=NhtW}<#^Z?(BhSwyar zG|A#Q7wh4OfK<|DAcl9THc-W4*>J4nTevsD%dkj`U~wSUCh15?_N@uMdF^Kw+{agk zJ`im^wDqj`Ev)W3k3stasP`88-M0ZBs7;B6{-tSm3>I@_e-QfT?7|n0D~0RRqDb^G zyHb=is;IwuQ&ITzL4KsP@Z`b$d%B0Wuhioo1CWttW8yhsER1ZUZzA{F*K=wmi-sb#Ju+j z-l@In^IKnb{bQG}Ps>+Vu_W#grNKNGto+yjA)?>0?~X`4I3T@5G1)RqGUZuP^NJCq&^HykuYtMDD8qq+l8RcZNJsvN(10{ zQ1$XcGt}QH-U^WU!-wRR1d--{B$%vY{JLWIV%P4-KQuxxDeJaF#{eu&&r!3Qu{w}0f--8^H|KwE>)ORrcR+2Qf zb})DRcH>k0zWK8@{RX}NYvTF;E~phK{+F;MkIP$)T$93Ba2R2TvKc>`D??#mv9wg$ zd~|-`Qx5LwwsZ2hb*Rt4S9dsF%Cny5<1fscy~)d;0m2r$f=83<->c~!GNyb!U)PA; zq^!`@@)UaG)Ew(9V?5ZBq#c%dCWZrplmuM`o~TyHjAIMh0*#1{B>K4po-dx$Tk-Cq z=WZDkP5x2W&Os`N8KiYHRH#UY*n|nvd(U>yO=MFI-2BEp?x@=N<~CbLJBf6P)}vLS?xJXYJ2^<3KJUdrwKnJnTp{ zjIi|R=L7rn9b*D#Xxr4*R<3T5AuOS+#U8hNlfo&^9JO{VbH!v9^JbK=TCGR-5EWR@ zN8T-_I|&@A}(hKeL4_*eb!1G8p~&_Im8|wc>Cdir+gg90n1dw?QaXcx6Op_W1r=axRw>4;rM*UOpT#Eb9xU1IiWo@h?|5uP zka>-XW0Ikp@dIe;MN8B01a7+5V@h3WN{J=HJ*pe0uwQ3S&MyWFni47X32Q7SyCTNQ z+sR!_9IZa5!>f&V$`q!%H8ci!a|RMx5}5MA_kr+bhtQy{-^)(hCVa@I!^TV4RBi zAFa!Nsi3y37I5EK;0cqu|9MRj<^r&h1lF}u0KpKQD^5Y+LvFEwM zLU@@v4_Na#Axy6tn3P%sD^5P#<7F;sd$f4a7LBMk zGU^RZHBcxSA%kCx*eH&wgA?Qwazm8>9SCSz_!;MqY-QX<1@p$*T8lc?@`ikEqJ>#w zcG``^CoFMAhdEXT9qt47g0IZkaU)4R7wkGs^Ax}usqJ5HfDYAV$!=6?>J6+Ha1I<5 z|6=9soU4>E))tW$<#>F ziZ$6>KJf0bPfbx_)7-}tMINlc=}|H+$uX)mhC6-Hz+XZxsKd^b?RFB6et}O#+>Wmw9Ec9) z{q}XFWp{3@qmyK*Jvzpyqv57LIR;hPXKsrh{G?&dRjF%Zt5&m20Ll?OyfUYC3WRn{cgQ?^V~UAv+5 z&_m#&nIwffgX1*Z2#5^Kl4DbE#NrD&Hi4|7SPqZ}(>_+JMz=s|k77aEL}<=0Zfb)a z%F(*L3zCA<=xO)2U3B|pcTqDbBoFp>QyAEU(jMu8(jLA61-H!ucI804+B!$E^cQQa z)_ERrW3g!B9iLb3nn3dlkvD7KsY?sRvls3QC0qPi>o<)GHx%4Xb$5a3GBTJ(k@`e@ z$RUa^%S15^1oLEmA=sayrP5;9qtf!Z1*?e$ORVPsXpL{jL<6E)0sj&swP3}NPmR%FM?O>SQgN5XfHE< zo(4#Cv11(%Nnw_{_Ro}r6=gKd{k?NebJ~<~Kv0r(r0qe4n3LFx$5%x(BKvrz$m?LG zjLIc;hbj0FMdb9aH9Lpsof#yG$(0sG2%RL;d(n>;#jb!R_+dad+K;Ccw!|RY?uS(a zj~?=&M!4C(5LnlH6k%aYvz@7?xRa^2gml%vn&eKl$R_lJ+e|xsNfXzr#xuh(>`}9g zLHSyiFwK^-p!;p$yt7$F|3*IfO3Mlu9e>Dpx8O`37?fA`cj`C0B-m9uRhJjs^mRp# zWB;Aj6|G^1V6`jg7#7V9UFvnB4((nIwG?k%c7h`?0tS8J3Bn0t#pb#SA}N-|45$-j z$R>%7cc2ebAClXc(&0UtHX<>pd)akR3Kx_cK+n<}FhzmTx!8e9^u2e4%x{>T6pQ`6 zO182bh$-W5A3^wos0SV_TgPmF4WUP-+D25KjbC{y_6W_9I2_vNKwU(^qSdn&>^=*t z&uvp*@c8#2*paD!ZMCi3;K{Na;I4Q35zw$YrW5U@Kk~)&rw;G?d7Q&c9|x<Hg|CNMsxovmfth*|E*GHezPTWa^Hd^F4!B3sF;)? z(NaPyAhocu1jUe(!5Cy|dh|W2=!@fNmuNOzxi^tE_jAtzNJ0JR-avc_H|ve#KO}#S z#a(8secu|^Tx553d4r@3#6^MHbH)vmiBpn0X^29xEv!Vuh1n(Sr5I0V&`jA2;WS|Y zbf0e}X|)wA-Pf5gBZ>r4YX3Mav1kKY(ulAJ0Q*jB)YhviHK)w!TJsi3^dMa$L@^{` z_De`fF4;M87vM3Ph9SzCoCi$#Fsd38u!^0#*sPful^p5oI(xGU?yeYjn;Hq1!wzFk zG&2w}W3`AX4bxoVm03y>ts{KaDf!}b&7$(P4KAMP=vK5?1In^-YYNtx1f#}+2QK@h zeSeAI@E6Z8a?)>sZ`fbq9_snl6LCu6g>o)rO;ijp3|$vig+4t} zylEo7$SEW<_U+qgVcaVhk+4k+C9THI5V10qV*dOV6pPtAI$)QN{!JRBKh-D zk2^{j@bZ}yqW?<#VVuI_27*cI-V~sJiqQv&m07+10XF+#ZnIJdr8t`9s_EE;T2V;B z4UnQUH9EdX%zwh-5&wflY#ve!IWt0UE-My3?L#^Bh%kcgP1q{&26eXLn zTkjJ*w+(|_>Pq0v8{%nX$QZbf)tbJaLY$03;MO=Ic-uqYUmUCuXD>J>o6BCRF=xa% z3R4SK9#t1!K4I_d>tZgE>&+kZ?Q}1qo4&h%U$GfY058s%*=!kac{0Z+4Hwm!)pFLR zJ+5*OpgWUrm0FPI2ib4NPJ+Sk07j(`diti^i#kh&f}i>P4~|d?RFb#!JN)~D@)beox}bw?4VCf^y*`2{4`-@%SFTry2h z>9VBc9#JxEs1+0i2^LR@B1J`B9Ac=#FW=(?2;5;#U$0E0UNag_!jY$&2diQk_n)bT zl5Me_SUvqUjwCqmVcyb`igygB_4YUB*m$h5oeKv3uIF0sk}~es!{D>4r%PC*F~FN3owq5e0|YeUTSG#Vq%&Gk7uwW z0lDo#_wvflqHeRm*}l?}o;EILszBt|EW*zNPmq#?4A+&i0xx^?9obLyY4xx=Y9&^G;xYXYPxG)DOpPg!i_Ccl#3L}6xAAZzNhPK1XaC_~ z!A|mlo?Be*8Nn=a+FhgpOj@G7yYs(Qk(8&|h@_>w8Y^r&5nCqe0V60rRz?b5%J;GYeBqSAjo|K692GxD4` zRZyM2FdI+-jK2}WAZTZ()w_)V{n5tEb@>+JYluDozCb$fA4H)$bzg(Ux{*hXurjO^ zwAxc+UXu=&JV*E59}h3kzQPG4M)X8E*}#_&}w*KEgtX)cU{vm9b$atHa;s>| z+L6&cn8xUL*OSjx4YGjf6{Eq+Q3{!ZyhrL&^6Vz@jGbI%cAM9GkmFlamTbcQGvOlL zmJ?(FI)c86=JEs|*;?h~o)88>12nXlpMR4@yh%qdwFNpct;vMlc=;{FSo*apJ;p}! zAX~t;3tb~VuP|ZW;z$=IHf->F@Ml)&-&Bnb{iQyE#;GZ@C$PzEf6~q}4D>9jic@mTO5x76ulDz@+XAcm35!VSu zT*Gs>;f0b2TNpjU_BjHZ&S6Sqk6V1370+!eppV2H+FY!q*n=GHQ!9Rn6MjY!Jc77A zG7Y!lFp8?TIHN!LXO?gCnsYM-gQxsm=Ek**VmZu7vnuufD7K~GIxfxbsQ@qv2T zPa`tvHB$fFCyZl>3oYg?_wW)C>^_iDOc^B7klnTOoytQH18WkOk)L2BSD0r%xgRSW zQS9elF^?O=_@|58zKLK;(f77l-Zzu}4{fXed2saq!5k#UZAoDBqYQS{sn@j@Vtp|$ zG%gnZ$U|9@u#w1@11Sjl8ze^Co=)7yS(}=;68a3~g;NDe_X^}yJj;~s8xq9ahQ5_r zxAlTMnep*)w1e(TG%tWsjo3RR;yVGPEO4V{Zp?=a_0R#=V^ioQu4YL=BO4r0$$XTX zZfnw#_$V}sDAIDrezGQ+h?q24St0QNug_?{s-pI(^jg`#JRxM1YBV;a@@JQvH8*>> zIJvku74E0NlXkYe_624>znU0J@L<-c=G#F3k4A_)*;ky!C(^uZfj%WB3-*{*B$?9+ zDm$WFp=0(xnt6`vDQV3Jl5f&R(Mp};;q8d3I%Kn>Kx=^;uSVCw0L=gw53%Bp==8Sw zxtx=cs!^-_+i{2OK`Q;913+AXc_&Z5$@z3<)So0CU3;JAv=H?@Zpi~riQ{z-zLtVL z!oF<}@IgJp)Iyz1zVJ42!SPHSkjYNS4%ulVVIXdRuiZ@5Mx8LJS}J#qD^Zi_xQ@>DKDr-_e#>5h3dtje*NcwH_h;i{Sx7}dkdpuW z(yUCjckQsagv*QGMSi9u1`Z|V^}Wjf7B@q%j2DQXyd0nOyqg%m{CK_lAoKlJ7#8M} z%IvR?Vh$6aDWK2W!=i?*<77q&B8O&3?zP(Cs@kapc)&p7En?J;t-TX9abGT#H?TW? ztO5(lPKRuC7fs}zwcUKbRh=7E8wzTsa#Z{a`WR}?UZ%!HohN}d&xJ=JQhpO1PI#>X zHkb>pW04pU%Bj_mf~U}1F1=wxdBZu1790>3Dm44bQ#F=T4V3&HlOLsGH)+AK$cHk6 zia$=$kog?)07HCL*PI6}DRhpM^*%I*kHM<#1Se+AQ!!xyhcy6j7`iDX7Z-2i73_n# zas*?7LkxS-XSqv;YBa zW_n*32D(HTYQ0$feV_Fru1ZxW0g&iwqixPX3=9t4o)o|kOo79V$?$uh?#8Q8e>4e)V6;_(x&ViUVxma+i25qea;d-oK7ouuDsB^ab{ zu1qjQ%`n56VtxBE#0qAzb7lph`Eb-}TYpXB!H-}3Ykqyp`otprp7{VEuW*^IR2n$Fb99*nAtqT&oOFIf z@w*6>YvOGw@Ja?Pp1=whZqydzx@9X4n^2!n83C5{C?G@|E?&$?p*g68)kNvUTJ)I6 z1Q|(#UuP6pj78GUxq11m-GSszc+)X{C2eo-?8ud9sB=3(D47v?`JAa{V(IF zPZQ_0AY*9M97>Jf<o%#O_%Wq}8>YM=q0|tGY+hlXcpE=Z4Od z`NT7Hu2hnvRoqOw@g1f=bv`+nba{GwA$Ak0INlqI1k<9!x_!sL()h?hEWoWrdU3w` zZ%%)VR+Bc@_v!C#koM1p-3v_^L6)_Ktj4HE>aUh%2XZE@JFMOn)J~c`_7VWNb9c-N z2b|SZMR4Z@E7j&q&9(6H3yjEu6HV7{2!1t0lgizD;mZ9$r(r7W5G$ky@w(T_dFnOD z*p#+z$@pKE+>o@%eT(2-p_C}wbQ5s(%Sn_{$HDN@MB+Ev?t@3dPy`%TZ!z}AThZSu zN<1i$siJhXFdjV zP*y|V<`V8t=h#XTRUR~5`c`Z9^-`*BZf?WAehGdg)E2Je)hqFa!k{V(u+(hTf^Yq& zoruUh2(^3pe)2{bvt4&4Y9CY3js)PUHtd4rVG57}uFJL)D(JfSIo^{P=7liFXG zq5yqgof0V8paQcP!gy+;^pp-DA5pj=gbMN0eW=-eY+N8~y+G>t+x}oa!5r>tW$xhI zPQSv=pi;~653Gvf6~*JcQ%t1xOrH2l3Zy@8AoJ+wz@daW@m7?%LXkr!bw9GY@ns3e zSfuWF_gkWnesv?s3I`@}NgE2xwgs&rj?kH-FEy82=O8`+szN ziHch`vvS`zNfap14!&#i9H@wF7}yIPm=UB%(o(}F{wsZ(wA0nJ2aD^@B41>>o-_U6 zUqD~vdo48S8~FTb^+%#zcbQiiYoDKYcj&$#^;Smmb+Ljp(L=1Kt_J!;0s%1|JK}Wi z;={~oL!foo5n8=}rs6MmUW~R&;SIJO3TL4Ky?kh+b2rT9B1Jl4>#Uh-Bec z`Hsp<==#UEW6pGPhNk8H!!DUQR~#F9jEMI6T*OWfN^Ze&X(4nV$wa8QUJ>oTkruH# zm~O<`J7Wxseo@FqaZMl#Y(mrFW9AHM9Kb|XBMqaZ2a)DvJgYipkDD_VUF_PKd~dT7 z#02}bBfPn9a!X!O#83=lbJSK#E}K&yx-HI#T6ua)6o0{|={*HFusCkHzs|Fn&|C3H zBck1cmfcWVUN&i>X$YU^Sn6k2H;r3zuXbJFz)r5~3$d$tUj(l1?o={MM){kjgqXRO zc5R*#{;V7AQh|G|)jLM@wGAK&rm2~@{Pewv#06pHbKn#wL0P6F1!^qw9g&cW3Z=9} zj)POhOlwsh@eF=>z?#sIs*C-Nl(yU!#DaiaxhEs#iJqQ8w%(?+6lU02MYSeDkr!B- zPjMv+on6OLXgGnAtl(ao>|X2Y8*Hb}GRW5}-IzXnoo-d0!m4Vy$GS!XOLy>3_+UGs z2D|YcQx@M#M|}TDOetGi{9lGo9m-=0-^+nKE^*?$^uHkxZh}I{#UTQd;X!L+W@jm( zDg@N4+lUqI92o_rNk{3P>1gxAL=&O;x)ZT=q1mk0kLlE$WeWuY_$0`0jY-Kkt zP*|m3AF}Ubd=`<>(Xg0har*_@x2YH}bn0Wk*OZz3*e5;Zc;2uBdnl8?&XjupbkOeNZsNh6pvsq_ydmJI+*z**{I{0K)-;p1~k8cpJXL$^t!-`E}=*4G^-E8>H!LjTPxSx zcF+cS`ommfKMhNSbas^@YbTpH1*RFrBuATUR zt{oFWSk^$xU&kbFQ;MCX22RAN5F6eq9UfR$ut`Jw--p2YX)A*J69m^!oYfj2y7NYcH6&r+0~_sH^c^nzeN1AU4Ga7=FlR{S|Mm~MpzY0$Z+p2W(a={b-pR9EO1Rs zB%KY|@wLcAA@)KXi!d2_BxrkhDn`DT1=Dec}V!okd{$+wK z4E{n8R*xKyci1(CnNdhf$Dp2(Jpof0-0%-38X=Dd9PQgT+w%Lshx9+loPS~MOm%ZT zt%2B2iL_KU_ita%N>xjB!#71_3=3c}o zgeW~^U_ZTJQ2!PqXulQd=3b=XOQhwATK$y(9$#1jOQ4}4?~l#&nek)H(04f(Sr=s| zWv7Lu1=%WGk4FSw^;;!8&YPM)pQDCY9DhU`hMty1@sq1=Tj7bFsOOBZOFlpR`W>-J$-(kezWJj;`?x-v>ev{*8V z8p|KXJPV$HyQr1A(9LVrM47u-XpcrIyO`yWvx1pVYc&?154aneRpLqgx)EMvRaa#|9?Wwqs2+W8n5~79G z(}iCiLk;?enn}ew`HzhG+tu+Ru@T+K5juvZN)wY;x6HjvqD!&!)$$;1VAh~7fg0K| zEha#aN=Yv|3^~YFH}cc38ovVb%L|g@9W6fo(JtT6$fa?zf@Ct88e}m?i)b*Jgc{fl zExfdvw-BYDmH6>(4QMt#p0;FUIQqkhD}aH?a7)_%JtA~soqj{ppP_82yi9kaxuK>~ ze_)Zt>1?q=ZH*kF{1iq9sr*tVuy=u>Zev}!gEZx@O6-fjyu9X00gpIl-fS_pzjpqJ z1yqBmf9NF!jaF<+YxgH6oXBdK)sH(>VZ)1siyA$P<#KDt;8NT*l_0{xit~5j1P)FN zI8hhYKhQ)i z37^aP13B~u65?sg+_@2Kr^iWHN=U;EDSZ@2W2!5ALhGNWXnFBY%7W?1 z=HI9JzQ-pLKZDYTv<0-lt|6c-RwhxZ)mU2Os{bsX_i^@*fKUj8*aDO5pks=qn3Dv6 zwggpKLuyRCTVPwmw1r}B#AS}?X7b837UlXwp~E2|PJw2SGVueL7){Y&z!jL!XN=0i zU^Eig`S2`{+gU$68aRdWx?BZ{sU_f=8sn~>s~M?GU~`fH5kCc; z8ICp+INM3(3{#k32RZdv6b9MQYdZXNuk7ed8;G?S2nT+NZBG=Tar^KFl2SvhW$bGW#kdWL-I)s_IqVnCDDM9fm8g;P;8 z7t4yZn3^*NQfx7SwmkzP$=fwdC}bafQSEF@pd&P8@H#`swGy_rz;Z?Ty5mkS%>m#% zp_!m9e<()sfKiY(nF<1zBz&&`ZlJf6QLvLhl`_``%RW&{+O>Xhp;lwSsyRqGf=RWd zpftiR`={2(siiPAS|p}@q=NhVc0ELprt%=fMXO3B)4ryC2LT(o=sLM7hJC!}T1@)E zA3^J$3&1*M6Xq>03FX`R&w*NkrZE?FwU+Muut;>qNhj@bX17ZJxnOlPSZ=Zeiz~T_ zOu#yc3t6ONHB;?|r4w+pI)~KGN;HOGC)txxiUN8#mexj+W(cz%9a4sx|IRG=}ia zuEBuba3AHsV2feqw-3MvuL`I+2|`Ud4~7ZkN=JZ;L20|Oxna5vx1qbIh#k2O4$RQF zo`tL()zxaqibg^GbB+BS5#U{@K;WWQj~GcB1zb}zJkPwH|5hZ9iH2308!>_;%msji zJHSL~s)YHBR=Koa1mLEOHos*`gp=s8KA-C zu0aE+W!#iJ*0xqKm3A`fUGy#O+X+5W36myS>Uh2!R*s$aCU^`K&KKLCCDkejX2p=5 z%o7-fl03x`gaSNyr?3_JLv?2RLS3F*8ub>Jd@^Cc17)v8vYEK4aqo?OS@W9mt%ITJ z9=S2%R8M){CugT@k~~0x`}Vl!svYqX=E)c_oU6o}#Hb^%G1l3BudxA{F*tbjG;W_>=xV73pKY53v%>I)@D36I_@&p$h|Aw zonQS`07z_F#@T-%@-Tb|)7;;anoD_WH>9ewFy(ZcEOM$#Y)8>qi7rCnsH9GO-_7zF zu*C87{Df1P4TEOsnzZ@H%&lvV(3V@;Q!%+OYRp`g05PjY^gL$^$-t0Y>H*CDDs?FZly*oZ&dxvsxaUWF!{em4{A>n@vpXg$dwvt@_rgmHF z-MER`ABa8R-t_H*kv>}CzOpz;!>p^^9ztHMsHL|SRnS<-y5Z*r(_}c4=fXF`l^-i}>e7v!qs_jv zqvWhX^F=2sDNWA9c@P0?lUlr6ecrTKM%pNQ^?*Lq?p-0~?_j50xV%^(+H>sMul#Tw zeciF*1=?a7cI(}352%>LO96pD+?9!fNyl^9v3^v&Y4L)mNGK0FN43&Xf8jUlxW1Bw zyiu2;qW-aGNhs=zbuoxnxiwZ3{PFZM#Kw)9H@(hgX23h(`Wm~m4&TvoZoYp{plb^> z_#?vXcxd>r7K+1HKJvhed>gtK`TAbJUazUWQY6T~t2af%#<+Veyr%7-#*A#@&*;@g58{i|E%6yC_InGXCOd{L0;$)z#?n7M`re zh!kO{6=>7I?*}czyF7_frt#)s1CFJ_XE&VrDA?Dp3XbvF{qsEJgb&OLSNz_5g?HpK z9)8rsr4JN!Af3G9!#Qn(6zaUDqLN(g2g8*M)Djap?WMK9NKlkC)E2|-g|#-rp%!Gz zAHd%`iq|81efi93m3yTBw3g0j#;Yb2X{mhRAI?&KDmbGqou(2xiRNb^sV}%%Wu0?< z?($L>(#BO*)^)rSgyNRni$i`R4v;GhlCZ8$@e^ROX(p=2_v6Y!%^As zu022)fHdv_-~Yu_H6WVPLpHQx!W%^6j)cBhS`O3QBW#x(eX54d&I22op(N59b*&$v zFiSRY6rOc^(dgSV1>a7-5C;(5S5MvKcM2Jm-LD9TGqDpP097%52V+0>Xqq!! zq4e3vj53SE6i8J`XcQB|MZPP8j;PAOnpGnllH6#Ku~vS42xP*Nz@~y%db7Xi8s09P z1)e%8ys6&M8D=Dt6&t`iKG_4X=!kgRQoh%Z`dc&mlOUqXk-k`jKv9@(a^2-Upw>?< zt5*^DV~6Zedbec4NVl($2T{&b)zA@b#dUyd>`2JC0=xa_fIm8{5um zr-!ApXZhC8@=vC2WyxO|!@0Km)h8ep*`^he92$@YwP>VcdoS5OC^s38e#7RPsg4j+ zbVGG}WRSET&ZfrcR(x~k8n1rTP%CnfUNKUonD$P?FtNFF#cn!wEIab-;jU=B1dHK@ z(;(yAQJ`O$sMn>h;pf^8{JISW%d+@v6@CnXh9n5TXGC}?FI9i-D0OMaIg&mAg=0Kn zNJ7oz5*ReJukD55fUsMuaP+H4tDN&V9zfqF@ zr=#ecUk9wu{0;!+gl;3Bw=Vn^)z$ahVhhw)io!na&9}LmWurLb0zubxK=UEnU*{5P z+SP}&*(iBKSO4{alBHaY^)5Q=mZ+2OwIooJ7*Q5XJ+2|q`9#f?6myq!&oz?klihLq z4C)$XP!BNS0G_Z1&TM>?Jk{S~{F3n83ioli=IO6f%wkvCl(RFFw~j0tb{GvXTx>*sB0McY0s&SNvj4+^h`9nJ_wM>F!Uc>X}9PifQekn0sKI2SAJP!a4h z5cyGTuCj3ZBM^&{dRelIlT^9zcfaAuL5Y~bl!ppSf`wZbK$z#6U~rdclk``e+!qhe z6Qspo*%<)eu6?C;Bp<^VuW6JI|Ncvyn+LlSl;Mp22Bl7ARQ0Xc24%29(ZrdsIPw&-=yHQ7_Vle|5h>AST0 zUGX2Zk34vp?U~IHT|;$U86T+UUHl_NE4m|}>E~6q``7hccCaT^#y+?wD##Q%HwPd8 zV3x4L4|qqu`B$4(LXqDJngNy-{&@aFBvVsywt@X^}iH7P%>bR?ciC$I^U-4Foa`YKI^qDyGK7k%E%c_P=yzAi`YnxGA%DeNd++j3*h^ z=rn>oBd0|~lZ<6YvmkKY*ZJlJ;Im0tqgWu&E92eqt;+NYdxx`eS(4Hw_Jb5|yVvBg z*tbdY^!AN;luEyN4VRhS@-_DC{({ziH{&Z}iGElSV~qvT>L-8G%+yEL zX#MFOhj{InyKG=mvW-<1B@c-}x$vA(nU?>S>0*eN#!SLzQ)Ex7fvQ)S4D<8|I#N$3 zT5Ei`Z?cxBODHX8(Xp73v`IsAYC@9b;t}z0wxVuQSY1J^GRwDPN@qbM-ZF48T$GZ< z8WU+;Pqo?{ghI-KZ-i*ydXu`Ep0Xw^McH_KE9J0S7G;x8Fe`DVG?j3Pv=0YzJ}yZR z%2=oqHiUjvuk0~Ca>Kol4CFi0_xQT~;_F?=u+!kIDl-9g`#ZNZ9HCy17Ga1v^Jv9# z{T4Kb1-AzUxq*MutfOWWZgD*HnFfyYg0&e9f(5tZ>krPF6{VikNeHoc{linPPt#Si z&*g>(c54V8rT_AX!J&bNm-!umPvOR}vDai#`CX___J#=zeB*{4<&2WpaDncZsOkp* zsg<%@@rbrMkR_ux9?LsQxzoBa1s%$BBn6vk#{&&zUwcfzeCBJUwFYSF$08qDsB;gWQN*g!p8pxjofWbqNSZOEKOaTx@+* zwdt5*Q47@EOZ~EZL9s?1o?A%9TJT=Ob_13yyugvPg*e&ZU(r6^k4=2+D-@n=Hv5vu zSXG|hM(>h9^zn=eQ=$6`JO&70&2|%V5Lsx>)(%#;pcOfu>*nk_3HB_BNaH$`jM<^S zcSftDU1?nL;jy)+sfonQN}(}gUW?d_ikr*3=^{G)=tjBtEPe>TO|0ddVB zTklrSHiW+!#26frPXQQ(YN8DG$PZo?(po(QUCCf_OJC`pw*uey00%gmH!`WJkrKXj2!#6?`T25mTu9OJp2L8z3! z=arrL$ZqxuE{%yV)14Kd>k}j7pxZ6#$Dz8$@WV5p8kTqN<-7W)Q7Gt2{KoOPK_tZ| zf2WG~O5@{qPI+W<4f_;reuFVdO^5`ADC1!JQE|N`s3cq@(0WB!n0uh@*c{=LAd;~} zyGK@hbF-Oo+!nN)@i*O(`@FA#u?o=~e{`4O#5}z&=UkU*50fOrzi11D^&FOqe>wii z?*k+2|EcUs;Gx{!@KBT~>PAwLrIDT7Th=Utu?~?np@t^gFs?zgX=D${RwOY^WGh-+ z+#4$066ISh8eYW#FXWp~S`<*%O^ZuItL1Tyqt8#tZ zY120E;^VG`!lZn&3sPd$RkdHpU#|w+bYV)pJC|SH9g%|5IkxVTQcBA4CL0}$&}ef@ zW^Vtj%M;;_1xxP9x#ex17&4N*{ksO*_4O}xYu(p*JkL#yr}@7b)t5X?%CY<+s5_MJ zuiqt+N_;A(_)%lumoyRFixWa-M7qK_9s6<1X?JDa9fP!+_6u~~M$5L=ipB=7(j#f< zZ34J%=bs549%~_mA(|={uZNs_0?o7;-LBP(ZRnkd{-^|2|=4vUTmtByHL8 zEph`(LSEzQj68a+`d$V<45J7cyv^#|^|%fD#si1Nx!4NW*`l*{->HEWNh6-|g>-=r zXmQ|-i}Ku$ndUeHQ^&ieT!Lf}vf6GaqW9$DJ2NWrqwPY%%4nip$@vK$nRp*_C-v<| zuKz~ZyN&<%!NS26&x?jhy+@awJipMQ-8(X4#Ae5??U<1QMt1l9R=w9fAnEF}NYu$2 z>6}Vkc zIb*A?G*z8^IvibmBKn_u^5&T_1oey0gZS2~obf(#xk=erZGTEdQnt3DMGM+0oPwss zj5zXD;(oWhB_T@~Ig#9@v)AKtXu3>Inmgf@A|-lD-1U>cNyl3h?ADD9)GG4}zUGPk zZzaXe!~Kf?<~@$G?Uql3t8jy9{2!doq4=J}j9ktTxss{p6!9UdjyDERlA*xZ!=Q)KDs5O)phz>Vq3BNGoM(H|=1*Q4$^2fTZw z(%nq1P|5Rt81}SYJpEEzMPl5VJsV5&4e)ZWKDyoZ>1EwpkHx-AQVQc8%JMz;{H~p{=FXV>jIxvm4X*qv52e?Y-f%DJ zxEA165GikEASQ^fH6K#d!Tpu2HP{sFs%E=e$gYd$aj$+xue6N+Wc(rAz~wUsk2`(b z8Kvmyz%bKQxpP}~baG-rwYcYCvkHOi zlkR<=>ZBTU*8RF_d#Bl@zZsRIhx<%~Z@Z=ik z>adw3!DK(8R|q$vy{FTxw%#xliD~6qXmY^7_9kthVPTF~Xy1CfBqbU~?1QmxmU=+k z(ggxvEuA;0e&+ci-zQR{-f7aO{O(Pz_OsEjLh_K>MbvoZ4nxtk5u{g@nPv)cgW_R} z9}EA4K4@z0?7ue}Z(o~R(X&FjejUI2g~08PH1E4w>9o{)S(?1>Z0XMvTb|;&EuyOE zGvWNpYX)Nv<8|a^;1>bh#&znEcl-r!T#pn= z4$?Yudha6F%4b>*8@=BdtXXY4N+`U4Dmx$}>HeVJk-QdTG@t!tVT#0(LeV0gvqyyw z2sEp^9eY0N`u10Tm4n8No&A=)IeEC|gnmEXoNSzu!1<4R<%-9kY_8~5Ej?zRegMn78wuMs#;i&eUA0Zk_RXQ3b&TT} z;SCI=7-FUB@*&;8|n>(_g^HGf3@QODE3LpmX~ELnymQm{Sx9xrKS zK29p~?v@R$0=v6Dr5aW>-!{+h@?Q58|Kz8{{W`%J+lDAdb&M5VHrX_mDY;1-JLnf)ezmPau$)1;=`-FU=-r-83tX=C`S#}GZufju zQ>sXNT0Ny=k@nc%cFnvA_i4SC)?_ORXHq8B4D%el1uPX`c~uG#S1M7C+*MMqLw78E zhY2dI8@+N^qrMI1+;TUda(vGqGSRyU{Fnm`aqrr7bz42c5xsOO-~oZpkzorD1g}Y<6rk&3>PsSGy}W?MtqFky@A(X# zIuNZK0cK?^=;PUAu>j0#HtjbHCV*6?jzA&OoE$*Jlga*}LF`SF?WLhv1O|zqC<>*> zYB;#lsYKx0&kH@BFpW8n*yDcc6?;_zaJs<-jPSkCsSX-!aV=P5kUgF@Nu<{a%#K*F z134Q{9|YX7X(v$62_cY3^G%t~rD>Q0z@)1|zs)vjJ6Jq9;7#Ki`w+eS**En?7;n&7 zu==V3T&eFboN3ZiMx3D8qYc;VjFUk_H-WWCau(VFXSQf~viH0L$gwD$UfFHqNcgN`x}M+YQ6RnN<+@t>JUp#)9YOkqst-Ga?{FsDpEeX0(5v{0J~SEbWiL zXC2}M4?UH@u&|;%0y`eb33ldo4~z-x8zY!oVmV=c+f$m?RfDC35mdQ2E>Pze7KWP- z>!Bh<&57I+O_^s}9Tg^k)h7{xx@0a0IA~GAOt2yy!X%Q$1rt~LbTB6@Du!_0%HV>N zlf)QI1&gvERKwso23mJ!Ou6ZS#zCS5W`gxE5T>C#E|{i<1D35C222I33?Njaz`On7 zi<+VWFP6D{e-{yiN#M|Jgk<44u1TiMI78S5W`Sdb5f+{zu34s{CfWN7a3Cf^@L%!& zN$?|!!9j2c)j$~+R6n#891w-z8(!oBpL2K=+%a$r2|~8-(vQj5_XT`<0Ksf;oP+tz z9CObS!0m)Tgg`K#xBM8B(|Z)Wb&DYL{WTYv`;A=q6~Nnx2+!lTIXtj8J7dZE!P_{z z#f8w6F}^!?^KE#+ZDv+xd5O&3EmomZzsv?>E-~ygGum45fk!SBN&|eo1rKw^?aZJ4 E2O(~oYXATM literal 0 HcmV?d00001 diff --git a/extension/android/benchmark/gradle/wrapper/gradle-wrapper.properties b/extension/android/benchmark/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000000..ba68b6be2f --- /dev/null +++ b/extension/android/benchmark/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,6 @@ +#Thu Aug 29 23:29:08 PDT 2024 +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/extension/android/benchmark/gradlew b/extension/android/benchmark/gradlew new file mode 100755 index 0000000000..4f906e0c81 --- /dev/null +++ b/extension/android/benchmark/gradlew @@ -0,0 +1,185 @@ +#!/usr/bin/env sh + +# +# Copyright 2015 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin or MSYS, switch paths to Windows format before running java +if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=`expr $i + 1` + done + case $i in + 0) set -- ;; + 1) set -- "$args0" ;; + 2) set -- "$args0" "$args1" ;; + 3) set -- "$args0" "$args1" "$args2" ;; + 4) set -- "$args0" "$args1" "$args2" "$args3" ;; + 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=`save "$@"` + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +exec "$JAVACMD" "$@" diff --git a/extension/android/benchmark/gradlew.bat b/extension/android/benchmark/gradlew.bat new file mode 100644 index 0000000000..ac1b06f938 --- /dev/null +++ b/extension/android/benchmark/gradlew.bat @@ -0,0 +1,89 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/extension/android/benchmark/settings.gradle.kts b/extension/android/benchmark/settings.gradle.kts new file mode 100644 index 0000000000..f2f5ac42a2 --- /dev/null +++ b/extension/android/benchmark/settings.gradle.kts @@ -0,0 +1,17 @@ +pluginManagement { + repositories { + google() + mavenCentral() + gradlePluginPortal() + } +} +dependencyResolutionManagement { + repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS) + repositories { + google() + mavenCentral() + } +} + +rootProject.name = "MiniBench" +include(":app") diff --git a/extension/android/jni/jni_layer.cpp b/extension/android/jni/jni_layer.cpp index c70912a545..79c6ebc516 100644 --- a/extension/android/jni/jni_layer.cpp +++ b/extension/android/jni/jni_layer.cpp @@ -18,6 +18,7 @@ #include "jni_layer_constants.h" #include +#include #include #include #include @@ -56,7 +57,7 @@ void et_pal_emit_log_message( using namespace torch::executor; -namespace executorch_jni { +namespace executorch::extension { class TensorHybrid : public facebook::jni::HybridClass { public: constexpr static const char* kJavaDescriptor = @@ -352,19 +353,26 @@ class ExecuTorchJni : public facebook::jni::HybridClass { return jresult; } + jint forward_ones() { + auto&& load_result = module_->load_method("forward"); + auto&& buf = prepare_input_tensors(*(module_->methods_["forward"].method)); + auto&& result = module_->methods_["forward"].method->execute(); + return (jint)result; + } + static void registerNatives() { registerHybrid({ makeNativeMethod("initHybrid", ExecuTorchJni::initHybrid), makeNativeMethod("forward", ExecuTorchJni::forward), makeNativeMethod("execute", ExecuTorchJni::execute), makeNativeMethod("loadMethod", ExecuTorchJni::load_method), + makeNativeMethod("forwardOnes", ExecuTorchJni::forward_ones), }); } }; - -} // namespace executorch_jni +} // namespace executorch::extension JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void*) { return facebook::jni::initialize( - vm, [] { executorch_jni::ExecuTorchJni::registerNatives(); }); + vm, [] { executorch::extension::ExecuTorchJni::registerNatives(); }); } diff --git a/extension/android/jni/jni_layer_constants.h b/extension/android/jni/jni_layer_constants.h index ac52b3a650..43946ffab6 100644 --- a/extension/android/jni/jni_layer_constants.h +++ b/extension/android/jni/jni_layer_constants.h @@ -10,7 +10,7 @@ #include -namespace executorch_jni { +namespace executorch::extension { constexpr static int kTensorDTypeUInt8 = 0; constexpr static int kTensorDTypeInt8 = 1; @@ -93,4 +93,4 @@ const std::unordered_map java_dtype_to_scalar_type = { {kTensorDTypeBits16, ScalarType::Bits16}, }; -} // namespace executorch_jni +} // namespace executorch::extension diff --git a/extension/android/jni/jni_layer_llama.cpp b/extension/android/jni/jni_layer_llama.cpp index 4f67d04396..dda9ece589 100644 --- a/extension/android/jni/jni_layer_llama.cpp +++ b/extension/android/jni/jni_layer_llama.cpp @@ -165,7 +165,7 @@ class ExecuTorchLlamaJni images.push_back(image_runner); } multi_modal_runner_->generate( - images, + std::move(images), prompt->toStdString(), seq_len, [callback](std::string result) { callback->onResult(result); }, diff --git a/extension/android/src/main/java/org/pytorch/executorch/Module.java b/extension/android/src/main/java/org/pytorch/executorch/Module.java index 5e57174114..981cfcd8c6 100644 --- a/extension/android/src/main/java/org/pytorch/executorch/Module.java +++ b/extension/android/src/main/java/org/pytorch/executorch/Module.java @@ -79,6 +79,12 @@ public static Module load(final String modelPath) { * @return return value from the 'forward' method. */ public EValue[] forward(EValue... inputs) { + if (inputs.length == 0) { + // forward default args (ones) + mNativePeer.forwardOnes(); + // discard the return value + return null; + } return mNativePeer.forward(inputs); } diff --git a/extension/android/src/main/java/org/pytorch/executorch/NativePeer.java b/extension/android/src/main/java/org/pytorch/executorch/NativePeer.java index 865c503765..6eadbf0509 100644 --- a/extension/android/src/main/java/org/pytorch/executorch/NativePeer.java +++ b/extension/android/src/main/java/org/pytorch/executorch/NativePeer.java @@ -13,6 +13,7 @@ import com.facebook.soloader.nativeloader.NativeLoader; import java.util.Map; +/** Interface for the native peer object for entry points to the Module */ class NativePeer { static { // Loads libexecutorch.so from jniLibs @@ -29,16 +30,33 @@ private static native HybridData initHybrid( mHybridData = initHybrid(moduleAbsolutePath, extraFiles, loadMode); } + /** Clean up the native resources associated with this instance */ public void resetNative() { mHybridData.resetNative(); } + /** Run a "forward" call with the given inputs */ @DoNotStrip public native EValue[] forward(EValue... inputs); + /** + * Run a "forward" call with the sample inputs (ones) to test a module + * + * @return the outputs of the forward call + * @apiNote This is experimental and test-only API + */ + @DoNotStrip + public native int forwardOnes(); + + /** Run an arbitrary method on the module */ @DoNotStrip public native EValue[] execute(String methodName, EValue... inputs); + /** + * Load a method on this module. + * + * @return the Error code if there was an error loading the method + */ @DoNotStrip public native int loadMethod(String methodName); } diff --git a/extension/llm/custom_ops/TARGETS b/extension/llm/custom_ops/TARGETS index ff3fde6e2c..8fe776ab09 100644 --- a/extension/llm/custom_ops/TARGETS +++ b/extension/llm/custom_ops/TARGETS @@ -14,7 +14,7 @@ runtime.python_test( "test_sdpa_with_kv_cache.py", ], preload_deps = [ - ":custom_ops_aot_lib", + ":custom_ops_aot_lib_mkl_noomp", ":custom_ops_aot_py", ], deps = [ diff --git a/extension/llm/custom_ops/targets.bzl b/extension/llm/custom_ops/targets.bzl index b90b636f7c..ded25054ac 100644 --- a/extension/llm/custom_ops/targets.bzl +++ b/extension/llm/custom_ops/targets.bzl @@ -6,54 +6,58 @@ def define_common_targets(): The directory containing this targets.bzl file should also contain both TARGETS and BUCK files that call this function. """ - runtime.cxx_library( - name = "custom_ops", - srcs = ["op_sdpa.cpp", "op_fallback.cpp"], - exported_headers = ["op_sdpa.h", "op_fallback.h"], - exported_deps = [ - "//executorch/runtime/kernel:kernel_includes", - "//executorch/kernels/portable/cpu:scalar_utils", - "//executorch/kernels/optimized:libblas", - "//executorch/kernels/optimized:libvec", - "//executorch/extension/kernel_util:kernel_util", - "//executorch/extension/parallel:thread_parallel", - "//executorch/extension/threadpool:threadpool", - ], - compiler_flags = ["-Wno-missing-prototypes", "-Wno-global-constructors"], - visibility = [ - "//executorch/...", - "//executorch/extension/llm/custom_ops/...", - "@EXECUTORCH_CLIENTS", - ], - # @lint-ignore BUCKLINT link_whole - link_whole = True, - force_static = True, - ) + for mkl_dep in ["", "_mkl_noomp"]: + runtime.cxx_library( + name = "custom_ops" + mkl_dep, + srcs = ["op_sdpa.cpp", "op_fallback.cpp"], + exported_headers = ["op_sdpa.h", "op_fallback.h"], + exported_deps = [ + "//executorch/runtime/kernel:kernel_includes", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/kernels/optimized:libblas{}".format(mkl_dep), + "//executorch/kernels/optimized:libvec", + "//executorch/extension/kernel_util:kernel_util", + "//executorch/extension/parallel:thread_parallel", + "//executorch/extension/threadpool:threadpool", + ], + compiler_flags = ["-Wno-missing-prototypes", "-Wno-global-constructors"], + visibility = [ + "//executorch/...", + "//executorch/extension/llm/custom_ops/...", + "@EXECUTORCH_CLIENTS", + ], + # @lint-ignore BUCKLINT link_whole + link_whole = True, + force_static = True, + ) - runtime.cxx_library( - name = "custom_ops_aot_lib", - srcs = [ - "op_sdpa_aot.cpp", - ], - visibility = [ - "//executorch/...", - "@EXECUTORCH_CLIENTS", - ], - external_deps = [ - "libtorch", - ], - deps = [ - ":custom_ops", - "//executorch/extension/aten_util:aten_bridge", - ], - ) + runtime.cxx_library( + name = "custom_ops_aot_lib" + mkl_dep, + srcs = [ + "op_sdpa_aot.cpp", + ], + visibility = [ + "//executorch/...", + "@EXECUTORCH_CLIENTS", + ], + external_deps = [ + "libtorch", + ], + deps = [ + ":custom_ops" + mkl_dep, + "//executorch/extension/aten_util:aten_bridge", + ], + ) runtime.python_library( name = "custom_ops_aot_py", srcs = [ "sdpa_with_kv_cache.py", ], - visibility = ["//executorch/..."], + visibility = [ + "//executorch/...", + "@EXECUTORCH_CLIENTS", + ], deps = [ "//caffe2:torch", ], diff --git a/extension/llm/custom_ops/test_sdpa_with_kv_cache.py b/extension/llm/custom_ops/test_sdpa_with_kv_cache.py index a1b36e688f..dd63c68f13 100644 --- a/extension/llm/custom_ops/test_sdpa_with_kv_cache.py +++ b/extension/llm/custom_ops/test_sdpa_with_kv_cache.py @@ -392,17 +392,50 @@ def setUp(self): self.max_seq_len = 2048 self.setup_caches() + def _scale_tensor(self, tensor, min_value, max_value, scale=True): + normalized_tensor = (tensor - tensor.min()) / (tensor.max() - tensor.min()) + + scaled_tensor = normalized_tensor * (max_value - min_value) + min_value + + return scaled_tensor if scale else tensor + def _test_sdpa_common( - self, n_heads_kv, n_heads_q, head_dim, max_seq_len, seq_len, next_iter_seq_len=1 + self, + n_heads_kv, + n_heads_q, + head_dim, + max_seq_len, + seq_len, + next_iter_seq_len=1, + scale_tensors=False, ): + # Range arbitrarily chosen to reproduce a numerical error on x86 in some of the long context tests + tensor_scale_max = 20 + tensor_scale_min = -20 self.n_heads_kv = n_heads_kv self.n_heads_q = n_heads_q self.head_dim = head_dim self.max_seq_len = max_seq_len self.setup_caches() - q = torch.rand((1, seq_len, self.n_heads_kv, self.head_dim)) - k = torch.rand((1, seq_len, self.n_heads_kv, self.head_dim)) - v = torch.rand((1, seq_len, self.n_heads_kv, self.head_dim)) + q = self._scale_tensor( + torch.rand((1, seq_len, self.n_heads_kv, self.head_dim)), + tensor_scale_max, + tensor_scale_min, + scale_tensors, + ) + k = self._scale_tensor( + torch.rand((1, seq_len, self.n_heads_kv, self.head_dim)), + tensor_scale_max, + tensor_scale_min, + scale_tensors, + ) + v = self._scale_tensor( + torch.rand((1, seq_len, self.n_heads_kv, self.head_dim)), + tensor_scale_max, + tensor_scale_min, + scale_tensors, + ) + start_pos = 0 attn_mask = self.mask[start_pos : start_pos + seq_len, :] attn_mask = attn_mask[:, : start_pos + seq_len] @@ -412,11 +445,27 @@ def _test_sdpa_common( op_output = torch.ops.llama.sdpa_with_kv_cache( q, k, v, self.k_cache, self.v_cache, start_pos, seq_len, None, 0, True ) - self.assertTrue(torch.allclose(ref_output, op_output)) + self.assertTrue(torch.allclose(ref_output, op_output, atol=1e-6)) + + q = self._scale_tensor( + torch.rand((1, next_iter_seq_len, self.n_heads_kv, self.head_dim)), + tensor_scale_max, + tensor_scale_min, + scale_tensors, + ) + k = self._scale_tensor( + torch.rand((1, next_iter_seq_len, self.n_heads_kv, self.head_dim)), + tensor_scale_max, + tensor_scale_min, + scale_tensors, + ) + v = self._scale_tensor( + torch.rand((1, next_iter_seq_len, self.n_heads_kv, self.head_dim)), + tensor_scale_max, + tensor_scale_min, + scale_tensors, + ) - q = torch.rand((1, next_iter_seq_len, self.n_heads_kv, self.head_dim)) - k = torch.rand((1, next_iter_seq_len, self.n_heads_kv, self.head_dim)) - v = torch.rand((1, next_iter_seq_len, self.n_heads_kv, self.head_dim)) start_pos = seq_len seq_len = q.size(1) attn_mask = self.mask[start_pos : start_pos + seq_len, :] @@ -427,7 +476,7 @@ def _test_sdpa_common( op_output = torch.ops.llama.sdpa_with_kv_cache( q, k, v, self.k_cache, self.v_cache, start_pos, seq_len, None, 0, True ) - self.assertTrue(torch.allclose(ref_output, op_output)) + self.assertTrue(torch.allclose(ref_output, op_output, atol=1e-6)) class SDPATestForLargeSeqLength(SDPATestCommon): @@ -438,7 +487,9 @@ def test_sdpa_with_cache_seq_len_130(self): head_dim = 128 max_seq_len = 2048 seq_len = 130 - self._test_sdpa_common(n_heads_kv, n_heads_q, head_dim, max_seq_len, seq_len) + self._test_sdpa_common( + n_heads_kv, n_heads_q, head_dim, max_seq_len, seq_len, True + ) def test_sdpa_with_cache_seq_len_small(self): n_heads_kv = 4 @@ -462,7 +513,9 @@ def test_sdpa_with_cache_seq_len_130_gqa(self): head_dim = 128 max_seq_len = 2048 seq_len = 130 - self._test_sdpa_common(n_heads_kv, n_heads_q, head_dim, max_seq_len, seq_len) + self._test_sdpa_common( + n_heads_kv, n_heads_q, head_dim, max_seq_len, seq_len, True + ) def test_sdpa_with_cache_seq_len_llava_example_gqa(self): n_heads_kv = 16 @@ -483,7 +536,13 @@ def test_sdpa_with_cache_seq_len_130(self): seq_len = 130 next_iter_seq_len = 17 self._test_sdpa_common( - n_heads_kv, n_heads_q, head_dim, max_seq_len, seq_len, next_iter_seq_len + n_heads_kv, + n_heads_q, + head_dim, + max_seq_len, + seq_len, + next_iter_seq_len, + True, ) def test_sdpa_with_cache_seq_len_llava_example(self): @@ -505,7 +564,13 @@ def test_sdpa_with_cache_seq_len_130_gqa(self): seq_len = 130 next_iter_seq_len = 33 self._test_sdpa_common( - n_heads_kv, n_heads_q, head_dim, max_seq_len, seq_len, next_iter_seq_len + n_heads_kv, + n_heads_q, + head_dim, + max_seq_len, + seq_len, + next_iter_seq_len, + True, ) def test_sdpa_with_cache_seq_len_llava_example_gqa(self): diff --git a/extension/llm/runner/multimodal_runner.h b/extension/llm/runner/multimodal_runner.h index 745f086f80..dbffac46fc 100644 --- a/extension/llm/runner/multimodal_runner.h +++ b/extension/llm/runner/multimodal_runner.h @@ -56,7 +56,7 @@ class MultimodalRunner { virtual bool is_loaded() = 0; virtual ::executorch::runtime::Error load() = 0; virtual ::executorch::runtime::Error generate( - std::vector& images, + std::vector images, const std::string& prompt, int32_t seq_len = 1024, std::function token_callback = {}, diff --git a/extension/module/module.h b/extension/module/module.h index 689fef5cd2..8ae7e43655 100644 --- a/extension/module/module.h +++ b/extension/module/module.h @@ -358,6 +358,8 @@ class Module final { std::unique_ptr<::executorch::runtime::MemoryAllocator> temp_allocator_; std::unique_ptr<::executorch::runtime::EventTracer> event_tracer_; std::unordered_map methods_; + + friend class ExecuTorchJni; }; } // namespace extension diff --git a/kernels/optimized/lib_defs.bzl b/kernels/optimized/lib_defs.bzl index 5af9b423ad..16ce446df4 100644 --- a/kernels/optimized/lib_defs.bzl +++ b/kernels/optimized/lib_defs.bzl @@ -1,4 +1,5 @@ load("@fbsource//tools/build_defs:default_platform_defs.bzl", "DEVSERVER_PLATFORM_REGEX") +load("@fbsource//tools/build_defs:fb_native_wrapper.bzl", "fb_native") load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") # Because vec exists as a collection of header files, compile and preprocessor @@ -99,44 +100,64 @@ def define_libs(): ], ) - runtime.cxx_library( - name = "libblas", - srcs = native.glob([ - "blas/**/*.cpp", - ]), - exported_headers = native.glob([ - "blas/**/*.h", - ]), - header_namespace = "executorch/kernels/optimized", - visibility = [ - "//executorch/...", - "@EXECUTORCH_CLIENTS", - ], - fbandroid_platform_preprocessor_flags = [ - ( - "^android-arm64.*$", - [ - "-DET_BUILD_WITH_BLAS", - ], - ), - ], - fbandroid_platform_deps = [ - ( - "^android-arm64.*$", - [ - "fbsource//third-party/openblas:openblas", - ], - ), - ], - fbobjc_exported_preprocessor_flags = [ - "-DET_BUILD_WITH_BLAS", - "-DET_BUILD_FOR_APPLE", - ], - fbobjc_frameworks = [ - "Accelerate", - ], - exported_deps = [ - "//executorch/kernels/optimized:libutils", - "//executorch/runtime/core/exec_aten:lib", + # OSS doesn't have ovr_config//os:linux-x86_64 + fb_native.config_setting( + name = "linux-x86_64", + constraint_values = [ + "ovr_config//os/constraints:linux", + "ovr_config//cpu/constraints:x86_64", ], ) + + for libblas_name, mkl_dep in [("libblas", "fbsource//third-party/mkl:mkl_lp64_omp"), ("libblas_mkl_noomp", "fbsource//third-party/mkl:mkl")]: + runtime.cxx_library( + name = libblas_name, + srcs = native.glob([ + "blas/**/*.cpp", + ]), + exported_headers = native.glob([ + "blas/**/*.h", + ]), + header_namespace = "executorch/kernels/optimized", + visibility = [ + "//executorch/...", + "@EXECUTORCH_CLIENTS", + ], + preprocessor_flags = select({ + ":linux-x86_64": [ + "-DET_BUILD_WITH_BLAS", + ] if not runtime.is_oss else [], + "DEFAULT": [], + }), + fbandroid_platform_preprocessor_flags = [ + ( + "^android-arm64.*$", + [ + "-DET_BUILD_WITH_BLAS", + ], + ), + ], + fbandroid_platform_deps = [ + ( + "^android-arm64.*$", + [ + "fbsource//third-party/openblas:openblas", + ], + ), + ], + fbobjc_exported_preprocessor_flags = [ + "-DET_BUILD_WITH_BLAS", + "-DET_BUILD_FOR_APPLE", + ], + fbobjc_frameworks = [ + "Accelerate", + ], + deps = select({ + ":linux-x86_64": [mkl_dep] if not runtime.is_oss else [], + "DEFAULT": [], + }), + exported_deps = [ + "//executorch/kernels/optimized:libutils", + "//executorch/runtime/core/exec_aten:lib", + ], + ) diff --git a/kernels/portable/CMakeLists.txt b/kernels/portable/CMakeLists.txt index eb3cedd5b3..885c509246 100644 --- a/kernels/portable/CMakeLists.txt +++ b/kernels/portable/CMakeLists.txt @@ -38,12 +38,11 @@ list(FILTER _portable_kernels__srcs EXCLUDE REGEX "test/*.cpp") list(FILTER _portable_kernels__srcs EXCLUDE REGEX "codegen") # Generate C++ bindings to register kernels into both PyTorch (for AOT) and # Executorch (for runtime). Here select all ops in functions.yaml -set(_yaml "${CMAKE_CURRENT_LIST_DIR}/functions.yaml") +set(_yaml "${CMAKE_CURRENT_SOURCE_DIR}/functions.yaml") gen_selected_ops(LIB_NAME "portable_ops_lib" OPS_SCHEMA_YAML "${_yaml}") # Expect gen_selected_ops output file to be selected_operators.yaml generate_bindings_for_kernels( - LIB_NAME "portable_ops_lib" FUNCTIONS_YAML - ${CMAKE_CURRENT_SOURCE_DIR}/functions.yaml + LIB_NAME "portable_ops_lib" FUNCTIONS_YAML "${_yaml}" ) message("Generated files ${gen_command_sources}") diff --git a/kernels/portable/cpu/op_abs.cpp b/kernels/portable/cpu/op_abs.cpp index 0dd925a0e2..9c2c219832 100644 --- a/kernels/portable/cpu/op_abs.cpp +++ b/kernels/portable/cpu/op_abs.cpp @@ -28,6 +28,8 @@ Tensor& abs_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) { "Failed to resize output tensor."); ET_KERNEL_CHECK(ctx, tensors_have_same_dtype(in, out), InvalidArgument, out); + ET_KERNEL_CHECK( + ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out); ET_SWITCH_REAL_TYPES(in.scalar_type(), ctx, "abs.out", CTYPE, [&] { apply_unary_map_fn( diff --git a/kernels/test/TestUtil.h b/kernels/test/TestUtil.h index ed72dbc412..8d782d3c2a 100644 --- a/kernels/test/TestUtil.h +++ b/kernels/test/TestUtil.h @@ -30,6 +30,22 @@ #define ET_EXPECT_KERNEL_FAILURE_WITH_MSG(_context, _statement, _matcher) \ EXPECT_ANY_THROW(_statement) +#define ET_TEST_OP_SUPPORTS_MEMORY_FORMATS( \ + tf, op, input_contiguous, expected_contiguous, channels_last_support) \ + Tensor input_channels_last = tf.channels_last_like(input_contiguous); \ + Tensor expected_channel_last = tf.channels_last_like(expected_contiguous); \ + \ + Tensor output_contiguous = tf.zeros_like(expected_contiguous); \ + Tensor output_channels_last = tf.channels_last_like(output_contiguous); \ + \ + Tensor ret = op(input_channels_last, output_channels_last); \ + if (channels_last_support) { \ + EXPECT_TENSOR_EQ(output_channels_last, expected_channel_last); \ + } else { \ + EXPECT_TENSOR_NE(output_channels_last, expected_channel_last); \ + } \ + EXPECT_TENSOR_EQ(output_channels_last, ret); + #else #define ET_EXPECT_KERNEL_FAILURE(_context, _statement) \ @@ -52,6 +68,26 @@ } \ } while (false) +#define ET_TEST_OP_SUPPORTS_MEMORY_FORMATS( \ + tf, op, input_contiguous, expected_contiguous, channels_last_support) \ + Tensor input_channels_last = tf.channels_last_like(input_contiguous); \ + Tensor expected_channel_last = tf.channels_last_like(expected_contiguous); \ + \ + Tensor output_contiguous = tf.zeros_like(expected_contiguous); \ + Tensor output_channels_last = tf.channels_last_like(output_contiguous); \ + \ + Tensor ret = op(input_channels_last, output_channels_last); \ + if (channels_last_support) { \ + EXPECT_TENSOR_EQ(output_channels_last, expected_channel_last); \ + } else { \ + EXPECT_TENSOR_NE(output_channels_last, expected_channel_last); \ + } \ + EXPECT_TENSOR_EQ(output_channels_last, ret); \ + ET_EXPECT_KERNEL_FAILURE( \ + context_, op(input_channels_last, output_contiguous)); \ + ET_EXPECT_KERNEL_FAILURE( \ + context_, op(input_contiguous, output_channels_last)); + #endif // USE_ATEN_LIB /* diff --git a/kernels/test/op_abs_test.cpp b/kernels/test/op_abs_test.cpp index b54cd97156..f596d586d9 100644 --- a/kernels/test/op_abs_test.cpp +++ b/kernels/test/op_abs_test.cpp @@ -38,3 +38,28 @@ TEST_F(OpAbsTest, SanityCheck) { EXPECT_TENSOR_EQ(out, ret); EXPECT_TENSOR_EQ(out, expected); } + +TEST_F(OpAbsTest, MemoryFormatCheck) { + TensorFactory tf; + + std::vector sizes = {2, 3, 1, 5}; + + Tensor input_contiguous = + tf.make(sizes, {0.8737, 0.5359, 0.3743, -0.3040, -0.7800, -0.2306, + -0.7684, -0.5364, 0.3478, -0.3289, 0.0829, 0.2939, + -0.8211, 0.8572, -0.0802, 0.9252, -0.2093, 0.9013, + -0.4197, 0.3987, -0.5291, -0.5567, 0.2691, 0.7819, + -0.8009, -0.4286, -0.9299, 0.2143, 0.2565, -0.5701}); + Tensor expected_contiguous = tf.make( + sizes, {0.8737, 0.5359, 0.3743, 0.3040, 0.7800, 0.2306, 0.7684, 0.5364, + 0.3478, 0.3289, 0.0829, 0.2939, 0.8211, 0.8572, 0.0802, 0.9252, + 0.2093, 0.9013, 0.4197, 0.3987, 0.5291, 0.5567, 0.2691, 0.7819, + 0.8009, 0.4286, 0.9299, 0.2143, 0.2565, 0.5701}); + + ET_TEST_OP_SUPPORTS_MEMORY_FORMATS( + tf, + op_abs_out, + input_contiguous, + expected_contiguous, + /*channels_last_support=*/true); +} diff --git a/runtime/core/exec_aten/testing_util/tensor_factory.h b/runtime/core/exec_aten/testing_util/tensor_factory.h index 8f39cc9911..3045af5581 100644 --- a/runtime/core/exec_aten/testing_util/tensor_factory.h +++ b/runtime/core/exec_aten/testing_util/tensor_factory.h @@ -3,8 +3,10 @@ #pragma once #include +#include #include +#include #include #include #include @@ -54,7 +56,7 @@ inline size_t sizes_to_numel(const std::vector& sizes) { inline bool check_strides( const std::vector sizes, - const std::vector strides) { + const std::vector strides) { if (sizes.size() != strides.size()) { // The length of stride vector shall equal to size vector. return false; @@ -147,14 +149,14 @@ inline bool check_dim_order( return true; } -inline std::vector strides_from_dim_order( +inline std::vector strides_from_dim_order( const std::vector& sizes, const std::vector& dim_order) { bool legal = check_dim_order(sizes, dim_order); ET_CHECK_MSG(legal, "The input dim_order variable is illegal."); size_t ndim = sizes.size(); - std::vector strides(ndim); + std::vector strides(ndim); strides[dim_order[ndim - 1]] = 1; for (int i = ndim - 2; i >= 0; --i) { uint8_t cur_dim = dim_order[i]; @@ -258,7 +260,7 @@ class TensorFactory { at::Tensor make( const std::vector& sizes, const std::vector& data, - const std::vector strides = {}, + const std::vector strides = {}, ET_UNUSED TensorShapeDynamism dynamism = TensorShapeDynamism::DYNAMIC_UNBOUND) { auto expected_numel = internal::sizes_to_numel(sizes); @@ -344,6 +346,72 @@ class TensorFactory { sizes, data, internal::channels_last_dim_order(sizes.size()), dynamism); } + /** + * Given data in contiguous memory format, returns a new Tensor with the + * specified shape and the same data but in channels last memory format. + * + * @param[in] sizes The sizes of the dimensions of the Tensor. + * @param[in] data The data in contiguous memory format that the Tensor should + * be initialized with. The size of this vector must be equal to the product + * of the elements of `sizes`. + * + * @return A new Tensor with the specified shape and data in channls last + * memory format. + */ + at::Tensor channels_last_like( + const at::Tensor& input, + TensorShapeDynamism dynamism = TensorShapeDynamism::STATIC) { + ET_CHECK_MSG( + input.sizes().size() == 4, "Only 4D tensors can be channels last"); + + const std::vector sizes( + input.sizes().begin(), input.sizes().end()); + + std::vector contiguous_dim_order(sizes.size()); + for (uint8_t i = 0; i < sizes.size(); i++) { + contiguous_dim_order[i] = i; + } + std::vector contiguous_strides = + internal::strides_from_dim_order(sizes, contiguous_dim_order); + + for (int32_t i = 0; i < input.dim(); i++) { + ET_CHECK_MSG( + input.strides()[i] == contiguous_strides[i], + "Input tensor is not contiguous"); + } + + int32_t N = sizes[0]; + int32_t C = sizes[1]; + int32_t H = sizes[2]; + int32_t W = sizes[3]; + + std::vector contiguous_data( + input.data_ptr(), input.data_ptr() + input.numel()); + std::vector channels_last_data( + N * C * H * W); // Create a new blob with the same total size to contain + // channels_last data + for (int32_t n = 0; n < N; ++n) { + for (int32_t c = 0; c < C; ++c) { + for (int32_t h = 0; h < H; ++h) { + for (int32_t w = 0; w < W; ++w) { + // Calculate the index in the original blob + int32_t old_index = ((n * C + c) * H + h) * W + w; + // Calculate the index in the new blob + int32_t new_index = ((n * H + h) * W + w) * C + c; + // Copy the data + channels_last_data[new_index] = contiguous_data[old_index]; + } + } + } + } + + return make_with_dimorder( + sizes, + channels_last_data, + internal::channels_last_dim_order(sizes.size()), + dynamism); + } + /** * Returns a new Tensor with the specified shape, containing contiguous * data will all elements set to `value`. @@ -459,14 +527,13 @@ class TensorFactory { */ at::Tensor empty_strided( const std::vector& sizes, - const std::vector& strides, + const std::vector& strides, ET_UNUSED TensorShapeDynamism dynamism = TensorShapeDynamism::DYNAMIC_UNBOUND) { auto sizes64 = vec_32_to_64(sizes); - auto strides64 = vec_32_to_64(strides); return at::empty_strided( sizes64, - strides64, + strides, DTYPE, /*layout_opt=*/at::Layout::Strided, /*device_opt=*/at::Device(at::DeviceType::CPU), @@ -666,7 +733,7 @@ class TensorFactory { torch::executor::Tensor make( const std::vector& sizes, const std::vector& data, - const std::vector strides = {}, + const std::vector strides = {}, TensorShapeDynamism dynamism = TensorShapeDynamism::STATIC) { std::vector default_strides; // Generate strides from the tensor dimensions, assuming contiguous data if @@ -746,7 +813,7 @@ class TensorFactory { /** * Returns a new Tensor with the specified shape and data in channels last - * memory layout. + * memory format. * * @param[in] sizes The sizes of the dimensions of the Tensor. * @param[in] data The data that the Tensor should be initialized with. The @@ -764,6 +831,60 @@ class TensorFactory { sizes, data, internal::channels_last_dim_order(sizes.size()), dynamism); } + /** + * Given data in contiguous memory format, returns a new Tensor with the + * specified shape and the same data but in channels last memory format. + * + * @param[in] sizes The sizes of the dimensions of the Tensor. + * @param[in] data The data in contiguous memory format that the Tensor should + * be initialized with. The size of this vector must be equal to the product + * of the elements of `sizes`. + * + * @return A new Tensor with the specified shape and data in channls last + * memory format. + */ + torch::executor::Tensor channels_last_like( + const torch::executor::Tensor& input, + TensorShapeDynamism dynamism = TensorShapeDynamism::STATIC) { + const std::vector sizes( + input.sizes().begin(), input.sizes().end()); + + ET_CHECK_MSG(sizes.size() == 4, "Only 4D tensors can be channels last"); + ET_CHECK_MSG( + is_contiguous_dim_order(input.dim_order().data(), input.dim()) == true, + "Input tensor is not contiguous"); + int32_t N = sizes[0]; + int32_t C = sizes[1]; + int32_t H = sizes[2]; + int32_t W = sizes[3]; + + std::vector contiguous_data( + input.data_ptr(), input.data_ptr() + input.numel()); + std::vector channels_last_data( + N * C * H * W); // Create a new blob with the same total size to contain + // channels_last data + for (int32_t n = 0; n < N; ++n) { + for (int32_t c = 0; c < C; ++c) { + for (int32_t h = 0; h < H; ++h) { + for (int32_t w = 0; w < W; ++w) { + // Calculate the index in the original blob + int32_t old_index = ((n * C + c) * H + h) * W + w; + // Calculate the index in the new blob + int32_t new_index = ((n * H + h) * W + w) * C + c; + // Copy the data + channels_last_data[new_index] = contiguous_data[old_index]; + } + } + } + } + + return make_with_dimorder( + sizes, + channels_last_data, + internal::channels_last_dim_order(sizes.size()), + dynamism); + } + /** * Returns a new Tensor with the specified shape, containing contiguous data * will all elements set to `value`. @@ -799,7 +920,20 @@ class TensorFactory { /** * Returns a new Tensor with the specified shape, containing contiguous data - * with all `0` elements. + * in channels last memory format with all `0` elements. + * + * @param[in] sizes The sizes of the dimensions of the Tensor. + * @return A new Tensor with the specified shape. + */ + torch::executor::Tensor zeros_channels_last( + const std::vector& sizes, + TensorShapeDynamism dynamism = TensorShapeDynamism::STATIC) { + return full_channels_last(sizes, 0, dynamism); + } + + /** + * Returns a new Tensor with the specified shape, containing contiguous data + * in contiguous memory format with all `0` elements. * * @param[in] sizes The sizes of the dimensions of the Tensor. * @return A new Tensor with the specified shape. @@ -878,7 +1012,7 @@ class TensorFactory { std::vector sizes_; std::vector data_; std::vector dim_order_; - std::vector strides_; + std::vector strides_; torch::executor::TensorImpl impl_; }; diff --git a/runtime/core/exec_aten/testing_util/test/tensor_factory_test.cpp b/runtime/core/exec_aten/testing_util/test/tensor_factory_test.cpp index a2bc36f481..8681e9553a 100644 --- a/runtime/core/exec_aten/testing_util/test/tensor_factory_test.cpp +++ b/runtime/core/exec_aten/testing_util/test/tensor_factory_test.cpp @@ -449,7 +449,7 @@ TEST_F(TensorFactoryTest, MakeStridedDataIsCopied) { // Create two tensors using the same input data and strided vector. std::vector data = {1, 2, 3, 4}; - std::vector strides = {1, 2}; + std::vector strides = {1, 2}; Tensor t1 = tf.make(/*sizes=*/{2, 2}, data, strides); Tensor t2 = tf.make(/*sizes=*/{2, 2}, data, strides); diff --git a/runtime/core/exec_aten/util/tensor_util.h b/runtime/core/exec_aten/util/tensor_util.h index b18cd349a6..4dcb0ef9f6 100644 --- a/runtime/core/exec_aten/util/tensor_util.h +++ b/runtime/core/exec_aten/util/tensor_util.h @@ -235,8 +235,9 @@ */ #define ET_CHECK_CONTIGUOUS(a__) \ ({ \ - const ::exec_aten::ArrayRef strides = a__.strides(); \ - const ::exec_aten::ArrayRef sizes = a__.sizes(); \ + const ::exec_aten::ArrayRef strides = \ + a__.strides(); \ + const ::exec_aten::ArrayRef sizes = a__.sizes(); \ ET_CHECK_MSG( \ strides[strides.size() - 1] == 1, \ "The stride of the last dimension shall be 1 for contiguous tensor, " \ @@ -267,8 +268,10 @@ "Two tensors shall have same number of strides, but not %zu and %zu.", \ a__.dim(), \ b__.dim()); \ - const ::exec_aten::ArrayRef a_strides = a__.strides(); \ - const ::exec_aten::ArrayRef b_strides = b__.strides(); \ + const ::exec_aten::ArrayRef a_strides = \ + a__.strides(); \ + const ::exec_aten::ArrayRef b_strides = \ + b__.strides(); \ for (size_t i = 0; i < a__.dim(); i++) { \ ET_CHECK_MSG( \ a_strides[i] == b_strides[i], \ @@ -276,8 +279,8 @@ "but now is %d and %d.", \ i, \ i, \ - a_strides[i], \ - b_strides[i]); \ + (int32_t)a_strides[i], \ + (int32_t)b_strides[i]); \ } \ }) @@ -295,9 +298,12 @@ a__.dim(), \ b__.dim(), \ c__.dim()); \ - const ::exec_aten::ArrayRef a_strides = a__.strides(); \ - const ::exec_aten::ArrayRef b_strides = b__.strides(); \ - const ::exec_aten::ArrayRef c_strides = c__.strides(); \ + const ::exec_aten::ArrayRef a_strides = \ + a__.strides(); \ + const ::exec_aten::ArrayRef b_strides = \ + b__.strides(); \ + const ::exec_aten::ArrayRef c_strides = \ + c__.strides(); \ for (size_t i = 0; i < a__.dim(); i++) { \ ET_CHECK_MSG( \ a_strides[i] == b_strides[i] && b_strides[i] == c_strides[i], \ @@ -306,9 +312,9 @@ i, \ i, \ i, \ - a_strides[i], \ - b_strides[i], \ - c_strides[i]); \ + (int32_t)a_strides[i], \ + (int32_t)b_strides[i], \ + (int32_t)c_strides[i]); \ } \ }) @@ -848,11 +854,11 @@ inline bool tensor_is_scalar(exec_aten::Tensor t) { /** * The expected output size may not be the existing size of any inputs and - * outputs if the operator supports both broadcast and dynamic shape. Therefore - * such operators needs extra space to store the calculated expected output - * size. such dynamic allocation is troublesome in executorch so we can just - * hard code a static value of a relatively small value because users don't - * create high dimensional tensors. + * outputs if the operator supports both broadcast and dynamic shape. + * Therefore such operators needs extra space to store the calculated expected + * output size. such dynamic allocation is troublesome in executorch so we can + * just hard code a static value of a relatively small value because users + * don't create high dimensional tensors. */ constexpr size_t kTensorDimensionLimit = 16; @@ -893,8 +899,8 @@ inline size_t getTrailingDims(const exec_aten::Tensor& tensor, int64_t dim) { * @param[in] tensor The tensor that will be indexed * @param[in] coordinate A n-dimensional array representing the coordinate to * index. It is assumed that the array has kTensorDimensionLimit elements. - * @param[out] index The linear index to element at the specified coordinate in - * the tensor. + * @param[out] index The linear index to element at the specified coordinate + * in the tensor. */ inline size_t coordinateToIndex( const exec_aten::Tensor& tensor, @@ -935,10 +941,10 @@ inline void indexToCoordinate( * * @param[in] tensor The source of the value to extract. * @param[out] out_val The extracted value, on success. - * @returns `true` if a value was extracted, and sets `*out_val` to that value. - * `false` if a value could not be extracted: either it was not an integer - * Scalar Tensor, or the value of that Scalar Tensor could not be represented - * by INT_T. + * @returns `true` if a value was extracted, and sets `*out_val` to that + * value. `false` if a value could not be extracted: either it was not an + * integer Scalar Tensor, or the value of that Scalar Tensor could not be + * represented by INT_T. */ template < typename INT_T, @@ -973,10 +979,10 @@ bool extract_scalar_tensor(exec_aten::Tensor tensor, INT_T* out_val) { * * @param[in] tensor The source of the value to extract. * @param[out] out_val The extracted value, on success. - * @returns `true` if a value was extracted, and sets `*out_val` to that value. - * `false` if a value could not be extracted: either it was not a floating - * point Scalar Tensor, or the value of that Scalar Tensor could not be - * represented by FLOAT_T. + * @returns `true` if a value was extracted, and sets `*out_val` to that + * value. `false` if a value could not be extracted: either it was not a + * floating point Scalar Tensor, or the value of that Scalar Tensor could not + * be represented by FLOAT_T. */ template < typename FLOAT_T, @@ -1076,9 +1082,9 @@ ET_NODISCARD Error resize_tensor_impl( * expand the tensor if new size exceeds the current capacity. Currently * fails an ET_CHECK if the tensor cannot be resized. * - * WARNING: Placeholder API until discussion around runtime context is settled, - * will likely move to be a class method on a TensorResizer object passed in - * through runtimeContext. + * WARNING: Placeholder API until discussion around runtime context is + * settled, will likely move to be a class method on a TensorResizer object + * passed in through runtimeContext. */ ET_NODISCARD inline Error resize_tensor( exec_aten::Tensor t, @@ -1091,9 +1097,9 @@ ET_NODISCARD inline Error resize_tensor( * expand the tensor if new size exceeds the current capacity. Currently * fails an ET_CHECK if the tensor cannot be resized. * - * WARNING: Placeholder API until discussion around runtime context is settled, - * will likely move to be a class method on a TensorResizer object passed in - * through runtimeContext. + * WARNING: Placeholder API until discussion around runtime context is + * settled, will likely move to be a class method on a TensorResizer object + * passed in through runtimeContext. */ template < typename T, @@ -1124,8 +1130,8 @@ ET_DEPRECATED inline void resize( /** * Get dim_order of a Tensor and write it to out_dim_order. * @param tensor The tensor where we want to get dim order from. - * @param out_dim_order Pointing to an array of DimOrderType where we write dim - * order into it. + * @param out_dim_order Pointing to an array of DimOrderType where we write + * dim order into it. * @param out_dim_order_size Size of the DimOrderType array. */ ET_NODISCARD Error get_dim_order( @@ -1134,18 +1140,47 @@ ET_NODISCARD Error get_dim_order( size_t out_dim_order_size); /** - * Checks whether a tensor has a valid dim order. If the dim order could not be - * determined, then this function returns false by default. + * Checks whether a tensor has a valid dim order. If the dim order could not + * be determined, then this function returns false by default. */ bool tensor_has_valid_dim_order(exec_aten::Tensor t); /** - * Checks whether a tensor has either the default of channels last dim order. If - * the dim order could not be determined, then this function returns false by - * default. + * Checks whether a tensor has either the default of channels last dim order. + * If the dim order could not be determined, then this function returns false + * by default. */ bool tensor_is_default_or_channels_last_dim_order(exec_aten::Tensor t); +/** + * Asserts that two tensors have the same dim_order + * + * Note that this macro only tests dim order, but not others like actual data, + * sizes, etc. Also this macro does not support ATen mode since we do not + * support dim order in ATen mode. + * + * TODO(T183094318): Add dim order and related function support for ATen mode. + */ + +bool tensors_have_same_dim_order( + const exec_aten::Tensor& a, + const exec_aten::Tensor& b); + +/** + * Asserts that three tensors have the same dim_order + * + * Note that this macro only tests dim order, but not others like actual data, + * sizes, etc. Also this macro does not support ATen mode since we do not + * support dim order in ATen mode. + * + * TODO(T183094318): Add dim order and related function support for ATen mode. + */ + +bool tensors_have_same_dim_order( + const exec_aten::Tensor& a, + const exec_aten::Tensor& b, + const exec_aten::Tensor& c); + /** * Given an n-dimensional coordinate array and an array of tensor strides, * calculates the linear index that can be used to retrieve the value at the @@ -1205,6 +1240,7 @@ using ::executorch::runtime::tensor_is_real_type; using ::executorch::runtime::tensor_is_realh_type; using ::executorch::runtime::tensor_is_realhb_type; using ::executorch::runtime::tensor_is_scalar; +using ::executorch::runtime::tensors_have_same_dim_order; using ::executorch::runtime::tensors_have_same_dtype; using ::executorch::runtime::tensors_have_same_rank; using ::executorch::runtime::tensors_have_same_shape; diff --git a/runtime/core/exec_aten/util/tensor_util_aten.cpp b/runtime/core/exec_aten/util/tensor_util_aten.cpp index c5ff3b5223..91b75c0648 100644 --- a/runtime/core/exec_aten/util/tensor_util_aten.cpp +++ b/runtime/core/exec_aten/util/tensor_util_aten.cpp @@ -77,6 +77,64 @@ inline bool tensor_is_default_or_channels_last_dim_order(at::Tensor t) { return ret_val; } +bool tensors_have_same_dim_order( + const exec_aten::Tensor& a, + const exec_aten::Tensor& b) { + exec_aten::DimOrderType a_dim_order[kTensorDimensionLimit]; + exec_aten::DimOrderType b_dim_order[kTensorDimensionLimit]; + + ET_LOG_MSG_AND_RETURN_IF_FALSE( + get_dim_order(a, a_dim_order, a.dim()) == Error::Ok, + "Failed to retrieve dim order from first input tensor!"); + ET_LOG_MSG_AND_RETURN_IF_FALSE( + get_dim_order(b, b_dim_order, b.dim()) == Error::Ok, + "Failed to retrieve dim order from second input tensor!"); + + bool all_contiguous = is_contiguous_dim_order(a_dim_order, a.dim()) && + is_contiguous_dim_order(b_dim_order, b.dim()); + + bool all_channels_last = is_channels_last_dim_order(a_dim_order, a.dim()) && + is_channels_last_dim_order(b_dim_order, b.dim()); + + ET_LOG_MSG_AND_RETURN_IF_FALSE( + all_contiguous || all_channels_last, + "Two input tensors have different dim orders"); + + return true; +} + +bool tensors_have_same_dim_order( + const exec_aten::Tensor& a, + const exec_aten::Tensor& b, + const exec_aten::Tensor& c) { + exec_aten::DimOrderType a_dim_order[kTensorDimensionLimit]; + exec_aten::DimOrderType b_dim_order[kTensorDimensionLimit]; + exec_aten::DimOrderType c_dim_order[kTensorDimensionLimit]; + ET_LOG_MSG_AND_RETURN_IF_FALSE( + get_dim_order(a, a_dim_order, a.dim()) == Error::Ok, + "Failed to retrieve dim order from first input tensor!"); + ET_LOG_MSG_AND_RETURN_IF_FALSE( + get_dim_order(b, b_dim_order, b.dim()) == Error::Ok, + "Failed to retrieve dim order from second input tensor!"); + ET_LOG_MSG_AND_RETURN_IF_FALSE( + get_dim_order(c, c_dim_order, c.dim()) == Error::Ok, + "Failed to retrieve dim order from third input tensor!"); + + bool all_contiguous = is_contiguous_dim_order(a_dim_order, a.dim()) && + is_contiguous_dim_order(b_dim_order, b.dim()) && + is_contiguous_dim_order(c_dim_order, c.dim()); + + bool all_channels_last = is_channels_last_dim_order(a_dim_order, a.dim()) && + is_channels_last_dim_order(b_dim_order, b.dim()) && + is_channels_last_dim_order(c_dim_order, c.dim()); + + ET_LOG_MSG_AND_RETURN_IF_FALSE( + all_contiguous || all_channels_last, + "Three input tensors have different dim orders"); + + return true; +} + namespace internal { Error share_tensor_data(const at::Tensor& t_dst, const at::Tensor& t_src) { diff --git a/runtime/core/exec_aten/util/tensor_util_portable.cpp b/runtime/core/exec_aten/util/tensor_util_portable.cpp index c7872d1499..7e9a15f09a 100644 --- a/runtime/core/exec_aten/util/tensor_util_portable.cpp +++ b/runtime/core/exec_aten/util/tensor_util_portable.cpp @@ -73,6 +73,40 @@ bool tensor_is_default_or_channels_last_dim_order(torch::executor::Tensor t) { return ret_val; } +bool tensors_have_same_dim_order( + const exec_aten::Tensor& a, + const exec_aten::Tensor& b) { + bool all_contiguous = + is_contiguous_dim_order(a.dim_order().data(), a.dim_order().size()) && + is_contiguous_dim_order(b.dim_order().data(), b.dim_order().size()); + bool all_channels_last = + is_channels_last_dim_order(a.dim_order().data(), a.dim_order().size()) && + is_channels_last_dim_order(b.dim_order().data(), b.dim_order().size()); + + ET_LOG_MSG_AND_RETURN_IF_FALSE( + all_contiguous || all_channels_last, + "Two input tensors have different dim orders"); + + return true; +} + +bool tensors_have_same_dim_order( + const exec_aten::Tensor& a, + const exec_aten::Tensor& b, + const exec_aten::Tensor& c) { + bool all_contiguous = + is_contiguous_dim_order(a.dim_order().data(), a.dim_order().size()) && + is_contiguous_dim_order(b.dim_order().data(), b.dim_order().size()) && + is_contiguous_dim_order(c.dim_order().data(), c.dim_order().size()); + bool all_channels_last = + is_channels_last_dim_order(a.dim_order().data(), a.dim_order().size()) && + is_channels_last_dim_order(b.dim_order().data(), b.dim_order().size()) && + is_channels_last_dim_order(c.dim_order().data(), c.dim_order().size()); + ET_LOG_MSG_AND_RETURN_IF_FALSE( + all_contiguous || all_channels_last, + "Three input tensors have different dim orders"); + return true; +} namespace internal { Error share_tensor_data( diff --git a/runtime/core/exec_aten/util/test/targets.bzl b/runtime/core/exec_aten/util/test/targets.bzl index cbd31013b5..615b7c99a4 100644 --- a/runtime/core/exec_aten/util/test/targets.bzl +++ b/runtime/core/exec_aten/util/test/targets.bzl @@ -16,16 +16,6 @@ def define_common_targets(): ], ) - runtime.cxx_test( - name = "tensor_util_test", - srcs = ["tensor_util_test.cpp"], - deps = [ - "//executorch/runtime/core/exec_aten/testing_util:tensor_util", - "//executorch/runtime/core/exec_aten/util:scalar_type_util", - "//executorch/runtime/core/exec_aten/util:tensor_util", - ], - ) - runtime.cxx_test( name = "operator_impl_example_test", srcs = ["operator_impl_example_test.cpp"], @@ -44,3 +34,15 @@ def define_common_targets(): "//executorch/runtime/core/exec_aten/util:tensor_util", ], ) + + for aten_mode in (True, False): + aten_suffix = "_aten" if aten_mode else "" + runtime.cxx_test( + name = "tensor_util_test" + aten_suffix, + srcs = ["tensor_util_test.cpp"], + deps = [ + "//executorch/runtime/core/exec_aten/testing_util:tensor_util", + "//executorch/runtime/core/exec_aten/util:scalar_type_util", + "//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix, + ], + ) diff --git a/runtime/core/exec_aten/util/test/tensor_util_test.cpp b/runtime/core/exec_aten/util/test/tensor_util_test.cpp index 53ff06966c..88588dade6 100644 --- a/runtime/core/exec_aten/util/test/tensor_util_test.cpp +++ b/runtime/core/exec_aten/util/test/tensor_util_test.cpp @@ -14,8 +14,6 @@ #include #include -#include - using namespace ::testing; using exec_aten::ScalarType; using exec_aten::Tensor; @@ -553,3 +551,57 @@ TEST_F(TensorUtilTest, ResizeZeroDimTensor) { executorch::runtime::Error::Ok); EXPECT_EQ(a.dim(), 0); } + +TEST_F(TensorUtilTest, SameDimOrderContiguous) { + using namespace torch::executor; + // Three different tensors with the same shape and same dim order + // ([0, 1, 2, 3]), but different dtypes and contents. + std::vector sizes = {3, 5, 2, 1}; + Tensor a = tf_byte_.ones(sizes); + Tensor b = tf_int_.zeros(sizes); + Tensor c = tf_float_.full(sizes, 0.1); + + // The tensors have the same dim order, should pass the following checks. + EXPECT_TRUE(tensors_have_same_dim_order(a, b)); + EXPECT_TRUE(tensors_have_same_dim_order(b, a)); + EXPECT_TRUE(tensors_have_same_dim_order(a, b, c)); + EXPECT_TRUE(tensors_have_same_dim_order(b, c, a)); + EXPECT_TRUE(tensors_have_same_dim_order(c, a, b)); +} + +TEST_F(TensorUtilTest, SameDimOrderChannelsLast) { + using namespace torch::executor; + // Three different tensors with the same shape and same dim order + // ([0, 2, 3, 1]), but different dtypes and contents. + std::vector sizes = {3, 5, 2, 1}; + Tensor a = tf_byte_.full_channels_last(sizes, 1); + Tensor b = tf_int_.full_channels_last(sizes, 0); + Tensor c = tf_float_.full_channels_last(sizes, 0.1); + + // The tensors have the same dim order, should pass the following checks. + EXPECT_TRUE(tensors_have_same_dim_order(a, b)); + EXPECT_TRUE(tensors_have_same_dim_order(b, a)); + EXPECT_TRUE(tensors_have_same_dim_order(a, b, c)); + EXPECT_TRUE(tensors_have_same_dim_order(b, c, a)); + EXPECT_TRUE(tensors_have_same_dim_order(c, a, b)); +} + +TEST_F(TensorUtilTest, SameShapesDifferentDimOrder) { + using namespace torch::executor; + // Three different tensors with the same shape but different dtypes and + // contents, where b and c have the same dim order ([0, 2, 3, 1]) while a is + // different ([0, 1, 2, 3]). + std::vector sizes = {3, 5, 2, 1}; + Tensor a = tf_byte_.ones(sizes); + Tensor b = tf_int_.full_channels_last(sizes, 0); + Tensor c = tf_float_.full_channels_last(sizes, 0.1); + + // Not the same dim order. Chec + EXPECT_FALSE(tensors_have_same_dim_order(a, b)); + EXPECT_FALSE(tensors_have_same_dim_order(b, a)); + + // Test with a mismatching tensor in all positions, where the other two agree. + EXPECT_FALSE(tensors_have_same_dim_order(a, b, c)); + EXPECT_FALSE(tensors_have_same_dim_order(a, c, b)); + EXPECT_FALSE(tensors_have_same_dim_order(c, b, a)); +} diff --git a/shim/tools/build_defs/fb_native_wrapper.bzl b/shim/tools/build_defs/fb_native_wrapper.bzl new file mode 100644 index 0000000000..d67b9384fe --- /dev/null +++ b/shim/tools/build_defs/fb_native_wrapper.bzl @@ -0,0 +1,10 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under both the MIT license found in the +# LICENSE-MIT file in the root directory of this source tree and the Apache +# License, Version 2.0 found in the LICENSE-APACHE file in the root directory +# of this source tree. + +fb_native = struct( + config_setting = native.config_setting, +)