Skip to content

Commit

Permalink
Compute Library v24.11
Browse files Browse the repository at this point in the history
  • Loading branch information
Jenkins committed Nov 11, 2024
1 parent c61bd33 commit f44f09d
Show file tree
Hide file tree
Showing 137 changed files with 4,412 additions and 1,659 deletions.
3 changes: 3 additions & 0 deletions Android.bp
Original file line number Diff line number Diff line change
Expand Up @@ -1025,11 +1025,14 @@ cc_library_static {
"src/runtime/experimental/operators/CpuActivation.cpp",
"src/runtime/experimental/operators/CpuAdd.cpp",
"src/runtime/experimental/operators/CpuDepthwiseConv2d.cpp",
"src/runtime/experimental/operators/CpuDequantize.cpp",
"src/runtime/experimental/operators/CpuElementwise.cpp",
"src/runtime/experimental/operators/CpuGEMMLowp.cpp",
"src/runtime/experimental/operators/CpuGemm.cpp",
"src/runtime/experimental/operators/CpuGemmConv2d.cpp",
"src/runtime/experimental/operators/CpuGemmDirectConv2d.cpp",
"src/runtime/experimental/operators/CpuMul.cpp",
"src/runtime/experimental/operators/CpuQuantize.cpp",
"src/runtime/experimental/operators/CpuSoftmax.cpp",
"src/runtime/experimental/operators/CpuSub.cpp",
"src/runtime/experimental/operators/CpuTranspose.cpp",
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
list(APPEND CMAKE_MESSAGE_CONTEXT ArmCompute)
project(
ArmCompute
VERSION 42.0.0
VERSION 43.0.0
DESCRIPTION
"The Arm Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A CPU and Arm® Mali™ GPU architectures"
LANGUAGES C CXX ASM)
Expand Down
15 changes: 15 additions & 0 deletions LICENSES/Apache-2.0.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# SPDX-FileCopyrightText: 2008-2023 The Khronos Group Inc.
#
# SPDX-License-Identifier: Apache-2.0

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
9 changes: 7 additions & 2 deletions LICENSE → LICENSES/MIT.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
MIT License
# SPDX-FileCopyrightText: 2012-2017 Christian Rau
# SPDX-FileCopyrightText: 2017 Leon Merten Lohse
# SPDX-FileCopyrightText: 2017 Sean Barrett
# SPDX-FileCopyrightText: 2017-2024 Arm Limited
#
# SPDX-License-Identifier: MIT

Copyright (c) 2017-2024 Arm Limited
MIT License

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
24 changes: 12 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<img src="https://raw.githubusercontent.com/ARM-software/ComputeLibrary/gh-pages/ACL_logo.png"/><br><br>
</div>

# Compute Library ![](https://img.shields.io/badge/latest_release-24.09-green)
# Compute Library ![](https://img.shields.io/badge/latest_release-24.11-green)


The Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A, Arm® Neoverse® and Arm® Mali™ GPUs architectures.<br>
Expand Down Expand Up @@ -37,7 +37,7 @@ Key Features:
<br>

## Documentation
[![Documentation](https://img.shields.io/badge/documentation-24.09-green)](https://artificial-intelligence.sites.arm.com/computelibrary/v24.09/index.xhtml)
[![Documentation](https://img.shields.io/badge/documentation-24.11-green)](https://artificial-intelligence.sites.arm.com/computelibrary/v24.11/index.xhtml)

> Note: The documentation includes the reference API, changelogs, build guide, contribution guide, errata, etc.
Expand All @@ -50,22 +50,22 @@ All the binaries can be downloaded from [here](https://github.com/ARM-software/C

| Platform | Operating System | Release archive (Download) |
| -------------- | ---------------- | -------------------------- |
| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-armv7a-cpu-bin.tar.gz) |
| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-bin.tar.gz) |
| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-gpu-bin.tar.gz) |
| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-gpu-bin.tar.gz) |
| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-armv7a-cpu-bin.tar.gz) |
| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-bin.tar.gz) |
| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-gpu-bin.tar.gz) |
| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-gpu-bin.tar.gz) |

<br>

| Architecture | Operating System | Release archive (Download) |
| ------------ | ---------------- | -------------------------- |
| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-armv7a-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-armv7a-cpu-gpu-bin.tar.gz) |
| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-android-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-android-aarch64-cpu-gpu-bin.tar.gz) |
| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-gpu-bin.tar.gz) |
| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-armv7a-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-armv7a-cpu-gpu-bin.tar.gz) |
| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-android-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-android-aarch64-cpu-gpu-bin.tar.gz) |
| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-gpu-bin.tar.gz) |

<br>

Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.09-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.09)
Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.11-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.11)

Pre-build binaries are generated with the following security / good coding practices related flags:
> -Wall, -Wextra, -Wformat=2, -Winit-self, -Wstrict-overflow=2, -Wswitch-default, -Woverloaded-virtual, -Wformat-security, -Wctor-dtor-privacy, -Wsign-promo, -Weffc++, -pedantic, -fstack-protector-strong
Expand Down Expand Up @@ -107,13 +107,13 @@ Pre-build binaries are generated with the following security / good coding pract

## Experimental builds

**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://artificial-intelligence.sites.arm.com/computelibrary/v24.09/how_to_build.xhtml) for more details.
**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://artificial-intelligence.sites.arm.com/computelibrary/v24.11/how_to_build.xhtml) for more details.

<br>

## How to contribute

Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://artificial-intelligence.sites.arm.com/computelibrary/v24.09/contribution_guidelines.xhtml).
Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://artificial-intelligence.sites.arm.com/computelibrary/v24.11/contribution_guidelines.xhtml).

### Developer Certificate of Origin (DCO)
Before the Compute Library accepts your contribution, you need to certify its origin and give us your permission. To manage this process we use the Developer Certificate of Origin (DCO) V1.1 (https://developercertificate.org/)
Expand Down
10 changes: 3 additions & 7 deletions SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ import codecs
import platform
import SCons

VERSION = "v24.09"
LIBRARY_VERSION_MAJOR = 42
VERSION = "v24.11"
LIBRARY_VERSION_MAJOR = 43
LIBRARY_VERSION_MINOR = 0
LIBRARY_VERSION_PATCH = 0
SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH)
Expand Down Expand Up @@ -627,12 +627,8 @@ custom_operators = []
custom_types = []
custom_layouts = []

use_custom_ops = env['high_priority'] or env['build_config']
use_custom_ops = env['build_config']

if env['high_priority']:
custom_operators = filelist['high_priority']
custom_types = ['all']
custom_layouts = ['all']

if env['build_config']:
custom_operators, custom_types, custom_layouts = read_build_config_json(env['build_config'])
Expand Down
21 changes: 6 additions & 15 deletions SConstruct
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ vars.AddVariables(
PathVariable("build_dir", "Specify sub-folder for the build", ".", PathVariable.PathAccept),
PathVariable("install_dir", "Specify sub-folder for the install", "", PathVariable.PathAccept),
BoolVariable("exceptions", "Enable/disable C++ exception support", True),
BoolVariable("high_priority", "Generate a library containing only the high priority operators", False),
PathVariable("linker_script", "Use an external linker script", "", PathVariable.PathAccept),
PathVariable("external_tests_dir", """Add examples, benchmarks and tests to the tests suite from an external path. In order to use this option, the external tests directory must have the following structure:
EXTERNAL_TESTS_DIR:
Expand Down Expand Up @@ -519,21 +518,11 @@ if not GetOption("help"):
# Thus for backward compatibility, we include this flag only for NDK < r23
env.Append(CXXFLAGS = ['-no-integrated-as'])

if env['high_priority'] and env['build_config']:
print("The high priority library cannot be built in conjunction with a user-specified build configuration")
Exit(1)

if not env['high_priority'] and not env['build_config']:
env.Append(CPPDEFINES = ['ARM_COMPUTE_GRAPH_ENABLED'])

data_types = []
data_layouts = []

# Set correct data types / layouts to build
if env['high_priority']:
data_types = ['all']
data_layouts = ['all']
elif env['build_config']:
if env['build_config']:
data_types, data_layouts = read_build_config_json(env['build_config'])
else:
data_types = env['data_type_support']
Expand Down Expand Up @@ -613,7 +602,9 @@ else:
env.Append(CXXFLAGS = ['-O3'])
else:
# on windows we use clang-cl which does not support the option -O3
env.Append(CXXFLAGS = ['-O2'])
if not version_at_least(compiler_ver, '17.0.0'):
# Disable optimizations in clang 17 or later because the compiler crashes with -O2
env.Append(CXXFLAGS = ['-O2'])

if env['asserts']:
env.Append(CPPDEFINES = ['ARM_COMPUTE_ASSERTS_ENABLED'])
Expand Down Expand Up @@ -653,7 +644,7 @@ Export('version_at_least')

SConscript('./SConscript', variant_dir=build_path, duplicate=0)

if env['examples'] and (env['build_config'] or env['high_priority']):
if env['examples'] and env['build_config']:
print("WARNING: Building examples for selected operators not supported. Use examples=0")
Return()

Expand All @@ -664,7 +655,7 @@ if env['examples'] and env['exceptions']:
SConscript('./examples/SConscript', variant_dir='%s/examples' % build_path, duplicate=0)

if env['exceptions']:
if env['build_config'] or env['high_priority']:
if env['build_config']:
print("WARNING: Building tests for selected operators not supported")
Return()
if env['os'] == 'bare_metal' and env['arch'] == 'armv7a':
Expand Down
8 changes: 7 additions & 1 deletion arm_compute/core/CPP/CPPTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#include "arm_compute/core/Error.h"

#include <cstdint>
#include <memory>

namespace arm_compute
Expand Down Expand Up @@ -180,7 +181,12 @@ class CPUInfo final
*
* @return Vector length if sme2 is enabled, otherwise returns 0.
*/
uint64_t get_sme2_vector_length() const;
uint64_t get_sme2_vector_length_in_bytes() const;
/** Return the vector length in bits for sme2
*
* @return Vector length if sme2 is enabled, otherwise returns 0.
*/
uint64_t get_sme2_vector_length_in_bits() const;

private:
struct Impl;
Expand Down
78 changes: 76 additions & 2 deletions arm_compute/core/QuantizationInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,31 @@ struct UniformQuantizationInfo
int32_t offset;
};

/** Quantization info when assuming per layer quantization */
struct UniformRequantizationInfo
{
/** Default constructor */
UniformRequantizationInfo() : scale(0.f), offset(0.f)
{
}
/** Constructor
*
* @param[in] scale Quantization scale
* @param[in] offset Quantization offset
*/
UniformRequantizationInfo(float scale, float offset) : scale(scale), offset(offset)
{
}
/** Checks if the scale and offset are both zero */
bool empty() const
{
return (scale == 0) && (offset == 0);
}

float scale;
float offset;
};

/** Quantization information */
class QuantizationInfo
{
Expand Down Expand Up @@ -232,6 +257,13 @@ struct Qasymm8QuantizationHelper
return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized));
}

static inline QUANTIZED_TYPE quantize(float value, const UniformRequantizationInfo &qinfo)
{
ARM_COMPUTE_ERROR_ON(qinfo.scale == 0);
const int quantized = support::cpp11::lround(value / qinfo.scale + qinfo.offset);
return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized));
}

/** Quantize a value given a 8-bit asymmetric quantization scheme using a specific rounding policy
*
* @param[in] value Value to quantize
Expand All @@ -253,6 +285,21 @@ struct Qasymm8QuantizationHelper
return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized));
}

static inline QUANTIZED_TYPE
quantize(float value, const UniformRequantizationInfo &qinfo, RoundingPolicy rounding_policy)
{
if (rounding_policy == RoundingPolicy::TO_NEAREST_UP)
{
return quantize(value, qinfo);
}

ARM_COMPUTE_ERROR_ON(qinfo.scale == 0);

// We round after adding the offset, because the offset is also float
const int quantized = arm_compute::round(value / qinfo.scale + qinfo.offset, rounding_policy);
return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized));
}

/** Quantize a value given a 8-bit asymmetric quantization scheme
*
* @param[in] value Value to quantize
Expand Down Expand Up @@ -588,7 +635,11 @@ inline float dequantize_s32(int32_t value, const QuantizationInfo &qinfo)
return dequantize_s32(value, qinfo.uniform());
}

/*
/** Compute the requantization offset and scale
*
* @deprecated because reequantization using integer offsets creates rounding issues.
* Please use @ref arm_compute::compute_requantization_scale_float_offset() instead.
*
* In case of requantization of a quantized input tensor to an output tensor with another quantization
* instead of applying dequantization and then a quantization functions, we just compute new scale and
* offset.
Expand Down Expand Up @@ -628,9 +679,32 @@ inline UniformQuantizationInfo compute_requantization_scale_offset(const Uniform
// In order to minimize flooring we convert the offset to a float,
// then compute the new offset in the float domain,
// finally we convert it back as int32_t
offset_to_apply -= static_cast<int32_t>(static_cast<float>(uqinfo_in.offset) * uqinfo_in.scale / uqinfo_out.scale);

#ifdef __aarch64__
constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN;
#else //__aarch64__
constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP;
#endif //__aarch64__

offset_to_apply -=
arm_compute::round(static_cast<float>(uqinfo_in.offset) * uqinfo_in.scale / uqinfo_out.scale, rounding_policy);
return UniformQuantizationInfo(scale_to_apply, offset_to_apply);
}

/** Similar to @ref arm_compute::compute_requantization_scale_offset()
* but returning offset as float instead of integer
*/
inline UniformRequantizationInfo compute_requantization_scale_float_offset(const UniformQuantizationInfo &uqinfo_in,
const UniformQuantizationInfo &uqinfo_out)
{
float scale_to_apply = uqinfo_out.scale;
float offset_to_apply = static_cast<float>(uqinfo_out.offset);

scale_to_apply /= uqinfo_in.scale;
offset_to_apply -= static_cast<float>(uqinfo_in.offset) * uqinfo_in.scale / uqinfo_out.scale;

return UniformRequantizationInfo(scale_to_apply, offset_to_apply);
}

} // namespace arm_compute
#endif // ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H
3 changes: 3 additions & 0 deletions arm_compute/core/TensorInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,9 @@ class TensorInfo final : public ITensorInfo

private:
/** Calculates strides, offset and total size resulting from the specified padding around the XY plane.
*
* @note When interpreting the required_strides in the return value, only the values up to the corresponding dimension in the tensor is
* valid. For example, 1D tensor should only refer to 1D in required_strides, 2D tensor up to 2D in required_strides, and so on.
*
* @param[in] padding Padding around the XY plane in elements.
*/
Expand Down
Loading

0 comments on commit f44f09d

Please sign in to comment.