From a88ea27e7a03aaa59e65589bb071ec01f4983be3 Mon Sep 17 00:00:00 2001 From: chenqiudu Date: Thu, 18 Apr 2024 19:37:41 +0800 Subject: [PATCH] simd wasm --- ...make_ktm_action.yml => cmake_ktm_test.yml} | 9 +- .github/workflows/cmake_ktm_wasm_test.yml | 59 ++++ CMakeLists.txt | 48 ++- README.md | 17 +- ktm/detail/function/common_simd.inl | 27 +- ktm/detail/function/geometric_simd.inl | 4 +- ktm/detail/function/matrix_simd.inl | 8 +- ktm/detail/matrix/mat_calc_simd.inl | 12 +- ktm/detail/quaternion/quat_calc_simd.inl | 4 +- ktm/detail/vector/vec_calc_simd.inl | 4 +- ktm/detail/vector/vec_data_simd.inl | 8 +- ktm/simd/arch_def.h | 6 + ktm/simd/intrin_api.h | 9 +- ktm/simd/skv.h | 23 +- ktm/simd/wasm_intrin.h | 280 ++++++++++++++++++ src/ktm.cpp | 11 + test/CMakeLists.txt | 19 ++ test/quaternion_test.cpp | 4 +- 18 files changed, 464 insertions(+), 88 deletions(-) rename .github/workflows/{cmake_ktm_action.yml => cmake_ktm_test.yml} (94%) create mode 100644 .github/workflows/cmake_ktm_wasm_test.yml create mode 100644 ktm/simd/wasm_intrin.h create mode 100644 src/ktm.cpp create mode 100644 test/CMakeLists.txt diff --git a/.github/workflows/cmake_ktm_action.yml b/.github/workflows/cmake_ktm_test.yml similarity index 94% rename from .github/workflows/cmake_ktm_action.yml rename to .github/workflows/cmake_ktm_test.yml index de69fb6..260eafb 100644 --- a/.github/workflows/cmake_ktm_action.yml +++ b/.github/workflows/cmake_ktm_test.yml @@ -1,6 +1,6 @@ # This starter workflow is for a CMake project running on multiple platforms. There is a different starter workflow if you just want a single platform. # See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-single-platform.yml -name: cmake ktm action +name: cmake ktm test on: push: @@ -67,6 +67,7 @@ jobs: shell: bash run: | echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT" + echo "build-output-test-dir=${{ github.workspace }}/build/test" >> "$GITHUB_OUTPUT" - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. @@ -76,6 +77,7 @@ jobs: -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }} -DCMAKE_C_COMPILER=${{ matrix.c_compiler }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + -DKTM_BUILD_TESTING=ON -S ${{ github.workspace }} - name: Build @@ -83,12 +85,11 @@ jobs: run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }} - name: Test - working-directory: ${{ steps.strings.outputs.build-output-dir }} + working-directory: ${{ steps.strings.outputs.build-output-test-dir }} # Execute tests defined by the CMake configuration. Note that --build-config is needed because the default Windows generator is a multi-config generator (Visual Studio generator). # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail run: ctest --build-config ${{ matrix.build_type }} - name: Install # Install your program with the given configuration. - run: ${{ matrix.root }} cmake --install ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }} - + run: ${{ matrix.root }} cmake --install ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }} \ No newline at end of file diff --git a/.github/workflows/cmake_ktm_wasm_test.yml b/.github/workflows/cmake_ktm_wasm_test.yml new file mode 100644 index 0000000..d298a10 --- /dev/null +++ b/.github/workflows/cmake_ktm_wasm_test.yml @@ -0,0 +1,59 @@ +# This starter workflow is for a CMake project running on multiple platforms. There is a different starter workflow if you just want a single platform. +# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-single-platform.yml +name: cmake ktm wasm test + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + build: + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + + matrix: + os: [macos-latest, ubuntu-latest] + build_type: [Release] + include: + - os: macos-latest + - os: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Setup emsdk + uses: mymindstorm/setup-emsdk@v14 + with: + version: "latest" + actions-cache-folder: 'emsdk-cache' + + - name: Set reusable strings + id: strings + shell: bash + run: | + echo $EMSDK + echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT" + echo "build-output-test-dir=${{ github.workspace }}/build/test" >> "$GITHUB_OUTPUT" + + - name: Configure CMake + run: > + cmake -B ${{ steps.strings.outputs.build-output-dir }} + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + -DKTM_BUILD_TESTING=ON + -DKTM_BUILD_WASM=ON + -DCMAKE_TOOLCHAIN_FILE="$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake" + -S ${{ github.workspace }} + + - name: Build + run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }} + + - name: Test + working-directory: ${{ steps.strings.outputs.build-output-test-dir }} + run: ctest --build-config ${{ matrix.build_type }} + + - name: Install + run: cmake --install ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 10e22ff..42708d7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,43 +1,37 @@ cmake_minimum_required(VERSION 3.20) -project(ktm) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") - add_compile_options(/std:c++17) +project(ktm) + +if(KTM_BUILD_WASM) + add_compile_options(-msimd128) +elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") add_compile_options(/source-charset:utf-8) add_compile_options(/execution-charset:utf-8) -elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - add_compile_options(-std=gnu++17) - add_compile_options(-fvisibility=hidden) - add_compile_options(-march=native) -elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") - add_compile_options(-std=gnu++17) - add_compile_options(-Wc++17-extensions) - add_compile_options(-fvisibility=hidden) +elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR + "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR + "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") add_compile_options(-march=native) endif() -add_executable(geometry_test ${CMAKE_CURRENT_SOURCE_DIR}/test/geometry_test.cpp) -add_executable(matrix_test ${CMAKE_CURRENT_SOURCE_DIR}/test/matrix_test.cpp) -add_executable(quaternion_test ${CMAKE_CURRENT_SOURCE_DIR}/test/quaternion_test.cpp) -add_executable(vector_test ${CMAKE_CURRENT_SOURCE_DIR}/test/vector_test.cpp) - -enable_testing() +if(KTM_BUILD_TESTING) + add_subdirectory(test) +endif() -add_test(geometry_test geometry_test) -add_test(matrix_test matrix_test) -add_test(quaternion_test quaternion_test) -add_test(vector_test vector_test) +add_executable(ktm ${CMAKE_CURRENT_SOURCE_DIR}/src/ktm.cpp) install(DIRECTORY ktm DESTINATION include) if(NOT TARGET uninstall) - configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/cmake_uninstall.cmake.in" - "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" - IMMEDIATE @ONLY) + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/cmake_uninstall.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" + IMMEDIATE @ONLY) - add_custom_target(uninstall - COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake) + add_custom_target(uninstall + COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake) endif() diff --git a/README.md b/README.md index 04f76d9..5e7bce0 100644 --- a/README.md +++ b/README.md @@ -14,13 +14,26 @@ **特点** - head-only,引入头文件即可使用 -- 支持Simd指令集加速,SSE,SSE2,SSE3,SSE4.1,SSE4.2,Neon +- 支持Simd指令集加速,SSE,SSE2,SSE3,SSE4.1,SSE4.2,Neon,Wasm - 代码结构清晰,类利用模板实现组件化 +**构建和安装** + +```shell +# unix +mkdir build && cd build +cmake .. +sudo make install + +# windows +cmake -S . -B ./build +cmake --install ./build --config Release +``` + **示例** ```c++ -#include "ktm/ktm.h" +#include using namespace ktm; using namespace std; diff --git a/ktm/detail/function/common_simd.inl b/ktm/detail/function/common_simd.inl index 30ef043..0f20d7f 100644 --- a/ktm/detail/function/common_simd.inl +++ b/ktm/detail/function/common_simd.inl @@ -11,7 +11,7 @@ #include "common_fwd.h" #include "../../simd/skv.h" -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE | KTM_SIMD_WASM) template<> struct ktm::detail::common_implement::reduce_add<4, float> @@ -278,9 +278,9 @@ struct ktm::detail::common_implement::fast_recip struct ktm::detail::common_implement::reduce_add<4, int> @@ -304,9 +304,9 @@ struct ktm::detail::common_implement::abs struct ktm::detail::common_implement::reduce_min<4, int> @@ -364,23 +364,10 @@ struct ktm::detail::common_implement::clamp -// struct ktm::detail::common_implement::elem_move -// { -// static_assert(L == 1); -// using V = vec<2, float>; -// static KTM_INLINE V call(const V& x) noexcept -// { -// V ret; -// ret.st = _shuffo64_f32(x.st, x.st, 0, L); -// return ret; -// } -// }; - template<> struct ktm::detail::common_implement::abs<2, float> { @@ -664,6 +651,6 @@ struct ktm::detail::common_implement::clamp<2, int> } }; -#endif +#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON) #endif \ No newline at end of file diff --git a/ktm/detail/function/geometric_simd.inl b/ktm/detail/function/geometric_simd.inl index db3cf72..1f3b090 100644 --- a/ktm/detail/function/geometric_simd.inl +++ b/ktm/detail/function/geometric_simd.inl @@ -11,7 +11,7 @@ #include "geometric_fwd.h" #include "../../simd/skv.h" -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE | KTM_SIMD_WASM) template struct ktm::detail::geometric_implement::dot> @@ -252,7 +252,7 @@ struct ktm::detail::geometric_implement::fast_normalize struct ktm::detail::matrix_implement::transpose<2, 2, float> @@ -345,9 +345,9 @@ struct ktm::detail::matrix_implement::inverse<4, float> } }; -#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE) +#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE | KTM_SIMD_WASM) -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE4_1) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE4_1 | KTM_SIMD_WASM) template<> struct ktm::detail::matrix_implement::determinant<3, int> @@ -405,6 +405,6 @@ struct ktm::detail::matrix_implement::determinant<4, int> } }; -#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE4_1) +#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE4_1 | KTM_SIMD_WASM) #endif \ No newline at end of file diff --git a/ktm/detail/matrix/mat_calc_simd.inl b/ktm/detail/matrix/mat_calc_simd.inl index 139a18e..05b62e8 100644 --- a/ktm/detail/matrix/mat_calc_simd.inl +++ b/ktm/detail/matrix/mat_calc_simd.inl @@ -11,7 +11,7 @@ #include "mat_calc_fwd.h" #include "../../simd/skv.h" -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE | KTM_SIMD_WASM) template struct ktm::detail::mat_opt_implement::mat_mul_vec> @@ -100,9 +100,9 @@ private: } }; -#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE) +#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE | KTM_SIMD_WASM) -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE2) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE2 | KTM_SIMD_WASM) template struct ktm::detail::mat_opt_implement::add> @@ -142,9 +142,9 @@ private: } }; -#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE2) +#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE2 | KTM_SIMD_WASM) -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE4_1) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE4_1 | KTM_SIMD_WASM) template struct ktm::detail::mat_opt_implement::mat_mul_vec> @@ -195,7 +195,7 @@ private: } }; -#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE4_1) +#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE4_1 | KTM_SIMD_WASM) #if KTM_SIMD_ENABLE(KTM_SIMD_NEON) diff --git a/ktm/detail/quaternion/quat_calc_simd.inl b/ktm/detail/quaternion/quat_calc_simd.inl index 6d9fd74..52b5f90 100644 --- a/ktm/detail/quaternion/quat_calc_simd.inl +++ b/ktm/detail/quaternion/quat_calc_simd.inl @@ -11,7 +11,7 @@ #include "quat_calc_fwd.h" #include "../../simd/skv.h" -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE | KTM_SIMD_WASM) namespace ktm { @@ -81,6 +81,6 @@ struct ktm::detail::quat_calc_implement::act } }; -#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE) +#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE | KTM_SIMD_WASM) #endif \ No newline at end of file diff --git a/ktm/detail/vector/vec_calc_simd.inl b/ktm/detail/vector/vec_calc_simd.inl index 685e19e..4007db7 100644 --- a/ktm/detail/vector/vec_calc_simd.inl +++ b/ktm/detail/vector/vec_calc_simd.inl @@ -11,7 +11,7 @@ #include "vec_calc_fwd.h" #include "../../simd/skv.h" -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE | KTM_SIMD_WASM) template struct ktm::detail::vec_calc_implement::add> @@ -354,7 +354,7 @@ struct ktm::detail::vec_calc_implement::mul_scalar_to_self struct ktm::detail::vec_data_implement::vec_storage<2, float> @@ -95,9 +95,9 @@ struct ktm::detail::vec_data_implement::vec_storage<2, int> typedef skv::sv2 type; }; -#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE) +#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE | KTM_SIMD_WASM) -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE2) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE2 | KTM_SIMD_WASM) template<> struct ktm::detail::vec_data_implement::vec_storage<3, int> @@ -111,7 +111,7 @@ struct ktm::detail::vec_data_implement::vec_storage<4, int> typedef skv::sv4 type; }; -#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE2) +#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE2 | KTM_SIMD_WASM) #if KTM_SIMD_ENABLE(KTM_SIMD_NEON) diff --git a/ktm/simd/arch_def.h b/ktm/simd/arch_def.h index 99e4503..7baa8f7 100644 --- a/ktm/simd/arch_def.h +++ b/ktm/simd/arch_def.h @@ -18,6 +18,7 @@ #define KTM_SIMD_SSSE3 0x00000020 #define KTM_SIMD_SSE4_1 0x00000040 #define KTM_SIMD_SSE4_2 0x00000080 +#define KTM_SIMD_WASM 0x00000100 #define KTM_SIMD_ENABLE(flags) (KTM_SIMD_SUPPORT & (flags)) #if defined(KTM_COMPILER_MSVC) @@ -85,4 +86,9 @@ #endif #endif +#if defined(__wasm__) && defined (__wasm_simd128__) + #define KTM_SIMD_SUPPORT KTM_SIMD_WASM + #include +#endif + #endif \ No newline at end of file diff --git a/ktm/simd/intrin_api.h b/ktm/simd/intrin_api.h index c5f8ce6..043b091 100644 --- a/ktm/simd/intrin_api.h +++ b/ktm/simd/intrin_api.h @@ -11,6 +11,7 @@ #include "arch_def.h" #include "arm_intrin.h" #include "intel_intrin.h" +#include "wasm_intrin.h" #if KTM_SIMD_ENABLE(KTM_SIMD_NEON) @@ -107,7 +108,7 @@ #endif -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE | KTM_SIMD_WASM) #define _dup128_f32(a) ::intrin::dup128_f32(a) #define _dupzero128_f32() ::intrin::dupzero128_f32() @@ -143,7 +144,7 @@ #endif -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE2) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE2 | KTM_SIMD_WASM) #define _dup128_s32(a) ::intrin::dup128_s32(a) #define _dupzero128_s32() _cast128_s32_f32(_dupzero128_f32()) @@ -177,7 +178,7 @@ #endif -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE4_1) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE4_1 | KTM_SIMD_WASM) #define _mul128_s32(a, b) ::intrin::mul128_s32(a, b) #define _madd128_s32(a, b, c) ::intrin::madd128_s32(a, b, c) @@ -187,7 +188,7 @@ #endif -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON64 | KTM_SIMD_SSE4_1) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON64 | KTM_SIMD_SSE4_1 | KTM_SIMD_WASM) #define _round128_f32(a) ::intrin::round128_f32(a) #define _floor128_f32(a) ::intrin::floor128_f32(a) diff --git a/ktm/simd/skv.h b/ktm/simd/skv.h index 4cb0df9..de7d70b 100644 --- a/ktm/simd/skv.h +++ b/ktm/simd/skv.h @@ -26,6 +26,11 @@ namespace skv #if KTM_SIMD_ENABLE(KTM_SIMD_SSE2) typedef __m128i sv4; #endif +#elif KTM_SIMD_ENABLE(KTM_SIMD_WASM) + typedef uint64_t fv2; + typedef uint64_t sv2; + typedef v128_t fv4; + typedef v128_t sv4; #endif #if KTM_SIMD_ENABLE(KTM_SIMD_NEON) @@ -117,11 +122,11 @@ KTM_FUNC fv2 dot1_fv2(fv2 x, fv2 y) noexcept #endif -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE | KTM_SIMD_WASM) KTM_FUNC fv4 round_fv4(fv4 a) noexcept { -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON64 | KTM_SIMD_SSE4_1) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON64 | KTM_SIMD_SSE4_1 | KTM_SIMD_WASM) return _round128_f32(a); #else constexpr union { unsigned int i; float f; } mask1 { 0x80000000 }; @@ -135,7 +140,7 @@ KTM_FUNC fv4 round_fv4(fv4 a) noexcept KTM_FUNC fv4 floor_fv4(fv4 a) noexcept { -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON64 | KTM_SIMD_SSE4_1) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON64 | KTM_SIMD_SSE4_1 | KTM_SIMD_WASM) return _floor128_f32(a); #else constexpr union { unsigned int i; float f; } mask = { 0x3f800000 }; @@ -149,7 +154,7 @@ KTM_FUNC fv4 floor_fv4(fv4 a) noexcept KTM_FUNC fv4 ceil_fv4(fv4 a) noexcept { -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON64 | KTM_SIMD_SSE4_1) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON64 | KTM_SIMD_SSE4_1 | KTM_SIMD_WASM) return _ceil128_f32(a); #else constexpr union { unsigned int i; float f; } mask = { 0x3f800000 }; @@ -284,9 +289,9 @@ KTM_FUNC fv4 dot1_fv4(fv4 x, fv4 y) noexcept return dot; } -#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE) +#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE | KTM_SIMD_WASM) -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE2) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE2 | KTM_SIMD_WASM) KTM_FUNC int radd_sv3(sv4 a) noexcept { @@ -332,9 +337,9 @@ KTM_FUNC int rsub_sv4(sv4 a) noexcept return ret.i; } -#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE2) +#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE2 | KTM_SIMD_WASM) -#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE4_1) +#if KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE4_1 | KTM_SIMD_WASM) KTM_FUNC int rmax_sv4(sv4 a) noexcept { @@ -364,7 +369,7 @@ KTM_FUNC int rmin_sv4(sv4 a) noexcept #endif } -#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE4_1) +#endif // KTM_SIMD_ENABLE(KTM_SIMD_NEON | KTM_SIMD_SSE4_1 | KTM_SIMD_WASM) } diff --git a/ktm/simd/wasm_intrin.h b/ktm/simd/wasm_intrin.h new file mode 100644 index 0000000..ae46a1f --- /dev/null +++ b/ktm/simd/wasm_intrin.h @@ -0,0 +1,280 @@ +// MIT License +// +// Copyright (c) 2023 有个小小杜 +// +// Created by 有个小小杜 +// + +#ifndef _KTM_WASM_INTRIN_H_ +#define _KTM_WASM_INTRIN_H_ + +#include "arch_def.h" +#include + +namespace intrin +{ + +#if KTM_SIMD_ENABLE(KTM_SIMD_WASM) + +KTM_FUNC float cast128to32_f32(v128_t a) noexcept +{ + return wasm_f32x4_extract_lane(a, 0); +} + +KTM_FUNC v128_t dup128_f32(float a) noexcept +{ + return wasm_f32x4_splat(a); +} + +KTM_FUNC v128_t dupzero128_f32() noexcept +{ + return wasm_f32x4_const_splat(0.f); +} + +KTM_FUNC v128_t set128_f32(float a, float b, float c, float d) noexcept +{ + return wasm_f32x4_make(d, c, b, a); +} + +template +KTM_FUNC v128_t shuffle128_f32(v128_t a, v128_t b) noexcept +{ + return wasm_i32x4_shuffle(a, b, N0, N1, N2 + 4, N3 + 4); +} + +template +KTM_FUNC v128_t shuffle128_f32(v128_t a) noexcept +{ + return wasm_i32x4_shuffle(a, a, N0, N1, N2 + 4, N3 + 4); +} + +KTM_FUNC v128_t and128_f32(v128_t a, v128_t b) noexcept +{ + return wasm_v128_and(a, b); +} + +KTM_FUNC v128_t or128_f32(v128_t a, v128_t b) noexcept +{ + return wasm_v128_or(a, b); +} + +KTM_FUNC v128_t xor128_f32(v128_t a, v128_t b) noexcept +{ + return wasm_v128_xor(a, b); +} + +KTM_FUNC v128_t add128_f32(v128_t a, v128_t b) noexcept +{ + return wasm_f32x4_add(a, b); +} + +KTM_FUNC v128_t sub128_f32(v128_t a, v128_t b) noexcept +{ + return wasm_f32x4_sub(a, b); +} + +KTM_FUNC v128_t mul128_f32(v128_t a, v128_t b) noexcept +{ + return wasm_f32x4_mul(a, b); +} + +KTM_FUNC v128_t div128_f32(v128_t a, v128_t b) noexcept +{ + return wasm_f32x4_div(a, b); +} + +KTM_FUNC v128_t madd128_f32(v128_t a, v128_t b, v128_t c) noexcept +{ + return wasm_f32x4_add(a, wasm_f32x4_mul(b, c)); +} + +KTM_FUNC v128_t neg128_f32(v128_t a) noexcept +{ + return wasm_f32x4_neg(a); +} + +KTM_FUNC v128_t abs128_f32(v128_t a) noexcept +{ + return wasm_f32x4_abs(a); +} + +KTM_FUNC v128_t max128_f32(v128_t a, v128_t b) noexcept +{ + return wasm_f32x4_max(a, b); +} + +KTM_FUNC v128_t min128_f32(v128_t a, v128_t b) noexcept +{ + return wasm_f32x4_min(a, b); +} + +KTM_FUNC v128_t cmpeq128_f32(v128_t a, v128_t b) noexcept +{ + return wasm_f32x4_eq(a, b); +} + +KTM_FUNC v128_t cmplt128_f32(v128_t a, v128_t b) noexcept +{ + return wasm_f32x4_lt(a, b); +} + +KTM_FUNC v128_t cmpgt128_f32(v128_t a, v128_t b) noexcept +{ + return wasm_f32x4_gt(a, b); +} + +KTM_FUNC v128_t cmple128_f32(v128_t a, v128_t b) noexcept +{ + return wasm_f32x4_le(a, b); +} + +KTM_FUNC v128_t cmpge128_f32(v128_t a, v128_t b) noexcept +{ + return wasm_f32x4_ge(a, b); +} + +KTM_FUNC v128_t recipl128_f32(v128_t a) noexcept +{ + v128_t ret = wasm_i32x4_sub(wasm_i32x4_splat(0x7ef477d5), a); + v128_t sub = wasm_f32x4_sub(wasm_f32x4_splat(2.f), wasm_f32x4_mul(a, ret)); + ret = wasm_f32x4_mul(ret, sub); + return ret; +} + +KTM_FUNC v128_t reciph128_f32(v128_t a) noexcept +{ + return wasm_f32x4_div(wasm_f32x4_splat(1.f), a); +} + +KTM_FUNC v128_t rsqrtl128_f32(v128_t a) noexcept +{ + v128_t ret = wasm_i32x4_sub(wasm_i32x4_splat(0x5f3759df), wasm_i32x4_shr(a, 1)); + v128_t mul = wasm_f32x4_mul(wasm_f32x4_splat(0.5f), wasm_f32x4_mul(a, wasm_f32x4_mul(ret, ret))); + v128_t sub = wasm_f32x4_sub(wasm_f32x4_splat(1.5f), mul); + ret = wasm_f32x4_mul(ret, sub); + return ret; +} + +KTM_FUNC v128_t rsqrth128_f32(v128_t a) noexcept +{ + return wasm_f32x4_div(wasm_f32x4_splat(1.f), wasm_f32x4_sqrt(a)); +} + +KTM_FUNC v128_t sqrtl128_f32(v128_t a) noexcept +{ + v128_t ret = wasm_i32x4_add(wasm_i32x4_splat(0x1fbd1df5), wasm_i32x4_shr(a, 1)); + v128_t mul = wasm_f32x4_add(ret, wasm_f32x4_div(a, ret)); + ret = wasm_f32x4_mul(mul, wasm_f32x4_splat(0.5f)); + return ret; +} + +KTM_FUNC v128_t sqrth128_f32(v128_t a) noexcept +{ + return wasm_f32x4_sqrt(a); +} + +KTM_FUNC v128_t round128_f32(v128_t a) noexcept +{ + return wasm_f32x4_nearest(a); +} + +KTM_FUNC v128_t floor128_f32(v128_t a) noexcept +{ + return wasm_f32x4_floor(a); +} + +KTM_FUNC v128_t ceil128_f32(v128_t a) noexcept +{ + return wasm_f32x4_ceil(a); +} + +KTM_FUNC v128_t cast128_s32_f32(v128_t a) noexcept +{ + return a; +} + +KTM_FUNC v128_t cast128_f32_s32(v128_t a) noexcept +{ + return a; +} + +KTM_FUNC v128_t dup128_s32(int a) noexcept +{ + return wasm_i32x4_splat(a); +} + +KTM_FUNC v128_t set128_s32(int a, int b, int c, int d) noexcept +{ + return wasm_i32x4_make(d, c, b, a); +} + +KTM_FUNC v128_t add128_s32(v128_t a, v128_t b) noexcept +{ + return wasm_i32x4_add(a, b); +} + +KTM_FUNC v128_t sub128_s32(v128_t a, v128_t b) noexcept +{ + return wasm_i32x4_sub(a, b); +} + +KTM_FUNC v128_t mul128_s32(v128_t a, v128_t b) noexcept +{ + return wasm_i32x4_mul(a, b); +} + +KTM_FUNC v128_t neg128_s32(v128_t a) noexcept +{ + return wasm_i32x4_neg(a); +} + +KTM_FUNC v128_t abs128_s32(v128_t a) noexcept +{ + return wasm_i32x4_abs(a); +} + +KTM_FUNC v128_t cmpeq128_s32(v128_t a, v128_t b) noexcept +{ + return wasm_i32x4_eq(a, b); +} + +KTM_FUNC v128_t cmplt128_s32(v128_t a, v128_t b) noexcept +{ + return wasm_i32x4_lt(a, b); +} + +KTM_FUNC v128_t cmpgt128_s32(v128_t a, v128_t b) noexcept +{ + return wasm_i32x4_gt(a, b); +} + +KTM_FUNC v128_t cmple128_s32(v128_t a, v128_t b) noexcept +{ + return wasm_i32x4_le(a, b); +} + +KTM_FUNC v128_t cmpge128_s32(v128_t a, v128_t b) noexcept +{ + return wasm_i32x4_ge(a, b); +} + +KTM_FUNC v128_t madd128_s32(v128_t a, v128_t b, v128_t c) noexcept +{ + return wasm_i32x4_add(a, wasm_i32x4_mul(b, c)); +} + +KTM_FUNC v128_t max128_s32(v128_t a, v128_t b) noexcept +{ + return wasm_i32x4_max(a, b); +} + +KTM_FUNC v128_t min128_s32(v128_t a, v128_t b) noexcept +{ + return wasm_i32x4_min(a, b); +} + +#endif + +} + +#endif \ No newline at end of file diff --git a/src/ktm.cpp b/src/ktm.cpp new file mode 100644 index 0000000..72333f7 --- /dev/null +++ b/src/ktm.cpp @@ -0,0 +1,11 @@ +#include "../ktm/ktm.h" + +using namespace ktm; +using namespace std; + +int main(int argv, char* argc[]) +{ + cout << "hello kutori math" << endl; + cout << "rsqrt approximation of 4 is " << fast::rsqrt(4.f) << endl; + return 0; +} \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 0000000..f3dbfc0 --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,19 @@ +enable_testing() + +add_executable(geometry_test geometry_test.cpp) +add_executable(matrix_test matrix_test.cpp) +add_executable(quaternion_test quaternion_test.cpp) +add_executable(vector_test vector_test.cpp) + +if(KTM_BUILD_WASM) + find_program(NODE node) + add_test(geometry_test ${NODE} geometry_test.js) + add_test(matrix_test ${NODE} matrix_test.js) + add_test(quaternion_test ${NODE} quaternion_test.js) + add_test(vector_test ${NODE} vector_test.js) +else() + add_test(geometry_test geometry_test) + add_test(matrix_test matrix_test) + add_test(quaternion_test quaternion_test) + add_test(vector_test vector_test) +endif() \ No newline at end of file diff --git a/test/quaternion_test.cpp b/test/quaternion_test.cpp index 29b32b5..e1950aa 100644 --- a/test/quaternion_test.cpp +++ b/test/quaternion_test.cpp @@ -41,8 +41,8 @@ int main(int argc, char* argv[]) std::cout << "inverse: " << ktm::inverse(q5) << std::endl; std::cout << "negate: " << ktm::negate(q5) << std::endl; std::cout << "normalize: " << ktm::normalize(q5) << std::endl; - std::cout << "slerp: " << ktm::slerp(q5, q4, 0.5f) << std::endl; - std::cout << "slerp_longest: " << ktm::slerp_longest(q5, q4, 0.5f) << std::endl; + std::cout << "slerp: " << ktm::slerp(q5, q3, 0.5f) << std::endl; + std::cout << "slerp_longest: " << ktm::slerp_longest(q5, q3, 0.5f) << std::endl; return 0; } \ No newline at end of file