From 9efeb6eabb161d0834478dabc32b8109901767ff Mon Sep 17 00:00:00 2001 From: chenqiudu Date: Wed, 12 Jun 2024 22:14:29 +0800 Subject: [PATCH 1/8] fix ceil func --- ktm/function/arithmetic.h | 2 +- test/vector_test.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ktm/function/arithmetic.h b/ktm/function/arithmetic.h index ad88352..96f5ef4 100644 --- a/ktm/function/arithmetic.h +++ b/ktm/function/arithmetic.h @@ -57,7 +57,7 @@ KTM_INLINE std::enable_if_t, T> floor(T x) noexcept template KTM_INLINE std::enable_if_t, T> ceil(T x) noexcept { - return std::ceil; + return std::ceil(x); } template diff --git a/test/vector_test.cpp b/test/vector_test.cpp index 580c71d..7bcffe5 100644 --- a/test/vector_test.cpp +++ b/test/vector_test.cpp @@ -33,7 +33,7 @@ int main(int argc, char* argv[]) std::cout << "floor: " << ktm::floor(t3) << std::endl; std::cout << "ceil: " << ktm::ceil(t3) << std::endl; std::cout << "round: " << ktm::round(t3) << std::endl; - std::cout << "mod" << ktm::mod(t3, {2.f, 3.f, 3.f}) << std::endl; + std::cout << "mod: " << ktm::mod(t3, {2.f, 3.f, 3.f}) << std::endl; std::cout << "sqrt: " << ktm::sqrt(t5) << std::endl; std::cout << "rsqrt: " << ktm::rsqrt(t5) << std::endl; std::cout << "recip: " << ktm::recip(t3) << std::endl; From c879f1b9e6b9c0d605d7cbc430b8599222fc9017 Mon Sep 17 00:00:00 2001 From: chenqiudu Date: Thu, 13 Jun 2024 11:46:45 +0800 Subject: [PATCH 2/8] use loop_util --- ktm/detail/function/common.inl | 361 +++------------------------- ktm/detail/loop_util.h | 151 ++++++++++++ ktm/detail/matrix/mat_calc.inl | 117 ++------- ktm/detail/matrix/mat_calc_simd.inl | 294 ++++++++++------------ ktm/detail/vector/vec_calc.inl | 257 ++------------------ 5 files changed, 351 insertions(+), 829 deletions(-) create mode 100644 ktm/detail/loop_util.h diff --git a/ktm/detail/function/common.inl b/ktm/detail/function/common.inl index f3e46c9..4d89009 100644 --- a/ktm/detail/function/common.inl +++ b/ktm/detail/function/common.inl @@ -8,9 +8,8 @@ #ifndef _KTM_COMMON_INL_ #define _KTM_COMMON_INL_ -#include #include "common_fwd.h" -#include "../../setup.h" +#include "../loop_util.h" #include "../../type/vec_fwd.h" #include "../../function/arithmetic.h" #include "../../function/exponential.h" @@ -21,21 +20,9 @@ struct ktm::detail::common_implement::reduce_add using V = vec; static KTM_INLINE T call(const V& x) noexcept { - if constexpr(N <= 4) - return call(x, std::make_index_sequence()); - else - { - T ret = zero; - for(int i = 0; i < N; ++i) - ret += x[i]; - return ret; - } - } -private: - template - static KTM_INLINE T call(const V& x, std::index_sequence) noexcept - { - return (x[Ns] + ...); + T ret; + loop_reduce(ret, x, x[0], std::plus()); + return ret; } }; @@ -45,22 +32,8 @@ struct ktm::detail::common_implement::reduce_min using V = vec; static KTM_INLINE T call(const V& x) noexcept { - if constexpr(N <= 4) - return call(x, std::make_index_sequence()); - else - { - T ret = x[0]; - for(int i = 0; i < N - 1; ++i) - ret = ktm::min(ret, x[i + 1]); - return ret; - } - } -private: - template - static KTM_INLINE T call(const V& x, std::index_sequence) noexcept - { - T ret = x[0]; - ((ret = ktm::min(ret, x[Ns + 1])), ...); + T ret; + loop_reduce(ret, x, x[0], ktm::min); return ret; } }; @@ -71,22 +44,8 @@ struct ktm::detail::common_implement::reduce_max using V = vec; static KTM_INLINE T call(const V& x) noexcept { - if constexpr(N <= 4) - return call(x, std::make_index_sequence()); - else - { - T ret = x[0]; - for(int i = 0; i < N - 1; ++i) - ret = ktm::max(ret, x[i + 1]); - return ret; - } - } -private: - template - static KTM_INLINE T call(const V& x, std::index_sequence) noexcept - { - T ret = x[0]; - ((ret = ktm::max(ret, x[Ns + 1])), ...); + T ret; + loop_reduce(ret, x, x[0], ktm::max); return ret; } }; @@ -96,23 +55,9 @@ struct ktm::detail::common_implement::abs { using V = vec; static KTM_INLINE V call(const V& x) noexcept - { - if constexpr(N <= 4) - return call(x, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::abs(x[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::abs(x[Ns])), ...); + loop_op(ret, x, ktm::abs); return ret; } }; @@ -122,23 +67,9 @@ struct ktm::detail::common_implement::min { using V = vec; static KTM_INLINE V call(const V& x, const V& y) noexcept - { - if constexpr(N <= 4) - return call(x, y, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::min(x[i], y[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, const V& y, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::min(x[Ns], y[Ns])), ...); + loop_op(ret, x, y, ktm::min); return ret; } }; @@ -148,23 +79,9 @@ struct ktm::detail::common_implement::max { using V = vec; static KTM_INLINE V call(const V& x, const V& y) noexcept - { - if constexpr(N <= 4) - return call(x, y, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::max(x[i], y[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, const V& y, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::max(x[Ns], y[Ns])), ...); + loop_op(ret, x, y, ktm::max); return ret; } }; @@ -174,23 +91,9 @@ struct ktm::detail::common_implement::clamp { using V = vec; static KTM_INLINE V call(const V& v, const V& min, const V& max) noexcept - { - if constexpr(N <= 4) - return call(v, min, max, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::clamp(v[i], min[i], max[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& v, const V& min, const V& max, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::clamp(v[Ns], min[Ns], max[Ns])), ...); + loop_op(ret, v, min, max, ktm::clamp); return ret; } }; @@ -200,23 +103,9 @@ struct ktm::detail::common_implement::floor { using V = vec; static KTM_INLINE V call(const V& x) noexcept - { - if constexpr(N <= 4) - return call(x, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::floor(x[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::floor(x[Ns])), ...); + loop_op(ret, x, ktm::floor); return ret; } }; @@ -226,23 +115,9 @@ struct ktm::detail::common_implement::ceil { using V = vec; static KTM_INLINE V call(const V& x) noexcept - { - if constexpr(N <= 4) - return call(x, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::ceil(x[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::ceil(x[Ns])), ...); + loop_op(ret, x, ktm::ceil); return ret; } }; @@ -252,23 +127,9 @@ struct ktm::detail::common_implement::round { using V = vec; static KTM_INLINE V call(const V& x) noexcept - { - if constexpr(N <= 4) - return call(x, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::round(x[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::round(x[Ns])), ...); + loop_op(ret, x, ktm::round); return ret; } }; @@ -278,23 +139,9 @@ struct ktm::detail::common_implement::fract { using V = vec; static KTM_INLINE V call(const V& x) noexcept - { - if constexpr(N <= 4) - return call(x, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::fract(x[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::fract(x[Ns])), ...); + loop_op(ret, x, ktm::fract); return ret; } }; @@ -304,23 +151,9 @@ struct ktm::detail::common_implement::mod { using V = vec; static KTM_INLINE V call(const V& x, const V& y) noexcept - { - if constexpr(N <= 4) - return call(x, y, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::mod(x[i], y[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, const V& y, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::mod(x[Ns], y[Ns])), ...); + loop_op(ret, x, y, ktm::mod); return ret; } }; @@ -330,23 +163,9 @@ struct ktm::detail::common_implement::lerp { using V = vec; static KTM_INLINE V call(const V& x, const V& y, T t) noexcept - { - if constexpr(N <= 4) - return call(x, y, t, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::lerp(x[i], y[i], t); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, const V& y, T t, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::lerp(x[Ns], y[Ns], t)), ...); + loop_scalar(ret, x, y, t, ktm::lerp); return ret; } }; @@ -356,23 +175,9 @@ struct ktm::detail::common_implement::mix { using V = vec; static KTM_INLINE V call(const V& x, const V& y, const V& t) noexcept - { - if constexpr(N <= 4) - return call(x, y, t, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::lerp(x[i], y[i], t[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, const V& y, const V& t, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::lerp(x[Ns], y[Ns], t[Ns])), ...); + loop_op(ret, x, y, t, ktm::lerp); return ret; } }; @@ -382,23 +187,9 @@ struct ktm::detail::common_implement::step { using V = vec; static KTM_INLINE V call(const V& edge, const V& x) noexcept - { - if constexpr(N <= 4) - return call(edge, x, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::step(edge[i], x[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& edge, const V& x, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::step(edge[Ns], x[Ns])), ...); + loop_op(ret, edge, x, ktm::step); return ret; } }; @@ -408,23 +199,9 @@ struct ktm::detail::common_implement::smoothstep { using V = vec; static KTM_INLINE V call(const V& edge0, const V& edge1, const V& x) noexcept - { - if constexpr(N <= 4) - return call(edge0, edge1, x, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::smoothstep(edge0[i], edge1[i], x[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& edge0, const V& edge1, const V& x, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::smoothstep(edge0[Ns], edge1[Ns], x[Ns])), ...); + loop_op(ret, edge0, edge1, x, ktm::smoothstep); return ret; } }; @@ -434,23 +211,9 @@ struct ktm::detail::common_implement::sqrt { using V = vec; static KTM_INLINE V call(const V& x) noexcept - { - if constexpr(N <= 4) - return call(x, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::sqrt(x[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::sqrt(x[Ns])), ...); + loop_op(ret, x, ktm::sqrt); return ret; } }; @@ -460,23 +223,9 @@ struct ktm::detail::common_implement::rsqrt { using V = vec; static KTM_INLINE V call(const V& x) noexcept - { - if constexpr(N <= 4) - return call(x, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::rsqrt(x[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::rsqrt(x[Ns])), ...); + loop_op(ret, x, ktm::rsqrt); return ret; } }; @@ -486,23 +235,9 @@ struct ktm::detail::common_implement::recip { using V = vec; static KTM_INLINE V call(const V& x) noexcept - { - if constexpr(N <= 4) - return call(x, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::recip(x[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::recip(x[Ns])), ...); + loop_op(ret, x, ktm::recip); return ret; } }; @@ -512,23 +247,9 @@ struct ktm::detail::common_implement::fast_sqrt { using V = vec; static KTM_INLINE V call(const V& x) noexcept - { - if constexpr(N <= 4) - return call(x, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::fast::sqrt(x[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::fast::sqrt(x[Ns])), ...); + loop_op(ret, x, ktm::fast::sqrt); return ret; } }; @@ -538,23 +259,9 @@ struct ktm::detail::common_implement::fast_rsqrt { using V = vec; static KTM_INLINE V call(const V& x) noexcept - { - if constexpr(N <= 4) - return call(x, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::fast::rsqrt(x[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::fast::rsqrt(x[Ns])), ...); + loop_op(ret, x, ktm::fast::rsqrt); return ret; } }; @@ -564,23 +271,9 @@ struct ktm::detail::common_implement::fast_recip { using V = vec; static KTM_INLINE V call(const V& x) noexcept - { - if constexpr(N <= 4) - return call(x, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = ktm::fast::recip(x[i]); - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, std::index_sequence) noexcept { V ret; - ((ret[Ns] = ktm::fast::recip(x[Ns])), ...); + loop_op(ret, x, ktm::fast::recip); return ret; } }; diff --git a/ktm/detail/loop_util.h b/ktm/detail/loop_util.h new file mode 100644 index 0000000..793628a --- /dev/null +++ b/ktm/detail/loop_util.h @@ -0,0 +1,151 @@ +// MIT License +// +// Copyright (c) 2024 有个小小杜 +// +// Created by 有个小小杜 +// + +#ifndef _KTM_LOOP_UTIL_H_ +#define _KTM_LOOP_UTIL_H_ + +#include +#include +#include "../setup.h" + +namespace ktm +{ +namespace detail +{ + +template +KTM_FUNC void loop_op(R& self, A1&& l1, OP&& op, std::index_sequence) +{ + ((self[Ns] = op(l1[Ns])), ...); +} + +template +KTM_FUNC void loop_op(R& self, A1&& l1, OP&& op) +{ + if constexpr(LoopN <= 4) + return loop_op(self, std::forward(l1), std::forward(op), std::make_index_sequence()); + else + { + for(int i = 0; i < LoopN; ++i) + self[i] = op(l1[i]); + } +} + +template +KTM_FUNC void loop_op(R& self, A1&& l1, A2&& l2, OP&& op, std::index_sequence) +{ + ((self[Ns] = op(l1[Ns], l2[Ns])), ...); +} + +template +KTM_FUNC void loop_op(R& self, A1&& l1, A2&& l2, OP&& op) +{ + if constexpr(LoopN <= 4) + return loop_op(self, std::forward(l1), std::forward(l2), std::forward(op), std::make_index_sequence()); + else + { + for(int i = 0; i < LoopN; ++i) + self[i] = op(l1[i], l2[i]); + } +} + +template +KTM_FUNC void loop_op(R& self, A1&& l1, A2&& l2, A3&& l3, OP&& op, std::index_sequence) +{ + ((self[Ns] = op(l1[Ns], l2[Ns], l3[Ns])), ...); +} + +template +KTM_FUNC void loop_op(R& self, A1&& l1, A2&& l2, A3&& l3, OP&& op) +{ + if constexpr(LoopN <= 4) + return loop_op(self, std::forward(l1), std::forward(l2), std::forward(l3), std::forward(op), std::make_index_sequence()); + else + { + for(int i = 0; i < LoopN; ++i) + self[i] = op(l1[i], l2[i], l3[i]); + } +} + +template +KTM_FUNC void loop_scalar(R& self, A1&& l1, const T& scalar, OP&& op, std::index_sequence) +{ + ((self[Ns] = op(l1[Ns], scalar)), ...); +} + +template +KTM_FUNC void loop_scalar(R& self, A1&& l1, const T& scalar, OP&& op) +{ + if constexpr(LoopN <= 4) + return loop_scalar(self, std::forward(l1), scalar, std::forward(op), std::make_index_sequence()); + else + { + for(int i = 0; i < LoopN; ++i) + self[i] = op(l1[i], scalar); + } +} + +template +KTM_FUNC void loop_scalar(R& self, A1&& l1, A2&& l2, const T& scalar, OP&& op, std::index_sequence) +{ + ((self[Ns] = op(l1[Ns], l2[Ns], scalar)), ...); +} + +template +KTM_FUNC void loop_scalar(R& self, A1&& l1, A2&& l2, const T& scalar, OP&& op) +{ + if constexpr(LoopN <= 4) + return loop_scalar(self, std::forward(l1), std::forward(l2), scalar, std::forward(op), std::make_index_sequence()); + else + { + for(int i = 0; i < LoopN; ++i) + self[i] = op(l1[i], l2[i], scalar); + } +} + +template +KTM_FUNC void loop_reduce(T& self, const A& l0, OP&& op, std::index_sequence) +{ + ((self = op(self, l0[Ns + 1])), ...); +} + +template +KTM_FUNC void loop_reduce(T& self, const A& l0, const T& init, OP&& op) +{ + self = init; + if constexpr(LoopN <= 4) + return loop_reduce(self, l0, std::forward(op), std::make_index_sequence()); + else + { + for(int i = 1; i < LoopN; ++i) + self = op(self, l0[i]); + } +} + +template +KTM_FUNC void loop_reduce(T& self, const A& l0, const U& l1, OP&& op, std::index_sequence) +{ + ((self = op(self, l0[Ns + 1], l1[Ns + 1])), ...); +} + +template +KTM_FUNC void loop_reduce(T& self, const A& l0, const U& l1, const T& init, OP&& op) +{ + self = init; + if constexpr(LoopN <= 4) + return loop_reduce(self, l0, l1, std::forward(op), std::make_index_sequence()); + else + { + for(int i = 1; i < LoopN; ++i) + self = op(self, l0[i], l1[i]); + } +} + +} +} + +#endif \ No newline at end of file diff --git a/ktm/detail/matrix/mat_calc.inl b/ktm/detail/matrix/mat_calc.inl index fc9e5ca..319cb65 100644 --- a/ktm/detail/matrix/mat_calc.inl +++ b/ktm/detail/matrix/mat_calc.inl @@ -9,7 +9,7 @@ #define _KTM_MAT_CALC_INL_ #include "mat_calc_fwd.h" -#include "../../setup.h" +#include "../loop_util.h" #include "../../type/vec_fwd.h" #include "../../type/mat_fwd.h" #include "../../function/common.h" @@ -21,23 +21,13 @@ struct ktm::detail::mat_opt_implement::mat_mul_vec using ColV = vec; using RowV = vec; static KTM_INLINE ColV call(const M& m, const RowV& v) noexcept - { - if constexpr(Row <= 4) - return call(m, v, std::make_index_sequence()); - else - { - ColV ret { }; - for(int i = 0; i < Row; ++i) - ret += (m[i] * v[i]); - return ret; - } - } -private: - template - static KTM_INLINE ColV call(const M& m, const RowV& v, std::index_sequence) noexcept { ColV ret; - ret = ((m[Ns] * v[Ns]) + ...); + loop_reduce(ret, m, v, m[0] * v[0], + [](const ColV& ret, const ColV& m_col, const T& v_val) -> ColV + { + return ret + m_col * v_val; + }); return ret; } }; @@ -49,23 +39,13 @@ struct ktm::detail::mat_opt_implement::vec_mul_mat using ColV = vec; using RowV = vec; static KTM_INLINE RowV call(const ColV& v, const M& m) noexcept - { - if constexpr(Row <= 4) - return call(v, m, std::make_index_sequence()); - else - { - RowV ret; - for(int i = 0; i < Row; ++i) - ret[i] = ktm::reduce_add(m[i] * v); - return ret; - } - } -private: - template - static KTM_INLINE RowV call(const ColV& v, const M& m, std::index_sequence) noexcept { RowV ret; - ((ret[Ns] = ktm::reduce_add(m[Ns] * v)), ...); + loop_scalar(ret, m, v, + [](const ColV& m_col, const ColV& v) -> T + { + return ktm::reduce_add(m_col * v); + }); return ret; } }; @@ -74,30 +54,24 @@ template struct ktm::detail::mat_opt_implement::mat_mul_mat { using M = mat; + using ColV = vec; + using RowV = vec; + template using M2 = mat; + template using RetM = mat; template static KTM_INLINE RetM call(const M& m1 , const M2& m2) noexcept - { - if constexpr(U <= 4) - return call(m1, m2, std::make_index_sequence()); - else - { - RetM ret; - for(int i = 0; i < U; ++i) - ret[i] = mat_mul_vec::call(m1, m2[i]); - return ret; - } - } -private: - template - static KTM_INLINE RetM call(const M& m1 , const M2& m2, std::index_sequence) noexcept { RetM ret; - ((ret[Ns] = mat_mul_vec::call(m1, m2[Ns])), ...); + loop_scalar(ret, m2, m1, + [](const RowV& m2_col, const M& m1) -> ColV + { + return mat_mul_vec::call(m1, m2_col); + }); return ret; } }; @@ -106,24 +80,11 @@ template struct ktm::detail::mat_opt_implement::add { using M = mat; + using ColV = vec; static KTM_INLINE M call(const M& m1, const M& m2) noexcept - { - if constexpr(Row <= 4) - return call(m1, m2, std::make_index_sequence()); - else - { - M ret; - for(int i = 0; i < Row; ++i) - ret[i] = m1[i] + m2[i]; - return ret; - } - } -private: - template - static KTM_INLINE M call(const M& m1, const M& m2, std::index_sequence) noexcept { M ret; - (((ret[Ns] = m1[Ns] + m2[Ns])), ...); + loop_op(ret, m1, m2, std::plus()); return ret; } }; @@ -132,24 +93,11 @@ template struct ktm::detail::mat_opt_implement::minus { using M = mat; + using ColV = vec; static KTM_INLINE M call(const M& m1, const M& m2) noexcept - { - if constexpr(Row <= 4) - return call(m1, m2, std::make_index_sequence()); - else - { - M ret; - for(int i = 0; i < Row; ++i) - ret[i] = m1[i] - m2[i]; - return ret; - } - } -private: - template - static KTM_INLINE M call(const M& m1, const M& m2, std::index_sequence) noexcept { M ret; - (((ret[Ns] = m1[Ns] - m2[Ns])), ...); + loop_op(ret, m1, m2, std::minus()); return ret; } }; @@ -158,24 +106,11 @@ template struct ktm::detail::mat_opt_implement::opposite { using M = mat; + using ColV = vec; static KTM_INLINE M call(const M& m) noexcept - { - if constexpr(Row <= 4) - return call(m, std::make_index_sequence()); - else - { - M ret; - for(int i = 0; i < Row; ++i) - ret[i] = -m[i]; - return ret; - } - } -private: - template - static KTM_INLINE M call(const M& m, std::index_sequence) noexcept { M ret; - (((ret[Ns] = -m[Ns])), ...); + loop_op(ret, m, std::negate()); return ret; } }; diff --git a/ktm/detail/matrix/mat_calc_simd.inl b/ktm/detail/matrix/mat_calc_simd.inl index 86c0387..214aa66 100644 --- a/ktm/detail/matrix/mat_calc_simd.inl +++ b/ktm/detail/matrix/mat_calc_simd.inl @@ -21,17 +21,14 @@ struct ktm::detail::mat_opt_implement::mat_mul_vec; static KTM_INLINE ColV call(const M& m, const RowV& v) noexcept { - return call(m, v, std::make_index_sequence()); - } -private: - - template - static KTM_INLINE ColV call(const M& m, const RowV& v, std::index_sequence) noexcept - { - ColV ret; - ret.st = _mul128_f32(m[0].st, _dup128_f32(v[0])); - ((ret.st = _madd128_f32(ret.st, m[Ns + 1].st, _dup128_f32(v[Ns + 1]))), ...); - return ret; + ColV ret; skv::fv4 s_ret; + loop_reduce(s_ret, m, v, _mul128_f32(m[0].st, _dup128_f32(v[0])), + [](const skv::fv4& s_ret, const ColV& m_col, const float& v_val) -> skv::fv4 + { + return _madd128_f32(s_ret, m_col.st, _dup128_f32(v_val)); + }); + ret.st = s_ret; + return ret; } }; @@ -42,22 +39,16 @@ struct ktm::detail::mat_opt_implement::vec_mul_mat; using RowV = vec; static KTM_INLINE RowV call(const ColV& v, const M& m) noexcept - { - return call(v, m, std::make_index_sequence()); - } -private: - template - static KTM_INLINE RowV call(const ColV& v, const M& m, std::index_sequence) noexcept { RowV ret; - if constexpr(Col == 3) + loop_scalar(ret, m, v, + [](const ColV& m_col, const ColV& v) -> float { - ((ret[Ns] = skv::radd_fv3(_mul128_f32(v.st, m[Ns].st))), ...); - } - else - { - ((ret[Ns] = skv::radd_fv4(_mul128_f32(v.st, m[Ns].st))), ...); - } + if constexpr(Col == 3) + return skv::radd_fv3(_mul128_f32(m_col.st, v.st)); + else + return skv::radd_fv4(_mul128_f32(m_col.st, v.st)); + }); return ret; } }; @@ -68,15 +59,14 @@ struct ktm::detail::mat_opt_implement::add; using ColV = vec; static KTM_INLINE M call(const M& m1, const M& m2) noexcept - { - return call(m1, m2, std::make_index_sequence()); - } -private: - template - static KTM_INLINE M call(const M& m1, const M& m2, std::index_sequence) noexcept { M ret; - ((ret[Ns].st = _add128_f32(m1[Ns].st, m2[Ns].st)), ...); + skv::fv4* p_fv4 = &ret[0].st; + loop_op(p_fv4, m1, m2, + [](const ColV& m1_col, const ColV& m2_col) -> skv::fv4 + { + return _add128_f32(m1_col.st, m2_col.st); + }); return ret; } }; @@ -87,15 +77,14 @@ struct ktm::detail::mat_opt_implement::minus; using ColV = vec; static KTM_INLINE M call(const M& m1, const M& m2) noexcept - { - return call(m1, m2, std::make_index_sequence()); - } -private: - template - static KTM_INLINE M call(const M& m1, const M& m2, std::index_sequence) noexcept { M ret; - ((ret[Ns].st = _sub128_f32(m1[Ns].st, m2[Ns].st)), ...); + skv::fv4* p_fv4 = &ret[0].st; + loop_op(p_fv4, m1, m2, + [](const ColV& m1_col, const ColV& m2_col) -> skv::fv4 + { + return _sub128_f32(m1_col.st, m2_col.st); + }); return ret; } }; @@ -106,15 +95,14 @@ struct ktm::detail::mat_opt_implement::opposite; using ColV = vec; static KTM_INLINE M call(const M& m) noexcept - { - return call(m, std::make_index_sequence()); - } -private: - template - static KTM_INLINE M call(const M& m, std::index_sequence) noexcept { M ret; - ((ret[Ns].st = _neg128_f32(m[Ns].st)), ...); + skv::fv4* p_fv4 = &ret[0].st; + loop_op(p_fv4, m, + [](const ColV& m_col) -> skv::fv4 + { + return _neg128_f32(m_col.st); + }); return ret; } }; @@ -129,15 +117,14 @@ struct ktm::detail::mat_opt_implement::add; using ColV = vec; static KTM_INLINE M call(const M& m1, const M& m2) noexcept - { - return call(m1, m2, std::make_index_sequence()); - } -private: - template - static KTM_INLINE M call(const M& m1, const M& m2, std::index_sequence) noexcept { M ret; - ((ret[Ns].st = _add128_s32(m1[Ns].st, m2[Ns].st)), ...); + skv::sv4* p_sv4 = &ret[0].st; + loop_op(p_sv4, m1, m2, + [](const ColV& m1_col, const ColV& m2_col) -> skv::sv4 + { + return _add128_s32(m1_col.st, m2_col.st); + }); return ret; } }; @@ -148,15 +135,14 @@ struct ktm::detail::mat_opt_implement::minus; using ColV = vec; static KTM_INLINE M call(const M& m1, const M& m2) noexcept - { - return call(m1, m2, std::make_index_sequence()); - } -private: - template - static KTM_INLINE M call(const M& m1, const M& m2, std::index_sequence) noexcept { M ret; - ((ret[Ns].st = _sub128_s32(m1[Ns].st, m2[Ns].st)), ...); + skv::sv4* p_sv4 = &ret[0].st; + loop_op(p_sv4, m1, m2, + [](const ColV& m1_col, const ColV& m2_col) -> skv::sv4 + { + return _sub128_s32(m1_col.st, m2_col.st); + }); return ret; } }; @@ -167,15 +153,14 @@ struct ktm::detail::mat_opt_implement::opposite; using ColV = vec; static KTM_INLINE M call(const M& m) noexcept - { - return call(m, std::make_index_sequence()); - } -private: - template - static KTM_INLINE M call(const M& m, std::index_sequence) noexcept { M ret; - ((ret[Ns].st = _neg128_s32(m[Ns].st)), ...); + skv::sv4* p_sv4 = &ret[0].st; + loop_op(p_sv4, m, + [](const ColV& m_col) -> skv::sv4 + { + return _neg128_s32(m_col.st); + }); return ret; } }; @@ -192,17 +177,14 @@ struct ktm::detail::mat_opt_implement::mat_mul_vec; static KTM_INLINE ColV call(const M& m, const RowV& v) noexcept { - return call(m, v, std::make_index_sequence()); - } -private: - - template - static KTM_INLINE ColV call(const M& m, const RowV& v, std::index_sequence) noexcept - { - ColV ret; - ret.st = _mul128_s32(m[0].st, _dup128_s32(v[0])); - ((ret.st = _madd128_s32(ret.st, m[Ns + 1].st, _dup128_s32(v[Ns + 1]))), ...); - return ret; + ColV ret; skv::sv4 s_ret; + loop_reduce(s_ret, m, v, _mul128_s32(m[0].st, _dup128_s32(v[0])), + [](const skv::sv4& s_ret, const ColV& m_col, const int& v_val) -> skv::sv4 + { + return _madd128_s32(s_ret, m_col.st, _dup128_s32(v_val)); + }); + ret.st = s_ret; + return ret; } }; @@ -213,22 +195,16 @@ struct ktm::detail::mat_opt_implement::vec_mul_mat; using RowV = vec; static KTM_INLINE RowV call(const ColV& v, const M& m) noexcept - { - return call(v, m, std::make_index_sequence()); - } -private: - template - static KTM_INLINE RowV call(const ColV& v, const M& m, std::index_sequence) noexcept { RowV ret; - if constexpr(Col == 3) + loop_scalar(ret, m, v, + [](const ColV& m_col, const ColV& v) -> int { - ((ret[Ns] = skv::radd_sv3(_mul128_s32(v.st, m[Ns].st))), ...); - } - else - { - ((ret[Ns] = skv::radd_sv4(_mul128_s32(v.st, m[Ns].st))), ...); - } + if constexpr(Col == 3) + return skv::radd_sv3(_mul128_s32(m_col.st, v.st)); + else + return skv::radd_sv4(_mul128_s32(m_col.st, v.st)); + }); return ret; } }; @@ -245,16 +221,14 @@ struct ktm::detail::mat_opt_implement::mat_mul_vec using RowV = vec; static KTM_INLINE ColV call(const M& m, const RowV& v) noexcept { - return call(m, v, std::make_index_sequence()); - } -private: - template - static KTM_INLINE ColV call(const M& m, const RowV& v, std::index_sequence) noexcept - { - ColV ret; - ret.st = _mul64_f32(m[0].st, _dup64_f32(v[0])); - ((ret.st = _madd64_f32(ret.st, m[Ns + 1].st, _dup64_f32(v[Ns + 1]))), ...); - return ret; + ColV ret; skv::fv2 s_ret; + loop_reduce(s_ret, m, v, _mul64_f32(m[0].st, _dup64_f32(v[0])), + [](const skv::fv2& s_ret, const ColV& m_col, const float& v_val) -> skv::fv2 + { + return _madd64_f32(s_ret, m_col.st, _dup64_f32(v_val)); + }); + ret.st = s_ret; + return ret; } }; @@ -265,15 +239,13 @@ struct ktm::detail::mat_opt_implement::vec_mul_mat using ColV = vec<2, float>; using RowV = vec; static KTM_INLINE RowV call(const ColV& v, const M& m) noexcept - { - return call(v, m, std::make_index_sequence()); - } -private: - template - static KTM_INLINE RowV call(const ColV& v, const M& m, std::index_sequence) noexcept { RowV ret; - ((ret[Ns] = skv::radd_fv2(_mul64_f32(v.st, m[Ns].st))), ...); + loop_scalar(ret, m, v, + [](const ColV& m_col, const ColV& v) -> float + { + return skv::radd_fv2(_mul64_f32(m_col.st, v.st)); + }); return ret; } }; @@ -284,15 +256,14 @@ struct ktm::detail::mat_opt_implement::add using M = mat; using ColV = vec<2, float>; static KTM_INLINE M call(const M& m1, const M& m2) noexcept - { - return call(m1, m2, std::make_index_sequence()); - } -private: - template - static KTM_INLINE M call(const M& m1, const M& m2, std::index_sequence) noexcept { M ret; - ((ret[Ns].st = _add64_f32(m1[Ns].st, m2[Ns].st)), ...); + skv::fv2* p_fv2 = &ret[0].st; + loop_op(p_fv2, m1, m2, + [](const ColV& m1_col, const ColV& m2_col) -> skv::fv2 + { + return _add64_f32(m1_col.st, m2_col.st); + }); return ret; } }; @@ -303,15 +274,14 @@ struct ktm::detail::mat_opt_implement::minus using M = mat; using ColV = vec<2, float>; static KTM_INLINE M call(const M& m1, const M& m2) noexcept - { - return call(m1, m2, std::make_index_sequence()); - } -private: - template - static KTM_INLINE M call(const M& m1, const M& m2, std::index_sequence) noexcept { M ret; - ((ret[Ns].st = _sub64_f32(m1[Ns].st, m2[Ns].st)), ...); + skv::fv2* p_fv2 = &ret[0].st; + loop_op(p_fv2, m1, m2, + [](const ColV& m1_col, const ColV& m2_col) -> skv::fv2 + { + return _sub64_f32(m1_col.st, m2_col.st); + }); return ret; } }; @@ -322,15 +292,14 @@ struct ktm::detail::mat_opt_implement::opposite using M = mat; using ColV = vec<2, float>; static KTM_INLINE M call(const M& m) noexcept - { - return call(m, std::make_index_sequence()); - } -private: - template - static KTM_INLINE M call(const M& m, std::index_sequence) noexcept { M ret; - ((ret[Ns].st = _neg64_f32(m[Ns].st)), ...); + skv::fv2* p_fv2 = &ret[0].st; + loop_op(p_fv2, m, + [](const ColV& m_col) -> skv::fv2 + { + return _neg64_f32(m_col.st); + }); return ret; } }; @@ -343,16 +312,14 @@ struct ktm::detail::mat_opt_implement::mat_mul_vec using RowV = vec; static KTM_INLINE ColV call(const M& m, const RowV& v) noexcept { - return call(m, v, std::make_index_sequence()); - } -private: - template - static KTM_INLINE ColV call(const M& m, const RowV& v, std::index_sequence) noexcept - { - ColV ret; - ret.st = _mul64_s32(m[0].st, _dup64_s32(v[0])); - ((ret.st = _madd64_s32(ret.st, m[Ns + 1].st, _dup64_s32(v[Ns + 1]))), ...); - return ret; + ColV ret; skv::sv2 s_ret; + loop_reduce(s_ret, m, v, _mul64_s32(m[0].st, _dup64_s32(v[0])), + [](const skv::sv2& s_ret, const ColV& m_col, const int& v_val) -> skv::sv2 + { + return _madd64_s32(s_ret, m_col.st, _dup64_s32(v_val)); + }); + ret.st = s_ret; + return ret; } }; @@ -363,15 +330,13 @@ struct ktm::detail::mat_opt_implement::vec_mul_mat using ColV = vec<2, int>; using RowV = vec; static KTM_INLINE RowV call(const ColV& v, const M& m) noexcept - { - return call(v, m, std::make_index_sequence()); - } -private: - template - static KTM_INLINE RowV call(const ColV& v, const M& m, std::index_sequence) noexcept { RowV ret; - ((ret[Ns] = skv::radd_sv2(_mul64_s32(v.st, m[Ns].st))), ...); + loop_scalar(ret, m, v, + [](const ColV& m_col, const ColV& v) -> int + { + return skv::radd_sv2(_mul64_s32(m_col.st, v.st)); + }); return ret; } }; @@ -382,15 +347,14 @@ struct ktm::detail::mat_opt_implement::add using M = mat; using ColV = vec<2, int>; static KTM_INLINE M call(const M& m1, const M& m2) noexcept - { - return call(m1, m2, std::make_index_sequence()); - } -private: - template - static KTM_INLINE M call(const M& m1, const M& m2, std::index_sequence) noexcept { M ret; - ((ret[Ns].st = _add64_s32(m1[Ns].st, m2[Ns].st)), ...); + skv::sv2* p_sv2 = &ret[0].st; + loop_op(p_sv2, m1, m2, + [](const ColV& m1_col, const ColV& m2_col) -> skv::sv2 + { + return _add64_s32(m1_col.st, m2_col.st); + }); return ret; } }; @@ -401,15 +365,14 @@ struct ktm::detail::mat_opt_implement::minus using M = mat; using ColV = vec<2, int>; static KTM_INLINE M call(const M& m1, const M& m2) noexcept - { - return call(m1, m2, std::make_index_sequence()); - } -private: - template - static KTM_INLINE M call(const M& m1, const M& m2, std::index_sequence) noexcept { M ret; - ((ret[Ns].st = _sub64_s32(m1[Ns].st, m2[Ns].st)), ...); + skv::sv2* p_sv2 = &ret[0].st; + loop_op(p_sv2, m1, m2, + [](const ColV& m1_col, const ColV& m2_col) -> skv::sv2 + { + return _sub64_s32(m1_col.st, m2_col.st); + }); return ret; } }; @@ -420,15 +383,14 @@ struct ktm::detail::mat_opt_implement::opposite using M = mat; using ColV = vec<2, int>; static KTM_INLINE M call(const M& m) noexcept - { - return call(m, std::make_index_sequence()); - } -private: - template - static KTM_INLINE M call(const M& m, std::index_sequence) noexcept { M ret; - ((ret[Ns].st = _neg64_s32(m[Ns].st)), ...); + skv::sv2* p_sv2 = &ret[0].st; + loop_op(p_sv2, m, + [](const ColV& m_col) -> skv::sv2 + { + return _neg64_s32(m_col.st); + }); return ret; } }; diff --git a/ktm/detail/vector/vec_calc.inl b/ktm/detail/vector/vec_calc.inl index f08201e..80d1940 100644 --- a/ktm/detail/vector/vec_calc.inl +++ b/ktm/detail/vector/vec_calc.inl @@ -8,9 +8,8 @@ #ifndef _KTM_VEC_CALC_INL_ #define _KTM_VEC_CALC_INL_ -#include #include "vec_calc_fwd.h" -#include "../../setup.h" +#include "../loop_util.h" #include "../../type/basic.h" #include "../../type/vec_fwd.h" @@ -19,23 +18,9 @@ struct ktm::detail::vec_calc_implement::add { using V = vec; static KTM_INLINE V call(const V& x, const V& y) noexcept - { - if constexpr(N <= 4) - return call(x, y, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = x[i] + y[i]; - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, const V& y, std::index_sequence) noexcept { V ret; - ((ret[Ns] = x[Ns] + y[Ns]), ...); + loop_op(ret, x, y, std::plus()); return ret; } }; @@ -46,19 +31,7 @@ struct ktm::detail::vec_calc_implement::add_to_self using V = vec; static KTM_INLINE void call(V& x, const V& y) noexcept { - if constexpr(N <= 4) - call(x, y, std::make_index_sequence()); - else - { - for(int i = 0; i < N; ++i) - x[i] += y[i]; - } - } -private: - template - static KTM_INLINE void call(V& x, const V& y, std::index_sequence) noexcept - { - ((x[Ns] += y[Ns]), ...); + loop_op(x, x, y, std::plus()); } }; @@ -67,23 +40,9 @@ struct ktm::detail::vec_calc_implement::minus { using V = vec; static KTM_INLINE V call(const V& x, const V& y) noexcept - { - if constexpr(N <= 4) - return call(x, y, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = x[i] - y[i]; - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, const V& y, std::index_sequence) noexcept { V ret; - ((ret[Ns] = x[Ns] - y[Ns]), ...); + loop_op(ret, x, y, std::minus()); return ret; } }; @@ -94,19 +53,7 @@ struct ktm::detail::vec_calc_implement::minus_to_self using V = vec; static KTM_INLINE void call(V& x, const V& y) noexcept { - if constexpr(N <= 4) - call(x, y, std::make_index_sequence()); - else - { - for(int i = 0; i < N; ++i) - x[i] -= y[i]; - } - } -private: - template - static KTM_INLINE void call(V& x, const V& y, std::index_sequence) noexcept - { - ((x[Ns] -= y[Ns]), ...); + loop_op(x, x, y, std::minus()); } }; @@ -115,23 +62,9 @@ struct ktm::detail::vec_calc_implement::mul { using V = vec; static KTM_INLINE V call(const V& x, const V& y) noexcept - { - if constexpr(N <= 4) - return call(x, y, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = x[i] * y[i]; - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, const V& y, std::index_sequence) noexcept { V ret; - ((ret[Ns] = x[Ns] * y[Ns]), ...); + loop_op(ret, x, y, std::multiplies()); return ret; } }; @@ -142,19 +75,7 @@ struct ktm::detail::vec_calc_implement::mul_to_self using V = vec; static KTM_INLINE void call(V& x, const V& y) noexcept { - if constexpr(N <= 4) - call(x, y, std::make_index_sequence()); - else - { - for(int i = 0; i < N; ++i) - x[i] *= y[i]; - } - } -private: - template - static KTM_INLINE void call(V& x, const V& y, std::index_sequence) noexcept - { - ((x[Ns] *= y[Ns]), ...); + loop_op(x, x, y, std::multiplies()); } }; @@ -163,23 +84,9 @@ struct ktm::detail::vec_calc_implement::div { using V = vec; static KTM_INLINE V call(const V& x, const V& y) noexcept - { - if constexpr(N <= 4) - return call(x, y, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = x[i] / y[i]; - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, const V& y, std::index_sequence) noexcept { V ret; - ((ret[Ns] = x[Ns] / y[Ns]), ...); + loop_op(ret, x, y, std::divides()); return ret; } }; @@ -190,19 +97,7 @@ struct ktm::detail::vec_calc_implement::div_to_self using V = vec; static KTM_INLINE void call(V& x, const V& y) noexcept { - if constexpr(N <= 4) - call(x, y, std::make_index_sequence()); - else - { - for(int i = 0; i < N; ++i) - x[i] /= y[i]; - } - } -private: - template - static KTM_INLINE void call(V& x, const V& y, std::index_sequence) noexcept - { - ((x[Ns] /= y[Ns]), ...); + loop_op(x, x, y, std::divides()); } }; @@ -211,23 +106,9 @@ struct ktm::detail::vec_calc_implement::opposite { using V = vec; static KTM_INLINE V call(const V& x) noexcept - { - if constexpr(N <= 4) - return call(x, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = -x[i]; - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, std::index_sequence) noexcept { V ret; - ((ret[Ns] = -x[Ns]), ...); + loop_op(ret, x, std::negate()); return ret; } }; @@ -237,23 +118,9 @@ struct ktm::detail::vec_calc_implement::add_scalar { using V = vec; static KTM_INLINE V call(const V& x, T scalar) noexcept - { - if constexpr(N <= 4) - return call(x, scalar, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = x[i] + scalar; - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, T scalar, std::index_sequence) noexcept { V ret; - ((ret[Ns] = x[Ns] + scalar), ...); + loop_scalar(ret, x, scalar, std::plus()); return ret; } }; @@ -264,19 +131,7 @@ struct ktm::detail::vec_calc_implement::add_scalar_to_self using V = vec; static KTM_INLINE void call(V& x, T scalar) noexcept { - if constexpr(N <= 4) - call(x, scalar, std::make_index_sequence()); - else - { - for(int i = 0; i < N; ++i) - x[i] += scalar; - } - } -private: - template - static KTM_INLINE void call(V& x, T scalar, std::index_sequence) noexcept - { - ((x[Ns] += scalar), ...); + loop_scalar(x, x, scalar, std::plus()); } }; @@ -285,23 +140,9 @@ struct ktm::detail::vec_calc_implement::minus_scalar { using V = vec; static KTM_INLINE V call(const V& x, T scalar) noexcept - { - if constexpr(N <= 4) - return call(x, scalar, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = x[i] - scalar; - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, T scalar, std::index_sequence) noexcept { V ret; - ((ret[Ns] = x[Ns] - scalar), ...); + loop_scalar(ret, x, scalar, std::minus()); return ret; } }; @@ -312,19 +153,7 @@ struct ktm::detail::vec_calc_implement::minus_scalar_to_self using V = vec; static KTM_INLINE void call(V& x, T scalar) noexcept { - if constexpr(N <= 4) - call(x, scalar, std::make_index_sequence()); - else - { - for(int i = 0; i < N; ++i) - x[i] -= scalar; - } - } -private: - template - static KTM_INLINE void call(V& x, T scalar, std::index_sequence) noexcept - { - ((x[Ns] -= scalar), ...); + loop_scalar(x, x, scalar, std::minus()); } }; @@ -333,24 +162,10 @@ struct ktm::detail::vec_calc_implement::mul_scalar { using V = vec; static KTM_INLINE V call(const V& x, T scalar) noexcept - { - if constexpr(N <= 4) - return call(x, scalar, std::make_index_sequence()); - else - { - V ret; - for(int i = 0; i < N; ++i) - ret[i] = x[i] * scalar; - return ret; - } - } -private: - template - static KTM_INLINE V call(const V& x, T scalar, std::index_sequence) noexcept { V ret; - ((ret[Ns] = x[Ns] * scalar), ...); - return ret; + loop_scalar(ret, x, scalar, std::multiplies()); + return ret; } }; @@ -360,19 +175,7 @@ struct ktm::detail::vec_calc_implement::mul_scalar_to_self using V = vec; static KTM_INLINE void call(V& x, T scalar) noexcept { - if constexpr(N <= 4) - call(x, scalar, std::make_index_sequence()); - else - { - for(int i = 0; i < N; ++i) - x[i] *= scalar; - } - } -private: - template - static KTM_INLINE void call(V& x, T scalar, std::index_sequence) noexcept - { - ((x[Ns] *= scalar), ...); + loop_scalar(x, x, scalar, std::multiplies()); } }; @@ -384,24 +187,13 @@ struct ktm::detail::vec_calc_implement::div_scalar { if constexpr(std::is_floating_point_v) return mul_scalar::call(x, one / scalar); - else if constexpr(N <= 4) - return call(x, scalar, std::make_index_sequence()); else { V ret; - for(int i = 0; i < N; ++i) - ret[i] = x[i] / scalar; + loop_scalar(ret, x, scalar, std::divides()); return ret; } } -private: - template - static KTM_INLINE V call(const V& x, T scalar, std::index_sequence) noexcept - { - V ret; - ((ret[Ns] = x[Ns] / scalar), ...); - return ret; - } }; template @@ -412,19 +204,8 @@ struct ktm::detail::vec_calc_implement::div_scalar_to_self { if constexpr(std::is_floating_point_v) mul_scalar_to_self::call(x, one / scalar); - else if constexpr(N <= 4) - call(x, scalar, std::make_index_sequence()); else - { - for(int i = 0; i < N; ++i) - x[i] /= scalar; - } - } -private: - template - static KTM_INLINE void call(V& x, T scalar, std::index_sequence) noexcept - { - ((x[Ns] /= scalar), ...); + loop_scalar(x, x, scalar, std::divides()); } }; From 2a5be6bdb2e85e889692eb0aae0c348f9097342d Mon Sep 17 00:00:00 2001 From: chenqiudu Date: Thu, 13 Jun 2024 13:44:33 +0800 Subject: [PATCH 3/8] update loop_util --- ktm/detail/loop_util.h | 50 +++++++++++------------ ktm/detail/matrix/mat_calc_simd.inl | 63 ++++++++++++----------------- 2 files changed, 50 insertions(+), 63 deletions(-) diff --git a/ktm/detail/loop_util.h b/ktm/detail/loop_util.h index 793628a..b7785f7 100644 --- a/ktm/detail/loop_util.h +++ b/ktm/detail/loop_util.h @@ -18,92 +18,92 @@ namespace detail { template -KTM_FUNC void loop_op(R& self, A1&& l1, OP&& op, std::index_sequence) +KTM_FUNC void loop_op(R&& self, A1&& l0, OP&& op, std::index_sequence) { - ((self[Ns] = op(l1[Ns])), ...); + ((self[Ns] = op(l0[Ns])), ...); } template -KTM_FUNC void loop_op(R& self, A1&& l1, OP&& op) +KTM_FUNC void loop_op(R&& self, A1&& l0, OP&& op) { if constexpr(LoopN <= 4) - return loop_op(self, std::forward(l1), std::forward(op), std::make_index_sequence()); + return loop_op(self, std::forward(l0), std::forward(op), std::make_index_sequence()); else { for(int i = 0; i < LoopN; ++i) - self[i] = op(l1[i]); + self[i] = op(l0[i]); } } template -KTM_FUNC void loop_op(R& self, A1&& l1, A2&& l2, OP&& op, std::index_sequence) +KTM_FUNC void loop_op(R&& self, A1&& l0, A2&& l1, OP&& op, std::index_sequence) { - ((self[Ns] = op(l1[Ns], l2[Ns])), ...); + ((self[Ns] = op(l0[Ns], l1[Ns])), ...); } template -KTM_FUNC void loop_op(R& self, A1&& l1, A2&& l2, OP&& op) +KTM_FUNC void loop_op(R&& self, A1&& l0, A2&& l1, OP&& op) { if constexpr(LoopN <= 4) - return loop_op(self, std::forward(l1), std::forward(l2), std::forward(op), std::make_index_sequence()); + return loop_op(self, std::forward(l0), std::forward(l1), std::forward(op), std::make_index_sequence()); else { for(int i = 0; i < LoopN; ++i) - self[i] = op(l1[i], l2[i]); + self[i] = op(l0[i], l1[i]); } } template -KTM_FUNC void loop_op(R& self, A1&& l1, A2&& l2, A3&& l3, OP&& op, std::index_sequence) +KTM_FUNC void loop_op(R&& self, A1&& l0, A2&& l1, A3&& l2, OP&& op, std::index_sequence) { - ((self[Ns] = op(l1[Ns], l2[Ns], l3[Ns])), ...); + ((self[Ns] = op(l0[Ns], l1[Ns], l2[Ns])), ...); } template -KTM_FUNC void loop_op(R& self, A1&& l1, A2&& l2, A3&& l3, OP&& op) +KTM_FUNC void loop_op(R&& self, A1&& l0, A2&& l1, A3&& l2, OP&& op) { if constexpr(LoopN <= 4) - return loop_op(self, std::forward(l1), std::forward(l2), std::forward(l3), std::forward(op), std::make_index_sequence()); + return loop_op(self, std::forward(l0), std::forward(l1), std::forward(l2), std::forward(op), std::make_index_sequence()); else { for(int i = 0; i < LoopN; ++i) - self[i] = op(l1[i], l2[i], l3[i]); + self[i] = op(l0[i], l1[i], l2[i]); } } template -KTM_FUNC void loop_scalar(R& self, A1&& l1, const T& scalar, OP&& op, std::index_sequence) +KTM_FUNC void loop_scalar(R&& self, A1&& l0, const T& scalar, OP&& op, std::index_sequence) { - ((self[Ns] = op(l1[Ns], scalar)), ...); + ((self[Ns] = op(l0[Ns], scalar)), ...); } template -KTM_FUNC void loop_scalar(R& self, A1&& l1, const T& scalar, OP&& op) +KTM_FUNC void loop_scalar(R&& self, A1&& l0, const T& scalar, OP&& op) { if constexpr(LoopN <= 4) - return loop_scalar(self, std::forward(l1), scalar, std::forward(op), std::make_index_sequence()); + return loop_scalar(self, std::forward(l0), scalar, std::forward(op), std::make_index_sequence()); else { for(int i = 0; i < LoopN; ++i) - self[i] = op(l1[i], scalar); + self[i] = op(l0[i], scalar); } } template -KTM_FUNC void loop_scalar(R& self, A1&& l1, A2&& l2, const T& scalar, OP&& op, std::index_sequence) +KTM_FUNC void loop_scalar(R&& self, A1&& l0, A2&& l1, const T& scalar, OP&& op, std::index_sequence) { - ((self[Ns] = op(l1[Ns], l2[Ns], scalar)), ...); + ((self[Ns] = op(l0[Ns], l1[Ns], scalar)), ...); } template -KTM_FUNC void loop_scalar(R& self, A1&& l1, A2&& l2, const T& scalar, OP&& op) +KTM_FUNC void loop_scalar(R&& self, A1&& l0, A2&& l1, const T& scalar, OP&& op) { if constexpr(LoopN <= 4) - return loop_scalar(self, std::forward(l1), std::forward(l2), scalar, std::forward(op), std::make_index_sequence()); + return loop_scalar(self, std::forward(l0), std::forward(l1), scalar, std::forward(op), std::make_index_sequence()); else { for(int i = 0; i < LoopN; ++i) - self[i] = op(l1[i], l2[i], scalar); + self[i] = op(l0[i], l1[i], scalar); } } diff --git a/ktm/detail/matrix/mat_calc_simd.inl b/ktm/detail/matrix/mat_calc_simd.inl index 214aa66..eb3f086 100644 --- a/ktm/detail/matrix/mat_calc_simd.inl +++ b/ktm/detail/matrix/mat_calc_simd.inl @@ -21,13 +21,12 @@ struct ktm::detail::mat_opt_implement::mat_mul_vec; static KTM_INLINE ColV call(const M& m, const RowV& v) noexcept { - ColV ret; skv::fv4 s_ret; - loop_reduce(s_ret, m, v, _mul128_f32(m[0].st, _dup128_f32(v[0])), - [](const skv::fv4& s_ret, const ColV& m_col, const float& v_val) -> skv::fv4 + ColV ret; + loop_reduce(ret.st, m, v, _mul128_f32(m[0].st, _dup128_f32(v[0])), + [](const skv::fv4& ret_st, const ColV& m_col, const float& v_val) -> skv::fv4 { - return _madd128_f32(s_ret, m_col.st, _dup128_f32(v_val)); + return _madd128_f32(ret_st, m_col.st, _dup128_f32(v_val)); }); - ret.st = s_ret; return ret; } }; @@ -119,8 +118,7 @@ struct ktm::detail::mat_opt_implement::add(p_sv4, m1, m2, + loop_op(&ret[0].st, m1, m2, [](const ColV& m1_col, const ColV& m2_col) -> skv::sv4 { return _add128_s32(m1_col.st, m2_col.st); @@ -137,8 +135,7 @@ struct ktm::detail::mat_opt_implement::minus(p_sv4, m1, m2, + loop_op(&ret[0].st, m1, m2, [](const ColV& m1_col, const ColV& m2_col) -> skv::sv4 { return _sub128_s32(m1_col.st, m2_col.st); @@ -155,8 +152,7 @@ struct ktm::detail::mat_opt_implement::opposite(p_sv4, m, + loop_op(&ret[0].st, m, [](const ColV& m_col) -> skv::sv4 { return _neg128_s32(m_col.st); @@ -177,13 +173,12 @@ struct ktm::detail::mat_opt_implement::mat_mul_vec; static KTM_INLINE ColV call(const M& m, const RowV& v) noexcept { - ColV ret; skv::sv4 s_ret; - loop_reduce(s_ret, m, v, _mul128_s32(m[0].st, _dup128_s32(v[0])), - [](const skv::sv4& s_ret, const ColV& m_col, const int& v_val) -> skv::sv4 + ColV ret; + loop_reduce(ret.st, m, v, _mul128_s32(m[0].st, _dup128_s32(v[0])), + [](const skv::sv4& ret_st, const ColV& m_col, const int& v_val) -> skv::sv4 { - return _madd128_s32(s_ret, m_col.st, _dup128_s32(v_val)); + return _madd128_s32(ret_st, m_col.st, _dup128_s32(v_val)); }); - ret.st = s_ret; return ret; } }; @@ -221,13 +216,12 @@ struct ktm::detail::mat_opt_implement::mat_mul_vec using RowV = vec; static KTM_INLINE ColV call(const M& m, const RowV& v) noexcept { - ColV ret; skv::fv2 s_ret; - loop_reduce(s_ret, m, v, _mul64_f32(m[0].st, _dup64_f32(v[0])), - [](const skv::fv2& s_ret, const ColV& m_col, const float& v_val) -> skv::fv2 + ColV ret; + loop_reduce(ret.st, m, v, _mul64_f32(m[0].st, _dup64_f32(v[0])), + [](const skv::fv2& ret_st, const ColV& m_col, const float& v_val) -> skv::fv2 { - return _madd64_f32(s_ret, m_col.st, _dup64_f32(v_val)); + return _madd64_f32(ret_st, m_col.st, _dup64_f32(v_val)); }); - ret.st = s_ret; return ret; } }; @@ -258,8 +252,7 @@ struct ktm::detail::mat_opt_implement::add static KTM_INLINE M call(const M& m1, const M& m2) noexcept { M ret; - skv::fv2* p_fv2 = &ret[0].st; - loop_op(p_fv2, m1, m2, + loop_op(&ret[0].st, m1, m2, [](const ColV& m1_col, const ColV& m2_col) -> skv::fv2 { return _add64_f32(m1_col.st, m2_col.st); @@ -276,8 +269,7 @@ struct ktm::detail::mat_opt_implement::minus static KTM_INLINE M call(const M& m1, const M& m2) noexcept { M ret; - skv::fv2* p_fv2 = &ret[0].st; - loop_op(p_fv2, m1, m2, + loop_op(&ret[0].st, m1, m2, [](const ColV& m1_col, const ColV& m2_col) -> skv::fv2 { return _sub64_f32(m1_col.st, m2_col.st); @@ -294,8 +286,7 @@ struct ktm::detail::mat_opt_implement::opposite static KTM_INLINE M call(const M& m) noexcept { M ret; - skv::fv2* p_fv2 = &ret[0].st; - loop_op(p_fv2, m, + loop_op(&ret[0].st, m, [](const ColV& m_col) -> skv::fv2 { return _neg64_f32(m_col.st); @@ -312,13 +303,12 @@ struct ktm::detail::mat_opt_implement::mat_mul_vec using RowV = vec; static KTM_INLINE ColV call(const M& m, const RowV& v) noexcept { - ColV ret; skv::sv2 s_ret; - loop_reduce(s_ret, m, v, _mul64_s32(m[0].st, _dup64_s32(v[0])), - [](const skv::sv2& s_ret, const ColV& m_col, const int& v_val) -> skv::sv2 + ColV ret; + loop_reduce(ret.st, m, v, _mul64_s32(m[0].st, _dup64_s32(v[0])), + [](const skv::sv2& ret_st, const ColV& m_col, const int& v_val) -> skv::sv2 { - return _madd64_s32(s_ret, m_col.st, _dup64_s32(v_val)); + return _madd64_s32(ret_st, m_col.st, _dup64_s32(v_val)); }); - ret.st = s_ret; return ret; } }; @@ -349,8 +339,7 @@ struct ktm::detail::mat_opt_implement::add static KTM_INLINE M call(const M& m1, const M& m2) noexcept { M ret; - skv::sv2* p_sv2 = &ret[0].st; - loop_op(p_sv2, m1, m2, + loop_op(&ret[0].st, m1, m2, [](const ColV& m1_col, const ColV& m2_col) -> skv::sv2 { return _add64_s32(m1_col.st, m2_col.st); @@ -367,8 +356,7 @@ struct ktm::detail::mat_opt_implement::minus static KTM_INLINE M call(const M& m1, const M& m2) noexcept { M ret; - skv::sv2* p_sv2 = &ret[0].st; - loop_op(p_sv2, m1, m2, + loop_op(&ret[0].st, m1, m2, [](const ColV& m1_col, const ColV& m2_col) -> skv::sv2 { return _sub64_s32(m1_col.st, m2_col.st); @@ -385,8 +373,7 @@ struct ktm::detail::mat_opt_implement::opposite static KTM_INLINE M call(const M& m) noexcept { M ret; - skv::sv2* p_sv2 = &ret[0].st; - loop_op(p_sv2, m, + loop_op(&ret[0].st, m, [](const ColV& m_col) -> skv::sv2 { return _neg64_s32(m_col.st); From ab9fe4612096ccffbe72cfc131a6749d38a9c460 Mon Sep 17 00:00:00 2001 From: chenqiudu Date: Thu, 13 Jun 2024 13:49:42 +0800 Subject: [PATCH 4/8] fix reduce_hessenberg --- ktm/function/matrix_decompose.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ktm/function/matrix_decompose.h b/ktm/function/matrix_decompose.h index 8b385d1..82b7913 100644 --- a/ktm/function/matrix_decompose.h +++ b/ktm/function/matrix_decompose.h @@ -50,18 +50,14 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< for(int k = i + 1; k < N; ++k) { - T tt = zero; T ta = zero; for(int j = v_start; j < N; ++j) { - tt += a[i][j] * trans[j][k]; ta += a[i][j] * a[k][j]; } - tt *= r_h; ta *= r_h; for(int j = v_start; j < N; ++j) { - trans[j][k] -= tt * a[i][j]; a[k][j] -= ta * a[i][j]; } } From bcbe42cbf4e2a0c2e10f41251a4fbe4244a66c0a Mon Sep 17 00:00:00 2001 From: chenqiudu Date: Thu, 13 Jun 2024 14:32:22 +0800 Subject: [PATCH 5/8] update make construct --- ktm/detail/function/matrix.inl | 25 ++++++--------- ktm/interface/complex/icomp_make.h | 3 +- ktm/interface/matrix/imat_make.h | 45 ++++++++++----------------- ktm/interface/quaternion/iquat_make.h | 3 +- 4 files changed, 29 insertions(+), 47 deletions(-) diff --git a/ktm/detail/function/matrix.inl b/ktm/detail/function/matrix.inl index 417c58a..ec046bc 100644 --- a/ktm/detail/function/matrix.inl +++ b/ktm/detail/function/matrix.inl @@ -24,7 +24,7 @@ struct ktm::detail::matrix_implement::transpose static KTM_INLINE RetM call(const M& m) noexcept { if constexpr(Row <= 4 && Col <= 4) - return call(m, std::make_index_sequence(), std::make_index_sequence()); + return call(m, std::make_index_sequence()); else { RetM ret; @@ -35,13 +35,10 @@ struct ktm::detail::matrix_implement::transpose } } private: - template - static KTM_INLINE RetM call(const M& m, std::index_sequence, std::index_sequence) noexcept + template + static KTM_INLINE RetM call(const M& m, std::index_sequence) noexcept { - RetM ret; - size_t row_index; - ((row_index = Rs, ret[Rs] = RowV(m[Cs][row_index]...)), ...); - return ret; + return RetM::from_row(m[Ns]...); } }; @@ -55,8 +52,8 @@ struct ktm::detail::matrix_implement::trace return call(m, std::make_index_sequence()); else { - T ret = zero; - for(int i = 0; i < N; ++i) + T ret = m[0][0]; + for(int i = 1; i < N; ++i) ret += m[i][i]; return ret; } @@ -76,23 +73,21 @@ struct ktm::detail::matrix_implement::diagonal using ColV = vec; static KTM_INLINE ColV call(const M& m) noexcept { + ColV ret; if constexpr(N <= 4) - return call(m, std::make_index_sequence()); + call(ret, m, std::make_index_sequence()); else { - ColV ret; for(int i = 0; i < N; ++i) ret[i] = m[i][i]; - return ret; } + return ret; } private: template - static KTM_INLINE ColV call(const M& m, std::index_sequence) noexcept + static KTM_INLINE void call(ColV& ret, const M& m, std::index_sequence) noexcept { - ColV ret; ((ret[Ns] = m[Ns][Ns]), ...); - return ret; } }; diff --git a/ktm/interface/complex/icomp_make.h b/ktm/interface/complex/icomp_make.h index 06d30e3..aaf552f 100644 --- a/ktm/interface/complex/icomp_make.h +++ b/ktm/interface/complex/icomp_make.h @@ -27,8 +27,7 @@ struct icomp_make> : Father static KTM_INLINE comp identity() noexcept { - static comp iden = comp(zero, one); - return iden; + return comp(zero, one); } static KTM_INLINE comp real_imag(T real, T imag) noexcept diff --git a/ktm/interface/matrix/imat_make.h b/ktm/interface/matrix/imat_make.h index 4224676..17ac8ff 100644 --- a/ktm/interface/matrix/imat_make.h +++ b/ktm/interface/matrix/imat_make.h @@ -29,24 +29,22 @@ struct imat_make>> : Fathe static KTM_INLINE std::enable_if_t, std::remove_const_t>...>, mat> from_row(RowVs&&... rows) noexcept { + mat ret; if constexpr(Row <= 4) - return from_row(std::make_index_sequence(), std::forward(rows)...); + from_row(ret, std::make_index_sequence(), std::forward(rows)...); else { - mat ret; for(int i = 0; i < Row; ++i) ret[i] = vec(rows[i]...); - return ret; } + return ret; } private: template - static KTM_INLINE mat from_row(std::index_sequence, RowVs&&... rows) noexcept + static KTM_INLINE void from_row(mat& ret, std::index_sequence, RowVs&&... rows) noexcept { - mat ret; size_t row_index; ((row_index = Ns, ret[row_index] = vec(rows[row_index]...)), ...); - return ret; } }; @@ -59,69 +57,60 @@ struct imat_make> : Father static KTM_INLINE std::enable_if_t, std::remove_const_t>...>, mat> from_row(RowVs&&... rows) noexcept { + mat ret; if constexpr(N <= 4) - return from_row(std::make_index_sequence(), std::forward(rows)...); + from_row(ret, std::make_index_sequence(), std::forward(rows)...); else { - mat ret; for(int i = 0; i < N; ++i) ret[i] = vec(rows[i]...); - return ret; } + return ret; } static KTM_INLINE mat from_diag(const vec& diag) noexcept { + mat ret { }; if constexpr(N <= 4) - return from_diag(diag, std::make_index_sequence()); + from_diag(ret, diag, std::make_index_sequence()); else { - mat ret { }; for(int i = 0; i < N; ++i) ret[i][i] = diag[i]; - return ret; } + return ret; } static KTM_INLINE mat from_eye() noexcept { + mat eye { }; if constexpr(N <= 4) - { - static mat eye = from_eye(std::make_index_sequence()); - return eye; - } + from_eye(eye, std::make_index_sequence()); else { - mat eye { }; for(int i = 0; i < N; ++i) eye[i][i] = one; - return eye; - } + } + return eye; } private: template - static KTM_INLINE mat from_row(std::index_sequence, RowVs&&... rows) noexcept + static KTM_INLINE void from_row(mat& ret, std::index_sequence, RowVs&&... rows) noexcept { - mat ret; size_t row_index; ((row_index = Ns, ret[row_index] = vec(rows[row_index]...)), ...); - return ret; } template - static KTM_INLINE mat from_diag(const vec& diag, std::index_sequence) noexcept + static KTM_INLINE void from_diag(mat& ret, const vec& diag, std::index_sequence) noexcept { - mat ret { }; ((ret[Ns][Ns] = diag[Ns]), ...); - return ret; } template - static KTM_INLINE mat from_eye(std::index_sequence) noexcept + static KTM_INLINE void from_eye(mat& ret, std::index_sequence) noexcept { - mat ret { }; ((ret[Ns][Ns] = one), ...); - return ret; } }; diff --git a/ktm/interface/quaternion/iquat_make.h b/ktm/interface/quaternion/iquat_make.h index 3af0c50..bbb597b 100644 --- a/ktm/interface/quaternion/iquat_make.h +++ b/ktm/interface/quaternion/iquat_make.h @@ -30,8 +30,7 @@ struct iquat_make> : Father static KTM_INLINE quat identity() noexcept { - static quat iden = quat(zero, zero, zero, one); - return iden; + return quat(zero, zero, zero, one); } static KTM_INLINE quat real_imag(T real, const vec<3, T>& imag) noexcept From 092816b9f054961375932f2fadbcf5897ff3505b Mon Sep 17 00:00:00 2001 From: chenqiudu Date: Thu, 13 Jun 2024 17:56:14 +0800 Subject: [PATCH 6/8] fix iarray memcpy --- ktm/interface/shared/iarray.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ktm/interface/shared/iarray.h b/ktm/interface/shared/iarray.h index 33cabe6..95d5365 100644 --- a/ktm/interface/shared/iarray.h +++ b/ktm/interface/shared/iarray.h @@ -34,9 +34,9 @@ struct iarray : Father #endif }; KTM_FUNC iarray(const iarray& copy) noexcept { std::memcpy(data(), copy.data(), sizeof(array_type)); } - KTM_FUNC iarray(iarray&& copy) noexcept { std::memmove(data(), copy.data(), sizeof(array_type)); }; + KTM_FUNC iarray(iarray&& copy) noexcept { std::memcpy(data(), copy.data(), sizeof(array_type)); }; KTM_FUNC iarray& operator=(const iarray& copy) noexcept { std::memcpy(data(), copy.data(), sizeof(array_type)); return *this; } - KTM_FUNC iarray& operator=(iarray&& copy) noexcept { std::memmove(data(), copy.data(), sizeof(array_type)); return *this; } + KTM_FUNC iarray& operator=(iarray&& copy) noexcept { std::memcpy(data(), copy.data(), sizeof(array_type)); return *this; } template KTM_FUNC typename array_type::value_type get() const noexcept From 13da8d37217d5f2abd28e4463cfc64efe9e384fe Mon Sep 17 00:00:00 2001 From: chenqiudu Date: Thu, 13 Jun 2024 18:23:25 +0800 Subject: [PATCH 7/8] one_over replace recip --- ktm/detail/function/matrix.inl | 76 ++++++++++++++--------------- ktm/detail/function/matrix_simd.inl | 10 ++-- ktm/function/matrix_decompose.h | 46 ++++++++--------- ktm/function/trigonometric.h | 2 +- ktm/type/basic.h | 8 ++- 5 files changed, 74 insertions(+), 68 deletions(-) diff --git a/ktm/detail/function/matrix.inl b/ktm/detail/function/matrix.inl index ec046bc..ec8310f 100644 --- a/ktm/detail/function/matrix.inl +++ b/ktm/detail/function/matrix.inl @@ -147,10 +147,10 @@ struct ktm::detail::matrix_implement::determinant det = one; M a { m }; for(int i = 0; i < N - 1; ++i) { - T one_over_diag = recip(a[i][i]); + T recip_diag = recip(a[i][i]); for(int j = i + 1; j < N; ++j) { - T factor = a[j][i] * one_over_diag; + T factor = a[j][i] * recip_diag; for(int k = i + 1; k < N; ++k) { a[j][k] -= a[i][k] * factor; @@ -190,12 +190,12 @@ struct ktm::detail::matrix_implement::inverse<2, T> using M = mat<2, 2, T>; static KTM_INLINE M call(const M& m) noexcept { - T one_over_det = one / determinant<2, T>::call(m); + T recip_det = one / determinant<2, T>::call(m); M ret; - ret[0][0] = m[1][1] * one_over_det; - ret[0][1] = - m[0][1] * one_over_det; - ret[1][0] = - m[1][0] * one_over_det; - ret[1][1] = m[0][0] * one_over_det; + ret[0][0] = m[1][1] * recip_det; + ret[0][1] = - m[0][1] * recip_det; + ret[1][0] = - m[1][0] * recip_det; + ret[1][1] = m[0][0] * recip_det; return ret; } }; @@ -206,17 +206,17 @@ struct ktm::detail::matrix_implement::inverse<3, T> using M = mat<3, 3, T>; static KTM_INLINE M call(const M& m) noexcept { - T one_over_det = one / determinant<3, T>::call(m); + T recip_det = one / determinant<3, T>::call(m); M ret; - ret[0][0] = one_over_det * (m[1][1] * m[2][2] - m[2][1] * m[1][2]); - ret[0][1] = one_over_det * (m[2][1] * m[0][2] - m[0][1] * m[2][2]); - ret[0][2] = one_over_det * (m[0][1] * m[1][2] - m[1][1] * m[0][2]); - ret[1][0] = one_over_det * (m[2][0] * m[1][2] - m[1][0] * m[2][2]); - ret[1][1] = one_over_det * (m[0][0] * m[2][2] - m[2][0] * m[0][2]); - ret[1][2] = one_over_det * (m[1][0] * m[0][2] - m[0][0] * m[1][2]); - ret[2][0] = one_over_det * (m[1][0] * m[2][1] - m[2][0] * m[1][1]); - ret[2][1] = one_over_det * (m[2][0] * m[0][1] - m[0][0] * m[2][1]); - ret[2][2] = one_over_det * (m[0][0] * m[1][1] - m[1][0] * m[0][1]); + ret[0][0] = recip_det * (m[1][1] * m[2][2] - m[2][1] * m[1][2]); + ret[0][1] = recip_det * (m[2][1] * m[0][2] - m[0][1] * m[2][2]); + ret[0][2] = recip_det * (m[0][1] * m[1][2] - m[1][1] * m[0][2]); + ret[1][0] = recip_det * (m[2][0] * m[1][2] - m[1][0] * m[2][2]); + ret[1][1] = recip_det * (m[0][0] * m[2][2] - m[2][0] * m[0][2]); + ret[1][2] = recip_det * (m[1][0] * m[0][2] - m[0][0] * m[1][2]); + ret[2][0] = recip_det * (m[1][0] * m[2][1] - m[2][0] * m[1][1]); + ret[2][1] = recip_det * (m[2][0] * m[0][1] - m[0][0] * m[2][1]); + ret[2][2] = recip_det * (m[0][0] * m[1][1] - m[1][0] * m[0][1]); return ret; } }; @@ -227,55 +227,55 @@ struct ktm::detail::matrix_implement::inverse<4, T> using M = mat<4, 4, T>; static KTM_INLINE M call(const M& m) noexcept { - T one_over_det = one / determinant<4, T>::call(m); + T recip_det = one / determinant<4, T>::call(m); M ret; ret[0][0] = - one_over_det * (m[1][1] * m[2][2] * m[3][3] - m[1][1] * m[2][3] * m[3][2] - m[2][1] * m[1][2] * m[3][3] + + recip_det * (m[1][1] * m[2][2] * m[3][3] - m[1][1] * m[2][3] * m[3][2] - m[2][1] * m[1][2] * m[3][3] + m[2][1] * m[1][3] * m[3][2] + m[3][1] * m[1][2] * m[2][3] - m[3][1] * m[1][3] * m[2][2]); ret[0][1] = - one_over_det * (m[3][1] * m[0][3] * m[2][2] - m[3][1] * m[0][2] * m[2][3] - m[2][1] * m[0][3] * m[3][2] + + recip_det * (m[3][1] * m[0][3] * m[2][2] - m[3][1] * m[0][2] * m[2][3] - m[2][1] * m[0][3] * m[3][2] + m[2][1] * m[0][2] * m[3][3] + m[0][1] * m[2][3] * m[3][2] - m[0][1] * m[2][2] * m[3][3]); ret[0][2] = - one_over_det * (m[0][1] * m[1][2] * m[3][3] - m[0][1] * m[1][3] * m[3][2] - m[1][1] * m[0][2] * m[3][3] + + recip_det * (m[0][1] * m[1][2] * m[3][3] - m[0][1] * m[1][3] * m[3][2] - m[1][1] * m[0][2] * m[3][3] + m[1][1] * m[0][3] * m[3][2] + m[3][1] * m[0][2] * m[1][3] - m[3][1] * m[0][3] * m[1][2]); ret[0][3] = - one_over_det * (m[2][1] * m[0][3] * m[1][2] - m[2][1] * m[0][2] * m[1][3] - m[1][1] * m[0][3] * m[2][2] + + recip_det * (m[2][1] * m[0][3] * m[1][2] - m[2][1] * m[0][2] * m[1][3] - m[1][1] * m[0][3] * m[2][2] + m[1][1] * m[0][2] * m[2][3] + m[0][1] * m[1][3] * m[2][2] - m[0][1] * m[1][2] * m[2][3]); ret[1][0] = - one_over_det * (m[3][0] * m[1][3] * m[2][2] - m[3][0] * m[1][2] * m[2][3] - m[2][0] * m[1][3] * m[3][2] + + recip_det * (m[3][0] * m[1][3] * m[2][2] - m[3][0] * m[1][2] * m[2][3] - m[2][0] * m[1][3] * m[3][2] + m[2][0] * m[1][2] * m[3][3] + m[1][0] * m[2][3] * m[3][2] - m[1][0] * m[2][2] * m[3][3]); ret[1][1] = - one_over_det * (m[0][0] * m[2][2] * m[3][3] - m[0][0] * m[2][3] * m[3][2] - m[2][0] * m[0][2] * m[3][3] + + recip_det * (m[0][0] * m[2][2] * m[3][3] - m[0][0] * m[2][3] * m[3][2] - m[2][0] * m[0][2] * m[3][3] + m[2][0] * m[0][3] * m[3][2] + m[3][0] * m[0][2] * m[2][3] - m[3][0] * m[0][3] * m[2][2]); ret[1][2] = - one_over_det * (m[3][0] * m[0][3] * m[1][2] - m[3][0] * m[0][2] * m[1][3] - m[1][0] * m[0][3] * m[3][2] + + recip_det * (m[3][0] * m[0][3] * m[1][2] - m[3][0] * m[0][2] * m[1][3] - m[1][0] * m[0][3] * m[3][2] + m[1][0] * m[0][2] * m[3][3] + m[0][0] * m[1][3] * m[3][2] - m[0][0] * m[1][2] * m[3][3]); ret[1][3] = - one_over_det * (m[0][0] * m[1][2] * m[2][3] - m[0][0] * m[1][3] * m[2][2] - m[1][0] * m[0][2] * m[2][3] + + recip_det * (m[0][0] * m[1][2] * m[2][3] - m[0][0] * m[1][3] * m[2][2] - m[1][0] * m[0][2] * m[2][3] + m[1][0] * m[0][3] * m[2][2] + m[2][0] * m[0][2] * m[1][3] - m[2][0] * m[0][3] * m[1][2]); ret[2][0] = - one_over_det * (m[1][0] * m[2][1] * m[3][3] - m[1][0] * m[2][3] * m[3][1] - m[2][0] * m[1][1] * m[3][3] + + recip_det * (m[1][0] * m[2][1] * m[3][3] - m[1][0] * m[2][3] * m[3][1] - m[2][0] * m[1][1] * m[3][3] + m[2][0] * m[1][3] * m[3][1] + m[3][0] * m[1][1] * m[2][3] - m[3][0] * m[1][3] * m[2][1]); ret[2][1] = - one_over_det * (m[3][0] * m[0][3] * m[2][1] - m[3][0] * m[0][1] * m[2][3] - m[2][0] * m[0][3] * m[3][1] + + recip_det * (m[3][0] * m[0][3] * m[2][1] - m[3][0] * m[0][1] * m[2][3] - m[2][0] * m[0][3] * m[3][1] + m[2][0] * m[0][1] * m[3][3] + m[0][0] * m[2][3] * m[3][1] - m[0][0] * m[2][1] * m[3][3]); ret[2][2] = - one_over_det * (m[0][0] * m[1][1] * m[3][3] - m[0][0] * m[1][3] * m[3][1] - m[1][0] * m[0][1] * m[3][3] + + recip_det * (m[0][0] * m[1][1] * m[3][3] - m[0][0] * m[1][3] * m[3][1] - m[1][0] * m[0][1] * m[3][3] + m[1][0] * m[0][3] * m[3][1] + m[3][0] * m[0][1] * m[1][3] - m[3][0] * m[0][3] * m[1][1]); ret[2][3] = - one_over_det * (m[2][0] * m[0][3] * m[1][1] - m[2][0] * m[0][1] * m[1][3] - m[1][0] * m[0][3] * m[2][1] + + recip_det * (m[2][0] * m[0][3] * m[1][1] - m[2][0] * m[0][1] * m[1][3] - m[1][0] * m[0][3] * m[2][1] + m[1][0] * m[0][1] * m[2][3] + m[0][0] * m[1][3] * m[2][1] - m[0][0] * m[1][1] * m[2][3]); ret[3][0] = - one_over_det * (m[3][0] * m[1][2] * m[2][1] - m[3][0] * m[1][1] * m[2][2] - m[2][0] * m[1][2] * m[3][1] + + recip_det * (m[3][0] * m[1][2] * m[2][1] - m[3][0] * m[1][1] * m[2][2] - m[2][0] * m[1][2] * m[3][1] + m[2][0] * m[1][1] * m[3][2] + m[1][0] * m[2][2] * m[3][1] - m[1][0] * m[2][1] * m[3][2]); ret[3][1] = - one_over_det * (m[0][0] * m[2][1] * m[3][2] - m[0][0] * m[2][2] * m[3][1] - m[2][0] * m[0][1] * m[3][2] + + recip_det * (m[0][0] * m[2][1] * m[3][2] - m[0][0] * m[2][2] * m[3][1] - m[2][0] * m[0][1] * m[3][2] + m[2][0] * m[0][2] * m[3][1] + m[3][0] * m[0][1] * m[2][2] - m[3][0] * m[0][2] * m[2][1]); ret[3][2] = - one_over_det * (m[3][0] * m[0][2] * m[1][1] - m[3][0] * m[0][1] * m[1][2] - m[1][0] * m[0][2] * m[3][1] + + recip_det * (m[3][0] * m[0][2] * m[1][1] - m[3][0] * m[0][1] * m[1][2] - m[1][0] * m[0][2] * m[3][1] + m[1][0] * m[0][1] * m[3][2] + m[0][0] * m[1][2] * m[3][1] - m[0][0] * m[1][1] * m[3][2]); ret[3][3] = - one_over_det * (m[0][0] * m[1][1] * m[2][2] - m[0][0] * m[1][2] * m[2][1] - m[1][0] * m[0][1] * m[2][2] + + recip_det * (m[0][0] * m[1][1] * m[2][2] - m[0][0] * m[1][2] * m[2][1] - m[1][0] * m[0][1] * m[2][2] + m[1][0] * m[0][2] * m[2][1] + m[2][0] * m[0][1] * m[1][2] - m[2][0] * m[0][2] * m[1][1]); return ret; } @@ -292,12 +292,12 @@ struct ktm::detail::matrix_implement::inverse for(int i = 0; i < N; ++i) { - T one_over_diag = recip(left[i][i]); + T recip_diag = recip(left[i][i]); for(int j = 0; j < N; ++j) { if(i != j) { - T factor = left[i][j] * one_over_diag; + T factor = left[i][j] * recip_diag; for(int k = 0; k < N; ++k) { if(k >= i) @@ -309,8 +309,8 @@ struct ktm::detail::matrix_implement::inverse for(int k = 0; k < N; ++k) { if(k >= i) - left[k][i] *= one_over_diag; - right[k][i] *= one_over_diag; + left[k][i] *= recip_diag; + right[k][i] *= recip_diag; } } return right; diff --git a/ktm/detail/function/matrix_simd.inl b/ktm/detail/function/matrix_simd.inl index 80c60fb..3813983 100644 --- a/ktm/detail/function/matrix_simd.inl +++ b/ktm/detail/function/matrix_simd.inl @@ -334,13 +334,13 @@ struct ktm::detail::matrix_implement::inverse<4, float> skv::fv4 i_tmp_1 = _shufft128_f32(inv_2, inv_3, 0, 0, 0, 0); skv::fv4 i_row_0 = _shufft128_f32(i_tmp_0, i_tmp_1, 3, 1, 3, 1); skv::fv4 i_dot = skv::dot_fv4(c_0, i_row_0); - skv::fv4 one_over_det = _reciph128_f32(i_dot); + skv::fv4 recip_det = _reciph128_f32(i_dot); M ret; - ret[0].st = _mul128_f32(inv_0, one_over_det); - ret[1].st = _mul128_f32(inv_1, one_over_det); - ret[2].st = _mul128_f32(inv_2, one_over_det); - ret[3].st = _mul128_f32(inv_3, one_over_det); + ret[0].st = _mul128_f32(inv_0, recip_det); + ret[1].st = _mul128_f32(inv_1, recip_det); + ret[2].st = _mul128_f32(inv_2, recip_det); + ret[3].st = _mul128_f32(inv_3, recip_det); return ret; } }; diff --git a/ktm/function/matrix_decompose.h b/ktm/function/matrix_decompose.h index 82b7913..23229b5 100644 --- a/ktm/function/matrix_decompose.h +++ b/ktm/function/matrix_decompose.h @@ -45,7 +45,7 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< continue; length = std::copysign(sqrt(length), -a[i][v_start]); - T r_h = recip(length * (length - a[i][v_start])); + T recip_h = recip(length * (length - a[i][v_start])); a[i][v_start] = (a[i][v_start] - length); for(int k = i + 1; k < N; ++k) @@ -55,7 +55,7 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< { ta += a[i][j] * a[k][j]; } - ta *= r_h; + ta *= recip_h; for(int j = v_start; j < N; ++j) { a[k][j] -= ta * a[i][j]; @@ -70,8 +70,8 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< ta += a[i][j] * a[j][k]; tt += a[i][j] * trans[j][k]; } - ta *= r_h; - tt *= r_h; + ta *= recip_h; + tt *= recip_h; for(int j = v_start; j < N; ++j) { a[j][k] -= ta * a[i][j]; @@ -116,20 +116,20 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< u[j] = a[i][j]; } - T r_h = recip(length * (length - a[i][v_start])); + T recip_h = recip(length * (length - a[i][v_start])); vec q { }; for(int j = v_start; j < N; ++j) { q += a[j] * u[j]; } - q *= r_h; + q *= recip_h; T dot_qu = zero; for(int j = v_start; j < N; ++j) { dot_qu += q[j] * u[j]; } - q -= dot_qu * static_cast(0.5) * r_h * u; + q -= dot_qu * static_cast(0.5) * recip_h * u; a[i][v_start] = length; a[v_start][i] = length; @@ -155,7 +155,7 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< { tt += u[j] * trans[j][k]; } - tt *= r_h; + tt *= recip_h; for(int j = v_start; j < N; ++j) { trans[j][k] -= tt * u[j]; @@ -177,10 +177,10 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< for(int i = 0; i < N - 1; ++i) { - T r_uii = recip(u[i][i]); + T recip_uii = recip(u[i][i]); for(int j = i + 1; j < N; ++j) { - l[i][j] = u[i][j] * r_uii; + l[i][j] = u[i][j] * recip_uii; u[i][j] = zero; for(int k = i + 1; k < N; ++k) { @@ -203,10 +203,10 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< for(int i = 0; i < N - 1; ++i) { - T r_lii = recip(l[i][i]); + T recip_lii = recip(l[i][i]); for(int j = i + 1; j < N; ++j) { - u[j][i] = l[j][i] * r_lii; + u[j][i] = l[j][i] * recip_lii; l[j][i] = zero; for(int k = i + 1; k < N; ++k) { @@ -271,7 +271,7 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< continue; length = std::copysign(sqrt(length), -r[i][i]); - T r_h = recip(length * (length - r[i][i])); + T recip_h = recip(length * (length - r[i][i])); r[i][i] = (r[i][i] - length); for(int k = 0; k <= i; ++k) @@ -281,7 +281,7 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< { tq += r[i][j] * q[j][k]; } - tq *= r_h; + tq *= recip_h; for(int j = i; j < N; ++j) { q[j][k] -= tq * r[i][j]; @@ -296,8 +296,8 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< tq += r[i][j] * q[j][k]; tr += r[i][j] * r[k][j]; } - tq *= r_h; - tr *= r_h; + tq *= recip_h; + tr *= recip_h; for(int j = i; j < N; ++j) { q[j][k] -= tq * r[i][j]; @@ -385,18 +385,18 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< continue; length = std::copysign(sqrt(length), -r[i][i]); - T r_h = recip(length * (length - r[i][i])); + T recip_h = recip(length * (length - r[i][i])); r[i][i] = (r[i][i] - length); for(int k = 0; k <= i + 1; ++k) { - T tq = r_h * (r[i][i] * q[i][k] + r[i][i + 1] * q[i + 1][k]); + T tq = recip_h * (r[i][i] * q[i][k] + r[i][i + 1] * q[i + 1][k]); q[i][k] -= tq * r[i][i]; q[i + 1][k] -= tq * r[i][i + 1]; } for(int k = i + 1; k < N; ++k) { - T tr = r_h * (r[i][i] * r[k][i] + r[i][i + 1] * r[k][i + 1]); + T tr = recip_h * (r[i][i] * r[k][i] + r[i][i + 1] * r[k][i + 1]); r[k][i] -= tr * r[i][i]; r[k][i + 1] -= tr * r[i][i + 1]; } @@ -521,15 +521,15 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< { if(acr < 0) { - sin_theta = -rsqrt(static_cast(2)); - cos_theta = rsqrt(static_cast(2)); + sin_theta = -rsqrt_tow; + cos_theta = rsqrt_tow; sin_two_theta = -one; cos_two_theta = zero; } else { - sin_theta = rsqrt(static_cast(2)); - cos_theta = rsqrt(static_cast(2)); + sin_theta = rsqrt_tow; + cos_theta = rsqrt_tow; sin_two_theta = one; cos_two_theta = zero; } diff --git a/ktm/function/trigonometric.h b/ktm/function/trigonometric.h index ec8b9a7..9d7329e 100644 --- a/ktm/function/trigonometric.h +++ b/ktm/function/trigonometric.h @@ -25,7 +25,7 @@ KTM_INLINE std::enable_if_t, T> radians(T degrees) n template KTM_INLINE std::enable_if_t, T> degrees(T radians) noexcept { - constexpr T radians_to_degrees = static_cast(180) * one_over_pi; + constexpr T radians_to_degrees = static_cast(180) * recip_pi; return radians * radians_to_degrees; } diff --git a/ktm/type/basic.h b/ktm/type/basic.h index 63a465c..7249d3b 100644 --- a/ktm/type/basic.h +++ b/ktm/type/basic.h @@ -36,7 +36,13 @@ template inline constexpr std::enable_if_t, T> half_pi = static_cast(0.5) * pi; template -inline constexpr std::enable_if_t, T> one_over_pi = one / pi; +inline constexpr std::enable_if_t, T> recip_pi = one / pi; + +template +inline constexpr std::enable_if_t, T> sqrt_tow = static_cast(1.41421356237309504880168872420969807856967187537695); + +template +inline constexpr std::enable_if_t, T> rsqrt_tow = one / sqrt_tow; } From 7429487ae1e1cb03093513b4323b2169359b7954 Mon Sep 17 00:00:00 2001 From: chenqiudu Date: Fri, 14 Jun 2024 22:24:48 +0800 Subject: [PATCH 8/8] add random --- ktm/function/arithmetic.h | 2 +- ktm/function/exponential.h | 6 +++ ktm/function/random.h | 75 ++++++++++++++++++++++++++++++++++++++ ktm/ktm.h | 1 + 4 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 ktm/function/random.h diff --git a/ktm/function/arithmetic.h b/ktm/function/arithmetic.h index 96f5ef4..71a54cf 100644 --- a/ktm/function/arithmetic.h +++ b/ktm/function/arithmetic.h @@ -16,7 +16,7 @@ namespace ktm { template -KTM_INLINE std::enable_if_t<(std::is_arithmetic_v && !std::is_unsigned_v), T> abs(T x) noexcept +KTM_INLINE std::enable_if_t && !std::is_unsigned_v, T> abs(T x) noexcept { if constexpr(std::is_floating_point_v) return std::copysign(x, zero); diff --git a/ktm/function/exponential.h b/ktm/function/exponential.h index 667f3ee..23496c5 100644 --- a/ktm/function/exponential.h +++ b/ktm/function/exponential.h @@ -56,6 +56,12 @@ KTM_INLINE std::enable_if_t, T> recip(T x) noexcept return detail::exponential_implement::recip::call(x); } +template +KTM_INLINE std::enable_if_t, T> cbrt(T x) noexcept +{ + return std::cbrt(x); +} + template KTM_INLINE std::enable_if_t, T> pow(T x, T y) noexcept { diff --git a/ktm/function/random.h b/ktm/function/random.h new file mode 100644 index 0000000..4e75650 --- /dev/null +++ b/ktm/function/random.h @@ -0,0 +1,75 @@ +// MIT License +// +// Copyright (c) 2024 有个小小杜 +// +// Created by 有个小小杜 +// + +#ifndef _KTM_RANDOM_H_ +#define _KTM_RANDOM_H_ + +#include +#include "../setup.h" +#include "../type/basic.h" +#include "arithmetic.h" +#include "exponential.h" +#include "trigonometric.h" + +namespace ktm +{ + +template +KTM_INLINE std::enable_if_t, T> lerp_rand(T min, T max) noexcept +{ + constexpr unsigned int rand_max = std::is_same_v && 0xffffff < RAND_MAX ? 0xffffff : RAND_MAX; + constexpr T recip_rand_max = one / static_cast(rand_max + 1); + return lerp(min, max, static_cast(std::rand() & rand_max) * recip_rand_max); +} + +template +KTM_INLINE std::enable_if_t, T> gauss_rand(T mean, T deviation) noexcept +{ + T unit = sqrt(lerp_rand(epsilon, one)); + T theta = lerp_rand(zero, tow_pi); + return unit * theta * deviation * sqrt(static_cast(-2) * log(unit) * recip(unit)) + mean; +} + +template +KTM_INLINE std::enable_if_t, T> exp_rand(T lambda) noexcept +{ + return -log(one - lerp_rand(zero, one)) * recip(lambda); +} + +template +KTM_INLINE std::enable_if_t, vec<2, T>> circur_rand(T radius) noexcept +{ + T theta = lerp_rand(zero, tow_pi); + return radius * vec<2, T>(cos(theta), sin(theta)); +} + +template +KTM_INLINE std::enable_if_t, vec<2, T>> disk_rand(T radius) noexcept +{ + T unit = sqrt(lerp_rand(zero, one)); + return unit * circur_rand(radius); +} + +template +KTM_INLINE std::enable_if_t, vec<3, T>> sphere_rand(T radius) noexcept +{ + T theta = lerp_rand(zero, tow_pi); + T phi = acos(lerp_rand(-one, one)); + T sin_phi = sin(phi); + return radius * vec<3, T>(sin_phi * cos(theta), sin_phi * sin(theta), cos(phi)); +} + +template +KTM_INLINE std::enable_if_t, vec<3, T>> ball_rand(T radius) noexcept +{ + T unit = cbrt(lerp_rand(zero, one)); + return unit * sphere_rand(radius); +} + +} + +#endif \ No newline at end of file diff --git a/ktm/ktm.h b/ktm/ktm.h index 77c0c15..4d247fa 100644 --- a/ktm/ktm.h +++ b/ktm/ktm.h @@ -21,6 +21,7 @@ #include "function/common.h" #include "function/geometric.h" #include "function/compare.h" +#include "function/random.h" #include "function/matrix.h" #include "function/matrix_decompose.h" #include "function/matrix_transform2d.h"