From 13da8d37217d5f2abd28e4463cfc64efe9e384fe Mon Sep 17 00:00:00 2001 From: chenqiudu Date: Thu, 13 Jun 2024 18:23:25 +0800 Subject: [PATCH] one_over replace recip --- ktm/detail/function/matrix.inl | 76 ++++++++++++++--------------- ktm/detail/function/matrix_simd.inl | 10 ++-- ktm/function/matrix_decompose.h | 46 ++++++++--------- ktm/function/trigonometric.h | 2 +- ktm/type/basic.h | 8 ++- 5 files changed, 74 insertions(+), 68 deletions(-) diff --git a/ktm/detail/function/matrix.inl b/ktm/detail/function/matrix.inl index ec046bc..ec8310f 100644 --- a/ktm/detail/function/matrix.inl +++ b/ktm/detail/function/matrix.inl @@ -147,10 +147,10 @@ struct ktm::detail::matrix_implement::determinant det = one; M a { m }; for(int i = 0; i < N - 1; ++i) { - T one_over_diag = recip(a[i][i]); + T recip_diag = recip(a[i][i]); for(int j = i + 1; j < N; ++j) { - T factor = a[j][i] * one_over_diag; + T factor = a[j][i] * recip_diag; for(int k = i + 1; k < N; ++k) { a[j][k] -= a[i][k] * factor; @@ -190,12 +190,12 @@ struct ktm::detail::matrix_implement::inverse<2, T> using M = mat<2, 2, T>; static KTM_INLINE M call(const M& m) noexcept { - T one_over_det = one / determinant<2, T>::call(m); + T recip_det = one / determinant<2, T>::call(m); M ret; - ret[0][0] = m[1][1] * one_over_det; - ret[0][1] = - m[0][1] * one_over_det; - ret[1][0] = - m[1][0] * one_over_det; - ret[1][1] = m[0][0] * one_over_det; + ret[0][0] = m[1][1] * recip_det; + ret[0][1] = - m[0][1] * recip_det; + ret[1][0] = - m[1][0] * recip_det; + ret[1][1] = m[0][0] * recip_det; return ret; } }; @@ -206,17 +206,17 @@ struct ktm::detail::matrix_implement::inverse<3, T> using M = mat<3, 3, T>; static KTM_INLINE M call(const M& m) noexcept { - T one_over_det = one / determinant<3, T>::call(m); + T recip_det = one / determinant<3, T>::call(m); M ret; - ret[0][0] = one_over_det * (m[1][1] * m[2][2] - m[2][1] * m[1][2]); - ret[0][1] = one_over_det * (m[2][1] * m[0][2] - m[0][1] * m[2][2]); - ret[0][2] = one_over_det * (m[0][1] * m[1][2] - m[1][1] * m[0][2]); - ret[1][0] = one_over_det * (m[2][0] * m[1][2] - m[1][0] * m[2][2]); - ret[1][1] = one_over_det * (m[0][0] * m[2][2] - m[2][0] * m[0][2]); - ret[1][2] = one_over_det * (m[1][0] * m[0][2] - m[0][0] * m[1][2]); - ret[2][0] = one_over_det * (m[1][0] * m[2][1] - m[2][0] * m[1][1]); - ret[2][1] = one_over_det * (m[2][0] * m[0][1] - m[0][0] * m[2][1]); - ret[2][2] = one_over_det * (m[0][0] * m[1][1] - m[1][0] * m[0][1]); + ret[0][0] = recip_det * (m[1][1] * m[2][2] - m[2][1] * m[1][2]); + ret[0][1] = recip_det * (m[2][1] * m[0][2] - m[0][1] * m[2][2]); + ret[0][2] = recip_det * (m[0][1] * m[1][2] - m[1][1] * m[0][2]); + ret[1][0] = recip_det * (m[2][0] * m[1][2] - m[1][0] * m[2][2]); + ret[1][1] = recip_det * (m[0][0] * m[2][2] - m[2][0] * m[0][2]); + ret[1][2] = recip_det * (m[1][0] * m[0][2] - m[0][0] * m[1][2]); + ret[2][0] = recip_det * (m[1][0] * m[2][1] - m[2][0] * m[1][1]); + ret[2][1] = recip_det * (m[2][0] * m[0][1] - m[0][0] * m[2][1]); + ret[2][2] = recip_det * (m[0][0] * m[1][1] - m[1][0] * m[0][1]); return ret; } }; @@ -227,55 +227,55 @@ struct ktm::detail::matrix_implement::inverse<4, T> using M = mat<4, 4, T>; static KTM_INLINE M call(const M& m) noexcept { - T one_over_det = one / determinant<4, T>::call(m); + T recip_det = one / determinant<4, T>::call(m); M ret; ret[0][0] = - one_over_det * (m[1][1] * m[2][2] * m[3][3] - m[1][1] * m[2][3] * m[3][2] - m[2][1] * m[1][2] * m[3][3] + + recip_det * (m[1][1] * m[2][2] * m[3][3] - m[1][1] * m[2][3] * m[3][2] - m[2][1] * m[1][2] * m[3][3] + m[2][1] * m[1][3] * m[3][2] + m[3][1] * m[1][2] * m[2][3] - m[3][1] * m[1][3] * m[2][2]); ret[0][1] = - one_over_det * (m[3][1] * m[0][3] * m[2][2] - m[3][1] * m[0][2] * m[2][3] - m[2][1] * m[0][3] * m[3][2] + + recip_det * (m[3][1] * m[0][3] * m[2][2] - m[3][1] * m[0][2] * m[2][3] - m[2][1] * m[0][3] * m[3][2] + m[2][1] * m[0][2] * m[3][3] + m[0][1] * m[2][3] * m[3][2] - m[0][1] * m[2][2] * m[3][3]); ret[0][2] = - one_over_det * (m[0][1] * m[1][2] * m[3][3] - m[0][1] * m[1][3] * m[3][2] - m[1][1] * m[0][2] * m[3][3] + + recip_det * (m[0][1] * m[1][2] * m[3][3] - m[0][1] * m[1][3] * m[3][2] - m[1][1] * m[0][2] * m[3][3] + m[1][1] * m[0][3] * m[3][2] + m[3][1] * m[0][2] * m[1][3] - m[3][1] * m[0][3] * m[1][2]); ret[0][3] = - one_over_det * (m[2][1] * m[0][3] * m[1][2] - m[2][1] * m[0][2] * m[1][3] - m[1][1] * m[0][3] * m[2][2] + + recip_det * (m[2][1] * m[0][3] * m[1][2] - m[2][1] * m[0][2] * m[1][3] - m[1][1] * m[0][3] * m[2][2] + m[1][1] * m[0][2] * m[2][3] + m[0][1] * m[1][3] * m[2][2] - m[0][1] * m[1][2] * m[2][3]); ret[1][0] = - one_over_det * (m[3][0] * m[1][3] * m[2][2] - m[3][0] * m[1][2] * m[2][3] - m[2][0] * m[1][3] * m[3][2] + + recip_det * (m[3][0] * m[1][3] * m[2][2] - m[3][0] * m[1][2] * m[2][3] - m[2][0] * m[1][3] * m[3][2] + m[2][0] * m[1][2] * m[3][3] + m[1][0] * m[2][3] * m[3][2] - m[1][0] * m[2][2] * m[3][3]); ret[1][1] = - one_over_det * (m[0][0] * m[2][2] * m[3][3] - m[0][0] * m[2][3] * m[3][2] - m[2][0] * m[0][2] * m[3][3] + + recip_det * (m[0][0] * m[2][2] * m[3][3] - m[0][0] * m[2][3] * m[3][2] - m[2][0] * m[0][2] * m[3][3] + m[2][0] * m[0][3] * m[3][2] + m[3][0] * m[0][2] * m[2][3] - m[3][0] * m[0][3] * m[2][2]); ret[1][2] = - one_over_det * (m[3][0] * m[0][3] * m[1][2] - m[3][0] * m[0][2] * m[1][3] - m[1][0] * m[0][3] * m[3][2] + + recip_det * (m[3][0] * m[0][3] * m[1][2] - m[3][0] * m[0][2] * m[1][3] - m[1][0] * m[0][3] * m[3][2] + m[1][0] * m[0][2] * m[3][3] + m[0][0] * m[1][3] * m[3][2] - m[0][0] * m[1][2] * m[3][3]); ret[1][3] = - one_over_det * (m[0][0] * m[1][2] * m[2][3] - m[0][0] * m[1][3] * m[2][2] - m[1][0] * m[0][2] * m[2][3] + + recip_det * (m[0][0] * m[1][2] * m[2][3] - m[0][0] * m[1][3] * m[2][2] - m[1][0] * m[0][2] * m[2][3] + m[1][0] * m[0][3] * m[2][2] + m[2][0] * m[0][2] * m[1][3] - m[2][0] * m[0][3] * m[1][2]); ret[2][0] = - one_over_det * (m[1][0] * m[2][1] * m[3][3] - m[1][0] * m[2][3] * m[3][1] - m[2][0] * m[1][1] * m[3][3] + + recip_det * (m[1][0] * m[2][1] * m[3][3] - m[1][0] * m[2][3] * m[3][1] - m[2][0] * m[1][1] * m[3][3] + m[2][0] * m[1][3] * m[3][1] + m[3][0] * m[1][1] * m[2][3] - m[3][0] * m[1][3] * m[2][1]); ret[2][1] = - one_over_det * (m[3][0] * m[0][3] * m[2][1] - m[3][0] * m[0][1] * m[2][3] - m[2][0] * m[0][3] * m[3][1] + + recip_det * (m[3][0] * m[0][3] * m[2][1] - m[3][0] * m[0][1] * m[2][3] - m[2][0] * m[0][3] * m[3][1] + m[2][0] * m[0][1] * m[3][3] + m[0][0] * m[2][3] * m[3][1] - m[0][0] * m[2][1] * m[3][3]); ret[2][2] = - one_over_det * (m[0][0] * m[1][1] * m[3][3] - m[0][0] * m[1][3] * m[3][1] - m[1][0] * m[0][1] * m[3][3] + + recip_det * (m[0][0] * m[1][1] * m[3][3] - m[0][0] * m[1][3] * m[3][1] - m[1][0] * m[0][1] * m[3][3] + m[1][0] * m[0][3] * m[3][1] + m[3][0] * m[0][1] * m[1][3] - m[3][0] * m[0][3] * m[1][1]); ret[2][3] = - one_over_det * (m[2][0] * m[0][3] * m[1][1] - m[2][0] * m[0][1] * m[1][3] - m[1][0] * m[0][3] * m[2][1] + + recip_det * (m[2][0] * m[0][3] * m[1][1] - m[2][0] * m[0][1] * m[1][3] - m[1][0] * m[0][3] * m[2][1] + m[1][0] * m[0][1] * m[2][3] + m[0][0] * m[1][3] * m[2][1] - m[0][0] * m[1][1] * m[2][3]); ret[3][0] = - one_over_det * (m[3][0] * m[1][2] * m[2][1] - m[3][0] * m[1][1] * m[2][2] - m[2][0] * m[1][2] * m[3][1] + + recip_det * (m[3][0] * m[1][2] * m[2][1] - m[3][0] * m[1][1] * m[2][2] - m[2][0] * m[1][2] * m[3][1] + m[2][0] * m[1][1] * m[3][2] + m[1][0] * m[2][2] * m[3][1] - m[1][0] * m[2][1] * m[3][2]); ret[3][1] = - one_over_det * (m[0][0] * m[2][1] * m[3][2] - m[0][0] * m[2][2] * m[3][1] - m[2][0] * m[0][1] * m[3][2] + + recip_det * (m[0][0] * m[2][1] * m[3][2] - m[0][0] * m[2][2] * m[3][1] - m[2][0] * m[0][1] * m[3][2] + m[2][0] * m[0][2] * m[3][1] + m[3][0] * m[0][1] * m[2][2] - m[3][0] * m[0][2] * m[2][1]); ret[3][2] = - one_over_det * (m[3][0] * m[0][2] * m[1][1] - m[3][0] * m[0][1] * m[1][2] - m[1][0] * m[0][2] * m[3][1] + + recip_det * (m[3][0] * m[0][2] * m[1][1] - m[3][0] * m[0][1] * m[1][2] - m[1][0] * m[0][2] * m[3][1] + m[1][0] * m[0][1] * m[3][2] + m[0][0] * m[1][2] * m[3][1] - m[0][0] * m[1][1] * m[3][2]); ret[3][3] = - one_over_det * (m[0][0] * m[1][1] * m[2][2] - m[0][0] * m[1][2] * m[2][1] - m[1][0] * m[0][1] * m[2][2] + + recip_det * (m[0][0] * m[1][1] * m[2][2] - m[0][0] * m[1][2] * m[2][1] - m[1][0] * m[0][1] * m[2][2] + m[1][0] * m[0][2] * m[2][1] + m[2][0] * m[0][1] * m[1][2] - m[2][0] * m[0][2] * m[1][1]); return ret; } @@ -292,12 +292,12 @@ struct ktm::detail::matrix_implement::inverse for(int i = 0; i < N; ++i) { - T one_over_diag = recip(left[i][i]); + T recip_diag = recip(left[i][i]); for(int j = 0; j < N; ++j) { if(i != j) { - T factor = left[i][j] * one_over_diag; + T factor = left[i][j] * recip_diag; for(int k = 0; k < N; ++k) { if(k >= i) @@ -309,8 +309,8 @@ struct ktm::detail::matrix_implement::inverse for(int k = 0; k < N; ++k) { if(k >= i) - left[k][i] *= one_over_diag; - right[k][i] *= one_over_diag; + left[k][i] *= recip_diag; + right[k][i] *= recip_diag; } } return right; diff --git a/ktm/detail/function/matrix_simd.inl b/ktm/detail/function/matrix_simd.inl index 80c60fb..3813983 100644 --- a/ktm/detail/function/matrix_simd.inl +++ b/ktm/detail/function/matrix_simd.inl @@ -334,13 +334,13 @@ struct ktm::detail::matrix_implement::inverse<4, float> skv::fv4 i_tmp_1 = _shufft128_f32(inv_2, inv_3, 0, 0, 0, 0); skv::fv4 i_row_0 = _shufft128_f32(i_tmp_0, i_tmp_1, 3, 1, 3, 1); skv::fv4 i_dot = skv::dot_fv4(c_0, i_row_0); - skv::fv4 one_over_det = _reciph128_f32(i_dot); + skv::fv4 recip_det = _reciph128_f32(i_dot); M ret; - ret[0].st = _mul128_f32(inv_0, one_over_det); - ret[1].st = _mul128_f32(inv_1, one_over_det); - ret[2].st = _mul128_f32(inv_2, one_over_det); - ret[3].st = _mul128_f32(inv_3, one_over_det); + ret[0].st = _mul128_f32(inv_0, recip_det); + ret[1].st = _mul128_f32(inv_1, recip_det); + ret[2].st = _mul128_f32(inv_2, recip_det); + ret[3].st = _mul128_f32(inv_3, recip_det); return ret; } }; diff --git a/ktm/function/matrix_decompose.h b/ktm/function/matrix_decompose.h index 82b7913..23229b5 100644 --- a/ktm/function/matrix_decompose.h +++ b/ktm/function/matrix_decompose.h @@ -45,7 +45,7 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< continue; length = std::copysign(sqrt(length), -a[i][v_start]); - T r_h = recip(length * (length - a[i][v_start])); + T recip_h = recip(length * (length - a[i][v_start])); a[i][v_start] = (a[i][v_start] - length); for(int k = i + 1; k < N; ++k) @@ -55,7 +55,7 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< { ta += a[i][j] * a[k][j]; } - ta *= r_h; + ta *= recip_h; for(int j = v_start; j < N; ++j) { a[k][j] -= ta * a[i][j]; @@ -70,8 +70,8 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< ta += a[i][j] * a[j][k]; tt += a[i][j] * trans[j][k]; } - ta *= r_h; - tt *= r_h; + ta *= recip_h; + tt *= recip_h; for(int j = v_start; j < N; ++j) { a[j][k] -= ta * a[i][j]; @@ -116,20 +116,20 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< u[j] = a[i][j]; } - T r_h = recip(length * (length - a[i][v_start])); + T recip_h = recip(length * (length - a[i][v_start])); vec q { }; for(int j = v_start; j < N; ++j) { q += a[j] * u[j]; } - q *= r_h; + q *= recip_h; T dot_qu = zero; for(int j = v_start; j < N; ++j) { dot_qu += q[j] * u[j]; } - q -= dot_qu * static_cast(0.5) * r_h * u; + q -= dot_qu * static_cast(0.5) * recip_h * u; a[i][v_start] = length; a[v_start][i] = length; @@ -155,7 +155,7 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< { tt += u[j] * trans[j][k]; } - tt *= r_h; + tt *= recip_h; for(int j = v_start; j < N; ++j) { trans[j][k] -= tt * u[j]; @@ -177,10 +177,10 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< for(int i = 0; i < N - 1; ++i) { - T r_uii = recip(u[i][i]); + T recip_uii = recip(u[i][i]); for(int j = i + 1; j < N; ++j) { - l[i][j] = u[i][j] * r_uii; + l[i][j] = u[i][j] * recip_uii; u[i][j] = zero; for(int k = i + 1; k < N; ++k) { @@ -203,10 +203,10 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< for(int i = 0; i < N - 1; ++i) { - T r_lii = recip(l[i][i]); + T recip_lii = recip(l[i][i]); for(int j = i + 1; j < N; ++j) { - u[j][i] = l[j][i] * r_lii; + u[j][i] = l[j][i] * recip_lii; l[j][i] = zero; for(int k = i + 1; k < N; ++k) { @@ -271,7 +271,7 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< continue; length = std::copysign(sqrt(length), -r[i][i]); - T r_h = recip(length * (length - r[i][i])); + T recip_h = recip(length * (length - r[i][i])); r[i][i] = (r[i][i] - length); for(int k = 0; k <= i; ++k) @@ -281,7 +281,7 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< { tq += r[i][j] * q[j][k]; } - tq *= r_h; + tq *= recip_h; for(int j = i; j < N; ++j) { q[j][k] -= tq * r[i][j]; @@ -296,8 +296,8 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< tq += r[i][j] * q[j][k]; tr += r[i][j] * r[k][j]; } - tq *= r_h; - tr *= r_h; + tq *= recip_h; + tr *= recip_h; for(int j = i; j < N; ++j) { q[j][k] -= tq * r[i][j]; @@ -385,18 +385,18 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< continue; length = std::copysign(sqrt(length), -r[i][i]); - T r_h = recip(length * (length - r[i][i])); + T recip_h = recip(length * (length - r[i][i])); r[i][i] = (r[i][i] - length); for(int k = 0; k <= i + 1; ++k) { - T tq = r_h * (r[i][i] * q[i][k] + r[i][i + 1] * q[i + 1][k]); + T tq = recip_h * (r[i][i] * q[i][k] + r[i][i + 1] * q[i + 1][k]); q[i][k] -= tq * r[i][i]; q[i + 1][k] -= tq * r[i][i + 1]; } for(int k = i + 1; k < N; ++k) { - T tr = r_h * (r[i][i] * r[k][i] + r[i][i + 1] * r[k][i + 1]); + T tr = recip_h * (r[i][i] * r[k][i] + r[i][i + 1] * r[k][i + 1]); r[k][i] -= tr * r[i][i]; r[k][i + 1] -= tr * r[i][i + 1]; } @@ -521,15 +521,15 @@ KTM_NOINLINE std::enable_if_t && is_floating_point_base_v< { if(acr < 0) { - sin_theta = -rsqrt(static_cast(2)); - cos_theta = rsqrt(static_cast(2)); + sin_theta = -rsqrt_tow; + cos_theta = rsqrt_tow; sin_two_theta = -one; cos_two_theta = zero; } else { - sin_theta = rsqrt(static_cast(2)); - cos_theta = rsqrt(static_cast(2)); + sin_theta = rsqrt_tow; + cos_theta = rsqrt_tow; sin_two_theta = one; cos_two_theta = zero; } diff --git a/ktm/function/trigonometric.h b/ktm/function/trigonometric.h index ec8b9a7..9d7329e 100644 --- a/ktm/function/trigonometric.h +++ b/ktm/function/trigonometric.h @@ -25,7 +25,7 @@ KTM_INLINE std::enable_if_t, T> radians(T degrees) n template KTM_INLINE std::enable_if_t, T> degrees(T radians) noexcept { - constexpr T radians_to_degrees = static_cast(180) * one_over_pi; + constexpr T radians_to_degrees = static_cast(180) * recip_pi; return radians * radians_to_degrees; } diff --git a/ktm/type/basic.h b/ktm/type/basic.h index 63a465c..7249d3b 100644 --- a/ktm/type/basic.h +++ b/ktm/type/basic.h @@ -36,7 +36,13 @@ template inline constexpr std::enable_if_t, T> half_pi = static_cast(0.5) * pi; template -inline constexpr std::enable_if_t, T> one_over_pi = one / pi; +inline constexpr std::enable_if_t, T> recip_pi = one / pi; + +template +inline constexpr std::enable_if_t, T> sqrt_tow = static_cast(1.41421356237309504880168872420969807856967187537695); + +template +inline constexpr std::enable_if_t, T> rsqrt_tow = one / sqrt_tow; }