Skip to content

Commit

Permalink
one_over replace recip
Browse files Browse the repository at this point in the history
  • Loading branch information
YGXXD committed Jun 13, 2024
1 parent 092816b commit 13da8d3
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 68 deletions.
76 changes: 38 additions & 38 deletions ktm/detail/function/matrix.inl
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,10 @@ struct ktm::detail::matrix_implement::determinant
det = one<T>; M a { m };
for(int i = 0; i < N - 1; ++i)
{
T one_over_diag = recip(a[i][i]);
T recip_diag = recip(a[i][i]);
for(int j = i + 1; j < N; ++j)
{
T factor = a[j][i] * one_over_diag;
T factor = a[j][i] * recip_diag;
for(int k = i + 1; k < N; ++k)
{
a[j][k] -= a[i][k] * factor;
Expand Down Expand Up @@ -190,12 +190,12 @@ struct ktm::detail::matrix_implement::inverse<2, T>
using M = mat<2, 2, T>;
static KTM_INLINE M call(const M& m) noexcept
{
T one_over_det = one<T> / determinant<2, T>::call(m);
T recip_det = one<T> / determinant<2, T>::call(m);
M ret;
ret[0][0] = m[1][1] * one_over_det;
ret[0][1] = - m[0][1] * one_over_det;
ret[1][0] = - m[1][0] * one_over_det;
ret[1][1] = m[0][0] * one_over_det;
ret[0][0] = m[1][1] * recip_det;
ret[0][1] = - m[0][1] * recip_det;
ret[1][0] = - m[1][0] * recip_det;
ret[1][1] = m[0][0] * recip_det;
return ret;
}
};
Expand All @@ -206,17 +206,17 @@ struct ktm::detail::matrix_implement::inverse<3, T>
using M = mat<3, 3, T>;
static KTM_INLINE M call(const M& m) noexcept
{
T one_over_det = one<T> / determinant<3, T>::call(m);
T recip_det = one<T> / determinant<3, T>::call(m);
M ret;
ret[0][0] = one_over_det * (m[1][1] * m[2][2] - m[2][1] * m[1][2]);
ret[0][1] = one_over_det * (m[2][1] * m[0][2] - m[0][1] * m[2][2]);
ret[0][2] = one_over_det * (m[0][1] * m[1][2] - m[1][1] * m[0][2]);
ret[1][0] = one_over_det * (m[2][0] * m[1][2] - m[1][0] * m[2][2]);
ret[1][1] = one_over_det * (m[0][0] * m[2][2] - m[2][0] * m[0][2]);
ret[1][2] = one_over_det * (m[1][0] * m[0][2] - m[0][0] * m[1][2]);
ret[2][0] = one_over_det * (m[1][0] * m[2][1] - m[2][0] * m[1][1]);
ret[2][1] = one_over_det * (m[2][0] * m[0][1] - m[0][0] * m[2][1]);
ret[2][2] = one_over_det * (m[0][0] * m[1][1] - m[1][0] * m[0][1]);
ret[0][0] = recip_det * (m[1][1] * m[2][2] - m[2][1] * m[1][2]);
ret[0][1] = recip_det * (m[2][1] * m[0][2] - m[0][1] * m[2][2]);
ret[0][2] = recip_det * (m[0][1] * m[1][2] - m[1][1] * m[0][2]);
ret[1][0] = recip_det * (m[2][0] * m[1][2] - m[1][0] * m[2][2]);
ret[1][1] = recip_det * (m[0][0] * m[2][2] - m[2][0] * m[0][2]);
ret[1][2] = recip_det * (m[1][0] * m[0][2] - m[0][0] * m[1][2]);
ret[2][0] = recip_det * (m[1][0] * m[2][1] - m[2][0] * m[1][1]);
ret[2][1] = recip_det * (m[2][0] * m[0][1] - m[0][0] * m[2][1]);
ret[2][2] = recip_det * (m[0][0] * m[1][1] - m[1][0] * m[0][1]);
return ret;
}
};
Expand All @@ -227,55 +227,55 @@ struct ktm::detail::matrix_implement::inverse<4, T>
using M = mat<4, 4, T>;
static KTM_INLINE M call(const M& m) noexcept
{
T one_over_det = one<T> / determinant<4, T>::call(m);
T recip_det = one<T> / determinant<4, T>::call(m);
M ret;
ret[0][0] =
one_over_det * (m[1][1] * m[2][2] * m[3][3] - m[1][1] * m[2][3] * m[3][2] - m[2][1] * m[1][2] * m[3][3] +
recip_det * (m[1][1] * m[2][2] * m[3][3] - m[1][1] * m[2][3] * m[3][2] - m[2][1] * m[1][2] * m[3][3] +
m[2][1] * m[1][3] * m[3][2] + m[3][1] * m[1][2] * m[2][3] - m[3][1] * m[1][3] * m[2][2]);
ret[0][1] =
one_over_det * (m[3][1] * m[0][3] * m[2][2] - m[3][1] * m[0][2] * m[2][3] - m[2][1] * m[0][3] * m[3][2] +
recip_det * (m[3][1] * m[0][3] * m[2][2] - m[3][1] * m[0][2] * m[2][3] - m[2][1] * m[0][3] * m[3][2] +
m[2][1] * m[0][2] * m[3][3] + m[0][1] * m[2][3] * m[3][2] - m[0][1] * m[2][2] * m[3][3]);
ret[0][2] =
one_over_det * (m[0][1] * m[1][2] * m[3][3] - m[0][1] * m[1][3] * m[3][2] - m[1][1] * m[0][2] * m[3][3] +
recip_det * (m[0][1] * m[1][2] * m[3][3] - m[0][1] * m[1][3] * m[3][2] - m[1][1] * m[0][2] * m[3][3] +
m[1][1] * m[0][3] * m[3][2] + m[3][1] * m[0][2] * m[1][3] - m[3][1] * m[0][3] * m[1][2]);
ret[0][3] =
one_over_det * (m[2][1] * m[0][3] * m[1][2] - m[2][1] * m[0][2] * m[1][3] - m[1][1] * m[0][3] * m[2][2] +
recip_det * (m[2][1] * m[0][3] * m[1][2] - m[2][1] * m[0][2] * m[1][3] - m[1][1] * m[0][3] * m[2][2] +
m[1][1] * m[0][2] * m[2][3] + m[0][1] * m[1][3] * m[2][2] - m[0][1] * m[1][2] * m[2][3]);
ret[1][0] =
one_over_det * (m[3][0] * m[1][3] * m[2][2] - m[3][0] * m[1][2] * m[2][3] - m[2][0] * m[1][3] * m[3][2] +
recip_det * (m[3][0] * m[1][3] * m[2][2] - m[3][0] * m[1][2] * m[2][3] - m[2][0] * m[1][3] * m[3][2] +
m[2][0] * m[1][2] * m[3][3] + m[1][0] * m[2][3] * m[3][2] - m[1][0] * m[2][2] * m[3][3]);
ret[1][1] =
one_over_det * (m[0][0] * m[2][2] * m[3][3] - m[0][0] * m[2][3] * m[3][2] - m[2][0] * m[0][2] * m[3][3] +
recip_det * (m[0][0] * m[2][2] * m[3][3] - m[0][0] * m[2][3] * m[3][2] - m[2][0] * m[0][2] * m[3][3] +
m[2][0] * m[0][3] * m[3][2] + m[3][0] * m[0][2] * m[2][3] - m[3][0] * m[0][3] * m[2][2]);
ret[1][2] =
one_over_det * (m[3][0] * m[0][3] * m[1][2] - m[3][0] * m[0][2] * m[1][3] - m[1][0] * m[0][3] * m[3][2] +
recip_det * (m[3][0] * m[0][3] * m[1][2] - m[3][0] * m[0][2] * m[1][3] - m[1][0] * m[0][3] * m[3][2] +
m[1][0] * m[0][2] * m[3][3] + m[0][0] * m[1][3] * m[3][2] - m[0][0] * m[1][2] * m[3][3]);
ret[1][3] =
one_over_det * (m[0][0] * m[1][2] * m[2][3] - m[0][0] * m[1][3] * m[2][2] - m[1][0] * m[0][2] * m[2][3] +
recip_det * (m[0][0] * m[1][2] * m[2][3] - m[0][0] * m[1][3] * m[2][2] - m[1][0] * m[0][2] * m[2][3] +
m[1][0] * m[0][3] * m[2][2] + m[2][0] * m[0][2] * m[1][3] - m[2][0] * m[0][3] * m[1][2]);
ret[2][0] =
one_over_det * (m[1][0] * m[2][1] * m[3][3] - m[1][0] * m[2][3] * m[3][1] - m[2][0] * m[1][1] * m[3][3] +
recip_det * (m[1][0] * m[2][1] * m[3][3] - m[1][0] * m[2][3] * m[3][1] - m[2][0] * m[1][1] * m[3][3] +
m[2][0] * m[1][3] * m[3][1] + m[3][0] * m[1][1] * m[2][3] - m[3][0] * m[1][3] * m[2][1]);
ret[2][1] =
one_over_det * (m[3][0] * m[0][3] * m[2][1] - m[3][0] * m[0][1] * m[2][3] - m[2][0] * m[0][3] * m[3][1] +
recip_det * (m[3][0] * m[0][3] * m[2][1] - m[3][0] * m[0][1] * m[2][3] - m[2][0] * m[0][3] * m[3][1] +
m[2][0] * m[0][1] * m[3][3] + m[0][0] * m[2][3] * m[3][1] - m[0][0] * m[2][1] * m[3][3]);
ret[2][2] =
one_over_det * (m[0][0] * m[1][1] * m[3][3] - m[0][0] * m[1][3] * m[3][1] - m[1][0] * m[0][1] * m[3][3] +
recip_det * (m[0][0] * m[1][1] * m[3][3] - m[0][0] * m[1][3] * m[3][1] - m[1][0] * m[0][1] * m[3][3] +
m[1][0] * m[0][3] * m[3][1] + m[3][0] * m[0][1] * m[1][3] - m[3][0] * m[0][3] * m[1][1]);
ret[2][3] =
one_over_det * (m[2][0] * m[0][3] * m[1][1] - m[2][0] * m[0][1] * m[1][3] - m[1][0] * m[0][3] * m[2][1] +
recip_det * (m[2][0] * m[0][3] * m[1][1] - m[2][0] * m[0][1] * m[1][3] - m[1][0] * m[0][3] * m[2][1] +
m[1][0] * m[0][1] * m[2][3] + m[0][0] * m[1][3] * m[2][1] - m[0][0] * m[1][1] * m[2][3]);
ret[3][0] =
one_over_det * (m[3][0] * m[1][2] * m[2][1] - m[3][0] * m[1][1] * m[2][2] - m[2][0] * m[1][2] * m[3][1] +
recip_det * (m[3][0] * m[1][2] * m[2][1] - m[3][0] * m[1][1] * m[2][2] - m[2][0] * m[1][2] * m[3][1] +
m[2][0] * m[1][1] * m[3][2] + m[1][0] * m[2][2] * m[3][1] - m[1][0] * m[2][1] * m[3][2]);
ret[3][1] =
one_over_det * (m[0][0] * m[2][1] * m[3][2] - m[0][0] * m[2][2] * m[3][1] - m[2][0] * m[0][1] * m[3][2] +
recip_det * (m[0][0] * m[2][1] * m[3][2] - m[0][0] * m[2][2] * m[3][1] - m[2][0] * m[0][1] * m[3][2] +
m[2][0] * m[0][2] * m[3][1] + m[3][0] * m[0][1] * m[2][2] - m[3][0] * m[0][2] * m[2][1]);
ret[3][2] =
one_over_det * (m[3][0] * m[0][2] * m[1][1] - m[3][0] * m[0][1] * m[1][2] - m[1][0] * m[0][2] * m[3][1] +
recip_det * (m[3][0] * m[0][2] * m[1][1] - m[3][0] * m[0][1] * m[1][2] - m[1][0] * m[0][2] * m[3][1] +
m[1][0] * m[0][1] * m[3][2] + m[0][0] * m[1][2] * m[3][1] - m[0][0] * m[1][1] * m[3][2]);
ret[3][3] =
one_over_det * (m[0][0] * m[1][1] * m[2][2] - m[0][0] * m[1][2] * m[2][1] - m[1][0] * m[0][1] * m[2][2] +
recip_det * (m[0][0] * m[1][1] * m[2][2] - m[0][0] * m[1][2] * m[2][1] - m[1][0] * m[0][1] * m[2][2] +
m[1][0] * m[0][2] * m[2][1] + m[2][0] * m[0][1] * m[1][2] - m[2][0] * m[0][2] * m[1][1]);
return ret;
}
Expand All @@ -292,12 +292,12 @@ struct ktm::detail::matrix_implement::inverse

for(int i = 0; i < N; ++i)
{
T one_over_diag = recip(left[i][i]);
T recip_diag = recip(left[i][i]);
for(int j = 0; j < N; ++j)
{
if(i != j)
{
T factor = left[i][j] * one_over_diag;
T factor = left[i][j] * recip_diag;
for(int k = 0; k < N; ++k)
{
if(k >= i)
Expand All @@ -309,8 +309,8 @@ struct ktm::detail::matrix_implement::inverse
for(int k = 0; k < N; ++k)
{
if(k >= i)
left[k][i] *= one_over_diag;
right[k][i] *= one_over_diag;
left[k][i] *= recip_diag;
right[k][i] *= recip_diag;
}
}
return right;
Expand Down
10 changes: 5 additions & 5 deletions ktm/detail/function/matrix_simd.inl
Original file line number Diff line number Diff line change
Expand Up @@ -334,13 +334,13 @@ struct ktm::detail::matrix_implement::inverse<4, float>
skv::fv4 i_tmp_1 = _shufft128_f32(inv_2, inv_3, 0, 0, 0, 0);
skv::fv4 i_row_0 = _shufft128_f32(i_tmp_0, i_tmp_1, 3, 1, 3, 1);
skv::fv4 i_dot = skv::dot_fv4(c_0, i_row_0);
skv::fv4 one_over_det = _reciph128_f32(i_dot);
skv::fv4 recip_det = _reciph128_f32(i_dot);

M ret;
ret[0].st = _mul128_f32(inv_0, one_over_det);
ret[1].st = _mul128_f32(inv_1, one_over_det);
ret[2].st = _mul128_f32(inv_2, one_over_det);
ret[3].st = _mul128_f32(inv_3, one_over_det);
ret[0].st = _mul128_f32(inv_0, recip_det);
ret[1].st = _mul128_f32(inv_1, recip_det);
ret[2].st = _mul128_f32(inv_2, recip_det);
ret[3].st = _mul128_f32(inv_3, recip_det);
return ret;
}
};
Expand Down
46 changes: 23 additions & 23 deletions ktm/function/matrix_decompose.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ KTM_NOINLINE std::enable_if_t<is_square_matrix_v<M> && is_floating_point_base_v<
continue;
length = std::copysign(sqrt(length), -a[i][v_start]);

T r_h = recip(length * (length - a[i][v_start]));
T recip_h = recip(length * (length - a[i][v_start]));
a[i][v_start] = (a[i][v_start] - length);

for(int k = i + 1; k < N; ++k)
Expand All @@ -55,7 +55,7 @@ KTM_NOINLINE std::enable_if_t<is_square_matrix_v<M> && is_floating_point_base_v<
{
ta += a[i][j] * a[k][j];
}
ta *= r_h;
ta *= recip_h;
for(int j = v_start; j < N; ++j)
{
a[k][j] -= ta * a[i][j];
Expand All @@ -70,8 +70,8 @@ KTM_NOINLINE std::enable_if_t<is_square_matrix_v<M> && is_floating_point_base_v<
ta += a[i][j] * a[j][k];
tt += a[i][j] * trans[j][k];
}
ta *= r_h;
tt *= r_h;
ta *= recip_h;
tt *= recip_h;
for(int j = v_start; j < N; ++j)
{
a[j][k] -= ta * a[i][j];
Expand Down Expand Up @@ -116,20 +116,20 @@ KTM_NOINLINE std::enable_if_t<is_square_matrix_v<M> && is_floating_point_base_v<
u[j] = a[i][j];
}

T r_h = recip(length * (length - a[i][v_start]));
T recip_h = recip(length * (length - a[i][v_start]));
vec<N, T> q { };
for(int j = v_start; j < N; ++j)
{
q += a[j] * u[j];
}
q *= r_h;
q *= recip_h;

T dot_qu = zero<T>;
for(int j = v_start; j < N; ++j)
{
dot_qu += q[j] * u[j];
}
q -= dot_qu * static_cast<T>(0.5) * r_h * u;
q -= dot_qu * static_cast<T>(0.5) * recip_h * u;

a[i][v_start] = length;
a[v_start][i] = length;
Expand All @@ -155,7 +155,7 @@ KTM_NOINLINE std::enable_if_t<is_square_matrix_v<M> && is_floating_point_base_v<
{
tt += u[j] * trans[j][k];
}
tt *= r_h;
tt *= recip_h;
for(int j = v_start; j < N; ++j)
{
trans[j][k] -= tt * u[j];
Expand All @@ -177,10 +177,10 @@ KTM_NOINLINE std::enable_if_t<is_square_matrix_v<M> && is_floating_point_base_v<

for(int i = 0; i < N - 1; ++i)
{
T r_uii = recip(u[i][i]);
T recip_uii = recip(u[i][i]);
for(int j = i + 1; j < N; ++j)
{
l[i][j] = u[i][j] * r_uii;
l[i][j] = u[i][j] * recip_uii;
u[i][j] = zero<T>;
for(int k = i + 1; k < N; ++k)
{
Expand All @@ -203,10 +203,10 @@ KTM_NOINLINE std::enable_if_t<is_square_matrix_v<M> && is_floating_point_base_v<

for(int i = 0; i < N - 1; ++i)
{
T r_lii = recip(l[i][i]);
T recip_lii = recip(l[i][i]);
for(int j = i + 1; j < N; ++j)
{
u[j][i] = l[j][i] * r_lii;
u[j][i] = l[j][i] * recip_lii;
l[j][i] = zero<T>;
for(int k = i + 1; k < N; ++k)
{
Expand Down Expand Up @@ -271,7 +271,7 @@ KTM_NOINLINE std::enable_if_t<is_square_matrix_v<M> && is_floating_point_base_v<
continue;
length = std::copysign(sqrt(length), -r[i][i]);

T r_h = recip(length * (length - r[i][i]));
T recip_h = recip(length * (length - r[i][i]));
r[i][i] = (r[i][i] - length);

for(int k = 0; k <= i; ++k)
Expand All @@ -281,7 +281,7 @@ KTM_NOINLINE std::enable_if_t<is_square_matrix_v<M> && is_floating_point_base_v<
{
tq += r[i][j] * q[j][k];
}
tq *= r_h;
tq *= recip_h;
for(int j = i; j < N; ++j)
{
q[j][k] -= tq * r[i][j];
Expand All @@ -296,8 +296,8 @@ KTM_NOINLINE std::enable_if_t<is_square_matrix_v<M> && is_floating_point_base_v<
tq += r[i][j] * q[j][k];
tr += r[i][j] * r[k][j];
}
tq *= r_h;
tr *= r_h;
tq *= recip_h;
tr *= recip_h;
for(int j = i; j < N; ++j)
{
q[j][k] -= tq * r[i][j];
Expand Down Expand Up @@ -385,18 +385,18 @@ KTM_NOINLINE std::enable_if_t<is_square_matrix_v<M> && is_floating_point_base_v<
continue;
length = std::copysign(sqrt(length), -r[i][i]);

T r_h = recip(length * (length - r[i][i]));
T recip_h = recip(length * (length - r[i][i]));
r[i][i] = (r[i][i] - length);

for(int k = 0; k <= i + 1; ++k)
{
T tq = r_h * (r[i][i] * q[i][k] + r[i][i + 1] * q[i + 1][k]);
T tq = recip_h * (r[i][i] * q[i][k] + r[i][i + 1] * q[i + 1][k]);
q[i][k] -= tq * r[i][i];
q[i + 1][k] -= tq * r[i][i + 1];
}
for(int k = i + 1; k < N; ++k)
{
T tr = r_h * (r[i][i] * r[k][i] + r[i][i + 1] * r[k][i + 1]);
T tr = recip_h * (r[i][i] * r[k][i] + r[i][i + 1] * r[k][i + 1]);
r[k][i] -= tr * r[i][i];
r[k][i + 1] -= tr * r[i][i + 1];
}
Expand Down Expand Up @@ -521,15 +521,15 @@ KTM_NOINLINE std::enable_if_t<is_square_matrix_v<M> && is_floating_point_base_v<
{
if(acr < 0)
{
sin_theta = -rsqrt(static_cast<T>(2));
cos_theta = rsqrt(static_cast<T>(2));
sin_theta = -rsqrt_tow<T>;
cos_theta = rsqrt_tow<T>;
sin_two_theta = -one<T>;
cos_two_theta = zero<T>;
}
else
{
sin_theta = rsqrt(static_cast<T>(2));
cos_theta = rsqrt(static_cast<T>(2));
sin_theta = rsqrt_tow<T>;
cos_theta = rsqrt_tow<T>;
sin_two_theta = one<T>;
cos_two_theta = zero<T>;
}
Expand Down
2 changes: 1 addition & 1 deletion ktm/function/trigonometric.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ KTM_INLINE std::enable_if_t<std::is_floating_point_v<T>, T> radians(T degrees) n
template<typename T>
KTM_INLINE std::enable_if_t<std::is_floating_point_v<T>, T> degrees(T radians) noexcept
{
constexpr T radians_to_degrees = static_cast<T>(180) * one_over_pi<T>;
constexpr T radians_to_degrees = static_cast<T>(180) * recip_pi<T>;
return radians * radians_to_degrees;
}

Expand Down
8 changes: 7 additions & 1 deletion ktm/type/basic.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,13 @@ template<typename T>
inline constexpr std::enable_if_t<std::is_floating_point_v<T>, T> half_pi = static_cast<T>(0.5) * pi<T>;

template<typename T>
inline constexpr std::enable_if_t<std::is_floating_point_v<T>, T> one_over_pi = one<T> / pi<T>;
inline constexpr std::enable_if_t<std::is_floating_point_v<T>, T> recip_pi = one<T> / pi<T>;

template<typename T>
inline constexpr std::enable_if_t<std::is_floating_point_v<T>, T> sqrt_tow = static_cast<T>(1.41421356237309504880168872420969807856967187537695);

template<typename T>
inline constexpr std::enable_if_t<std::is_floating_point_v<T>, T> rsqrt_tow = one<T> / sqrt_tow<T>;

}

Expand Down

0 comments on commit 13da8d3

Please sign in to comment.