Skip to content

Commit

Permalink
v0.2.6 version pull requests (#6)
Browse files Browse the repository at this point in the history
update detail framework
fix some calculate bug
add random function
  • Loading branch information
YGXXD authored Jun 14, 2024
2 parents 47bcdf4 + 7429487 commit eb75c86
Show file tree
Hide file tree
Showing 19 changed files with 524 additions and 949 deletions.
361 changes: 27 additions & 334 deletions ktm/detail/function/common.inl

Large diffs are not rendered by default.

101 changes: 48 additions & 53 deletions ktm/detail/function/matrix.inl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ struct ktm::detail::matrix_implement::transpose
static KTM_INLINE RetM call(const M& m) noexcept
{
if constexpr(Row <= 4 && Col <= 4)
return call(m, std::make_index_sequence<Col>(), std::make_index_sequence<Row>());
return call(m, std::make_index_sequence<Row>());
else
{
RetM ret;
Expand All @@ -35,13 +35,10 @@ struct ktm::detail::matrix_implement::transpose
}
}
private:
template<size_t ...Rs, size_t ...Cs>
static KTM_INLINE RetM call(const M& m, std::index_sequence<Rs...>, std::index_sequence<Cs...>) noexcept
template<size_t ...Ns>
static KTM_INLINE RetM call(const M& m, std::index_sequence<Ns...>) noexcept
{
RetM ret;
size_t row_index;
((row_index = Rs, ret[Rs] = RowV(m[Cs][row_index]...)), ...);
return ret;
return RetM::from_row(m[Ns]...);
}
};

Expand All @@ -55,8 +52,8 @@ struct ktm::detail::matrix_implement::trace
return call(m, std::make_index_sequence<N>());
else
{
T ret = zero<T>;
for(int i = 0; i < N; ++i)
T ret = m[0][0];
for(int i = 1; i < N; ++i)
ret += m[i][i];
return ret;
}
Expand All @@ -76,23 +73,21 @@ struct ktm::detail::matrix_implement::diagonal
using ColV = vec<N, T>;
static KTM_INLINE ColV call(const M& m) noexcept
{
ColV ret;
if constexpr(N <= 4)
return call(m, std::make_index_sequence<N>());
call(ret, m, std::make_index_sequence<N>());
else
{
ColV ret;
for(int i = 0; i < N; ++i)
ret[i] = m[i][i];
return ret;
}
return ret;
}
private:
template<size_t ...Ns>
static KTM_INLINE ColV call(const M& m, std::index_sequence<Ns...>) noexcept
static KTM_INLINE void call(ColV& ret, const M& m, std::index_sequence<Ns...>) noexcept
{
ColV ret;
((ret[Ns] = m[Ns][Ns]), ...);
return ret;
}
};

Expand Down Expand Up @@ -152,10 +147,10 @@ struct ktm::detail::matrix_implement::determinant
det = one<T>; M a { m };
for(int i = 0; i < N - 1; ++i)
{
T one_over_diag = recip(a[i][i]);
T recip_diag = recip(a[i][i]);
for(int j = i + 1; j < N; ++j)
{
T factor = a[j][i] * one_over_diag;
T factor = a[j][i] * recip_diag;
for(int k = i + 1; k < N; ++k)
{
a[j][k] -= a[i][k] * factor;
Expand Down Expand Up @@ -195,12 +190,12 @@ struct ktm::detail::matrix_implement::inverse<2, T>
using M = mat<2, 2, T>;
static KTM_INLINE M call(const M& m) noexcept
{
T one_over_det = one<T> / determinant<2, T>::call(m);
T recip_det = one<T> / determinant<2, T>::call(m);
M ret;
ret[0][0] = m[1][1] * one_over_det;
ret[0][1] = - m[0][1] * one_over_det;
ret[1][0] = - m[1][0] * one_over_det;
ret[1][1] = m[0][0] * one_over_det;
ret[0][0] = m[1][1] * recip_det;
ret[0][1] = - m[0][1] * recip_det;
ret[1][0] = - m[1][0] * recip_det;
ret[1][1] = m[0][0] * recip_det;
return ret;
}
};
Expand All @@ -211,17 +206,17 @@ struct ktm::detail::matrix_implement::inverse<3, T>
using M = mat<3, 3, T>;
static KTM_INLINE M call(const M& m) noexcept
{
T one_over_det = one<T> / determinant<3, T>::call(m);
T recip_det = one<T> / determinant<3, T>::call(m);
M ret;
ret[0][0] = one_over_det * (m[1][1] * m[2][2] - m[2][1] * m[1][2]);
ret[0][1] = one_over_det * (m[2][1] * m[0][2] - m[0][1] * m[2][2]);
ret[0][2] = one_over_det * (m[0][1] * m[1][2] - m[1][1] * m[0][2]);
ret[1][0] = one_over_det * (m[2][0] * m[1][2] - m[1][0] * m[2][2]);
ret[1][1] = one_over_det * (m[0][0] * m[2][2] - m[2][0] * m[0][2]);
ret[1][2] = one_over_det * (m[1][0] * m[0][2] - m[0][0] * m[1][2]);
ret[2][0] = one_over_det * (m[1][0] * m[2][1] - m[2][0] * m[1][1]);
ret[2][1] = one_over_det * (m[2][0] * m[0][1] - m[0][0] * m[2][1]);
ret[2][2] = one_over_det * (m[0][0] * m[1][1] - m[1][0] * m[0][1]);
ret[0][0] = recip_det * (m[1][1] * m[2][2] - m[2][1] * m[1][2]);
ret[0][1] = recip_det * (m[2][1] * m[0][2] - m[0][1] * m[2][2]);
ret[0][2] = recip_det * (m[0][1] * m[1][2] - m[1][1] * m[0][2]);
ret[1][0] = recip_det * (m[2][0] * m[1][2] - m[1][0] * m[2][2]);
ret[1][1] = recip_det * (m[0][0] * m[2][2] - m[2][0] * m[0][2]);
ret[1][2] = recip_det * (m[1][0] * m[0][2] - m[0][0] * m[1][2]);
ret[2][0] = recip_det * (m[1][0] * m[2][1] - m[2][0] * m[1][1]);
ret[2][1] = recip_det * (m[2][0] * m[0][1] - m[0][0] * m[2][1]);
ret[2][2] = recip_det * (m[0][0] * m[1][1] - m[1][0] * m[0][1]);
return ret;
}
};
Expand All @@ -232,55 +227,55 @@ struct ktm::detail::matrix_implement::inverse<4, T>
using M = mat<4, 4, T>;
static KTM_INLINE M call(const M& m) noexcept
{
T one_over_det = one<T> / determinant<4, T>::call(m);
T recip_det = one<T> / determinant<4, T>::call(m);
M ret;
ret[0][0] =
one_over_det * (m[1][1] * m[2][2] * m[3][3] - m[1][1] * m[2][3] * m[3][2] - m[2][1] * m[1][2] * m[3][3] +
recip_det * (m[1][1] * m[2][2] * m[3][3] - m[1][1] * m[2][3] * m[3][2] - m[2][1] * m[1][2] * m[3][3] +
m[2][1] * m[1][3] * m[3][2] + m[3][1] * m[1][2] * m[2][3] - m[3][1] * m[1][3] * m[2][2]);
ret[0][1] =
one_over_det * (m[3][1] * m[0][3] * m[2][2] - m[3][1] * m[0][2] * m[2][3] - m[2][1] * m[0][3] * m[3][2] +
recip_det * (m[3][1] * m[0][3] * m[2][2] - m[3][1] * m[0][2] * m[2][3] - m[2][1] * m[0][3] * m[3][2] +
m[2][1] * m[0][2] * m[3][3] + m[0][1] * m[2][3] * m[3][2] - m[0][1] * m[2][2] * m[3][3]);
ret[0][2] =
one_over_det * (m[0][1] * m[1][2] * m[3][3] - m[0][1] * m[1][3] * m[3][2] - m[1][1] * m[0][2] * m[3][3] +
recip_det * (m[0][1] * m[1][2] * m[3][3] - m[0][1] * m[1][3] * m[3][2] - m[1][1] * m[0][2] * m[3][3] +
m[1][1] * m[0][3] * m[3][2] + m[3][1] * m[0][2] * m[1][3] - m[3][1] * m[0][3] * m[1][2]);
ret[0][3] =
one_over_det * (m[2][1] * m[0][3] * m[1][2] - m[2][1] * m[0][2] * m[1][3] - m[1][1] * m[0][3] * m[2][2] +
recip_det * (m[2][1] * m[0][3] * m[1][2] - m[2][1] * m[0][2] * m[1][3] - m[1][1] * m[0][3] * m[2][2] +
m[1][1] * m[0][2] * m[2][3] + m[0][1] * m[1][3] * m[2][2] - m[0][1] * m[1][2] * m[2][3]);
ret[1][0] =
one_over_det * (m[3][0] * m[1][3] * m[2][2] - m[3][0] * m[1][2] * m[2][3] - m[2][0] * m[1][3] * m[3][2] +
recip_det * (m[3][0] * m[1][3] * m[2][2] - m[3][0] * m[1][2] * m[2][3] - m[2][0] * m[1][3] * m[3][2] +
m[2][0] * m[1][2] * m[3][3] + m[1][0] * m[2][3] * m[3][2] - m[1][0] * m[2][2] * m[3][3]);
ret[1][1] =
one_over_det * (m[0][0] * m[2][2] * m[3][3] - m[0][0] * m[2][3] * m[3][2] - m[2][0] * m[0][2] * m[3][3] +
recip_det * (m[0][0] * m[2][2] * m[3][3] - m[0][0] * m[2][3] * m[3][2] - m[2][0] * m[0][2] * m[3][3] +
m[2][0] * m[0][3] * m[3][2] + m[3][0] * m[0][2] * m[2][3] - m[3][0] * m[0][3] * m[2][2]);
ret[1][2] =
one_over_det * (m[3][0] * m[0][3] * m[1][2] - m[3][0] * m[0][2] * m[1][3] - m[1][0] * m[0][3] * m[3][2] +
recip_det * (m[3][0] * m[0][3] * m[1][2] - m[3][0] * m[0][2] * m[1][3] - m[1][0] * m[0][3] * m[3][2] +
m[1][0] * m[0][2] * m[3][3] + m[0][0] * m[1][3] * m[3][2] - m[0][0] * m[1][2] * m[3][3]);
ret[1][3] =
one_over_det * (m[0][0] * m[1][2] * m[2][3] - m[0][0] * m[1][3] * m[2][2] - m[1][0] * m[0][2] * m[2][3] +
recip_det * (m[0][0] * m[1][2] * m[2][3] - m[0][0] * m[1][3] * m[2][2] - m[1][0] * m[0][2] * m[2][3] +
m[1][0] * m[0][3] * m[2][2] + m[2][0] * m[0][2] * m[1][3] - m[2][0] * m[0][3] * m[1][2]);
ret[2][0] =
one_over_det * (m[1][0] * m[2][1] * m[3][3] - m[1][0] * m[2][3] * m[3][1] - m[2][0] * m[1][1] * m[3][3] +
recip_det * (m[1][0] * m[2][1] * m[3][3] - m[1][0] * m[2][3] * m[3][1] - m[2][0] * m[1][1] * m[3][3] +
m[2][0] * m[1][3] * m[3][1] + m[3][0] * m[1][1] * m[2][3] - m[3][0] * m[1][3] * m[2][1]);
ret[2][1] =
one_over_det * (m[3][0] * m[0][3] * m[2][1] - m[3][0] * m[0][1] * m[2][3] - m[2][0] * m[0][3] * m[3][1] +
recip_det * (m[3][0] * m[0][3] * m[2][1] - m[3][0] * m[0][1] * m[2][3] - m[2][0] * m[0][3] * m[3][1] +
m[2][0] * m[0][1] * m[3][3] + m[0][0] * m[2][3] * m[3][1] - m[0][0] * m[2][1] * m[3][3]);
ret[2][2] =
one_over_det * (m[0][0] * m[1][1] * m[3][3] - m[0][0] * m[1][3] * m[3][1] - m[1][0] * m[0][1] * m[3][3] +
recip_det * (m[0][0] * m[1][1] * m[3][3] - m[0][0] * m[1][3] * m[3][1] - m[1][0] * m[0][1] * m[3][3] +
m[1][0] * m[0][3] * m[3][1] + m[3][0] * m[0][1] * m[1][3] - m[3][0] * m[0][3] * m[1][1]);
ret[2][3] =
one_over_det * (m[2][0] * m[0][3] * m[1][1] - m[2][0] * m[0][1] * m[1][3] - m[1][0] * m[0][3] * m[2][1] +
recip_det * (m[2][0] * m[0][3] * m[1][1] - m[2][0] * m[0][1] * m[1][3] - m[1][0] * m[0][3] * m[2][1] +
m[1][0] * m[0][1] * m[2][3] + m[0][0] * m[1][3] * m[2][1] - m[0][0] * m[1][1] * m[2][3]);
ret[3][0] =
one_over_det * (m[3][0] * m[1][2] * m[2][1] - m[3][0] * m[1][1] * m[2][2] - m[2][0] * m[1][2] * m[3][1] +
recip_det * (m[3][0] * m[1][2] * m[2][1] - m[3][0] * m[1][1] * m[2][2] - m[2][0] * m[1][2] * m[3][1] +
m[2][0] * m[1][1] * m[3][2] + m[1][0] * m[2][2] * m[3][1] - m[1][0] * m[2][1] * m[3][2]);
ret[3][1] =
one_over_det * (m[0][0] * m[2][1] * m[3][2] - m[0][0] * m[2][2] * m[3][1] - m[2][0] * m[0][1] * m[3][2] +
recip_det * (m[0][0] * m[2][1] * m[3][2] - m[0][0] * m[2][2] * m[3][1] - m[2][0] * m[0][1] * m[3][2] +
m[2][0] * m[0][2] * m[3][1] + m[3][0] * m[0][1] * m[2][2] - m[3][0] * m[0][2] * m[2][1]);
ret[3][2] =
one_over_det * (m[3][0] * m[0][2] * m[1][1] - m[3][0] * m[0][1] * m[1][2] - m[1][0] * m[0][2] * m[3][1] +
recip_det * (m[3][0] * m[0][2] * m[1][1] - m[3][0] * m[0][1] * m[1][2] - m[1][0] * m[0][2] * m[3][1] +
m[1][0] * m[0][1] * m[3][2] + m[0][0] * m[1][2] * m[3][1] - m[0][0] * m[1][1] * m[3][2]);
ret[3][3] =
one_over_det * (m[0][0] * m[1][1] * m[2][2] - m[0][0] * m[1][2] * m[2][1] - m[1][0] * m[0][1] * m[2][2] +
recip_det * (m[0][0] * m[1][1] * m[2][2] - m[0][0] * m[1][2] * m[2][1] - m[1][0] * m[0][1] * m[2][2] +
m[1][0] * m[0][2] * m[2][1] + m[2][0] * m[0][1] * m[1][2] - m[2][0] * m[0][2] * m[1][1]);
return ret;
}
Expand All @@ -297,12 +292,12 @@ struct ktm::detail::matrix_implement::inverse

for(int i = 0; i < N; ++i)
{
T one_over_diag = recip(left[i][i]);
T recip_diag = recip(left[i][i]);
for(int j = 0; j < N; ++j)
{
if(i != j)
{
T factor = left[i][j] * one_over_diag;
T factor = left[i][j] * recip_diag;
for(int k = 0; k < N; ++k)
{
if(k >= i)
Expand All @@ -314,8 +309,8 @@ struct ktm::detail::matrix_implement::inverse
for(int k = 0; k < N; ++k)
{
if(k >= i)
left[k][i] *= one_over_diag;
right[k][i] *= one_over_diag;
left[k][i] *= recip_diag;
right[k][i] *= recip_diag;
}
}
return right;
Expand Down
10 changes: 5 additions & 5 deletions ktm/detail/function/matrix_simd.inl
Original file line number Diff line number Diff line change
Expand Up @@ -334,13 +334,13 @@ struct ktm::detail::matrix_implement::inverse<4, float>
skv::fv4 i_tmp_1 = _shufft128_f32(inv_2, inv_3, 0, 0, 0, 0);
skv::fv4 i_row_0 = _shufft128_f32(i_tmp_0, i_tmp_1, 3, 1, 3, 1);
skv::fv4 i_dot = skv::dot_fv4(c_0, i_row_0);
skv::fv4 one_over_det = _reciph128_f32(i_dot);
skv::fv4 recip_det = _reciph128_f32(i_dot);

M ret;
ret[0].st = _mul128_f32(inv_0, one_over_det);
ret[1].st = _mul128_f32(inv_1, one_over_det);
ret[2].st = _mul128_f32(inv_2, one_over_det);
ret[3].st = _mul128_f32(inv_3, one_over_det);
ret[0].st = _mul128_f32(inv_0, recip_det);
ret[1].st = _mul128_f32(inv_1, recip_det);
ret[2].st = _mul128_f32(inv_2, recip_det);
ret[3].st = _mul128_f32(inv_3, recip_det);
return ret;
}
};
Expand Down
151 changes: 151 additions & 0 deletions ktm/detail/loop_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
// MIT License
//
// Copyright (c) 2024 有个小小杜
//
// Created by 有个小小杜
//

#ifndef _KTM_LOOP_UTIL_H_
#define _KTM_LOOP_UTIL_H_

#include <utility>
#include <functional>
#include "../setup.h"

namespace ktm
{
namespace detail
{

template<typename R, typename A1, typename OP, size_t ...Ns>
KTM_FUNC void loop_op(R&& self, A1&& l0, OP&& op, std::index_sequence<Ns...>)
{
((self[Ns] = op(l0[Ns])), ...);
}

template<size_t LoopN, typename R, typename A1, typename OP>
KTM_FUNC void loop_op(R&& self, A1&& l0, OP&& op)
{
if constexpr(LoopN <= 4)
return loop_op(self, std::forward<A1>(l0), std::forward<OP>(op), std::make_index_sequence<LoopN>());
else
{
for(int i = 0; i < LoopN; ++i)
self[i] = op(l0[i]);
}
}

template<typename R, typename A1, typename A2, typename OP, size_t ...Ns>
KTM_FUNC void loop_op(R&& self, A1&& l0, A2&& l1, OP&& op, std::index_sequence<Ns...>)
{
((self[Ns] = op(l0[Ns], l1[Ns])), ...);
}

template<size_t LoopN, typename R, typename A1, typename A2, typename OP>
KTM_FUNC void loop_op(R&& self, A1&& l0, A2&& l1, OP&& op)
{
if constexpr(LoopN <= 4)
return loop_op(self, std::forward<A1>(l0), std::forward<A2>(l1), std::forward<OP>(op), std::make_index_sequence<LoopN>());
else
{
for(int i = 0; i < LoopN; ++i)
self[i] = op(l0[i], l1[i]);
}
}

template<typename R, typename A1, typename A2, typename A3, typename OP, size_t ...Ns>
KTM_FUNC void loop_op(R&& self, A1&& l0, A2&& l1, A3&& l2, OP&& op, std::index_sequence<Ns...>)
{
((self[Ns] = op(l0[Ns], l1[Ns], l2[Ns])), ...);
}

template<size_t LoopN, typename R, typename A1, typename A2, typename A3, typename OP>
KTM_FUNC void loop_op(R&& self, A1&& l0, A2&& l1, A3&& l2, OP&& op)
{
if constexpr(LoopN <= 4)
return loop_op(self, std::forward<A1>(l0), std::forward<A2>(l1), std::forward<A3>(l2), std::forward<OP>(op), std::make_index_sequence<LoopN>());
else
{
for(int i = 0; i < LoopN; ++i)
self[i] = op(l0[i], l1[i], l2[i]);
}
}

template<typename R, typename A1, typename T, typename OP, size_t ...Ns>
KTM_FUNC void loop_scalar(R&& self, A1&& l0, const T& scalar, OP&& op, std::index_sequence<Ns...>)
{
((self[Ns] = op(l0[Ns], scalar)), ...);
}

template<size_t LoopN, typename R, typename A1, typename T, typename OP>
KTM_FUNC void loop_scalar(R&& self, A1&& l0, const T& scalar, OP&& op)
{
if constexpr(LoopN <= 4)
return loop_scalar(self, std::forward<A1>(l0), scalar, std::forward<OP>(op), std::make_index_sequence<LoopN>());
else
{
for(int i = 0; i < LoopN; ++i)
self[i] = op(l0[i], scalar);
}
}

template<typename R, typename A1, typename A2, typename T, typename OP, size_t ...Ns>
KTM_FUNC void loop_scalar(R&& self, A1&& l0, A2&& l1, const T& scalar, OP&& op, std::index_sequence<Ns...>)
{
((self[Ns] = op(l0[Ns], l1[Ns], scalar)), ...);
}

template<size_t LoopN, typename R, typename A1, typename A2, typename T, typename OP>
KTM_FUNC void loop_scalar(R&& self, A1&& l0, A2&& l1, const T& scalar, OP&& op)
{
if constexpr(LoopN <= 4)
return loop_scalar(self, std::forward<A1>(l0), std::forward<A2>(l1), scalar, std::forward<OP>(op), std::make_index_sequence<LoopN>());
else
{
for(int i = 0; i < LoopN; ++i)
self[i] = op(l0[i], l1[i], scalar);
}
}

template<typename T, typename A, typename OP, size_t ...Ns>
KTM_FUNC void loop_reduce(T& self, const A& l0, OP&& op, std::index_sequence<Ns...>)
{
((self = op(self, l0[Ns + 1])), ...);
}

template<size_t LoopN, typename T, typename A, typename OP>
KTM_FUNC void loop_reduce(T& self, const A& l0, const T& init, OP&& op)
{
self = init;
if constexpr(LoopN <= 4)
return loop_reduce(self, l0, std::forward<OP>(op), std::make_index_sequence<LoopN - 1>());
else
{
for(int i = 1; i < LoopN; ++i)
self = op(self, l0[i]);
}
}

template<typename T, typename A, typename U, typename OP, size_t ...Ns>
KTM_FUNC void loop_reduce(T& self, const A& l0, const U& l1, OP&& op, std::index_sequence<Ns...>)
{
((self = op(self, l0[Ns + 1], l1[Ns + 1])), ...);
}

template<size_t LoopN, typename T, typename A, typename U, typename OP>
KTM_FUNC void loop_reduce(T& self, const A& l0, const U& l1, const T& init, OP&& op)
{
self = init;
if constexpr(LoopN <= 4)
return loop_reduce(self, l0, l1, std::forward<OP>(op), std::make_index_sequence<LoopN - 1>());
else
{
for(int i = 1; i < LoopN; ++i)
self = op(self, l0[i], l1[i]);
}
}

}
}

#endif
Loading

0 comments on commit eb75c86

Please sign in to comment.