Skip to content

Commit

Permalink
rename files
Browse files Browse the repository at this point in the history
  • Loading branch information
upsj committed May 19, 2024
1 parent 7f1d5be commit 7fd6afb
Show file tree
Hide file tree
Showing 23 changed files with 86 additions and 102 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -206,4 +206,72 @@ __global__ __launch_bounds__(default_block_size) void ict_tri_spgeam_init(
}


template <int subwarp_size, typename ValueType, typename IndexType>
__global__ __launch_bounds__(default_block_size) void ict_sweep(
const IndexType* __restrict__ a_row_ptrs,
const IndexType* __restrict__ a_col_idxs,
const ValueType* __restrict__ a_vals,
const IndexType* __restrict__ l_row_ptrs,
const IndexType* __restrict__ l_row_idxs,
const IndexType* __restrict__ l_col_idxs, ValueType* __restrict__ l_vals,
IndexType l_nnz)
{
auto l_nz = thread::get_subwarp_id_flat<subwarp_size, IndexType>();
if (l_nz >= l_nnz) {
return;
}
auto row = l_row_idxs[l_nz];
auto col = l_col_idxs[l_nz];
auto subwarp =
group::tiled_partition<subwarp_size>(group::this_thread_block());
// find entry of A at (row, col)
auto a_row_begin = a_row_ptrs[row];
auto a_row_end = a_row_ptrs[row + 1];
auto a_row_size = a_row_end - a_row_begin;
auto a_idx =
group_wide_search(a_row_begin, a_row_size, subwarp,
[&](IndexType i) { return a_col_idxs[i] >= col; });
bool has_a = a_idx < a_row_end && a_col_idxs[a_idx] == col;
auto a_val = has_a ? a_vals[a_idx] : zero<ValueType>();
auto l_row_begin = l_row_ptrs[row];
auto l_row_size = l_row_ptrs[row + 1] - l_row_begin;
auto lh_col_begin = l_row_ptrs[col];
auto lh_col_size = l_row_ptrs[col + 1] - lh_col_begin;
ValueType sum{};
IndexType lh_nz{};
auto last_entry = col;
group_merge<subwarp_size>(
l_col_idxs + l_row_begin, l_row_size, l_col_idxs + lh_col_begin,
lh_col_size, subwarp,
[&](IndexType l_idx, IndexType l_col, IndexType lh_idx,
IndexType lh_row, IndexType, bool) {
// we don't need to use the `bool valid` because last_entry is
// already a smaller sentinel value than the one used in group_merge
if (l_col == lh_row && l_col < last_entry) {
sum += load_relaxed(l_vals + (l_idx + l_row_begin)) *
conj(load_relaxed(l_vals + (lh_idx + lh_col_begin)));
}
// remember the transposed element
auto found_transp = subwarp.ballot(lh_row == row);
if (found_transp) {
lh_nz =
subwarp.shfl(lh_idx + lh_col_begin, ffs(found_transp) - 1);
}
return true;
});
// accumulate result from all threads
sum = reduce(subwarp, sum, [](ValueType a, ValueType b) { return a + b; });

if (subwarp.thread_rank() == 0) {
auto to_write =
row == col ? sqrt(a_val - sum)
: (a_val - sum) /
load_relaxed(l_vals + (l_row_ptrs[col + 1] - 1));
if (is_finite(to_write)) {
store_relaxed(l_vals + l_nz, to_write);
}
}
}


} // namespace kernel
76 changes: 0 additions & 76 deletions common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc

This file was deleted.

4 changes: 2 additions & 2 deletions cuda/factorization/par_ict_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <ginkgo/core/matrix/dense.hpp>


#include "common/cuda_hip/base/runtime.hpp"
#include "core/components/prefix_sum_kernels.hpp"
#include "core/matrix/coo_builder.hpp"
#include "core/matrix/csr_builder.hpp"
Expand Down Expand Up @@ -46,8 +47,7 @@ using compiled_kernels =
syn::value_list<int, 1, 2, 4, 8, 16, 32, config::warp_size>;


#include "common/cuda_hip/factorization/par_ict_spgeam_kernels.hpp.inc"
#include "common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc"
#include "common/cuda_hip/factorization/par_ict_kernels.hpp.inc"


namespace {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <ginkgo/core/matrix/dense.hpp>


#include "common/cuda_hip/base/runtime.hpp"
#include "core/components/prefix_sum_kernels.hpp"
#include "core/matrix/coo_builder.hpp"
#include "core/matrix/csr_builder.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <ginkgo/core/matrix/csr.hpp>


#include "common/cuda_hip/base/runtime.hpp"
#include "core/components/prefix_sum_kernels.hpp"
#include "cuda/base/math.hpp"
#include "cuda/components/atomic.cuh"
Expand Down Expand Up @@ -147,7 +148,7 @@ void threshold_select(std::shared_ptr<const DefaultExecutor> exec,
auto out_ptr = reinterpret_cast<AbsType*>(tmp1.get_data());
kernel::basecase_select<<<1, kernel::basecase_block_size, 0,
exec->get_stream()>>>(
as_cuda_type(tmp22), bucket.size, rank, as_cuda_type(out_ptr));
as_device_type(tmp22), bucket.size, rank, as_device_type(out_ptr));
threshold = exec->copy_val_to_host(out_ptr);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <ginkgo/core/matrix/dense.hpp>


#include "common/cuda_hip/base/runtime.hpp"
#include "core/components/prefix_sum_kernels.hpp"
#include "core/matrix/coo_builder.hpp"
#include "core/matrix/csr_builder.hpp"
Expand Down Expand Up @@ -80,8 +81,8 @@ void add_candidates(syn::value_list<int, subwarp_size>,
auto u_vals = u->get_const_values();
auto l_new_row_ptrs = l_new->get_row_ptrs();
auto u_new_row_ptrs = u_new->get_row_ptrs();
// count non-zeros per row
if (num_blocks > 0) {
// count non-zeros per row
kernel::tri_spgeam_nnz<subwarp_size>
<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
lu_row_ptrs, lu_col_idxs, a_row_ptrs, a_col_idxs,
Expand All @@ -105,8 +106,8 @@ void add_candidates(syn::value_list<int, subwarp_size>,
auto u_new_col_idxs = u_new->get_col_idxs();
auto u_new_vals = u_new->get_values();
// fill columns and values
if (num_blocks > 0) {
// fill columns and values
kernel::tri_spgeam_init<subwarp_size>
<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
lu_row_ptrs, lu_col_idxs, as_device_type(lu_vals), a_row_ptrs,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <ginkgo/core/matrix/dense.hpp>


#include "common/cuda_hip/base/runtime.hpp"
#include "core/components/prefix_sum_kernels.hpp"
#include "core/matrix/coo_builder.hpp"
#include "core/matrix/csr_builder.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace cuda {
namespace jacobi {


#include "common/cuda_hip/preconditioner/jacobi_advanced_apply_kernel.hpp.inc"
#include <common/cuda_hip/preconditioner/jacobi_advanced_apply_kernel.hpp.inc>


// clang-format off
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ namespace cuda {
namespace jacobi {


#include "common/cuda_hip/preconditioner/jacobi_generate_kernel.hpp.inc"
#include <common/cuda_hip/preconditioner/jacobi_generate_kernel.hpp.inc>


// clang-format off
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace cuda {
namespace jacobi {


#include "common/cuda_hip/preconditioner/jacobi_simple_apply_kernel.hpp.inc"
#include <common/cuda_hip/preconditioner/jacobi_simple_apply_kernel.hpp.inc>


// clang-format off
Expand Down
7 changes: 2 additions & 5 deletions hip/factorization/par_ict_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@
#include "core/factorization/par_ict_kernels.hpp"


#include <hip/hip_runtime.h>


#include <ginkgo/core/base/array.hpp>
#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/matrix/coo.hpp>
#include <ginkgo/core/matrix/csr.hpp>
#include <ginkgo/core/matrix/dense.hpp>


#include "common/cuda_hip/base/runtime.hpp"
#include "core/components/prefix_sum_kernels.hpp"
#include "core/matrix/coo_builder.hpp"
#include "core/matrix/csr_builder.hpp"
Expand Down Expand Up @@ -49,8 +47,7 @@ using compiled_kernels =
syn::value_list<int, 1, 2, 4, 8, 16, 32, config::warp_size>;


#include "common/cuda_hip/factorization/par_ict_spgeam_kernels.hpp.inc"
#include "common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc"
#include "common/cuda_hip/factorization/par_ict_kernels.hpp.inc"


namespace {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@
#include "core/factorization/par_ilut_kernels.hpp"


#include <hip/hip_runtime.h>


#include <ginkgo/core/base/array.hpp>
#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/matrix/coo.hpp>
#include <ginkgo/core/matrix/csr.hpp>
#include <ginkgo/core/matrix/dense.hpp>


#include "common/cuda_hip/base/runtime.hpp"
#include "core/components/prefix_sum_kernels.hpp"
#include "core/matrix/coo_builder.hpp"
#include "core/matrix/csr_builder.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,12 @@
#include <algorithm>


#include <hip/hip_runtime.h>


#include <ginkgo/core/base/array.hpp>
#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/matrix/csr.hpp>


#include "common/cuda_hip/base/runtime.hpp"
#include "core/components/prefix_sum_kernels.hpp"
#include "hip/base/math.hip.hpp"
#include "hip/components/atomic.hip.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@
#include "core/factorization/par_ilut_kernels.hpp"


#include <hip/hip_runtime.h>


#include <ginkgo/core/base/array.hpp>
#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/matrix/coo.hpp>
#include <ginkgo/core/matrix/csr.hpp>
#include <ginkgo/core/matrix/dense.hpp>


#include "common/cuda_hip/base/runtime.hpp"
#include "core/components/prefix_sum_kernels.hpp"
#include "core/matrix/coo_builder.hpp"
#include "core/matrix/csr_builder.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@
#include "core/factorization/par_ilut_kernels.hpp"


#include <hip/hip_runtime.h>


#include <ginkgo/core/base/array.hpp>
#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/matrix/coo.hpp>
#include <ginkgo/core/matrix/csr.hpp>
#include <ginkgo/core/matrix/dense.hpp>


#include "common/cuda_hip/base/runtime.hpp"
#include "core/components/prefix_sum_kernels.hpp"
#include "core/matrix/coo_builder.hpp"
#include "core/matrix/csr_builder.hpp"
Expand Down Expand Up @@ -85,7 +83,6 @@ void compute_l_u_factors(syn::value_list<int, subwarp_size>,
}
}


GKO_ENABLE_IMPLEMENTATION_SELECTION(select_compute_l_u_factors,
compute_l_u_factors);

Expand Down

0 comments on commit 7fd6afb

Please sign in to comment.