From 7fd6afbbe0017a7b089dd4899821538d1fbf8a82 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Fri, 19 Apr 2024 12:55:56 +0200 Subject: [PATCH] rename files --- ...ernels.hpp.inc => par_ict_kernels.hpp.inc} | 68 +++++++++++++++++ .../par_ict_sweep_kernels.hpp.inc | 76 ------------------- ... => jacobi_advanced_apply_kernels.hpp.inc} | 0 ...pp.inc => jacobi_generate_kernels.hpp.inc} | 0 ...nc => jacobi_simple_apply_kernels.hpp.inc} | 0 cuda/factorization/par_ict_kernels.cu | 4 +- ...l.cu => par_ilut_approx_filter_kernels.cu} | 0 ...r_kernel.cu => par_ilut_filter_kernels.cu} | 1 + ...t_kernel.cu => par_ilut_select_kernels.cu} | 3 +- ...m_kernel.cu => par_ilut_spgeam_kernels.cu} | 5 +- ...ep_kernel.cu => par_ilut_sweep_kernels.cu} | 1 + ...el.cu => jacobi_advanced_apply_kernels.cu} | 0 ...obi_advanced_apply_kernels.instantiate.cu} | 2 +- ...e_kernel.cu => jacobi_generate_kernels.cu} | 0 ...=> jacobi_generate_kernels.instantiate.cu} | 2 +- ...rnel.cu => jacobi_simple_apply_kernels.cu} | 0 ...acobi_simple_apply_kernels.instantiate.cu} | 2 +- hip/factorization/par_ict_kernels.hip.cpp | 7 +- ...=> par_ilut_approx_filter_kernels.hip.cpp} | 0 ...ip.cpp => par_ilut_filter_kernels.hip.cpp} | 4 +- ...ip.cpp => par_ilut_select_kernels.hip.cpp} | 4 +- ...ip.cpp => par_ilut_spgeam_kernels.hip.cpp} | 4 +- ...hip.cpp => par_ilut_sweep_kernels.hip.cpp} | 5 +- 23 files changed, 86 insertions(+), 102 deletions(-) rename common/cuda_hip/factorization/{par_ict_spgeam_kernels.hpp.inc => par_ict_kernels.hpp.inc} (75%) delete mode 100644 common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc rename common/cuda_hip/preconditioner/{jacobi_advanced_apply_kernel.hpp.inc => jacobi_advanced_apply_kernels.hpp.inc} (100%) rename common/cuda_hip/preconditioner/{jacobi_generate_kernel.hpp.inc => jacobi_generate_kernels.hpp.inc} (100%) rename common/cuda_hip/preconditioner/{jacobi_simple_apply_kernel.hpp.inc => jacobi_simple_apply_kernels.hpp.inc} (100%) rename cuda/factorization/{par_ilut_approx_filter_kernel.cu => par_ilut_approx_filter_kernels.cu} (100%) rename cuda/factorization/{par_ilut_filter_kernel.cu => par_ilut_filter_kernels.cu} (99%) rename cuda/factorization/{par_ilut_select_kernel.cu => par_ilut_select_kernels.cu} (98%) rename cuda/factorization/{par_ilut_spgeam_kernel.cu => par_ilut_spgeam_kernels.cu} (98%) rename cuda/factorization/{par_ilut_sweep_kernel.cu => par_ilut_sweep_kernels.cu} (99%) rename cuda/preconditioner/{jacobi_advanced_apply_kernel.cu => jacobi_advanced_apply_kernels.cu} (100%) rename cuda/preconditioner/{jacobi_advanced_apply_instantiate.inc.cu => jacobi_advanced_apply_kernels.instantiate.cu} (97%) rename cuda/preconditioner/{jacobi_generate_kernel.cu => jacobi_generate_kernels.cu} (100%) rename cuda/preconditioner/{jacobi_generate_instantiate.inc.cu => jacobi_generate_kernels.instantiate.cu} (98%) rename cuda/preconditioner/{jacobi_simple_apply_kernel.cu => jacobi_simple_apply_kernels.cu} (100%) rename cuda/preconditioner/{jacobi_simple_apply_instantiate.inc.cu => jacobi_simple_apply_kernels.instantiate.cu} (97%) rename hip/factorization/{par_ilut_approx_filter_kernel.hip.cpp => par_ilut_approx_filter_kernels.hip.cpp} (100%) rename hip/factorization/{par_ilut_filter_kernel.hip.cpp => par_ilut_filter_kernels.hip.cpp} (99%) rename hip/factorization/{par_ilut_select_kernel.hip.cpp => par_ilut_select_kernels.hip.cpp} (99%) rename hip/factorization/{par_ilut_spgeam_kernel.hip.cpp => par_ilut_spgeam_kernels.hip.cpp} (99%) rename hip/factorization/{par_ilut_sweep_kernel.hip.cpp => par_ilut_sweep_kernels.hip.cpp} (99%) diff --git a/common/cuda_hip/factorization/par_ict_spgeam_kernels.hpp.inc b/common/cuda_hip/factorization/par_ict_kernels.hpp.inc similarity index 75% rename from common/cuda_hip/factorization/par_ict_spgeam_kernels.hpp.inc rename to common/cuda_hip/factorization/par_ict_kernels.hpp.inc index 93a49e56d21..87aa8297345 100644 --- a/common/cuda_hip/factorization/par_ict_spgeam_kernels.hpp.inc +++ b/common/cuda_hip/factorization/par_ict_kernels.hpp.inc @@ -206,4 +206,72 @@ __global__ __launch_bounds__(default_block_size) void ict_tri_spgeam_init( } +template +__global__ __launch_bounds__(default_block_size) void ict_sweep( + const IndexType* __restrict__ a_row_ptrs, + const IndexType* __restrict__ a_col_idxs, + const ValueType* __restrict__ a_vals, + const IndexType* __restrict__ l_row_ptrs, + const IndexType* __restrict__ l_row_idxs, + const IndexType* __restrict__ l_col_idxs, ValueType* __restrict__ l_vals, + IndexType l_nnz) +{ + auto l_nz = thread::get_subwarp_id_flat(); + if (l_nz >= l_nnz) { + return; + } + auto row = l_row_idxs[l_nz]; + auto col = l_col_idxs[l_nz]; + auto subwarp = + group::tiled_partition(group::this_thread_block()); + // find entry of A at (row, col) + auto a_row_begin = a_row_ptrs[row]; + auto a_row_end = a_row_ptrs[row + 1]; + auto a_row_size = a_row_end - a_row_begin; + auto a_idx = + group_wide_search(a_row_begin, a_row_size, subwarp, + [&](IndexType i) { return a_col_idxs[i] >= col; }); + bool has_a = a_idx < a_row_end && a_col_idxs[a_idx] == col; + auto a_val = has_a ? a_vals[a_idx] : zero(); + auto l_row_begin = l_row_ptrs[row]; + auto l_row_size = l_row_ptrs[row + 1] - l_row_begin; + auto lh_col_begin = l_row_ptrs[col]; + auto lh_col_size = l_row_ptrs[col + 1] - lh_col_begin; + ValueType sum{}; + IndexType lh_nz{}; + auto last_entry = col; + group_merge( + l_col_idxs + l_row_begin, l_row_size, l_col_idxs + lh_col_begin, + lh_col_size, subwarp, + [&](IndexType l_idx, IndexType l_col, IndexType lh_idx, + IndexType lh_row, IndexType, bool) { + // we don't need to use the `bool valid` because last_entry is + // already a smaller sentinel value than the one used in group_merge + if (l_col == lh_row && l_col < last_entry) { + sum += load_relaxed(l_vals + (l_idx + l_row_begin)) * + conj(load_relaxed(l_vals + (lh_idx + lh_col_begin))); + } + // remember the transposed element + auto found_transp = subwarp.ballot(lh_row == row); + if (found_transp) { + lh_nz = + subwarp.shfl(lh_idx + lh_col_begin, ffs(found_transp) - 1); + } + return true; + }); + // accumulate result from all threads + sum = reduce(subwarp, sum, [](ValueType a, ValueType b) { return a + b; }); + + if (subwarp.thread_rank() == 0) { + auto to_write = + row == col ? sqrt(a_val - sum) + : (a_val - sum) / + load_relaxed(l_vals + (l_row_ptrs[col + 1] - 1)); + if (is_finite(to_write)) { + store_relaxed(l_vals + l_nz, to_write); + } + } +} + + } // namespace kernel diff --git a/common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc b/common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc deleted file mode 100644 index bc58f0a9799..00000000000 --- a/common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc +++ /dev/null @@ -1,76 +0,0 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors -// -// SPDX-License-Identifier: BSD-3-Clause - -namespace kernel { - - -template -__global__ __launch_bounds__(default_block_size) void ict_sweep( - const IndexType* __restrict__ a_row_ptrs, - const IndexType* __restrict__ a_col_idxs, - const ValueType* __restrict__ a_vals, - const IndexType* __restrict__ l_row_ptrs, - const IndexType* __restrict__ l_row_idxs, - const IndexType* __restrict__ l_col_idxs, ValueType* __restrict__ l_vals, - IndexType l_nnz) -{ - auto l_nz = thread::get_subwarp_id_flat(); - if (l_nz >= l_nnz) { - return; - } - auto row = l_row_idxs[l_nz]; - auto col = l_col_idxs[l_nz]; - auto subwarp = - group::tiled_partition(group::this_thread_block()); - // find entry of A at (row, col) - auto a_row_begin = a_row_ptrs[row]; - auto a_row_end = a_row_ptrs[row + 1]; - auto a_row_size = a_row_end - a_row_begin; - auto a_idx = - group_wide_search(a_row_begin, a_row_size, subwarp, - [&](IndexType i) { return a_col_idxs[i] >= col; }); - bool has_a = a_idx < a_row_end && a_col_idxs[a_idx] == col; - auto a_val = has_a ? a_vals[a_idx] : zero(); - auto l_row_begin = l_row_ptrs[row]; - auto l_row_size = l_row_ptrs[row + 1] - l_row_begin; - auto lh_col_begin = l_row_ptrs[col]; - auto lh_col_size = l_row_ptrs[col + 1] - lh_col_begin; - ValueType sum{}; - IndexType lh_nz{}; - auto last_entry = col; - group_merge( - l_col_idxs + l_row_begin, l_row_size, l_col_idxs + lh_col_begin, - lh_col_size, subwarp, - [&](IndexType l_idx, IndexType l_col, IndexType lh_idx, - IndexType lh_row, IndexType, bool) { - // we don't need to use the `bool valid` because last_entry is - // already a smaller sentinel value than the one used in group_merge - if (l_col == lh_row && l_col < last_entry) { - sum += load_relaxed(l_vals + (l_idx + l_row_begin)) * - conj(load_relaxed(l_vals + (lh_idx + lh_col_begin))); - } - // remember the transposed element - auto found_transp = subwarp.ballot(lh_row == row); - if (found_transp) { - lh_nz = - subwarp.shfl(lh_idx + lh_col_begin, ffs(found_transp) - 1); - } - return true; - }); - // accumulate result from all threads - sum = reduce(subwarp, sum, [](ValueType a, ValueType b) { return a + b; }); - - if (subwarp.thread_rank() == 0) { - auto to_write = - row == col ? sqrt(a_val - sum) - : (a_val - sum) / - load_relaxed(l_vals + (l_row_ptrs[col + 1] - 1)); - if (is_finite(to_write)) { - store_relaxed(l_vals + l_nz, to_write); - } - } -} - - -} // namespace kernel diff --git a/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernel.hpp.inc b/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernels.hpp.inc similarity index 100% rename from common/cuda_hip/preconditioner/jacobi_advanced_apply_kernel.hpp.inc rename to common/cuda_hip/preconditioner/jacobi_advanced_apply_kernels.hpp.inc diff --git a/common/cuda_hip/preconditioner/jacobi_generate_kernel.hpp.inc b/common/cuda_hip/preconditioner/jacobi_generate_kernels.hpp.inc similarity index 100% rename from common/cuda_hip/preconditioner/jacobi_generate_kernel.hpp.inc rename to common/cuda_hip/preconditioner/jacobi_generate_kernels.hpp.inc diff --git a/common/cuda_hip/preconditioner/jacobi_simple_apply_kernel.hpp.inc b/common/cuda_hip/preconditioner/jacobi_simple_apply_kernels.hpp.inc similarity index 100% rename from common/cuda_hip/preconditioner/jacobi_simple_apply_kernel.hpp.inc rename to common/cuda_hip/preconditioner/jacobi_simple_apply_kernels.hpp.inc diff --git a/cuda/factorization/par_ict_kernels.cu b/cuda/factorization/par_ict_kernels.cu index 5f48ceef2f8..9285e786adf 100644 --- a/cuda/factorization/par_ict_kernels.cu +++ b/cuda/factorization/par_ict_kernels.cu @@ -12,6 +12,7 @@ #include +#include "common/cuda_hip/base/runtime.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/matrix/coo_builder.hpp" #include "core/matrix/csr_builder.hpp" @@ -46,8 +47,7 @@ using compiled_kernels = syn::value_list; -#include "common/cuda_hip/factorization/par_ict_spgeam_kernels.hpp.inc" -#include "common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ict_kernels.hpp.inc" namespace { diff --git a/cuda/factorization/par_ilut_approx_filter_kernel.cu b/cuda/factorization/par_ilut_approx_filter_kernels.cu similarity index 100% rename from cuda/factorization/par_ilut_approx_filter_kernel.cu rename to cuda/factorization/par_ilut_approx_filter_kernels.cu diff --git a/cuda/factorization/par_ilut_filter_kernel.cu b/cuda/factorization/par_ilut_filter_kernels.cu similarity index 99% rename from cuda/factorization/par_ilut_filter_kernel.cu rename to cuda/factorization/par_ilut_filter_kernels.cu index 0e63f102b72..ddd4b428d55 100644 --- a/cuda/factorization/par_ilut_filter_kernel.cu +++ b/cuda/factorization/par_ilut_filter_kernels.cu @@ -12,6 +12,7 @@ #include +#include "common/cuda_hip/base/runtime.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/matrix/coo_builder.hpp" #include "core/matrix/csr_builder.hpp" diff --git a/cuda/factorization/par_ilut_select_kernel.cu b/cuda/factorization/par_ilut_select_kernels.cu similarity index 98% rename from cuda/factorization/par_ilut_select_kernel.cu rename to cuda/factorization/par_ilut_select_kernels.cu index ca8b55e504b..6a7bd53c1c4 100644 --- a/cuda/factorization/par_ilut_select_kernel.cu +++ b/cuda/factorization/par_ilut_select_kernels.cu @@ -13,6 +13,7 @@ #include +#include "common/cuda_hip/base/runtime.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "cuda/base/math.hpp" #include "cuda/components/atomic.cuh" @@ -147,7 +148,7 @@ void threshold_select(std::shared_ptr exec, auto out_ptr = reinterpret_cast(tmp1.get_data()); kernel::basecase_select<<<1, kernel::basecase_block_size, 0, exec->get_stream()>>>( - as_cuda_type(tmp22), bucket.size, rank, as_cuda_type(out_ptr)); + as_device_type(tmp22), bucket.size, rank, as_device_type(out_ptr)); threshold = exec->copy_val_to_host(out_ptr); } diff --git a/cuda/factorization/par_ilut_spgeam_kernel.cu b/cuda/factorization/par_ilut_spgeam_kernels.cu similarity index 98% rename from cuda/factorization/par_ilut_spgeam_kernel.cu rename to cuda/factorization/par_ilut_spgeam_kernels.cu index c4372f66219..7f59e4edc37 100644 --- a/cuda/factorization/par_ilut_spgeam_kernel.cu +++ b/cuda/factorization/par_ilut_spgeam_kernels.cu @@ -12,6 +12,7 @@ #include +#include "common/cuda_hip/base/runtime.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/matrix/coo_builder.hpp" #include "core/matrix/csr_builder.hpp" @@ -80,8 +81,8 @@ void add_candidates(syn::value_list, auto u_vals = u->get_const_values(); auto l_new_row_ptrs = l_new->get_row_ptrs(); auto u_new_row_ptrs = u_new->get_row_ptrs(); - // count non-zeros per row if (num_blocks > 0) { + // count non-zeros per row kernel::tri_spgeam_nnz <<get_stream()>>>( lu_row_ptrs, lu_col_idxs, a_row_ptrs, a_col_idxs, @@ -105,8 +106,8 @@ void add_candidates(syn::value_list, auto u_new_col_idxs = u_new->get_col_idxs(); auto u_new_vals = u_new->get_values(); - // fill columns and values if (num_blocks > 0) { + // fill columns and values kernel::tri_spgeam_init <<get_stream()>>>( lu_row_ptrs, lu_col_idxs, as_device_type(lu_vals), a_row_ptrs, diff --git a/cuda/factorization/par_ilut_sweep_kernel.cu b/cuda/factorization/par_ilut_sweep_kernels.cu similarity index 99% rename from cuda/factorization/par_ilut_sweep_kernel.cu rename to cuda/factorization/par_ilut_sweep_kernels.cu index 85fb3f26e21..5ec8dd81325 100644 --- a/cuda/factorization/par_ilut_sweep_kernel.cu +++ b/cuda/factorization/par_ilut_sweep_kernels.cu @@ -12,6 +12,7 @@ #include +#include "common/cuda_hip/base/runtime.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/matrix/coo_builder.hpp" #include "core/matrix/csr_builder.hpp" diff --git a/cuda/preconditioner/jacobi_advanced_apply_kernel.cu b/cuda/preconditioner/jacobi_advanced_apply_kernels.cu similarity index 100% rename from cuda/preconditioner/jacobi_advanced_apply_kernel.cu rename to cuda/preconditioner/jacobi_advanced_apply_kernels.cu diff --git a/cuda/preconditioner/jacobi_advanced_apply_instantiate.inc.cu b/cuda/preconditioner/jacobi_advanced_apply_kernels.instantiate.cu similarity index 97% rename from cuda/preconditioner/jacobi_advanced_apply_instantiate.inc.cu rename to cuda/preconditioner/jacobi_advanced_apply_kernels.instantiate.cu index 5633ad15a4b..ca7bf20372c 100644 --- a/cuda/preconditioner/jacobi_advanced_apply_instantiate.inc.cu +++ b/cuda/preconditioner/jacobi_advanced_apply_kernels.instantiate.cu @@ -32,7 +32,7 @@ namespace cuda { namespace jacobi { -#include "common/cuda_hip/preconditioner/jacobi_advanced_apply_kernel.hpp.inc" +#include // clang-format off diff --git a/cuda/preconditioner/jacobi_generate_kernel.cu b/cuda/preconditioner/jacobi_generate_kernels.cu similarity index 100% rename from cuda/preconditioner/jacobi_generate_kernel.cu rename to cuda/preconditioner/jacobi_generate_kernels.cu diff --git a/cuda/preconditioner/jacobi_generate_instantiate.inc.cu b/cuda/preconditioner/jacobi_generate_kernels.instantiate.cu similarity index 98% rename from cuda/preconditioner/jacobi_generate_instantiate.inc.cu rename to cuda/preconditioner/jacobi_generate_kernels.instantiate.cu index a76c4fba271..a3ad8890042 100644 --- a/cuda/preconditioner/jacobi_generate_instantiate.inc.cu +++ b/cuda/preconditioner/jacobi_generate_kernels.instantiate.cu @@ -35,7 +35,7 @@ namespace cuda { namespace jacobi { -#include "common/cuda_hip/preconditioner/jacobi_generate_kernel.hpp.inc" +#include // clang-format off diff --git a/cuda/preconditioner/jacobi_simple_apply_kernel.cu b/cuda/preconditioner/jacobi_simple_apply_kernels.cu similarity index 100% rename from cuda/preconditioner/jacobi_simple_apply_kernel.cu rename to cuda/preconditioner/jacobi_simple_apply_kernels.cu diff --git a/cuda/preconditioner/jacobi_simple_apply_instantiate.inc.cu b/cuda/preconditioner/jacobi_simple_apply_kernels.instantiate.cu similarity index 97% rename from cuda/preconditioner/jacobi_simple_apply_instantiate.inc.cu rename to cuda/preconditioner/jacobi_simple_apply_kernels.instantiate.cu index 07689daa815..a227adb701b 100644 --- a/cuda/preconditioner/jacobi_simple_apply_instantiate.inc.cu +++ b/cuda/preconditioner/jacobi_simple_apply_kernels.instantiate.cu @@ -32,7 +32,7 @@ namespace cuda { namespace jacobi { -#include "common/cuda_hip/preconditioner/jacobi_simple_apply_kernel.hpp.inc" +#include // clang-format off diff --git a/hip/factorization/par_ict_kernels.hip.cpp b/hip/factorization/par_ict_kernels.hip.cpp index 4b27383bff5..1d5e412e9dd 100644 --- a/hip/factorization/par_ict_kernels.hip.cpp +++ b/hip/factorization/par_ict_kernels.hip.cpp @@ -5,9 +5,6 @@ #include "core/factorization/par_ict_kernels.hpp" -#include - - #include #include #include @@ -15,6 +12,7 @@ #include +#include "common/cuda_hip/base/runtime.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/matrix/coo_builder.hpp" #include "core/matrix/csr_builder.hpp" @@ -49,8 +47,7 @@ using compiled_kernels = syn::value_list; -#include "common/cuda_hip/factorization/par_ict_spgeam_kernels.hpp.inc" -#include "common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ict_kernels.hpp.inc" namespace { diff --git a/hip/factorization/par_ilut_approx_filter_kernel.hip.cpp b/hip/factorization/par_ilut_approx_filter_kernels.hip.cpp similarity index 100% rename from hip/factorization/par_ilut_approx_filter_kernel.hip.cpp rename to hip/factorization/par_ilut_approx_filter_kernels.hip.cpp diff --git a/hip/factorization/par_ilut_filter_kernel.hip.cpp b/hip/factorization/par_ilut_filter_kernels.hip.cpp similarity index 99% rename from hip/factorization/par_ilut_filter_kernel.hip.cpp rename to hip/factorization/par_ilut_filter_kernels.hip.cpp index eef1044878e..2777d218149 100644 --- a/hip/factorization/par_ilut_filter_kernel.hip.cpp +++ b/hip/factorization/par_ilut_filter_kernels.hip.cpp @@ -5,9 +5,6 @@ #include "core/factorization/par_ilut_kernels.hpp" -#include - - #include #include #include @@ -15,6 +12,7 @@ #include +#include "common/cuda_hip/base/runtime.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/matrix/coo_builder.hpp" #include "core/matrix/csr_builder.hpp" diff --git a/hip/factorization/par_ilut_select_kernel.hip.cpp b/hip/factorization/par_ilut_select_kernels.hip.cpp similarity index 99% rename from hip/factorization/par_ilut_select_kernel.hip.cpp rename to hip/factorization/par_ilut_select_kernels.hip.cpp index b6d93e65b24..b259133b95d 100644 --- a/hip/factorization/par_ilut_select_kernel.hip.cpp +++ b/hip/factorization/par_ilut_select_kernels.hip.cpp @@ -8,14 +8,12 @@ #include -#include - - #include #include #include +#include "common/cuda_hip/base/runtime.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "hip/base/math.hip.hpp" #include "hip/components/atomic.hip.hpp" diff --git a/hip/factorization/par_ilut_spgeam_kernel.hip.cpp b/hip/factorization/par_ilut_spgeam_kernels.hip.cpp similarity index 99% rename from hip/factorization/par_ilut_spgeam_kernel.hip.cpp rename to hip/factorization/par_ilut_spgeam_kernels.hip.cpp index ad102e49488..cd9d7b7124a 100644 --- a/hip/factorization/par_ilut_spgeam_kernel.hip.cpp +++ b/hip/factorization/par_ilut_spgeam_kernels.hip.cpp @@ -5,9 +5,6 @@ #include "core/factorization/par_ilut_kernels.hpp" -#include - - #include #include #include @@ -15,6 +12,7 @@ #include +#include "common/cuda_hip/base/runtime.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/matrix/coo_builder.hpp" #include "core/matrix/csr_builder.hpp" diff --git a/hip/factorization/par_ilut_sweep_kernel.hip.cpp b/hip/factorization/par_ilut_sweep_kernels.hip.cpp similarity index 99% rename from hip/factorization/par_ilut_sweep_kernel.hip.cpp rename to hip/factorization/par_ilut_sweep_kernels.hip.cpp index bdcecc609d5..26672fd2acb 100644 --- a/hip/factorization/par_ilut_sweep_kernel.hip.cpp +++ b/hip/factorization/par_ilut_sweep_kernels.hip.cpp @@ -5,9 +5,6 @@ #include "core/factorization/par_ilut_kernels.hpp" -#include - - #include #include #include @@ -15,6 +12,7 @@ #include +#include "common/cuda_hip/base/runtime.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/matrix/coo_builder.hpp" #include "core/matrix/csr_builder.hpp" @@ -85,7 +83,6 @@ void compute_l_u_factors(syn::value_list, } } - GKO_ENABLE_IMPLEMENTATION_SELECTION(select_compute_l_u_factors, compute_l_u_factors);