From 849f50256cf38b78174a3f1a9933847d7dfa61ee Mon Sep 17 00:00:00 2001 From: nbeams <246972+nbeams@users.noreply.github.com> Date: Thu, 11 Jul 2024 02:48:55 +0000 Subject: [PATCH 1/6] Add CGS and CGS2 orthogonalization options to GMRES. Change Hessenberg data layout to facilitate CGS communication in distributed solver. --- .../unified/solver/common_gmres_kernels.cpp | 32 ++- common/unified/solver/gmres_kernels.cpp | 25 ++ core/device_hooks/common_kernels.inc.cpp | 1 + core/solver/gmres.cpp | 253 ++++++++++++++++-- core/solver/gmres_kernels.hpp | 18 +- core/test/config/solver.cpp | 3 + include/ginkgo/core/solver/gmres.hpp | 51 +++- reference/solver/common_gmres_kernels.cpp | 46 ++-- reference/solver/gmres_kernels.cpp | 21 ++ reference/test/solver/gmres_kernels.cpp | 100 +++++-- test/mpi/solver/solver.cpp | 8 +- test/solver/gmres_kernels.cpp | 86 ++++-- 12 files changed, 520 insertions(+), 124 deletions(-) diff --git a/common/unified/solver/common_gmres_kernels.cpp b/common/unified/solver/common_gmres_kernels.cpp index 0e6ba18bb64..15637fe701e 100644 --- a/common/unified/solver/common_gmres_kernels.cpp +++ b/common/unified/solver/common_gmres_kernels.cpp @@ -69,28 +69,30 @@ void hessenberg_qr(std::shared_ptr exec, exec, [] GKO_KERNEL(auto rhs, auto givens_sin, auto givens_cos, auto residual_norm, auto residual_norm_collection, - auto hessenberg_iter, auto iter, auto final_iter_nums, - auto stop_status) { + auto hessenberg_iter, auto iter, auto num_rhs, + auto final_iter_nums, auto stop_status) { using value_type = std::decay_t; if (stop_status[rhs].has_stopped()) { return; } // increment iteration count final_iter_nums[rhs]++; - auto hess_this = hessenberg_iter(0, rhs); - auto hess_next = hessenberg_iter(1, rhs); + auto hess_this = + hessenberg_iter(0, rhs); // hessenberg_iter(0, rhs); + auto hess_next = + hessenberg_iter(0, num_rhs + rhs); // hessenberg_iter(1, rhs); // apply previous Givens rotations to column for (decltype(iter) j = 0; j < iter; ++j) { // in here: hess_this = hessenberg_iter(j, rhs); // hess_next = hessenberg_iter(j+1, rhs); - hess_next = hessenberg_iter(j + 1, rhs); + hess_next = hessenberg_iter(0, (j + 1) * num_rhs + rhs); const auto gc = givens_cos(j, rhs); const auto gs = givens_sin(j, rhs); const auto out1 = gc * hess_this + gs * hess_next; const auto out2 = -conj(gs) * hess_this + conj(gc) * hess_next; - hessenberg_iter(j, rhs) = out1; - hessenberg_iter(j + 1, rhs) = hess_this = out2; - hess_next = hessenberg_iter(j + 2, rhs); + hessenberg_iter(0, j * num_rhs + rhs) = out1; + hessenberg_iter(0, (j + 1) * num_rhs + rhs) = hess_this = out2; + hess_next = hessenberg_iter(0, (j + 2) * num_rhs + rhs); } // hess_this is hessenberg_iter(iter, rhs) and // hess_next is hessenberg_iter(iter + 1, rhs) @@ -110,8 +112,9 @@ void hessenberg_qr(std::shared_ptr exec, givens_sin(iter, rhs) = gs = conj(hess_next) / hypotenuse; } // apply new Givens rotation to column - hessenberg_iter(iter, rhs) = gc * hess_this + gs * hess_next; - hessenberg_iter(iter + 1, rhs) = zero(); + hessenberg_iter(0, iter * num_rhs + rhs) = + gc * hess_this + gs * hess_next; + hessenberg_iter(0, (iter + 1) * num_rhs + rhs) = zero(); // apply new Givens rotation to RHS of least-squares problem const auto rnc_new = -conj(gs) * residual_norm_collection(iter, rhs); @@ -120,9 +123,9 @@ void hessenberg_qr(std::shared_ptr exec, gc * residual_norm_collection(iter, rhs); residual_norm(0, rhs) = abs(rnc_new); }, - hessenberg_iter->get_size()[1], givens_sin, givens_cos, residual_norm, - residual_norm_collection, hessenberg_iter, iter, final_iter_nums, - stop_status); + residual_norm->get_size()[1], givens_sin, givens_cos, residual_norm, + residual_norm_collection, hessenberg_iter, iter, + residual_norm->get_size()[1], final_iter_nums, stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( @@ -146,7 +149,8 @@ void solve_krylov(std::shared_ptr exec, for (int64 i = sizes[col] - 1; i >= 0; i--) { auto value = rhs(i, col); for (int64 j = i + 1; j < sizes[col]; j++) { - value -= mtx(i, j * num_cols + col) * y(j, col); + // i is the Krylov vector, j is Arnoldi iter + value -= mtx(j, i * num_cols + col) * y(j, col); } // y(i) = (rhs(i) - U(i,i+1:) * y(i+1:)) / U(i, i) y(i, col) = value / mtx(i, i * num_cols + col); diff --git a/common/unified/solver/gmres_kernels.cpp b/common/unified/solver/gmres_kernels.cpp index 3997963f8d7..c10dc2562e5 100644 --- a/common/unified/solver/gmres_kernels.cpp +++ b/common/unified/solver/gmres_kernels.cpp @@ -8,6 +8,7 @@ #include #include "common/unified/base/kernel_launch.hpp" +#include "common/unified/base/kernel_launch_reduction.hpp" namespace gko { @@ -94,6 +95,30 @@ void multi_axpy(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_MULTI_AXPY_KERNEL); +template +void multi_dot(std::shared_ptr exec, + const matrix::Dense* krylov_bases, + const matrix::Dense* next_krylov, + matrix::Dense* hessenberg_col) +{ + run_kernel_col_reduction( + exec, + [] GKO_KERNEL(auto row, auto col, auto bases, auto next_krylov, + auto num_rhs, auto num_rows) { + auto irhs = col % num_rhs; // which rhs + auto ivec = col / num_rhs; // which Krylov vector + return conj(bases(ivec * num_rows + row, irhs)) * + next_krylov(row, irhs); + }, + GKO_KERNEL_REDUCE_SUM(ValueType), hessenberg_col->get_values(), + gko::dim<2>{next_krylov->get_size()[0], + hessenberg_col->get_size()[1] - next_krylov->get_size()[1]}, + krylov_bases, next_krylov, next_krylov->get_size()[1], + next_krylov->get_size()[0]); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_MULTI_DOT_KERNEL); + } // namespace gmres } // namespace GKO_DEVICE_NAMESPACE } // namespace kernels diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index f5dc92ce16e..1ba925e94e3 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -553,6 +553,7 @@ namespace gmres { GKO_STUB_VALUE_TYPE(GKO_DECLARE_GMRES_RESTART_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_GMRES_MULTI_AXPY_KERNEL); +GKO_STUB_VALUE_TYPE(GKO_DECLARE_GMRES_MULTI_DOT_KERNEL); } // namespace gmres diff --git a/core/solver/gmres.cpp b/core/solver/gmres.cpp index cd3d88a5c02..f6fb254cf94 100644 --- a/core/solver/gmres.cpp +++ b/core/solver/gmres.cpp @@ -33,9 +33,25 @@ GKO_REGISTER_OPERATION(restart, gmres::restart); GKO_REGISTER_OPERATION(hessenberg_qr, common_gmres::hessenberg_qr); GKO_REGISTER_OPERATION(solve_krylov, common_gmres::solve_krylov); GKO_REGISTER_OPERATION(multi_axpy, gmres::multi_axpy); +GKO_REGISTER_OPERATION(multi_dot, gmres::multi_dot); } // anonymous namespace + + +std::ostream& operator<<(std::ostream& stream, orthog_method orthog) +{ + switch (orthog) { + case orthog_method::mgs: + return stream << "mgs"; + case orthog_method::cgs: + return stream << "cgs"; + case orthog_method::cgs2: + return stream << "cgs2"; + } + return stream; +} + } // namespace gmres @@ -52,6 +68,20 @@ typename Gmres::parameters_type Gmres::parse( if (auto& obj = config.get("flexible")) { params.with_flexible(gko::config::get_value(obj)); } + if (auto& obj = config.get("orthog_method")) { + auto str = obj.get_string(); + gmres::orthog_method orthog; + if (str == "mgs") { + orthog = gmres::orthog_method::mgs; + } else if (str == "cgs") { + orthog = gmres::orthog_method::cgs; + } else if (str == "cgs2") { + orthog = gmres::orthog_method::cgs2; + } else { + GKO_INVALID_CONFIG_VALUE("orthog_method", str); + } + params.with_orthog_method(orthog); + } return params; } @@ -112,6 +142,155 @@ struct help_compute_norm { } }; +namespace { +// Orthogonalization helper functions +template +void orthogonalize_mgs(matrix::Dense* hessenberg_iter, + VectorType* krylov_bases, VectorType* next_krylov, + array& reduction_tmp, size_type restart_iter, + size_type num_rows, size_type num_rhs, + size_type local_num_rows) +{ + for (size_type i = 0; i <= restart_iter; i++) { + // orthogonalize against krylov_bases(:, i): + // hessenberg(i, restart_iter) = next_krylov' * krylov_bases(:, + // i) next_krylov -= hessenberg(i, restart_iter) * + // krylov_bases(:, i) + auto hessenberg_entry = hessenberg_iter->create_submatrix( + span{0, 1}, span{i * num_rhs, (i + 1) * num_rhs}); + auto krylov_basis = ::gko::detail::create_submatrix_helper( + krylov_bases, dim<2>{num_rows, num_rhs}, + span{local_num_rows * i, local_num_rows * (i + 1)}, + span{0, num_rhs}); + next_krylov->compute_conj_dot(krylov_basis, hessenberg_entry, + reduction_tmp); + next_krylov->sub_scaled(hessenberg_entry, krylov_basis); + } +} + +template +void finish_reduce(matrix::Dense* hessenberg_iter, + matrix::Dense* next_krylov, + const size_type num_rhs, const size_type restart_iter) +{ + return; +} + +#if GINKGO_BUILD_MPI +template +void finish_reduce(matrix::Dense* hessenberg_iter, + experimental::distributed::Vector* next_krylov, + const size_type num_rhs, const size_type restart_iter) +{ + auto exec = hessenberg_iter->get_executor(); + const auto comm = next_krylov->get_communicator(); + exec->synchronize(); + // hessenberg_iter is the size of all non-zeros for this iteration -- but we + // are not setting the last values for each rhs (values that would be below + // the diagonal in the "full" matrix. + auto hessenberg_reduce = hessenberg_iter->create_submatrix( + span{0, 1}, span{0, num_rhs * (restart_iter + 1)}); + if (experimental::mpi::requires_host_buffer(exec, comm)) { + ::gko::detail::DenseCache host_reduction_buffer; + host_reduction_buffer.init(exec->get_master(), + hessenberg_reduce->get_size()); + host_reduction_buffer->copy_from(hessenberg_reduce); + comm.all_reduce(exec->get_master(), host_reduction_buffer->get_values(), + static_cast(hessenberg_reduce->get_size()[1]), + MPI_SUM); + hessenberg_reduce->copy_from(host_reduction_buffer.get()); + } else { + comm.all_reduce(exec, hessenberg_reduce->get_values(), + static_cast(hessenberg_reduce->get_size()[1]), + MPI_SUM); + } +} +#endif + +template +void orthogonalize_cgs(matrix::Dense* hessenberg_iter, + VectorType* krylov_bases, VectorType* next_krylov, + size_type restart_iter, size_type num_rows, + size_type num_rhs, size_type local_num_rows) +{ + auto exec = hessenberg_iter->get_executor(); + // hessenberg(0:restart_iter, restart_iter) = krylov_basis' * + // next_krylov + auto krylov_basis_small = ::gko::detail::create_submatrix_helper( + krylov_bases, dim<2>{num_rows, num_rhs}, + span{0, local_num_rows * (restart_iter + 1)}, span{0, num_rhs}); + exec->run(gmres::make_multi_dot( + gko::detail::get_local(krylov_basis_small.get()), + gko::detail::get_local(next_krylov), hessenberg_iter)); + finish_reduce(hessenberg_iter, next_krylov, num_rhs, restart_iter); + for (size_type i = 0; i <= restart_iter; i++) { + // next_krylov -= hessenberg(i, restart_iter) * krylov_bases(:, + // i) + auto hessenberg_entry = hessenberg_iter->create_submatrix( + span{0, 1}, span{i * num_rhs, (i + 1) * num_rhs}); + auto krylov_col = ::gko::detail::create_submatrix_helper( + krylov_bases, dim<2>{num_rows, num_rhs}, + span{local_num_rows * i, local_num_rows * (i + 1)}, + span{0, num_rhs}); + next_krylov->sub_scaled(hessenberg_entry, krylov_col); + } +} + + +template +void orthogonalize_cgs2(matrix::Dense* hessenberg_iter, + VectorType* krylov_bases, VectorType* next_krylov, + matrix::Dense* hessenberg_aux, + matrix::Dense* one_op, + size_type restart_iter, size_type num_rows, + size_type num_rhs, size_type local_num_rows) +{ + auto exec = hessenberg_iter->get_executor(); + // hessenberg(0:restart_iter, restart_iter) = krylov_bases' * + // next_krylov + auto krylov_basis_small = ::gko::detail::create_submatrix_helper( + krylov_bases, dim<2>{num_rows, num_rhs}, + span{0, local_num_rows * (restart_iter + 1)}, span{0, num_rhs}); + exec->run(gmres::make_multi_dot( + gko::detail::get_local(krylov_basis_small.get()), + gko::detail::get_local(next_krylov), hessenberg_iter)); + finish_reduce(hessenberg_iter, next_krylov, num_rhs, restart_iter); + for (size_type i = 0; i <= restart_iter; i++) { + // next_krylov -= hessenberg(i, restart_iter) * krylov_bases(:, + // i) + auto hessenberg_entry = hessenberg_iter->create_submatrix( + span{0, 1}, span{i * num_rhs, (i + 1) * num_rhs}); + auto krylov_col = ::gko::detail::create_submatrix_helper( + krylov_bases, dim<2>{num_rows, num_rhs}, + span{local_num_rows * i, local_num_rows * (i + 1)}, + span{0, num_rhs}); + next_krylov->sub_scaled(hessenberg_entry, krylov_col); + } + // Re-orthogonalize + auto hessenberg_aux_iter = hessenberg_aux->create_submatrix( + span{0, 1}, span{0, (restart_iter + 2) * num_rhs}); + exec->run(gmres::make_multi_dot( + gko::detail::get_local(krylov_basis_small.get()), + gko::detail::get_local(next_krylov), hessenberg_aux_iter.get())); + finish_reduce(hessenberg_aux_iter.get(), next_krylov, num_rhs, + restart_iter); + + for (size_type i = 0; i <= restart_iter; i++) { + // next_krylov -= hessenberg(i, restart_iter) * krylov_bases(:, + // i) + auto hessenberg_entry = hessenberg_aux->create_submatrix( + span{0, 1}, span{i * num_rhs, (i + 1) * num_rhs}); + auto krylov_col = ::gko::detail::create_submatrix_helper( + krylov_bases, dim<2>{num_rows, num_rhs}, + span{local_num_rows * i, local_num_rows * (i + 1)}, + span{0, num_rhs}); + next_krylov->sub_scaled(hessenberg_entry, krylov_col); + } + // Add both Hessenberg columns + hessenberg_iter->add_scaled(one_op, hessenberg_aux_iter); +} +} // anonymous namespace + template struct help_compute_norm::value>> { @@ -127,7 +306,6 @@ struct help_compute_norm template void Gmres::apply_dense_impl(const VectorType* dense_b, @@ -161,9 +339,23 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, dim<2>{num_rows * (krylov_dim + 1), num_rhs}, dim<2>{local_num_rows * (krylov_dim + 1), num_rhs}); } - // rows: rows of Hessenberg matrix, columns: block for each entry + // The Hessenberg matrix formed by the Arnoldi process is of shape + // (krylov_dim + 1) x (krylov_dim) for a single RHS. The (i,j)th + // entry is associated with the ith Krylov basis vector and the jth + // iteration of Arnoldi. + // For ease of using the reduction kernels locally and for having + // contiguous memory for communicating in the distributed case, we + // will store the Hessenberg matrix in the shape + // (krylov_dim) x ((krylov_dim + 1) * num_rhs), where the (i,j)th + // entry is associated with the ith iteration and the (j/num_rhs)th + // Krylov basis vector, for the (j % num_rhs)th RHS vector. auto hessenberg = this->template create_workspace_op( - ws::hessenberg, dim<2>{krylov_dim + 1, krylov_dim * num_rhs}); + ws::hessenberg, dim<2>{krylov_dim, (krylov_dim + 1) * num_rhs}); + LocalVector* hessenberg_aux = nullptr; + if (this->parameters_.orthog_method == gmres::orthog_method::cgs2) { + hessenberg_aux = this->template create_workspace_op( + ws::hessenberg_aux, dim<2>{1, (krylov_dim + 1) * num_rhs}); + } auto givens_sin = this->template create_workspace_op( ws::givens_sin, dim<2>{krylov_dim, num_rhs}); auto givens_cos = this->template create_workspace_op( @@ -312,36 +504,39 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, this->get_preconditioner()->apply(this_krylov, preconditioned_krylov_vector); - // Create view of current column in the hessenberg matrix: - // hessenberg_iter = hessenberg(:, restart_iter); - auto hessenberg_iter = hessenberg->create_submatrix( - span{0, restart_iter + 2}, - span{num_rhs * restart_iter, num_rhs * (restart_iter + 1)}); + // Create view of current "column" in the hessenberg matrix: + // hessenberg_iter = hessenberg(:, restart_iter), which + // is actually stored as a row, hessenberg(restart_iter, :) + auto hessenberg_iter = + hessenberg->create_submatrix(span{restart_iter, restart_iter + 1}, + span{0, num_rhs * (restart_iter + 2)}); // Start of Arnoldi // next_krylov = A * preconditioned_krylov_vector this->get_system_matrix()->apply(preconditioned_krylov_vector, next_krylov); - - for (size_type i = 0; i <= restart_iter; i++) { - // orthogonalize against krylov_bases(:, i): - // hessenberg(i, restart_iter) = next_krylov' * krylov_bases(:, i) - // next_krylov -= hessenberg(i, restart_iter) * krylov_bases(:, i) - auto hessenberg_entry = hessenberg_iter->create_submatrix( - span{i, i + 1}, span{0, num_rhs}); - auto krylov_basis = ::gko::detail::create_submatrix_helper( - krylov_bases, dim<2>{num_rows, num_rhs}, - span{local_num_rows * i, local_num_rows * (i + 1)}, - span{0, num_rhs}); - next_krylov->compute_conj_dot(krylov_basis, hessenberg_entry, - reduction_tmp); - next_krylov->sub_scaled(hessenberg_entry, krylov_basis); + if (this->parameters_.orthog_method == gmres::orthog_method::mgs) { + orthogonalize_mgs(hessenberg_iter.get(), krylov_bases, + next_krylov.get(), reduction_tmp, restart_iter, + num_rows, num_rhs, local_num_rows); + } else if (this->parameters_.orthog_method == + gmres::orthog_method::cgs) { + orthogonalize_cgs(hessenberg_iter.get(), krylov_bases, + next_krylov.get(), restart_iter, num_rows, + num_rhs, local_num_rows); + } else if (this->parameters_.orthog_method == + gmres::orthog_method::cgs2) { + orthogonalize_cgs2(hessenberg_iter.get(), krylov_bases, + next_krylov.get(), hessenberg_aux, one_op, + restart_iter, num_rows, num_rhs, local_num_rows); } // normalize next_krylov: // hessenberg(restart_iter+1, restart_iter) = norm(next_krylov) + // (stored in hessenberg(restart_iter, (restart_iter + 1) * num_rhs)) // next_krylov /= hessenberg(restart_iter+1, restart_iter) auto hessenberg_norm_entry = hessenberg_iter->create_submatrix( - span{restart_iter + 1, restart_iter + 2}, span{0, num_rhs}); + span{0, 1}, + span{(restart_iter + 1) * num_rhs, (restart_iter + 2) * num_rhs}); help_compute_norm::compute_next_krylov_norm_into_hessenberg( next_krylov.get(), hessenberg_norm_entry.get(), next_krylov_norm_tmp, reduction_tmp); @@ -379,7 +574,7 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, } auto hessenberg_small = hessenberg->create_submatrix( - span{0, restart_iter}, span{0, num_rhs * (restart_iter)}); + span{0, restart_iter}, span{0, num_rhs * restart_iter}); // Solve upper triangular. // y = hessenberg \ residual_norm_collection @@ -443,7 +638,7 @@ int workspace_traits>::num_arrays(const Solver&) template int workspace_traits>::num_vectors(const Solver&) { - return 15; + return 16; } @@ -455,6 +650,7 @@ std::vector workspace_traits>::op_names( "preconditioned_vector", "krylov_bases", "hessenberg", + "hessenberg_aux", "givens_sin", "givens_cos", "residual_norm_collection", @@ -480,10 +676,9 @@ std::vector workspace_traits>::array_names( template std::vector workspace_traits>::scalars(const Solver&) { - return {hessenberg, givens_sin, - givens_cos, residual_norm_collection, - residual_norm, y, - next_krylov_norm_tmp}; + return {hessenberg, hessenberg_aux, givens_sin, + givens_cos, residual_norm_collection, residual_norm, + y, next_krylov_norm_tmp}; } diff --git a/core/solver/gmres_kernels.hpp b/core/solver/gmres_kernels.hpp index 196b0de3ab0..f9fbe76279b 100644 --- a/core/solver/gmres_kernels.hpp +++ b/core/solver/gmres_kernels.hpp @@ -38,11 +38,19 @@ namespace gmres { stopping_status* stop_status) -#define GKO_DECLARE_ALL_AS_TEMPLATES \ - template \ - GKO_DECLARE_GMRES_RESTART_KERNEL(ValueType); \ - template \ - GKO_DECLARE_GMRES_MULTI_AXPY_KERNEL(ValueType) +#define GKO_DECLARE_GMRES_MULTI_DOT_KERNEL(_type) \ + void multi_dot(std::shared_ptr exec, \ + const matrix::Dense<_type>* krylov_bases, \ + const matrix::Dense<_type>* next_krylov, \ + matrix::Dense<_type>* hessenberg_col) + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_GMRES_RESTART_KERNEL(ValueType); \ + template \ + GKO_DECLARE_GMRES_MULTI_AXPY_KERNEL(ValueType); \ + template \ + GKO_DECLARE_GMRES_MULTI_DOT_KERNEL(ValueType) } // namespace gmres diff --git a/core/test/config/solver.cpp b/core/test/config/solver.cpp index 8a2f025d00a..78f1f7351f8 100644 --- a/core/test/config/solver.cpp +++ b/core/test/config/solver.cpp @@ -289,6 +289,8 @@ struct Gmres param.with_krylov_dim(3u); config_map["flexible"] = pnode{true}; param.with_flexible(true); + config_map["orthog_method"] = pnode{"cgs"}; + param.with_orthog_method(gko::solver::gmres::orthog_method::cgs); } template @@ -300,6 +302,7 @@ struct Gmres solver_config_test::template validate(result, answer); ASSERT_EQ(res_param.krylov_dim, ans_param.krylov_dim); ASSERT_EQ(res_param.flexible, ans_param.flexible); + ASSERT_EQ(res_param.orthog_method, ans_param.orthog_method); } }; diff --git a/include/ginkgo/core/solver/gmres.hpp b/include/ginkgo/core/solver/gmres.hpp index 57bbca0b529..308dadf5218 100644 --- a/include/ginkgo/core/solver/gmres.hpp +++ b/include/ginkgo/core/solver/gmres.hpp @@ -31,6 +31,29 @@ namespace solver { constexpr size_type gmres_default_krylov_dim = 100u; +namespace gmres { +/** + * Set the orthogonalization method for the Krylov subspace. + */ +enum class orthog_method { + /** + * Modified Gram-Schmidt (default) + */ + mgs, + /** + * Classical Gram-Schmidt + */ + cgs, + /** + * Classical Gram-Schmidt with re-orthogonalization + */ + cgs2 +}; + +/** Prints an orthogonalization method. */ +std::ostream& operator<<(std::ostream& stream, orthog_method orthog); + +} // namespace gmres /** * GMRES or the generalized minimal residual method is an iterative type Krylov @@ -93,6 +116,10 @@ class Gmres /** Flexible GMRES */ bool GKO_FACTORY_PARAMETER_SCALAR(flexible, false); + + /** Orthogonalization method */ + gmres::orthog_method GKO_FACTORY_PARAMETER_SCALAR( + orthog_method, gmres::orthog_method::mgs); }; GKO_ENABLE_LIN_OP_FACTORY(Gmres, parameters, Factory); GKO_ENABLE_BUILD_METHOD(Factory); @@ -167,28 +194,30 @@ struct workspace_traits> { constexpr static int krylov_bases = 2; // hessenberg matrix constexpr static int hessenberg = 3; + // auxiliary space for CGS2 + constexpr static int hessenberg_aux = 4; // givens sin parameters - constexpr static int givens_sin = 4; + constexpr static int givens_sin = 5; // givens cos parameters - constexpr static int givens_cos = 5; + constexpr static int givens_cos = 6; // coefficients of the residual in Krylov space - constexpr static int residual_norm_collection = 6; + constexpr static int residual_norm_collection = 7; // residual norm scalar - constexpr static int residual_norm = 7; + constexpr static int residual_norm = 8; // solution of the least-squares problem in Krylov space - constexpr static int y = 8; + constexpr static int y = 9; // solution of the least-squares problem mapped to the full space - constexpr static int before_preconditioner = 9; + constexpr static int before_preconditioner = 10; // preconditioned solution of the least-squares problem - constexpr static int after_preconditioner = 10; + constexpr static int after_preconditioner = 11; // constant 1.0 scalar - constexpr static int one = 11; + constexpr static int one = 12; // constant -1.0 scalar - constexpr static int minus_one = 12; + constexpr static int minus_one = 13; // temporary norm vector of next_krylov to copy into hessenberg matrix - constexpr static int next_krylov_norm_tmp = 13; + constexpr static int next_krylov_norm_tmp = 14; // preconditioned krylov basis multivector - constexpr static int preconditioned_krylov_bases = 14; + constexpr static int preconditioned_krylov_bases = 15; // stopping status array constexpr static int stop = 0; diff --git a/reference/solver/common_gmres_kernels.cpp b/reference/solver/common_gmres_kernels.cpp index 643c164b828..122c224d5c1 100644 --- a/reference/solver/common_gmres_kernels.cpp +++ b/reference/solver/common_gmres_kernels.cpp @@ -30,14 +30,15 @@ template void calculate_sin_and_cos(matrix::Dense* givens_sin, matrix::Dense* givens_cos, matrix::Dense* hessenberg_iter, - size_type iter, const size_type rhs) + size_type iter, const size_type num_rhs, + const size_type rhs) { - if (is_zero(hessenberg_iter->at(iter, rhs))) { + if (is_zero(hessenberg_iter->at(0, iter * num_rhs + rhs))) { givens_cos->at(iter, rhs) = zero(); givens_sin->at(iter, rhs) = one(); } else { - auto this_hess = hessenberg_iter->at(iter, rhs); - auto next_hess = hessenberg_iter->at(iter + 1, rhs); + auto this_hess = hessenberg_iter->at(0, iter * num_rhs + rhs); + auto next_hess = hessenberg_iter->at(0, (iter + 1) * num_rhs + rhs); const auto scale = abs(this_hess) + abs(next_hess); const auto hypotenuse = scale * sqrt(abs(this_hess / scale) * abs(this_hess / scale) + @@ -52,19 +53,24 @@ template void givens_rotation(matrix::Dense* givens_sin, matrix::Dense* givens_cos, matrix::Dense* hessenberg_iter, size_type iter, + const size_type num_rhs, const stopping_status* stop_status) { - for (size_type i = 0; i < hessenberg_iter->get_size()[1]; ++i) { + for (size_type i = 0; i < num_rhs; ++i) { if (stop_status[i].has_stopped()) { continue; } for (size_type j = 0; j < iter; ++j) { - auto temp = givens_cos->at(j, i) * hessenberg_iter->at(j, i) + - givens_sin->at(j, i) * hessenberg_iter->at(j + 1, i); - hessenberg_iter->at(j + 1, i) = - -conj(givens_sin->at(j, i)) * hessenberg_iter->at(j, i) + - conj(givens_cos->at(j, i)) * hessenberg_iter->at(j + 1, i); - hessenberg_iter->at(j, i) = temp; + auto temp = + givens_cos->at(j, i) * hessenberg_iter->at(0, j * num_rhs + i) + + givens_sin->at(j, i) * + hessenberg_iter->at(0, (j + 1) * num_rhs + i); + hessenberg_iter->at(0, (j + 1) * num_rhs + i) = + -conj(givens_sin->at(j, i)) * + hessenberg_iter->at(0, j * num_rhs + i) + + conj(givens_cos->at(j, i)) * + hessenberg_iter->at(0, (j + 1) * num_rhs + i); + hessenberg_iter->at(0, j * num_rhs + i) = temp; // temp = cos(j)*hessenberg(j) + // sin(j)*hessenberg(j+1) // hessenberg(j+1) = -conj(sin(j))*hessenberg(j) + @@ -72,12 +78,15 @@ void givens_rotation(matrix::Dense* givens_sin, // hessenberg(j) = temp; } - calculate_sin_and_cos(givens_sin, givens_cos, hessenberg_iter, iter, i); + calculate_sin_and_cos(givens_sin, givens_cos, hessenberg_iter, iter, + num_rhs, i); - hessenberg_iter->at(iter, i) = - givens_cos->at(iter, i) * hessenberg_iter->at(iter, i) + - givens_sin->at(iter, i) * hessenberg_iter->at(iter + 1, i); - hessenberg_iter->at(iter + 1, i) = zero(); + hessenberg_iter->at(0, iter * num_rhs + i) = + givens_cos->at(iter, i) * + hessenberg_iter->at(0, iter * num_rhs + i) + + givens_sin->at(iter, i) * + hessenberg_iter->at(0, (iter + 1) * num_rhs + i); + hessenberg_iter->at(0, (iter + 1) * num_rhs + i) = zero(); // hessenberg(iter) = cos(iter)*hessenberg(iter) + // sin(iter)*hessenberg(iter + 1) // hessenberg(iter+1) = 0 @@ -151,7 +160,8 @@ void hessenberg_qr(std::shared_ptr exec, } } - givens_rotation(givens_sin, givens_cos, hessenberg_iter, iter, stop_status); + givens_rotation(givens_sin, givens_cos, hessenberg_iter, iter, + residual_norm->get_size()[1], stop_status); calculate_next_residual_norm(givens_sin, givens_cos, residual_norm, residual_norm_collection, iter, stop_status); } @@ -176,7 +186,7 @@ void solve_krylov(std::shared_ptr exec, for (size_type j = i + 1; j < final_iter_nums[k]; ++j) { temp -= hessenberg->at( - i, j * residual_norm_collection->get_size()[1] + k) * + j, i * residual_norm_collection->get_size()[1] + k) * y->at(j, k); } y->at(i, k) = diff --git a/reference/solver/gmres_kernels.cpp b/reference/solver/gmres_kernels.cpp index a0b22862998..4c482632353 100644 --- a/reference/solver/gmres_kernels.cpp +++ b/reference/solver/gmres_kernels.cpp @@ -71,6 +71,27 @@ void multi_axpy(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_MULTI_AXPY_KERNEL); +template +void multi_dot(std::shared_ptr exec, + const matrix::Dense* krylov_bases, + const matrix::Dense* next_krylov, + matrix::Dense* hessenberg_col) +{ + auto num_rhs = next_krylov->get_size()[1]; + auto krylov_bases_rowoffset = next_krylov->get_size()[0]; + for (size_type i = 0; i < hessenberg_col->get_size()[1]; ++i) { + auto ivec = i / num_rhs; + auto irhs = i % num_rhs; + hessenberg_col->at(0, i) = zero(); + for (size_type j = 0; j < krylov_bases_rowoffset; ++j) { + hessenberg_col->at(0, i) += + krylov_bases->at(ivec * krylov_bases_rowoffset + j, irhs) * + next_krylov->at(j, irhs); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_MULTI_DOT_KERNEL); } // namespace gmres } // namespace reference diff --git a/reference/test/solver/gmres_kernels.cpp b/reference/test/solver/gmres_kernels.cpp index 00f7766179f..bc877e0ed76 100644 --- a/reference/test/solver/gmres_kernels.cpp +++ b/reference/test/solver/gmres_kernels.cpp @@ -102,7 +102,7 @@ class Gmres : public ::testing::Test { small_y = Mtx::create(exec, gko::dim<2>{small_restart, small_size[1]}); small_hessenberg = Mtx::create( exec, - gko::dim<2>{small_restart + 1, small_restart * small_size[1]}); + gko::dim<2>{small_restart, (small_restart + 1) * small_size[1]}); small_hessenberg->fill(gko::zero()); stopped.converge(1, true); @@ -222,8 +222,8 @@ TYPED_TEST(Gmres, KernelHessenbergQrIter0) this->small_residual_norm->fill(nan); this->small_residual_norm_collection = gko::initialize( {I{1.25, 1.5}, I{nan, nan}, I{95., 94.}}, this->exec); - this->small_hessenberg = gko::initialize( - {I{0.5, -0.75}, I{-0.5, 1}, I{97., 96.}}, this->exec); + this->small_hessenberg = + gko::initialize({I{0.5, -0.75, -0.5, 1, 97., 96.}}, this->exec); this->small_final_iter_nums.get_data()[0] = 0; this->small_final_iter_nums.get_data()[1] = 0; @@ -242,7 +242,7 @@ TYPED_TEST(Gmres, KernelHessenbergQrIter0) GKO_EXPECT_MTX_NEAR(this->small_givens_sin, l({{-0.5 * sqrt(2.), 0.8}, {-72., 73.}}), r::value); GKO_EXPECT_MTX_NEAR(this->small_hessenberg, - l({{0.5 * sqrt(2.), 1.25}, {0., 0.}, {97., 96.}}), + l({{0.5 * sqrt(2.), 1.25, 0., 0., 97., 96.}}), r::value); GKO_EXPECT_MTX_NEAR( this->small_residual_norm_collection, @@ -267,8 +267,8 @@ TYPED_TEST(Gmres, KernelHessenbergQrIter1) this->small_residual_norm->fill(nan); this->small_residual_norm_collection = gko::initialize( {I{95., 94.}, I{1.25, 1.5}, I{nan, nan}}, this->exec); - this->small_hessenberg = gko::initialize( - {I{-0.5, 4}, I{0.25, 0.5}, I{-0.5, 1}}, this->exec); + this->small_hessenberg = + gko::initialize({I{-0.5, 4, 0.25, 0.5, -0.5, 1}}, this->exec); this->small_final_iter_nums.get_data()[0] = 1; this->small_final_iter_nums.get_data()[1] = 1; @@ -287,7 +287,7 @@ TYPED_TEST(Gmres, KernelHessenbergQrIter1) GKO_EXPECT_MTX_NEAR(this->small_givens_sin, l({{0.5, 0.25}, {-0.5 * sqrt(2.), 0.8}}), r::value); GKO_EXPECT_MTX_NEAR(this->small_hessenberg, - l({{-0.375, 2.125}, {0.5 * sqrt(2.), 1.25}, {0., 0.}}), + l({{-0.375, 2.125, 0.5 * sqrt(2.), 1.25, 0., 0.}}), r::value); GKO_EXPECT_MTX_NEAR( this->small_residual_norm_collection, @@ -309,9 +309,8 @@ TYPED_TEST(Gmres, KernelSolveKrylov) this->small_final_iter_nums.get_data()[1] = restart; this->small_hessenberg = gko::initialize( // clang-format off - {{-1, 3, 2, -4}, - {0, 0, 1, 5}, - {nan, nan, nan, nan}}, + {{-1, 3, 0, 0, nan, nan}, + {2, -4, 1, 5, nan, nan}}, // clang-format on this->exec); this->small_residual_norm_collection = @@ -366,6 +365,40 @@ TYPED_TEST(Gmres, KernelMultiAxpy) r::value); } +TYPED_TEST(Gmres, KernelMultiDot) +{ + using T = typename TestFixture::value_type; + using Mtx = typename TestFixture::Mtx; + const T nan = std::numeric_limits>::quiet_NaN(); + const auto restart = this->small_givens_sin->get_size()[0]; + this->small_hessenberg->fill(gko::zero()); + auto hessenberg_iter = this->small_hessenberg->create_submatrix( + gko::span{0, 1}, + gko::span{0, (restart + 1) * this->small_x->get_size()[1]}); + this->small_x = gko::initialize( // next_krylov + {I{-1.0, 2.3}, I{-14.0, -22.0}, I{8.4, 14.2}}, this->exec); + + this->small_krylov_bases = gko::initialize( // restart+1 x rows x #rhs + { + I{1, 10}, // 0, 0, x + I{2, 11}, // 0, 1, x + I{3, 12}, // 0, 2, x + I{4, 13}, // 1, 0, x + I{5, 14}, // 1, 1, x + I{6, 15}, // 1, 2, x + I{7, 16}, // 2, 0, x + I{8, 17}, // 2, 1, x + I{9, 18}, // 2, 2, x + }, + this->exec); + gko::kernels::reference::gmres::multi_dot( + this->exec, this->small_krylov_bases.get(), this->small_x.get(), + hessenberg_iter.get()); + + GKO_ASSERT_MTX_NEAR(hessenberg_iter, + l({{-3.8, -48.6, -23.6, -65.1, -43.4, -81.6}}), + r::value); +} TYPED_TEST(Gmres, SolvesStencilSystem) { @@ -703,28 +736,37 @@ TYPED_TEST(Gmres, SolvesBigDenseSystem1WithRestart) TYPED_TEST(Gmres, SolvesWithPreconditioner) { + using gko::solver::gmres::orthog_method; + using Mtx = typename TestFixture::Mtx; using Solver = typename TestFixture::Solver; using value_type = typename TestFixture::value_type; - auto gmres_factory_preconditioner = - Solver::build() - .with_criteria(gko::stop::Iteration::build().with_max_iters(100u), - gko::stop::ResidualNorm::build() - .with_reduction_factor(r::value)) - .with_preconditioner( - gko::preconditioner::Jacobi::build() - .with_max_block_size(3u)) - .on(this->exec); - auto solver = gmres_factory_preconditioner->generate(this->mtx_big); - auto b = gko::initialize( - {175352.10, 313410.50, 131114.10, -134116.30, 179529.30, -43564.90}, - this->exec); - auto x = gko::initialize({0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, this->exec); - - solver->apply(b, x); - - GKO_ASSERT_MTX_NEAR(x, l({33.0, -56.0, 81.0, -30.0, 21.0, 40.0}), - r::value * 1e3); + for (auto orthog : + {orthog_method::mgs, orthog_method::cgs, orthog_method::cgs2}) { + SCOPED_TRACE(orthog); + auto gmres_factory_preconditioner = + Solver::build() + .with_orthog_method(orthog) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(100u), + gko::stop::ResidualNorm::build() + .with_reduction_factor(r::value)) + .with_preconditioner( + gko::preconditioner::Jacobi::build() + .with_max_block_size(3u)) + .on(this->exec); + auto solver = gmres_factory_preconditioner->generate(this->mtx_big); + auto b = gko::initialize( + {175352.10, 313410.50, 131114.10, -134116.30, 179529.30, -43564.90}, + this->exec); + auto x = + gko::initialize({0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, this->exec); + + solver->apply(b, x); + + GKO_ASSERT_MTX_NEAR(x, l({33.0, -56.0, 81.0, -30.0, 21.0, 40.0}), + r::value * 1e3); + } } diff --git a/test/mpi/solver/solver.cpp b/test/mpi/solver/solver.cpp index 589be91bcba..aaf61cb47ea 100644 --- a/test/mpi/solver/solver.cpp +++ b/test/mpi/solver/solver.cpp @@ -195,13 +195,14 @@ struct Ir : SimpleSolverTest> { }; -template +template struct Gmres : SimpleSolverTest> { static typename solver_type::parameters_type build( std::shared_ptr exec) { return SimpleSolverTest>::build( std::move(exec)) + .with_orthog_method(orthog) .with_krylov_dim(dimension); } }; @@ -531,7 +532,10 @@ class Solver : public CommonMpiTestFixture { using SolverTypes = ::testing::Types, Gcr<100u>, - Gmres<10u>, Gmres<100u>>; + Gmres<10u, gko::solver::gmres::orthog_method::mgs>, + Gmres<10u, gko::solver::gmres::orthog_method::cgs>, + Gmres<10u, gko::solver::gmres::orthog_method::cgs2>, + Gmres<100u, gko::solver::gmres::orthog_method::mgs>>; TYPED_TEST_SUITE(Solver, SolverTypes, TypenameNameGenerator); diff --git a/test/solver/gmres_kernels.cpp b/test/solver/gmres_kernels.cpp index a6c74bd45c0..fb2eab5c040 100644 --- a/test/solver/gmres_kernels.cpp +++ b/test/solver/gmres_kernels.cpp @@ -74,10 +74,11 @@ class Gmres : public CommonTestFixture { b = gen_mtx(m, nrhs); krylov_bases = gen_mtx(m * (gko::solver::gmres_default_krylov_dim + 1), nrhs); - hessenberg = gen_mtx(gko::solver::gmres_default_krylov_dim + 1, - gko::solver::gmres_default_krylov_dim * nrhs); + hessenberg = + gen_mtx(gko::solver::gmres_default_krylov_dim, + (gko::solver::gmres_default_krylov_dim + 1) * nrhs); hessenberg_iter = - gen_mtx(gko::solver::gmres_default_krylov_dim + 1, nrhs); + gen_mtx(1, (gko::solver::gmres_default_krylov_dim + 1) * nrhs); residual = gen_mtx(m, nrhs); residual_norm = gen_mtx(1, nrhs); residual_norm_collection = @@ -272,6 +273,50 @@ TEST_F(Gmres, GmresKernelMultiAxpyIsEquivalentToRef) GKO_ASSERT_ARRAY_EQ(stop_status, d_stop_status); } +TEST_F(Gmres, GmresKernelMultiDotIsEquivalentToRef) +{ + initialize_data(); + + auto krylov_basis = krylov_bases->create_submatrix( + gko::span{ + 0, x->get_size()[0] * (gko::solver::gmres_default_krylov_dim - 1)}, + gko::span{0, x->get_size()[1]}); + auto d_krylov_basis = d_krylov_bases->create_submatrix( + gko::span{0, d_x->get_size()[0] * + (gko::solver::gmres_default_krylov_dim - 1)}, + gko::span{0, d_x->get_size()[1]}); + auto next_krylov = krylov_bases->create_submatrix( + gko::span{ + x->get_size()[0] * (gko::solver::gmres_default_krylov_dim - 1), + x->get_size()[0] * gko::solver::gmres_default_krylov_dim}, + gko::span{0, x->get_size()[1]}); + auto d_next_krylov = d_krylov_bases->create_submatrix( + gko::span{ + d_x->get_size()[0] * (gko::solver::gmres_default_krylov_dim - 1), + d_x->get_size()[0] * gko::solver::gmres_default_krylov_dim}, + gko::span{0, d_x->get_size()[1]}); + gko::kernels::reference::gmres::multi_dot( + ref, krylov_basis.get(), next_krylov.get(), hessenberg_iter.get()); + gko::kernels::GKO_DEVICE_NAMESPACE::gmres::multi_dot( + exec, d_krylov_basis.get(), d_next_krylov.get(), + d_hessenberg_iter.get()); + + // The multidot computation does not set the value below the diagonal + // in the Hessenberg matrix column(s), as that is done after the + // orthogonalization of the next basis vector. In this test, we + // are checking the column(s) created on the last iteration before the + // solver's restart would be triggered, so it is only the final row of + // the Hessenberg column(s) that we ignore. + auto hessenberg_iter_small = hessenberg_iter->create_submatrix( + gko::span{0, 1}, + gko::span{0, gko::solver::gmres_default_krylov_dim * x->get_size()[1]}); + auto d_hessenberg_iter_small = d_hessenberg_iter->create_submatrix( + gko::span{0, 1}, + gko::span{0, gko::solver::gmres_default_krylov_dim * x->get_size()[1]}); + GKO_ASSERT_MTX_NEAR(d_hessenberg_iter_small, hessenberg_iter_small, + r::value); +} + TEST_F(Gmres, GmresApplyOneRHSIsEquivalentToRef) { @@ -294,18 +339,27 @@ TEST_F(Gmres, GmresApplyOneRHSIsEquivalentToRef) TEST_F(Gmres, GmresApplyMultipleRHSIsEquivalentToRef) { - int m = 123; - int n = 5; - auto ref_solver = ref_gmres_factory->generate(mtx); - auto exec_solver = exec_gmres_factory->generate(d_mtx); - auto b = gen_mtx(m, n); - auto x = gen_mtx(m, n); - auto d_b = gko::clone(exec, b); - auto d_x = gko::clone(exec, x); - - ref_solver->apply(b, x); - exec_solver->apply(d_b, d_x); + using gko::solver::gmres::orthog_method; + auto base_params = gko::clone(ref, ref_gmres_factory)->get_parameters(); - GKO_ASSERT_MTX_NEAR(d_b, b, 0); - GKO_ASSERT_MTX_NEAR(d_x, x, r::value * 1e3); + for (auto orthog : + {orthog_method::mgs, orthog_method::cgs, orthog_method::cgs2}) { + SCOPED_TRACE(orthog); + int m = 123; + int n = 5; + auto ref_solver = + base_params.with_orthog_method(orthog).on(ref)->generate(mtx); + auto exec_solver = + base_params.with_orthog_method(orthog).on(exec)->generate(d_mtx); + auto b = gen_mtx(m, n); + auto x = gen_mtx(m, n); + auto d_b = gko::clone(exec, b); + auto d_x = gko::clone(exec, x); + + ref_solver->apply(b, x); + exec_solver->apply(d_b, d_x); + + GKO_ASSERT_MTX_NEAR(d_b, b, 0); + GKO_ASSERT_MTX_NEAR(d_x, x, r::value * 1e3); + } } From e14a3c20b355a0de16329efadbba0fb71820432e Mon Sep 17 00:00:00 2001 From: nbeams <246972+nbeams@users.noreply.github.com> Date: Thu, 18 Jul 2024 19:09:30 +0000 Subject: [PATCH 2/6] Minor: formatting and comment clarity --- core/solver/gmres.cpp | 12 +++++++----- core/solver/gmres_kernels.hpp | 1 + 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/core/solver/gmres.cpp b/core/solver/gmres.cpp index f6fb254cf94..7a77988be98 100644 --- a/core/solver/gmres.cpp +++ b/core/solver/gmres.cpp @@ -154,8 +154,9 @@ void orthogonalize_mgs(matrix::Dense* hessenberg_iter, for (size_type i = 0; i <= restart_iter; i++) { // orthogonalize against krylov_bases(:, i): // hessenberg(i, restart_iter) = next_krylov' * krylov_bases(:, - // i) next_krylov -= hessenberg(i, restart_iter) * - // krylov_bases(:, i) + // i) + // next_krylov -= hessenberg(i, restart_iter) * krylov_bases(:, + // i) auto hessenberg_entry = hessenberg_iter->create_submatrix( span{0, 1}, span{i * num_rhs, (i + 1) * num_rhs}); auto krylov_basis = ::gko::detail::create_submatrix_helper( @@ -185,9 +186,10 @@ void finish_reduce(matrix::Dense* hessenberg_iter, auto exec = hessenberg_iter->get_executor(); const auto comm = next_krylov->get_communicator(); exec->synchronize(); - // hessenberg_iter is the size of all non-zeros for this iteration -- but we - // are not setting the last values for each rhs (values that would be below - // the diagonal in the "full" matrix. + // hessenberg_iter is the size of all non-zeros for this iteration, but we + // are not setting the last values for each rhs here. Values that would be + // below the diagonal in the "full" matrix are skipped, because they will + // be used to hold the norm of next_krylov for each rhs. auto hessenberg_reduce = hessenberg_iter->create_submatrix( span{0, 1}, span{0, num_rhs * (restart_iter + 1)}); if (experimental::mpi::requires_host_buffer(exec, comm)) { diff --git a/core/solver/gmres_kernels.hpp b/core/solver/gmres_kernels.hpp index f9fbe76279b..21bb5854816 100644 --- a/core/solver/gmres_kernels.hpp +++ b/core/solver/gmres_kernels.hpp @@ -44,6 +44,7 @@ namespace gmres { const matrix::Dense<_type>* next_krylov, \ matrix::Dense<_type>* hessenberg_col) + #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ GKO_DECLARE_GMRES_RESTART_KERNEL(ValueType); \ From 5c7b2488972bf41e7371540e334386f97650206b Mon Sep 17 00:00:00 2001 From: nbeams <246972+nbeams@users.noreply.github.com> Date: Fri, 19 Jul 2024 20:47:28 +0000 Subject: [PATCH 3/6] Reshape hessenberg_iter view to 'logical layout' (one column per rhs) for kernels that do not use the full Hessenberg matrix --- .../unified/solver/common_gmres_kernels.cpp | 29 +++++------ common/unified/solver/gmres_kernels.cpp | 6 ++- core/solver/gmres.cpp | 49 ++++++++++--------- reference/solver/common_gmres_kernels.cpp | 44 +++++++---------- reference/solver/gmres_kernels.cpp | 16 +++--- reference/test/solver/gmres_kernels.cpp | 38 ++++++++++---- test/solver/gmres_kernels.cpp | 10 ++-- 7 files changed, 102 insertions(+), 90 deletions(-) diff --git a/common/unified/solver/common_gmres_kernels.cpp b/common/unified/solver/common_gmres_kernels.cpp index 15637fe701e..679aebcfaa2 100644 --- a/common/unified/solver/common_gmres_kernels.cpp +++ b/common/unified/solver/common_gmres_kernels.cpp @@ -69,30 +69,28 @@ void hessenberg_qr(std::shared_ptr exec, exec, [] GKO_KERNEL(auto rhs, auto givens_sin, auto givens_cos, auto residual_norm, auto residual_norm_collection, - auto hessenberg_iter, auto iter, auto num_rhs, - auto final_iter_nums, auto stop_status) { + auto hessenberg_iter, auto iter, auto final_iter_nums, + auto stop_status) { using value_type = std::decay_t; if (stop_status[rhs].has_stopped()) { return; } // increment iteration count final_iter_nums[rhs]++; - auto hess_this = - hessenberg_iter(0, rhs); // hessenberg_iter(0, rhs); - auto hess_next = - hessenberg_iter(0, num_rhs + rhs); // hessenberg_iter(1, rhs); + auto hess_this = hessenberg_iter(0, rhs); + auto hess_next = hessenberg_iter(1, rhs); // apply previous Givens rotations to column for (decltype(iter) j = 0; j < iter; ++j) { // in here: hess_this = hessenberg_iter(j, rhs); // hess_next = hessenberg_iter(j+1, rhs); - hess_next = hessenberg_iter(0, (j + 1) * num_rhs + rhs); + hess_next = hessenberg_iter(j + 1, rhs); const auto gc = givens_cos(j, rhs); const auto gs = givens_sin(j, rhs); const auto out1 = gc * hess_this + gs * hess_next; const auto out2 = -conj(gs) * hess_this + conj(gc) * hess_next; - hessenberg_iter(0, j * num_rhs + rhs) = out1; - hessenberg_iter(0, (j + 1) * num_rhs + rhs) = hess_this = out2; - hess_next = hessenberg_iter(0, (j + 2) * num_rhs + rhs); + hessenberg_iter(j, rhs) = out1; + hessenberg_iter(j + 1, rhs) = hess_this = out2; + hess_next = hessenberg_iter(j + 2, rhs); } // hess_this is hessenberg_iter(iter, rhs) and // hess_next is hessenberg_iter(iter + 1, rhs) @@ -112,9 +110,8 @@ void hessenberg_qr(std::shared_ptr exec, givens_sin(iter, rhs) = gs = conj(hess_next) / hypotenuse; } // apply new Givens rotation to column - hessenberg_iter(0, iter * num_rhs + rhs) = - gc * hess_this + gs * hess_next; - hessenberg_iter(0, (iter + 1) * num_rhs + rhs) = zero(); + hessenberg_iter(iter, rhs) = gc * hess_this + gs * hess_next; + hessenberg_iter(iter + 1, rhs) = zero(); // apply new Givens rotation to RHS of least-squares problem const auto rnc_new = -conj(gs) * residual_norm_collection(iter, rhs); @@ -123,9 +120,9 @@ void hessenberg_qr(std::shared_ptr exec, gc * residual_norm_collection(iter, rhs); residual_norm(0, rhs) = abs(rnc_new); }, - residual_norm->get_size()[1], givens_sin, givens_cos, residual_norm, - residual_norm_collection, hessenberg_iter, iter, - residual_norm->get_size()[1], final_iter_nums, stop_status); + hessenberg_iter->get_size()[1], givens_sin, givens_cos, residual_norm, + residual_norm_collection, hessenberg_iter, iter, final_iter_nums, + stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( diff --git a/common/unified/solver/gmres_kernels.cpp b/common/unified/solver/gmres_kernels.cpp index c10dc2562e5..f24ae445edb 100644 --- a/common/unified/solver/gmres_kernels.cpp +++ b/common/unified/solver/gmres_kernels.cpp @@ -111,8 +111,10 @@ void multi_dot(std::shared_ptr exec, next_krylov(row, irhs); }, GKO_KERNEL_REDUCE_SUM(ValueType), hessenberg_col->get_values(), - gko::dim<2>{next_krylov->get_size()[0], - hessenberg_col->get_size()[1] - next_krylov->get_size()[1]}, + gko::dim<2>{ + next_krylov->get_size()[0], + hessenberg_col->get_size()[0] * hessenberg_col->get_size()[1] - + next_krylov->get_size()[1]}, krylov_bases, next_krylov, next_krylov->get_size()[1], next_krylov->get_size()[0]); } diff --git a/core/solver/gmres.cpp b/core/solver/gmres.cpp index 7a77988be98..d47eb4428ea 100644 --- a/core/solver/gmres.cpp +++ b/core/solver/gmres.cpp @@ -157,8 +157,8 @@ void orthogonalize_mgs(matrix::Dense* hessenberg_iter, // i) // next_krylov -= hessenberg(i, restart_iter) * krylov_bases(:, // i) - auto hessenberg_entry = hessenberg_iter->create_submatrix( - span{0, 1}, span{i * num_rhs, (i + 1) * num_rhs}); + auto hessenberg_entry = + hessenberg_iter->create_submatrix(span{i, i + 1}, span{0, num_rhs}); auto krylov_basis = ::gko::detail::create_submatrix_helper( krylov_bases, dim<2>{num_rows, num_rhs}, span{local_num_rows * i, local_num_rows * (i + 1)}, @@ -191,19 +191,18 @@ void finish_reduce(matrix::Dense* hessenberg_iter, // below the diagonal in the "full" matrix are skipped, because they will // be used to hold the norm of next_krylov for each rhs. auto hessenberg_reduce = hessenberg_iter->create_submatrix( - span{0, 1}, span{0, num_rhs * (restart_iter + 1)}); + span{0, restart_iter + 1}, span{0, num_rhs}); + int message_size = static_cast((restart_iter + 1) * num_rhs); if (experimental::mpi::requires_host_buffer(exec, comm)) { ::gko::detail::DenseCache host_reduction_buffer; host_reduction_buffer.init(exec->get_master(), hessenberg_reduce->get_size()); host_reduction_buffer->copy_from(hessenberg_reduce); comm.all_reduce(exec->get_master(), host_reduction_buffer->get_values(), - static_cast(hessenberg_reduce->get_size()[1]), - MPI_SUM); + message_size, MPI_SUM); hessenberg_reduce->copy_from(host_reduction_buffer.get()); } else { - comm.all_reduce(exec, hessenberg_reduce->get_values(), - static_cast(hessenberg_reduce->get_size()[1]), + comm.all_reduce(exec, hessenberg_reduce->get_values(), message_size, MPI_SUM); } } @@ -228,8 +227,8 @@ void orthogonalize_cgs(matrix::Dense* hessenberg_iter, for (size_type i = 0; i <= restart_iter; i++) { // next_krylov -= hessenberg(i, restart_iter) * krylov_bases(:, // i) - auto hessenberg_entry = hessenberg_iter->create_submatrix( - span{0, 1}, span{i * num_rhs, (i + 1) * num_rhs}); + auto hessenberg_entry = + hessenberg_iter->create_submatrix(span{i, i + 1}, span{0, num_rhs}); auto krylov_col = ::gko::detail::create_submatrix_helper( krylov_bases, dim<2>{num_rows, num_rhs}, span{local_num_rows * i, local_num_rows * (i + 1)}, @@ -260,8 +259,8 @@ void orthogonalize_cgs2(matrix::Dense* hessenberg_iter, for (size_type i = 0; i <= restart_iter; i++) { // next_krylov -= hessenberg(i, restart_iter) * krylov_bases(:, // i) - auto hessenberg_entry = hessenberg_iter->create_submatrix( - span{0, 1}, span{i * num_rhs, (i + 1) * num_rhs}); + auto hessenberg_entry = + hessenberg_iter->create_submatrix(span{i, i + 1}, span{0, num_rhs}); auto krylov_col = ::gko::detail::create_submatrix_helper( krylov_bases, dim<2>{num_rows, num_rhs}, span{local_num_rows * i, local_num_rows * (i + 1)}, @@ -270,7 +269,7 @@ void orthogonalize_cgs2(matrix::Dense* hessenberg_iter, } // Re-orthogonalize auto hessenberg_aux_iter = hessenberg_aux->create_submatrix( - span{0, 1}, span{0, (restart_iter + 2) * num_rhs}); + span{0, restart_iter + 2}, span{0, num_rhs}); exec->run(gmres::make_multi_dot( gko::detail::get_local(krylov_basis_small.get()), gko::detail::get_local(next_krylov), hessenberg_aux_iter.get())); @@ -280,8 +279,8 @@ void orthogonalize_cgs2(matrix::Dense* hessenberg_iter, for (size_type i = 0; i <= restart_iter; i++) { // next_krylov -= hessenberg(i, restart_iter) * krylov_bases(:, // i) - auto hessenberg_entry = hessenberg_aux->create_submatrix( - span{0, 1}, span{i * num_rhs, (i + 1) * num_rhs}); + auto hessenberg_entry = + hessenberg_aux->create_submatrix(span{i, i + 1}, span{0, num_rhs}); auto krylov_col = ::gko::detail::create_submatrix_helper( krylov_bases, dim<2>{num_rows, num_rhs}, span{local_num_rows * i, local_num_rows * (i + 1)}, @@ -353,10 +352,13 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, // Krylov basis vector, for the (j % num_rhs)th RHS vector. auto hessenberg = this->template create_workspace_op( ws::hessenberg, dim<2>{krylov_dim, (krylov_dim + 1) * num_rhs}); + // Because the auxiliary Hessenberg workspace only ever stores one + // iteration of data at a time, we store it in the "logical" layout + // from the start. LocalVector* hessenberg_aux = nullptr; if (this->parameters_.orthog_method == gmres::orthog_method::cgs2) { hessenberg_aux = this->template create_workspace_op( - ws::hessenberg_aux, dim<2>{1, (krylov_dim + 1) * num_rhs}); + ws::hessenberg_aux, dim<2>{(krylov_dim + 1), num_rhs}); } auto givens_sin = this->template create_workspace_op( ws::givens_sin, dim<2>{krylov_dim, num_rhs}); @@ -506,12 +508,16 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, this->get_preconditioner()->apply(this_krylov, preconditioned_krylov_vector); - // Create view of current "column" in the hessenberg matrix: + // Create view of current column in the hessenberg matrix: // hessenberg_iter = hessenberg(:, restart_iter), which - // is actually stored as a row, hessenberg(restart_iter, :) - auto hessenberg_iter = - hessenberg->create_submatrix(span{restart_iter, restart_iter + 1}, - span{0, num_rhs * (restart_iter + 2)}); + // is actually stored as a row, hessenberg(restart_iter, :), + // but we will reshape it for viewing in hessenberg_iter. + auto hessenberg_iter = LocalVector::create( + exec, dim<2>{restart_iter + 2, num_rhs}, + make_array_view(exec, (restart_iter + 2) * num_rhs, + hessenberg->get_values() + + restart_iter * hessenberg->get_size()[1]), + num_rhs); // Start of Arnoldi // next_krylov = A * preconditioned_krylov_vector @@ -537,8 +543,7 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, // (stored in hessenberg(restart_iter, (restart_iter + 1) * num_rhs)) // next_krylov /= hessenberg(restart_iter+1, restart_iter) auto hessenberg_norm_entry = hessenberg_iter->create_submatrix( - span{0, 1}, - span{(restart_iter + 1) * num_rhs, (restart_iter + 2) * num_rhs}); + span{restart_iter + 1, restart_iter + 2}, span{0, num_rhs}); help_compute_norm::compute_next_krylov_norm_into_hessenberg( next_krylov.get(), hessenberg_norm_entry.get(), next_krylov_norm_tmp, reduction_tmp); diff --git a/reference/solver/common_gmres_kernels.cpp b/reference/solver/common_gmres_kernels.cpp index 122c224d5c1..4ba091e03ae 100644 --- a/reference/solver/common_gmres_kernels.cpp +++ b/reference/solver/common_gmres_kernels.cpp @@ -30,15 +30,14 @@ template void calculate_sin_and_cos(matrix::Dense* givens_sin, matrix::Dense* givens_cos, matrix::Dense* hessenberg_iter, - size_type iter, const size_type num_rhs, - const size_type rhs) + size_type iter, const size_type rhs) { - if (is_zero(hessenberg_iter->at(0, iter * num_rhs + rhs))) { + if (is_zero(hessenberg_iter->at(iter, rhs))) { givens_cos->at(iter, rhs) = zero(); givens_sin->at(iter, rhs) = one(); } else { - auto this_hess = hessenberg_iter->at(0, iter * num_rhs + rhs); - auto next_hess = hessenberg_iter->at(0, (iter + 1) * num_rhs + rhs); + auto this_hess = hessenberg_iter->at(iter, rhs); + auto next_hess = hessenberg_iter->at(iter + 1, rhs); const auto scale = abs(this_hess) + abs(next_hess); const auto hypotenuse = scale * sqrt(abs(this_hess / scale) * abs(this_hess / scale) + @@ -53,24 +52,19 @@ template void givens_rotation(matrix::Dense* givens_sin, matrix::Dense* givens_cos, matrix::Dense* hessenberg_iter, size_type iter, - const size_type num_rhs, const stopping_status* stop_status) { - for (size_type i = 0; i < num_rhs; ++i) { + for (size_type i = 0; i < hessenberg_iter->get_size()[1]; ++i) { if (stop_status[i].has_stopped()) { continue; } for (size_type j = 0; j < iter; ++j) { - auto temp = - givens_cos->at(j, i) * hessenberg_iter->at(0, j * num_rhs + i) + - givens_sin->at(j, i) * - hessenberg_iter->at(0, (j + 1) * num_rhs + i); - hessenberg_iter->at(0, (j + 1) * num_rhs + i) = - -conj(givens_sin->at(j, i)) * - hessenberg_iter->at(0, j * num_rhs + i) + - conj(givens_cos->at(j, i)) * - hessenberg_iter->at(0, (j + 1) * num_rhs + i); - hessenberg_iter->at(0, j * num_rhs + i) = temp; + auto temp = givens_cos->at(j, i) * hessenberg_iter->at(j, i) + + givens_sin->at(j, i) * hessenberg_iter->at(j + 1, i); + hessenberg_iter->at(j + 1, i) = + -conj(givens_sin->at(j, i)) * hessenberg_iter->at(j, i) + + conj(givens_cos->at(j, i)) * hessenberg_iter->at(j + 1, i); + hessenberg_iter->at(j, i) = temp; // temp = cos(j)*hessenberg(j) + // sin(j)*hessenberg(j+1) // hessenberg(j+1) = -conj(sin(j))*hessenberg(j) + @@ -78,15 +72,12 @@ void givens_rotation(matrix::Dense* givens_sin, // hessenberg(j) = temp; } - calculate_sin_and_cos(givens_sin, givens_cos, hessenberg_iter, iter, - num_rhs, i); + calculate_sin_and_cos(givens_sin, givens_cos, hessenberg_iter, iter, i); - hessenberg_iter->at(0, iter * num_rhs + i) = - givens_cos->at(iter, i) * - hessenberg_iter->at(0, iter * num_rhs + i) + - givens_sin->at(iter, i) * - hessenberg_iter->at(0, (iter + 1) * num_rhs + i); - hessenberg_iter->at(0, (iter + 1) * num_rhs + i) = zero(); + hessenberg_iter->at(iter, i) = + givens_cos->at(iter, i) * hessenberg_iter->at(iter, i) + + givens_sin->at(iter, i) * hessenberg_iter->at(iter + 1, i); + hessenberg_iter->at(iter + 1, i) = zero(); // hessenberg(iter) = cos(iter)*hessenberg(iter) + // sin(iter)*hessenberg(iter + 1) // hessenberg(iter+1) = 0 @@ -160,8 +151,7 @@ void hessenberg_qr(std::shared_ptr exec, } } - givens_rotation(givens_sin, givens_cos, hessenberg_iter, iter, - residual_norm->get_size()[1], stop_status); + givens_rotation(givens_sin, givens_cos, hessenberg_iter, iter, stop_status); calculate_next_residual_norm(givens_sin, givens_cos, residual_norm, residual_norm_collection, iter, stop_status); } diff --git a/reference/solver/gmres_kernels.cpp b/reference/solver/gmres_kernels.cpp index 4c482632353..a7f5a751a3b 100644 --- a/reference/solver/gmres_kernels.cpp +++ b/reference/solver/gmres_kernels.cpp @@ -79,14 +79,14 @@ void multi_dot(std::shared_ptr exec, { auto num_rhs = next_krylov->get_size()[1]; auto krylov_bases_rowoffset = next_krylov->get_size()[0]; - for (size_type i = 0; i < hessenberg_col->get_size()[1]; ++i) { - auto ivec = i / num_rhs; - auto irhs = i % num_rhs; - hessenberg_col->at(0, i) = zero(); - for (size_type j = 0; j < krylov_bases_rowoffset; ++j) { - hessenberg_col->at(0, i) += - krylov_bases->at(ivec * krylov_bases_rowoffset + j, irhs) * - next_krylov->at(j, irhs); + for (size_type i = 0; i < hessenberg_col->get_size()[0] - 1; ++i) { + for (size_type k = 0; k < num_rhs; ++k) { + hessenberg_col->at(i, k) = zero(); + for (size_type j = 0; j < krylov_bases_rowoffset; ++j) { + hessenberg_col->at(i, k) += + conj(krylov_bases->at(i * krylov_bases_rowoffset + j, k)) * + next_krylov->at(j, k); + } } } } diff --git a/reference/test/solver/gmres_kernels.cpp b/reference/test/solver/gmres_kernels.cpp index bc877e0ed76..7bbb30fff11 100644 --- a/reference/test/solver/gmres_kernels.cpp +++ b/reference/test/solver/gmres_kernels.cpp @@ -227,12 +227,19 @@ TYPED_TEST(Gmres, KernelHessenbergQrIter0) this->small_final_iter_nums.get_data()[0] = 0; this->small_final_iter_nums.get_data()[1] = 0; + // Reshape into "hessenberg_iter" columns as done in Gmres + auto hessenberg_iter_rows = this->small_givens_sin->get_size()[0] + 1; + auto hessenberg_iter_cols = this->small_givens_sin->get_size()[1]; + auto hessenberg_reshape = Mtx::create( + this->exec, gko::dim<2>{hessenberg_iter_rows, hessenberg_iter_cols}, + make_array_view(this->exec, hessenberg_iter_rows * hessenberg_iter_cols, + this->small_hessenberg->get_values()), + hessenberg_iter_cols); gko::kernels::reference::common_gmres::hessenberg_qr( this->exec, this->small_givens_sin.get(), this->small_givens_cos.get(), this->small_residual_norm.get(), - this->small_residual_norm_collection.get(), - this->small_hessenberg.get(), iteration, - this->small_final_iter_nums.get_data(), + this->small_residual_norm_collection.get(), hessenberg_reshape.get(), + iteration, this->small_final_iter_nums.get_data(), this->small_stop.get_const_data()); ASSERT_EQ(this->small_final_iter_nums.get_data()[0], 1); @@ -272,12 +279,19 @@ TYPED_TEST(Gmres, KernelHessenbergQrIter1) this->small_final_iter_nums.get_data()[0] = 1; this->small_final_iter_nums.get_data()[1] = 1; + // Reshape into "hessenberg_iter" columns as done in Gmres + auto hessenberg_iter_rows = this->small_givens_sin->get_size()[0] + 1; + auto hessenberg_iter_cols = this->small_givens_sin->get_size()[1]; + auto hessenberg_reshape = Mtx::create( + this->exec, gko::dim<2>{hessenberg_iter_rows, hessenberg_iter_cols}, + make_array_view(this->exec, hessenberg_iter_rows * hessenberg_iter_cols, + this->small_hessenberg->get_values()), + hessenberg_iter_cols); gko::kernels::reference::common_gmres::hessenberg_qr( this->exec, this->small_givens_sin.get(), this->small_givens_cos.get(), this->small_residual_norm.get(), - this->small_residual_norm_collection.get(), - this->small_hessenberg.get(), iteration, - this->small_final_iter_nums.get_data(), + this->small_residual_norm_collection.get(), hessenberg_reshape.get(), + iteration, this->small_final_iter_nums.get_data(), this->small_stop.get_const_data()); ASSERT_EQ(this->small_final_iter_nums.get_data()[0], 2); @@ -372,9 +386,13 @@ TYPED_TEST(Gmres, KernelMultiDot) const T nan = std::numeric_limits>::quiet_NaN(); const auto restart = this->small_givens_sin->get_size()[0]; this->small_hessenberg->fill(gko::zero()); - auto hessenberg_iter = this->small_hessenberg->create_submatrix( - gko::span{0, 1}, - gko::span{0, (restart + 1) * this->small_x->get_size()[1]}); + // Reshape into "hessenberg_iter" columns as done in Gmres + auto hessenberg_iter = Mtx::create( + this->exec, gko::dim<2>{restart + 1, this->small_x->get_size()[1]}, + make_array_view(this->exec, + (restart + 1) * this->small_x->get_size()[1], + this->small_hessenberg->get_values()), + this->small_x->get_size()[1]); this->small_x = gko::initialize( // next_krylov {I{-1.0, 2.3}, I{-14.0, -22.0}, I{8.4, 14.2}}, this->exec); @@ -396,7 +414,7 @@ TYPED_TEST(Gmres, KernelMultiDot) hessenberg_iter.get()); GKO_ASSERT_MTX_NEAR(hessenberg_iter, - l({{-3.8, -48.6, -23.6, -65.1, -43.4, -81.6}}), + l({{-3.8, -48.6}, {-23.6, -65.1}, {0.0, 0.0}}), r::value); } diff --git a/test/solver/gmres_kernels.cpp b/test/solver/gmres_kernels.cpp index fb2eab5c040..d4dcbf19318 100644 --- a/test/solver/gmres_kernels.cpp +++ b/test/solver/gmres_kernels.cpp @@ -78,7 +78,7 @@ class Gmres : public CommonTestFixture { gen_mtx(gko::solver::gmres_default_krylov_dim, (gko::solver::gmres_default_krylov_dim + 1) * nrhs); hessenberg_iter = - gen_mtx(1, (gko::solver::gmres_default_krylov_dim + 1) * nrhs); + gen_mtx(gko::solver::gmres_default_krylov_dim + 1, nrhs); residual = gen_mtx(m, nrhs); residual_norm = gen_mtx(1, nrhs); residual_norm_collection = @@ -308,11 +308,11 @@ TEST_F(Gmres, GmresKernelMultiDotIsEquivalentToRef) // solver's restart would be triggered, so it is only the final row of // the Hessenberg column(s) that we ignore. auto hessenberg_iter_small = hessenberg_iter->create_submatrix( - gko::span{0, 1}, - gko::span{0, gko::solver::gmres_default_krylov_dim * x->get_size()[1]}); + gko::span{0, gko::solver::gmres_default_krylov_dim + 1}, + gko::span{0, x->get_size()[1]}); auto d_hessenberg_iter_small = d_hessenberg_iter->create_submatrix( - gko::span{0, 1}, - gko::span{0, gko::solver::gmres_default_krylov_dim * x->get_size()[1]}); + gko::span{0, gko::solver::gmres_default_krylov_dim + 1}, + gko::span{0, x->get_size()[1]}); GKO_ASSERT_MTX_NEAR(d_hessenberg_iter_small, hessenberg_iter_small, r::value); } From 0a7a0d9704d4f761044d01e6c7be23f6883f79d8 Mon Sep 17 00:00:00 2001 From: nbeams <246972+nbeams@users.noreply.github.com> Date: Mon, 22 Jul 2024 23:24:49 +0000 Subject: [PATCH 4/6] gmres: minor spacing and namespace fixes --- core/solver/gmres.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/core/solver/gmres.cpp b/core/solver/gmres.cpp index d47eb4428ea..dbad7d8d1d8 100644 --- a/core/solver/gmres.cpp +++ b/core/solver/gmres.cpp @@ -52,6 +52,7 @@ std::ostream& operator<<(std::ostream& stream, orthog_method orthog) return stream; } + } // namespace gmres @@ -142,7 +143,7 @@ struct help_compute_norm { } }; -namespace { + // Orthogonalization helper functions template void orthogonalize_mgs(matrix::Dense* hessenberg_iter, @@ -169,6 +170,7 @@ void orthogonalize_mgs(matrix::Dense* hessenberg_iter, } } + template void finish_reduce(matrix::Dense* hessenberg_iter, matrix::Dense* next_krylov, @@ -177,6 +179,7 @@ void finish_reduce(matrix::Dense* hessenberg_iter, return; } + #if GINKGO_BUILD_MPI template void finish_reduce(matrix::Dense* hessenberg_iter, @@ -208,6 +211,7 @@ void finish_reduce(matrix::Dense* hessenberg_iter, } #endif + template void orthogonalize_cgs(matrix::Dense* hessenberg_iter, VectorType* krylov_bases, VectorType* next_krylov, @@ -290,7 +294,7 @@ void orthogonalize_cgs2(matrix::Dense* hessenberg_iter, // Add both Hessenberg columns hessenberg_iter->add_scaled(one_op, hessenberg_aux_iter); } -} // anonymous namespace + template struct help_compute_norm template void Gmres::apply_dense_impl(const VectorType* dense_b, From 31cabdfbefffcd87be11a160d7754dd72e6e7843 Mon Sep 17 00:00:00 2001 From: nbeams <246972+nbeams@users.noreply.github.com> Date: Mon, 22 Jul 2024 23:27:00 +0000 Subject: [PATCH 5/6] Simplify multi_dot kernel test --- test/solver/gmres_kernels.cpp | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/test/solver/gmres_kernels.cpp b/test/solver/gmres_kernels.cpp index d4dcbf19318..a084e17fbdc 100644 --- a/test/solver/gmres_kernels.cpp +++ b/test/solver/gmres_kernels.cpp @@ -301,19 +301,7 @@ TEST_F(Gmres, GmresKernelMultiDotIsEquivalentToRef) exec, d_krylov_basis.get(), d_next_krylov.get(), d_hessenberg_iter.get()); - // The multidot computation does not set the value below the diagonal - // in the Hessenberg matrix column(s), as that is done after the - // orthogonalization of the next basis vector. In this test, we - // are checking the column(s) created on the last iteration before the - // solver's restart would be triggered, so it is only the final row of - // the Hessenberg column(s) that we ignore. - auto hessenberg_iter_small = hessenberg_iter->create_submatrix( - gko::span{0, gko::solver::gmres_default_krylov_dim + 1}, - gko::span{0, x->get_size()[1]}); - auto d_hessenberg_iter_small = d_hessenberg_iter->create_submatrix( - gko::span{0, gko::solver::gmres_default_krylov_dim + 1}, - gko::span{0, x->get_size()[1]}); - GKO_ASSERT_MTX_NEAR(d_hessenberg_iter_small, hessenberg_iter_small, + GKO_ASSERT_MTX_NEAR(d_hessenberg_iter, hessenberg_iter, r::value); } From d553b3a131512f4613c9ca208ddeeb613bcd15f4 Mon Sep 17 00:00:00 2001 From: nbeams <246972+nbeams@users.noreply.github.com> Date: Mon, 5 Aug 2024 19:23:26 +0000 Subject: [PATCH 6/6] Rename orthog_method to ortho_method --- core/solver/gmres.cpp | 35 ++++++++++++------------- core/test/config/solver.cpp | 6 ++--- include/ginkgo/core/solver/gmres.hpp | 8 +++--- reference/test/solver/gmres_kernels.cpp | 10 +++---- test/mpi/solver/solver.cpp | 12 ++++----- test/solver/gmres_kernels.cpp | 12 ++++----- 6 files changed, 41 insertions(+), 42 deletions(-) diff --git a/core/solver/gmres.cpp b/core/solver/gmres.cpp index dbad7d8d1d8..e47714b2186 100644 --- a/core/solver/gmres.cpp +++ b/core/solver/gmres.cpp @@ -39,14 +39,14 @@ GKO_REGISTER_OPERATION(multi_dot, gmres::multi_dot); } // anonymous namespace -std::ostream& operator<<(std::ostream& stream, orthog_method orthog) +std::ostream& operator<<(std::ostream& stream, ortho_method ortho) { - switch (orthog) { - case orthog_method::mgs: + switch (ortho) { + case ortho_method::mgs: return stream << "mgs"; - case orthog_method::cgs: + case ortho_method::cgs: return stream << "cgs"; - case orthog_method::cgs2: + case ortho_method::cgs2: return stream << "cgs2"; } return stream; @@ -69,19 +69,19 @@ typename Gmres::parameters_type Gmres::parse( if (auto& obj = config.get("flexible")) { params.with_flexible(gko::config::get_value(obj)); } - if (auto& obj = config.get("orthog_method")) { + if (auto& obj = config.get("ortho_method")) { auto str = obj.get_string(); - gmres::orthog_method orthog; + gmres::ortho_method ortho; if (str == "mgs") { - orthog = gmres::orthog_method::mgs; + ortho = gmres::ortho_method::mgs; } else if (str == "cgs") { - orthog = gmres::orthog_method::cgs; + ortho = gmres::ortho_method::cgs; } else if (str == "cgs2") { - orthog = gmres::orthog_method::cgs2; + ortho = gmres::ortho_method::cgs2; } else { - GKO_INVALID_CONFIG_VALUE("orthog_method", str); + GKO_INVALID_CONFIG_VALUE("ortho_method", str); } - params.with_orthog_method(orthog); + params.with_ortho_method(ortho); } return params; } @@ -361,7 +361,7 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, // iteration of data at a time, we store it in the "logical" layout // from the start. LocalVector* hessenberg_aux = nullptr; - if (this->parameters_.orthog_method == gmres::orthog_method::cgs2) { + if (this->parameters_.ortho_method == gmres::ortho_method::cgs2) { hessenberg_aux = this->template create_workspace_op( ws::hessenberg_aux, dim<2>{(krylov_dim + 1), num_rhs}); } @@ -528,17 +528,16 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, // next_krylov = A * preconditioned_krylov_vector this->get_system_matrix()->apply(preconditioned_krylov_vector, next_krylov); - if (this->parameters_.orthog_method == gmres::orthog_method::mgs) { + if (this->parameters_.ortho_method == gmres::ortho_method::mgs) { orthogonalize_mgs(hessenberg_iter.get(), krylov_bases, next_krylov.get(), reduction_tmp, restart_iter, num_rows, num_rhs, local_num_rows); - } else if (this->parameters_.orthog_method == - gmres::orthog_method::cgs) { + } else if (this->parameters_.ortho_method == gmres::ortho_method::cgs) { orthogonalize_cgs(hessenberg_iter.get(), krylov_bases, next_krylov.get(), restart_iter, num_rows, num_rhs, local_num_rows); - } else if (this->parameters_.orthog_method == - gmres::orthog_method::cgs2) { + } else if (this->parameters_.ortho_method == + gmres::ortho_method::cgs2) { orthogonalize_cgs2(hessenberg_iter.get(), krylov_bases, next_krylov.get(), hessenberg_aux, one_op, restart_iter, num_rows, num_rhs, local_num_rows); diff --git a/core/test/config/solver.cpp b/core/test/config/solver.cpp index 78f1f7351f8..a170ebb1e04 100644 --- a/core/test/config/solver.cpp +++ b/core/test/config/solver.cpp @@ -289,8 +289,8 @@ struct Gmres param.with_krylov_dim(3u); config_map["flexible"] = pnode{true}; param.with_flexible(true); - config_map["orthog_method"] = pnode{"cgs"}; - param.with_orthog_method(gko::solver::gmres::orthog_method::cgs); + config_map["ortho_method"] = pnode{"cgs"}; + param.with_ortho_method(gko::solver::gmres::ortho_method::cgs); } template @@ -302,7 +302,7 @@ struct Gmres solver_config_test::template validate(result, answer); ASSERT_EQ(res_param.krylov_dim, ans_param.krylov_dim); ASSERT_EQ(res_param.flexible, ans_param.flexible); - ASSERT_EQ(res_param.orthog_method, ans_param.orthog_method); + ASSERT_EQ(res_param.ortho_method, ans_param.ortho_method); } }; diff --git a/include/ginkgo/core/solver/gmres.hpp b/include/ginkgo/core/solver/gmres.hpp index 308dadf5218..3ba3acf94bb 100644 --- a/include/ginkgo/core/solver/gmres.hpp +++ b/include/ginkgo/core/solver/gmres.hpp @@ -35,7 +35,7 @@ namespace gmres { /** * Set the orthogonalization method for the Krylov subspace. */ -enum class orthog_method { +enum class ortho_method { /** * Modified Gram-Schmidt (default) */ @@ -51,7 +51,7 @@ enum class orthog_method { }; /** Prints an orthogonalization method. */ -std::ostream& operator<<(std::ostream& stream, orthog_method orthog); +std::ostream& operator<<(std::ostream& stream, ortho_method ortho); } // namespace gmres @@ -118,8 +118,8 @@ class Gmres bool GKO_FACTORY_PARAMETER_SCALAR(flexible, false); /** Orthogonalization method */ - gmres::orthog_method GKO_FACTORY_PARAMETER_SCALAR( - orthog_method, gmres::orthog_method::mgs); + gmres::ortho_method GKO_FACTORY_PARAMETER_SCALAR( + ortho_method, gmres::ortho_method::mgs); }; GKO_ENABLE_LIN_OP_FACTORY(Gmres, parameters, Factory); GKO_ENABLE_BUILD_METHOD(Factory); diff --git a/reference/test/solver/gmres_kernels.cpp b/reference/test/solver/gmres_kernels.cpp index 7bbb30fff11..3f11b087bb7 100644 --- a/reference/test/solver/gmres_kernels.cpp +++ b/reference/test/solver/gmres_kernels.cpp @@ -754,17 +754,17 @@ TYPED_TEST(Gmres, SolvesBigDenseSystem1WithRestart) TYPED_TEST(Gmres, SolvesWithPreconditioner) { - using gko::solver::gmres::orthog_method; + using gko::solver::gmres::ortho_method; using Mtx = typename TestFixture::Mtx; using Solver = typename TestFixture::Solver; using value_type = typename TestFixture::value_type; - for (auto orthog : - {orthog_method::mgs, orthog_method::cgs, orthog_method::cgs2}) { - SCOPED_TRACE(orthog); + for (auto ortho : + {ortho_method::mgs, ortho_method::cgs, ortho_method::cgs2}) { + SCOPED_TRACE(ortho); auto gmres_factory_preconditioner = Solver::build() - .with_orthog_method(orthog) + .with_ortho_method(ortho) .with_criteria( gko::stop::Iteration::build().with_max_iters(100u), gko::stop::ResidualNorm::build() diff --git a/test/mpi/solver/solver.cpp b/test/mpi/solver/solver.cpp index aaf61cb47ea..be9f6865c86 100644 --- a/test/mpi/solver/solver.cpp +++ b/test/mpi/solver/solver.cpp @@ -195,14 +195,14 @@ struct Ir : SimpleSolverTest> { }; -template +template struct Gmres : SimpleSolverTest> { static typename solver_type::parameters_type build( std::shared_ptr exec) { return SimpleSolverTest>::build( std::move(exec)) - .with_orthog_method(orthog) + .with_ortho_method(ortho) .with_krylov_dim(dimension); } }; @@ -532,10 +532,10 @@ class Solver : public CommonMpiTestFixture { using SolverTypes = ::testing::Types, Gcr<100u>, - Gmres<10u, gko::solver::gmres::orthog_method::mgs>, - Gmres<10u, gko::solver::gmres::orthog_method::cgs>, - Gmres<10u, gko::solver::gmres::orthog_method::cgs2>, - Gmres<100u, gko::solver::gmres::orthog_method::mgs>>; + Gmres<10u, gko::solver::gmres::ortho_method::mgs>, + Gmres<10u, gko::solver::gmres::ortho_method::cgs>, + Gmres<10u, gko::solver::gmres::ortho_method::cgs2>, + Gmres<100u, gko::solver::gmres::ortho_method::mgs>>; TYPED_TEST_SUITE(Solver, SolverTypes, TypenameNameGenerator); diff --git a/test/solver/gmres_kernels.cpp b/test/solver/gmres_kernels.cpp index a084e17fbdc..72cbc83b002 100644 --- a/test/solver/gmres_kernels.cpp +++ b/test/solver/gmres_kernels.cpp @@ -327,18 +327,18 @@ TEST_F(Gmres, GmresApplyOneRHSIsEquivalentToRef) TEST_F(Gmres, GmresApplyMultipleRHSIsEquivalentToRef) { - using gko::solver::gmres::orthog_method; + using gko::solver::gmres::ortho_method; auto base_params = gko::clone(ref, ref_gmres_factory)->get_parameters(); - for (auto orthog : - {orthog_method::mgs, orthog_method::cgs, orthog_method::cgs2}) { - SCOPED_TRACE(orthog); + for (auto ortho : + {ortho_method::mgs, ortho_method::cgs, ortho_method::cgs2}) { + SCOPED_TRACE(ortho); int m = 123; int n = 5; auto ref_solver = - base_params.with_orthog_method(orthog).on(ref)->generate(mtx); + base_params.with_ortho_method(ortho).on(ref)->generate(mtx); auto exec_solver = - base_params.with_orthog_method(orthog).on(exec)->generate(d_mtx); + base_params.with_ortho_method(ortho).on(exec)->generate(d_mtx); auto b = gen_mtx(m, n); auto x = gen_mtx(m, n); auto d_b = gko::clone(exec, b);