Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add choice for triangular solver implementation for Ginkgo #585

Merged
merged 12 commits into from
Feb 20, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .nfs0000000216213277000026b7
Binary file not shown.
2 changes: 1 addition & 1 deletion BUILD.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ EOD
esac
done

set -xv
# set -xv
pelesh marked this conversation as resolved.
Show resolved Hide resolved

# If MY_CLUSTER is not set by user, try to discover it from environment
if [[ ! -v MY_CLUSTER ]]
Expand Down
4 changes: 2 additions & 2 deletions scripts/ascentVariables.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ module load exasgd-coinhsl/2015.06.23/gcc-9.1.0-qe3m7kw
module load exasgd-cub/1.16.0/gcc-9.1.0-o5zdbep
# cuda@11.4.2%gcc@9.1.0~allow-unsupported-compilers~dev arch=linux-rhel8-power9le
module load exasgd-cuda/11.4.2/gcc-9.1.0-4676kh5
# ginkgo@glu%gcc@9.1.0+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_type=Release cuda_arch=70 dev_path=/gpfs/wolf/proj-shared/csc359/src/ginkgo arch=linux-rhel8-power9le
module load exasgd-ginkgo/glu/cuda-11.4.2/gcc-9.1.0-fpuykyc
# ginkgo@1.5.0.glu_experimental%gcc@10.2.0+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_system=cmake build_type=Debug cuda_arch=70 dev_path=/gpfs/wolf/proj-shared/csc359/src/ginkgo arch=linux-rhel8-power9le
module load exasgd-ginkgo/1.5.0.glu_experimental/cuda-11.4.2/gcc-10.2.0-ndoi6vk
# gmp@6.2.1%gcc@9.1.0 libs=shared,static arch=linux-rhel8-power9le
module load exasgd-gmp/6.2.1/gcc-9.1.0-umqilrg
# gnuconfig@2021-08-14%gcc@9.1.0 arch=linux-rhel8-power9le
Expand Down
4 changes: 2 additions & 2 deletions scripts/marianasVariables.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ module load camp-0.2.3-gcc-10.2.0-36lcy72
module load openblas-0.3.20-gcc-10.2.0-x6v3mwm
# coinhsl@2019.05.21%gcc@10.2.0+blas arch=linux-centos7-zen2
module load coinhsl-2019.05.21-gcc-10.2.0-gkzkws6
# ginkgo@1.5.0.glu_experimental%gcc@10.2.0+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_type=Release cuda_arch=60,70,75,80 arch=linux-centos7-zen2
module load ginkgo-1.5.0.glu_experimental-gcc-10.2.0-x73b7k3
# ginkgo@1.5.0.glu_experimental%gcc@10.2.0+cuda~develtools~full_optimizations~hwloc~ipo~mpi~oneapi+openmp~rocm+shared build_system=cmake build_type=Debug cuda_arch=60,70,75,80 arch=linux-centos7-zen2
module load ginkgo-1.5.0.glu_experimental-gcc-10.2.0-3o5dw4r
# magma@2.6.2%gcc@10.2.0+cuda+fortran~ipo~rocm+shared build_type=RelWithDebInfo cuda_arch=60,70,75,80 arch=linux-centos7-zen2
module load magma-2.6.2-gcc-10.2.0-caockkq
# metis@5.1.0%gcc@10.2.0~gdb~int64~real64+shared build_type=Release patches=4991da9,b1225da arch=linux-centos7-zen2
Expand Down
68 changes: 34 additions & 34 deletions scripts/newellVariables.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,40 +9,40 @@ module use -a /share/apps/modules/tools
module use -a /share/apps/modules/compilers
module use -a /share/apps/modules/mpi
module use -a /etc/modulefiles
module use -a /qfs/projects/exasgd/src/jaelyn-spack/spack/share/spack/modules/linux-centos8-power9le
module use -a /qfs/projects/exasgd/src/ci-newll/ci-modules/linux-centos8-power9le

# Load spack-built modules

# autoconf@2.69%gcc@8.5.0 patches=35c4492,7793209,a49dd5b arch=linux-centos8-power9le
module load autoconf-2.69-gcc-8.5.0-2mzbyqj
# autoconf-archive@2022.02.11%gcc@8.5.0 patches=130cd48 arch=linux-centos8-power9le
module load autoconf-archive-2022.02.11-gcc-8.5.0-nolgalj
module load autoconf-2.69-gcc-8.5.0-khf4rhm
# autoconf-archive@2022.02.11%gcc@8.5.0 patches=139214f arch=linux-centos8-power9le
module load autoconf-archive-2022.02.11-gcc-8.5.0-hbtsmvt
# automake@1.16.5%gcc@8.5.0 arch=linux-centos8-power9le
module load automake-1.16.5-gcc-8.5.0-pnnvoal
module load automake-1.16.5-gcc-8.5.0-4vya5zv
# berkeley-db@18.1.40%gcc@8.5.0+cxx~docs+stl patches=b231fcc arch=linux-centos8-power9le
module load berkeley-db-18.1.40-gcc-8.5.0-cuzn6qn
# blt@0.4.1%gcc@8.5.0 arch=linux-centos8-power9le
module load blt-0.4.1-gcc-8.5.0-likpa4a
module load blt-0.4.1-gcc-8.5.0-dp7ssua
# bzip2@1.0.8%gcc@8.5.0~debug~pic+shared arch=linux-centos8-power9le
module load bzip2-1.0.8-gcc-8.5.0-tsweuon
# ca-certificates-mozilla@2022-03-29%gcc@8.5.0 arch=linux-centos8-power9le
module load ca-certificates-mozilla-2022-03-29-gcc-8.5.0-zyzfhdf
# camp@0.2.2%gcc@8.5.0+cuda~ipo~rocm~tests build_type=RelWithDebInfo cuda_arch=70 arch=linux-centos8-power9le
module load camp-0.2.2-gcc-8.5.0-5po5zoy
# cmake@3.23.2%gcc@8.5.0~doc+ncurses+ownlibs~qt build_type=Release arch=linux-centos8-power9le
module load cmake-3.23.2-gcc-8.5.0-pr3l2mn
# coinhsl@2015.06.23%gcc@8.5.0+blas arch=linux-centos8-power9le
module load coinhsl-2015.06.23-gcc-8.5.0-f6ka4rc
# ca-certificates-mozilla@2022-07-19%gcc@8.5.0 arch=linux-centos8-power9le
module load ca-certificates-mozilla-2022-07-19-gcc-8.5.0-db3wqwx
# camp@0.2.3%gcc@8.5.0+cuda~ipo+openmp~rocm~tests build_system=cmake build_type=RelWithDebInfo cuda_arch=70 arch=linux-centos8-power9le
module load camp-0.2.3-gcc-8.5.0-x4hzwm5
# cmake@3.23.3%gcc@8.5.0~doc+ncurses+ownlibs~qt build_type=Release arch=linux-centos8-power9le
module load cmake-3.23.3-gcc-8.5.0-h76vmev
# coinhsl@2019.05.21%gcc@8.5.0+blas arch=linux-centos8-power9le
module load coinhsl-2019.05.21-gcc-8.5.0-hoy7u3p
# cub@1.16.0%gcc@8.5.0 arch=linux-centos8-power9le
module load cub-1.16.0-gcc-8.5.0-p3cnthb
# diffutils@3.8%gcc@8.5.0 arch=linux-centos8-power9le
module load diffutils-3.8-gcc-8.5.0-ppyuisg
# gdbm@1.19%gcc@8.5.0 arch=linux-centos8-power9le
module load gdbm-1.19-gcc-8.5.0-unfo3x4
# ginkgo@glu_experimental%gcc@8.5.0+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_type=Release cuda_arch=70 arch=linux-centos8-power9le
module load ginkgo-glu_experimental-gcc-8.5.0-tq3ravg
module load gdbm-1.19-gcc-8.5.0-uowynqh
# ginkgo@1.5.0.glu_experimental%gcc@8.5.0+cuda~develtools~full_optimizations~hwloc~ipo~mpi~oneapi+openmp~rocm+shared build_system=cmake build_type=Debug cuda_arch=70 arch=linux-centos8-power9le
module load ginkgo-1.5.0.glu_experimental-gcc-8.5.0-p3aodax
# gmp@6.2.1%gcc@8.5.0 libs=shared,static arch=linux-centos8-power9le
module load gmp-6.2.1-gcc-8.5.0-xlcuuht
module load gmp-6.2.1-gcc-8.5.0-bq7amxg
# gnuconfig@2021-08-14%gcc@8.5.0 arch=linux-centos8-power9le
module load gnuconfig-2021-08-14-gcc-8.5.0-qjyg7ls
# libiconv@1.16%gcc@8.5.0 libs=shared,static arch=linux-centos8-power9le
Expand All @@ -54,31 +54,31 @@ module load libtool-2.4.7-gcc-8.5.0-kxdso3c
# m4@1.4.19%gcc@8.5.0+sigsegv patches=9dc5fbd,bfdffa7 arch=linux-centos8-power9le
module load m4-1.4.19-gcc-8.5.0-untfsqf
# magma@2.6.2%gcc@8.5.0+cuda+fortran~ipo~rocm+shared build_type=RelWithDebInfo cuda_arch=70 arch=linux-centos8-power9le
module load magma-2.6.2-gcc-8.5.0-ee3572c
module load magma-2.6.2-gcc-8.5.0-kfhqe36
# metis@5.1.0%gcc@8.5.0~gdb~int64~real64+shared build_type=Release patches=4991da9,b1225da arch=linux-centos8-power9le
module load metis-5.1.0-gcc-8.5.0-ldsei63
module load metis-5.1.0-gcc-8.5.0-ib64hvb
# mpfr@4.1.0%gcc@8.5.0 libs=shared,static arch=linux-centos8-power9le
module load mpfr-4.1.0-gcc-8.5.0-esdxmf2
# ncurses@6.2%gcc@8.5.0~symlinks+termlib abi=none arch=linux-centos8-power9le
module load ncurses-6.2-gcc-8.5.0-v24hmxo
# openblas@0.3.20%gcc@8.5.0~bignuma~consistent_fpcsr~ilp64+locking+pic+shared symbol_suffix=none threads=none arch=linux-centos8-power9le
module load openblas-0.3.20-gcc-8.5.0-rwstn2s
module load mpfr-4.1.0-gcc-8.5.0-ko56wbz
# ncurses@6.3%gcc@8.5.0~symlinks+termlib abi=none arch=linux-centos8-power9le
module load ncurses-6.3-gcc-8.5.0-glmmmuu
# openblas@0.3.20%gcc@8.5.0~bignuma~consistent_fpcsr~ilp64+locking+pic+shared patches=9f12903 symbol_suffix=none threads=none arch=linux-centos8-power9le
module load openblas-0.3.20-gcc-8.5.0-dmvuekp
# openssl@1.1.1q%gcc@8.5.0~docs~shared certs=mozilla patches=3fdcf2d arch=linux-centos8-power9le
module load openssl-1.1.1q-gcc-8.5.0-xlfn3bw
## module load openssl-1.1.1q-gcc-8.5.0-lv52izx
# perl@5.34.1%gcc@8.5.0+cpanm+shared+threads arch=linux-centos8-power9le
module load perl-5.34.1-gcc-8.5.0-fn534xj
module load perl-5.34.1-gcc-8.5.0-qt5uuuh
# pkgconf@1.8.0%gcc@8.5.0 arch=linux-centos8-power9le
module load pkgconf-1.8.0-gcc-8.5.0-imrnro2
# raja@0.14.0%gcc@8.5.0+odule load raja-0.14.0-gcc-8.5.0-qsgckji
module load raja-0.14.0-gcc-8.5.0-qsgckji
# raja@0.14.0%gcc@8.5.0+cuda~examples~exercises~ipo+openmp~rocm+shared~tests build_system=cmake build_type=RelWithDebInfo cuda_arch=70 arch=linux-centos8-power9le
module load raja-0.14.0-gcc-8.5.0-2pndg26
# readline@8.1.2%gcc@8.5.0 arch=linux-centos8-power9le
module load readline-8.1.2-gcc-8.5.0-l4hzlyf
module load readline-8.1.2-gcc-8.5.0-6rwgkxr
# suite-sparse@5.10.1%gcc@8.5.0~cuda~graphblas~openmp+pic~tbb arch=linux-centos8-power9le
module load suite-sparse-5.10.1-gcc-8.5.0-ykffgpl
module load suite-sparse-5.10.1-gcc-8.5.0-yc2nlwi
# texinfo@6.5%gcc@8.5.0 patches=12f6edb,1732115 arch=linux-centos8-power9le
module load texinfo-6.5-gcc-8.5.0-fvxyl2q
# umpire@6.0.0%gcc@8.5.0+c+cuda~device_alloc~deviceconst+examples~fortran~ipo~numa~openmp~rocm~shared build_type=RelWithDebInfo cuda_arch=70 tests=none arch=linux-centos8-power9le
module load umpire-6.0.0-gcc-8.5.0-xar7dg5
module load texinfo-6.5-gcc-8.5.0-v2eju2d
# umpire@6.0.0%gcc@8.5.0+c+cuda~device_alloc~deviceconst~examples~fortran~ipo~numa~openmp~rocm~shared build_system=cmake build_type=RelWithDebInfo cuda_arch=70 tests=none arch=linux-centos8-power9le
module load umpire-6.0.0-gcc-8.5.0-mftt44d
# zlib@1.2.12%gcc@8.5.0+optimize+pic+shared patches=0d38234 arch=linux-centos8-power9le
module load zlib-1.2.12-gcc-8.5.0-spb5k73

Expand Down
1 change: 1 addition & 0 deletions src/Drivers/Sparse/NlpSparseEx1Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ int main(int argc, char **argv)
nlp.options->SetStringValue("linsol_mode", "speculative");
nlp.options->SetStringValue("linear_solver_sparse", "ginkgo");
nlp.options->SetStringValue("fact_acceptor", "inertia_free");
nlp.options->SetIntegerValue("ir_outer_maxit", 0);
if (use_ginkgo_cuda) {
nlp.options->SetStringValue("ginkgo_exec", "cuda");
} else if (use_ginkgo_hip) {
Expand Down
44 changes: 34 additions & 10 deletions src/LinAlg/hiopLinSolverSparseGinkgo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,28 +232,46 @@ std::shared_ptr<gko::Executor> create_exec(std::string executor_string)
}


gko::solver::trisolve_algorithm create_alg(std::string algorithm_string)
{
std::map<std::string, gko::solver::trisolve_algorithm>
cnpetra marked this conversation as resolved.
Show resolved Hide resolved
alg_map{
{"syncfree", gko::solver::trisolve_algorithm::syncfree},
{"sparselib", gko::solver::trisolve_algorithm::sparselib}};

return alg_map.at(algorithm_string);
}


std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gko::Executor> exec,
std::shared_ptr<gko::matrix::Csr<double, int>> mtx)
std::shared_ptr<gko::matrix::Csr<double, int>> mtx,
gko::solver::trisolve_algorithm alg,
const unsigned gmres_iter, const double gmres_tol, const unsigned gmres_restart)
{
auto preprocessing_fact = gko::share(gko::reorder::Mc64<double, int>::build().on(exec));
auto preprocessing = gko::share(preprocessing_fact->generate(mtx));
auto lu_fact = gko::share(gko::experimental::factorization::Glu<double, int>::build_reusable()
.on(exec, mtx.get(), preprocessing.get()));
auto inner_solver_fact = gko::share(gko::experimental::solver::Direct<double, int>::build()
.with_factorization(lu_fact)
.with_algorithm(alg)
.on(exec));
auto solver_fact = gko::share(gko::solver::Gmres<>::build()
.with_criteria(

std::shared_ptr<gko::LinOpFactory> solver_fact = inner_solver_fact;
if (gmres_iter > 0) {
solver_fact = gko::share(gko::solver::Gmres<double>::build()
cnpetra marked this conversation as resolved.
Show resolved Hide resolved
.with_criteria(
gko::stop::Iteration::build()
.with_max_iters(200u)
.with_max_iters(gmres_iter)
.on(exec),
gko::stop::ResidualNorm<>::build()
.with_baseline(gko::stop::mode::absolute)
.with_reduction_factor(1e-8)
.with_reduction_factor(gmres_tol)
.on(exec))
.with_krylov_dim(10u)
.with_krylov_dim(gmres_restart)
.with_preconditioner(inner_solver_fact)
.on(exec));
}

auto reusable_factory = gko::share(gko::solver::ScaledReordered<>::build()
.with_solver(solver_fact)
Expand Down Expand Up @@ -289,12 +307,17 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
assert(n_>0);

exec_ = create_exec(nlp_->options->GetString("ginkgo_exec"));
auto alg = create_alg(nlp_->options->GetString("ginkgo_trisolve"));
auto gmres_iter = nlp_->options->GetInteger("ir_inner_ginkgo_maxit");
auto gmres_tol = nlp_->options->GetNumeric("ir_inner_ginkgo_tol");
auto gmres_restart = nlp_->options->GetInteger("ir_inner_ginkgo_restart");
iterative_refinement_ = gmres_iter > 0;

host_mtx_ = transferTripletToCSR(exec_->get_master(), n_, M_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_);
mtx_ = exec_ == (exec_->get_master()) ? host_mtx_ : gko::clone(exec_, host_mtx_);
nnz_ = mtx_->get_num_stored_elements();

reusable_factory_ = setup_solver_factory(exec_, mtx_);
reusable_factory_ = setup_solver_factory(exec_, mtx_, alg, gmres_iter, gmres_tol, gmres_restart);
}

int hiopLinSolverSymSparseGinkgo::matrixChanged()
Expand All @@ -313,9 +336,10 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
gko_solver_ = gko::share(reusable_factory_->generate(mtx_));

// Temporary solution for the ginkgo GLU integration.
auto sol = gko::as<gko::solver::Gmres<>>(gko::as<gko::solver::ScaledReordered<>>(gko_solver_)->get_solver());
auto precond = gko::as<gko::experimental::solver::Direct<double, int>>(sol->get_preconditioner());
auto status = precond->get_factorization_status();
auto direct = iterative_refinement_ ?
gko::as<gko::experimental::solver::Direct<double, int>>(gko::as<gko::solver::Gmres<>>(gko::as<gko::solver::ScaledReordered<>>(gko_solver_)->get_solver())->get_preconditioner()) :
cnpetra marked this conversation as resolved.
Show resolved Hide resolved
gko::as<gko::experimental::solver::Direct<double, int>>(gko::as<gko::solver::ScaledReordered<>>(gko_solver_)->get_solver());
auto status = direct->get_factorization_status();

return status == gko::experimental::factorization::status::success ? 0 : -1;
}
Expand Down
1 change: 1 addition & 0 deletions src/LinAlg/hiopLinSolverSparseGinkgo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class hiopLinSolverSymSparseGinkgo: public hiopLinSolverSymSparse
std::shared_ptr<gko::matrix::Csr<double, int>> host_mtx_;
std::shared_ptr<gko::LinOpFactory> reusable_factory_;
std::shared_ptr<gko::LinOp> gko_solver_;
bool iterative_refinement_;


public:
Expand Down
38 changes: 38 additions & 0 deletions src/Utils/hiopOptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,44 @@ void hiopOptionsNLP::register_options()
"Selects the hardware architecture to run the Ginkgo linear solver on.");
}


// choose triangular solver implementation in Ginkgo.
// - Default is 'sparselib' which uses vendor triangular solvers
// - 'syncfree' uses the busy waiting loop based Ginkgo implementation
{
vector<string> range {"syncfree", "sparselib"};

register_str_option("ginkgo_trisolve",
"syncfree",
range,
"Selects the triangular solver for Ginkgo.");
}


// Ginkgo iterative refinement options
{
register_int_option("ir_inner_ginkgo_restart",
cnpetra marked this conversation as resolved.
Show resolved Hide resolved
20,
1,
100,
"GMRES restart value (default is 20). ");

register_num_option("ir_inner_ginkgo_tol",
1e-12,
1e-16,
1e-1,
"GMRES tolerance (default is 1e-12). ");

// 0 iterations means no iterative refinement, making the restart and tolerance options irrelevant.
register_int_option("ir_inner_ginkgo_maxit",
0,
0,
1000,
"GMRES maximum number of iterations (default is 0). ");

}


// choose sparsity permutation (to reduce nz in the factors). This option is available only when using
// Cholesky linear solvers
// - metis: use CUDA function csrmetisnd, which is a wrapper of METIS_NodeND; requires linking with
Expand Down