diff --git a/.nfs0000000216213277000026b7 b/.nfs0000000216213277000026b7 new file mode 100644 index 000000000..5bbc78355 Binary files /dev/null and b/.nfs0000000216213277000026b7 differ diff --git a/BUILD.sh b/BUILD.sh index ccd034724..7dd603145 100755 --- a/BUILD.sh +++ b/BUILD.sh @@ -86,7 +86,7 @@ EOD esac done -set -xv +# set -xv # If MY_CLUSTER is not set by user, try to discover it from environment if [[ ! -v MY_CLUSTER ]] diff --git a/scripts/ascentVariables.sh b/scripts/ascentVariables.sh index 352d13a37..f8c30de23 100644 --- a/scripts/ascentVariables.sh +++ b/scripts/ascentVariables.sh @@ -21,8 +21,8 @@ module load exasgd-coinhsl/2015.06.23/gcc-9.1.0-qe3m7kw module load exasgd-cub/1.16.0/gcc-9.1.0-o5zdbep # cuda@11.4.2%gcc@9.1.0~allow-unsupported-compilers~dev arch=linux-rhel8-power9le module load exasgd-cuda/11.4.2/gcc-9.1.0-4676kh5 -# ginkgo@glu%gcc@9.1.0+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_type=Release cuda_arch=70 dev_path=/gpfs/wolf/proj-shared/csc359/src/ginkgo arch=linux-rhel8-power9le -module load exasgd-ginkgo/glu/cuda-11.4.2/gcc-9.1.0-fpuykyc +# ginkgo@1.5.0.glu_experimental%gcc@9.1.0+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_system=cmake build_type=Release cuda_arch=70 dev_path=/gpfs/wolf/proj-shared/csc359/src/ginkgo arch=linux-rhel8-power9le +module load exasgd-ginkgo/1.5.0.glu_experimental/cuda-11.4.2/gcc-9.1.0-abpriwf # gmp@6.2.1%gcc@9.1.0 libs=shared,static arch=linux-rhel8-power9le module load exasgd-gmp/6.2.1/gcc-9.1.0-umqilrg # gnuconfig@2021-08-14%gcc@9.1.0 arch=linux-rhel8-power9le diff --git a/scripts/marianasVariables.sh b/scripts/marianasVariables.sh index d646e4129..2c206adc9 100644 --- a/scripts/marianasVariables.sh +++ b/scripts/marianasVariables.sh @@ -41,8 +41,8 @@ module load camp-0.2.3-gcc-10.2.0-36lcy72 module load openblas-0.3.20-gcc-10.2.0-x6v3mwm # coinhsl@2019.05.21%gcc@10.2.0+blas arch=linux-centos7-zen2 module load coinhsl-2019.05.21-gcc-10.2.0-gkzkws6 -# ginkgo@1.5.0.glu_experimental%gcc@10.2.0+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_type=Release cuda_arch=60,70,75,80 arch=linux-centos7-zen2 -module load ginkgo-1.5.0.glu_experimental-gcc-10.2.0-x73b7k3 +# ginkgo@1.5.0.glu_experimental%gcc@10.2.0+cuda~develtools~full_optimizations~hwloc~ipo~mpi~oneapi+openmp~rocm+shared build_system=cmake build_type=Debug cuda_arch=60,70,75,80 arch=linux-centos7-zen2 +module load ginkgo-1.5.0.glu_experimental-gcc-10.2.0-3o5dw4r # magma@2.6.2%gcc@10.2.0+cuda+fortran~ipo~rocm+shared build_type=RelWithDebInfo cuda_arch=60,70,75,80 arch=linux-centos7-zen2 module load magma-2.6.2-gcc-10.2.0-caockkq # metis@5.1.0%gcc@10.2.0~gdb~int64~real64+shared build_type=Release patches=4991da9,b1225da arch=linux-centos7-zen2 diff --git a/scripts/newellVariables.sh b/scripts/newellVariables.sh index a22c6dbe7..56968ca43 100644 --- a/scripts/newellVariables.sh +++ b/scripts/newellVariables.sh @@ -9,40 +9,40 @@ module use -a /share/apps/modules/tools module use -a /share/apps/modules/compilers module use -a /share/apps/modules/mpi module use -a /etc/modulefiles -module use -a /qfs/projects/exasgd/src/jaelyn-spack/spack/share/spack/modules/linux-centos8-power9le +module use -a /qfs/projects/exasgd/src/ci-newll/ci-modules/linux-centos8-power9le # Load spack-built modules # autoconf@2.69%gcc@8.5.0 patches=35c4492,7793209,a49dd5b arch=linux-centos8-power9le -module load autoconf-2.69-gcc-8.5.0-2mzbyqj -# autoconf-archive@2022.02.11%gcc@8.5.0 patches=130cd48 arch=linux-centos8-power9le -module load autoconf-archive-2022.02.11-gcc-8.5.0-nolgalj +module load autoconf-2.69-gcc-8.5.0-khf4rhm +# autoconf-archive@2022.02.11%gcc@8.5.0 patches=139214f arch=linux-centos8-power9le +module load autoconf-archive-2022.02.11-gcc-8.5.0-hbtsmvt # automake@1.16.5%gcc@8.5.0 arch=linux-centos8-power9le -module load automake-1.16.5-gcc-8.5.0-pnnvoal +module load automake-1.16.5-gcc-8.5.0-4vya5zv # berkeley-db@18.1.40%gcc@8.5.0+cxx~docs+stl patches=b231fcc arch=linux-centos8-power9le module load berkeley-db-18.1.40-gcc-8.5.0-cuzn6qn # blt@0.4.1%gcc@8.5.0 arch=linux-centos8-power9le -module load blt-0.4.1-gcc-8.5.0-likpa4a +module load blt-0.4.1-gcc-8.5.0-dp7ssua # bzip2@1.0.8%gcc@8.5.0~debug~pic+shared arch=linux-centos8-power9le module load bzip2-1.0.8-gcc-8.5.0-tsweuon -# ca-certificates-mozilla@2022-03-29%gcc@8.5.0 arch=linux-centos8-power9le -module load ca-certificates-mozilla-2022-03-29-gcc-8.5.0-zyzfhdf -# camp@0.2.2%gcc@8.5.0+cuda~ipo~rocm~tests build_type=RelWithDebInfo cuda_arch=70 arch=linux-centos8-power9le -module load camp-0.2.2-gcc-8.5.0-5po5zoy -# cmake@3.23.2%gcc@8.5.0~doc+ncurses+ownlibs~qt build_type=Release arch=linux-centos8-power9le -module load cmake-3.23.2-gcc-8.5.0-pr3l2mn -# coinhsl@2015.06.23%gcc@8.5.0+blas arch=linux-centos8-power9le -module load coinhsl-2015.06.23-gcc-8.5.0-f6ka4rc +# ca-certificates-mozilla@2022-07-19%gcc@8.5.0 arch=linux-centos8-power9le +module load ca-certificates-mozilla-2022-07-19-gcc-8.5.0-db3wqwx +# camp@0.2.3%gcc@8.5.0+cuda~ipo+openmp~rocm~tests build_system=cmake build_type=RelWithDebInfo cuda_arch=70 arch=linux-centos8-power9le +module load camp-0.2.3-gcc-8.5.0-x4hzwm5 +# cmake@3.23.3%gcc@8.5.0~doc+ncurses+ownlibs~qt build_type=Release arch=linux-centos8-power9le +module load cmake-3.23.3-gcc-8.5.0-h76vmev +# coinhsl@2019.05.21%gcc@8.5.0+blas arch=linux-centos8-power9le +module load coinhsl-2019.05.21-gcc-8.5.0-hoy7u3p # cub@1.16.0%gcc@8.5.0 arch=linux-centos8-power9le module load cub-1.16.0-gcc-8.5.0-p3cnthb # diffutils@3.8%gcc@8.5.0 arch=linux-centos8-power9le module load diffutils-3.8-gcc-8.5.0-ppyuisg # gdbm@1.19%gcc@8.5.0 arch=linux-centos8-power9le -module load gdbm-1.19-gcc-8.5.0-unfo3x4 -# ginkgo@glu_experimental%gcc@8.5.0+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_type=Release cuda_arch=70 arch=linux-centos8-power9le -module load ginkgo-glu_experimental-gcc-8.5.0-tq3ravg +module load gdbm-1.19-gcc-8.5.0-uowynqh +# ginkgo@1.5.0.glu_experimental%gcc@8.5.0+cuda~develtools~full_optimizations~hwloc~ipo~mpi~oneapi+openmp~rocm+shared build_system=cmake build_type=Debug cuda_arch=70 arch=linux-centos8-power9le +module load ginkgo-1.5.0.glu_experimental-gcc-8.5.0-p3aodax # gmp@6.2.1%gcc@8.5.0 libs=shared,static arch=linux-centos8-power9le -module load gmp-6.2.1-gcc-8.5.0-xlcuuht +module load gmp-6.2.1-gcc-8.5.0-bq7amxg # gnuconfig@2021-08-14%gcc@8.5.0 arch=linux-centos8-power9le module load gnuconfig-2021-08-14-gcc-8.5.0-qjyg7ls # libiconv@1.16%gcc@8.5.0 libs=shared,static arch=linux-centos8-power9le @@ -54,31 +54,31 @@ module load libtool-2.4.7-gcc-8.5.0-kxdso3c # m4@1.4.19%gcc@8.5.0+sigsegv patches=9dc5fbd,bfdffa7 arch=linux-centos8-power9le module load m4-1.4.19-gcc-8.5.0-untfsqf # magma@2.6.2%gcc@8.5.0+cuda+fortran~ipo~rocm+shared build_type=RelWithDebInfo cuda_arch=70 arch=linux-centos8-power9le -module load magma-2.6.2-gcc-8.5.0-ee3572c +module load magma-2.6.2-gcc-8.5.0-kfhqe36 # metis@5.1.0%gcc@8.5.0~gdb~int64~real64+shared build_type=Release patches=4991da9,b1225da arch=linux-centos8-power9le -module load metis-5.1.0-gcc-8.5.0-ldsei63 +module load metis-5.1.0-gcc-8.5.0-ib64hvb # mpfr@4.1.0%gcc@8.5.0 libs=shared,static arch=linux-centos8-power9le -module load mpfr-4.1.0-gcc-8.5.0-esdxmf2 -# ncurses@6.2%gcc@8.5.0~symlinks+termlib abi=none arch=linux-centos8-power9le -module load ncurses-6.2-gcc-8.5.0-v24hmxo -# openblas@0.3.20%gcc@8.5.0~bignuma~consistent_fpcsr~ilp64+locking+pic+shared symbol_suffix=none threads=none arch=linux-centos8-power9le -module load openblas-0.3.20-gcc-8.5.0-rwstn2s +module load mpfr-4.1.0-gcc-8.5.0-ko56wbz +# ncurses@6.3%gcc@8.5.0~symlinks+termlib abi=none arch=linux-centos8-power9le +module load ncurses-6.3-gcc-8.5.0-glmmmuu +# openblas@0.3.20%gcc@8.5.0~bignuma~consistent_fpcsr~ilp64+locking+pic+shared patches=9f12903 symbol_suffix=none threads=none arch=linux-centos8-power9le +module load openblas-0.3.20-gcc-8.5.0-dmvuekp # openssl@1.1.1q%gcc@8.5.0~docs~shared certs=mozilla patches=3fdcf2d arch=linux-centos8-power9le -module load openssl-1.1.1q-gcc-8.5.0-xlfn3bw +## module load openssl-1.1.1q-gcc-8.5.0-lv52izx # perl@5.34.1%gcc@8.5.0+cpanm+shared+threads arch=linux-centos8-power9le -module load perl-5.34.1-gcc-8.5.0-fn534xj +module load perl-5.34.1-gcc-8.5.0-qt5uuuh # pkgconf@1.8.0%gcc@8.5.0 arch=linux-centos8-power9le module load pkgconf-1.8.0-gcc-8.5.0-imrnro2 -# raja@0.14.0%gcc@8.5.0+odule load raja-0.14.0-gcc-8.5.0-qsgckji -module load raja-0.14.0-gcc-8.5.0-qsgckji +# raja@0.14.0%gcc@8.5.0+cuda~examples~exercises~ipo+openmp~rocm+shared~tests build_system=cmake build_type=RelWithDebInfo cuda_arch=70 arch=linux-centos8-power9le +module load raja-0.14.0-gcc-8.5.0-2pndg26 # readline@8.1.2%gcc@8.5.0 arch=linux-centos8-power9le -module load readline-8.1.2-gcc-8.5.0-l4hzlyf +module load readline-8.1.2-gcc-8.5.0-6rwgkxr # suite-sparse@5.10.1%gcc@8.5.0~cuda~graphblas~openmp+pic~tbb arch=linux-centos8-power9le -module load suite-sparse-5.10.1-gcc-8.5.0-ykffgpl +module load suite-sparse-5.10.1-gcc-8.5.0-yc2nlwi # texinfo@6.5%gcc@8.5.0 patches=12f6edb,1732115 arch=linux-centos8-power9le -module load texinfo-6.5-gcc-8.5.0-fvxyl2q -# umpire@6.0.0%gcc@8.5.0+c+cuda~device_alloc~deviceconst+examples~fortran~ipo~numa~openmp~rocm~shared build_type=RelWithDebInfo cuda_arch=70 tests=none arch=linux-centos8-power9le -module load umpire-6.0.0-gcc-8.5.0-xar7dg5 +module load texinfo-6.5-gcc-8.5.0-v2eju2d +# umpire@6.0.0%gcc@8.5.0+c+cuda~device_alloc~deviceconst~examples~fortran~ipo~numa~openmp~rocm~shared build_system=cmake build_type=RelWithDebInfo cuda_arch=70 tests=none arch=linux-centos8-power9le +module load umpire-6.0.0-gcc-8.5.0-mftt44d # zlib@1.2.12%gcc@8.5.0+optimize+pic+shared patches=0d38234 arch=linux-centos8-power9le module load zlib-1.2.12-gcc-8.5.0-spb5k73 diff --git a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp index 0590b7ce9..09d81cf40 100644 --- a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp +++ b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp @@ -222,9 +222,9 @@ int main(int argc, char **argv) nlp.options->SetStringValue("linsol_mode", "speculative"); nlp.options->SetStringValue("linear_solver_sparse", "cusolver-lu"); nlp.options->SetStringValue("cusolver_lu_refactorization", "rf"); - nlp.options->SetIntegerValue("ir_inner_cusolver_maxit", 100); - nlp.options->SetNumericValue("ir_inner_cusolver_tol", 1e-16); - nlp.options->SetIntegerValue("ir_inner_cusolver_restart", 20); + nlp.options->SetIntegerValue("ir_inner_maxit", 100); + nlp.options->SetNumericValue("ir_inner_tol", 1e-16); + nlp.options->SetIntegerValue("ir_inner_restart", 20); nlp.options->SetStringValue("ir_inner_cusolver_gs_scheme", "mgs_pm"); nlp.options->SetStringValue("compute_mode", "hybrid"); // LU solver needs to use inertia free approach @@ -236,6 +236,7 @@ int main(int argc, char **argv) nlp.options->SetStringValue("linsol_mode", "speculative"); nlp.options->SetStringValue("linear_solver_sparse", "ginkgo"); nlp.options->SetStringValue("fact_acceptor", "inertia_free"); + nlp.options->SetIntegerValue("ir_outer_maxit", 0); if (use_ginkgo_cuda) { nlp.options->SetStringValue("ginkgo_exec", "cuda"); } else if (use_ginkgo_hip) { diff --git a/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp b/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp index cfcd10e80..957b31f33 100644 --- a/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp +++ b/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp @@ -121,7 +121,7 @@ namespace hiop refact_ = "glu"; } // by default, dont use iterative refinement - int maxit_test = nlp_->options->GetInteger("ir_inner_cusolver_maxit"); + int maxit_test = nlp_->options->GetInteger("ir_inner_maxit"); if ((maxit_test < 0) || (maxit_test > 1000)){ nlp_->log->printf(hovWarning, @@ -138,7 +138,7 @@ namespace hiop if(use_ir_ == "yes") { if(refact_ == "rf") { - ir_->restart_ = nlp_->options->GetInteger("ir_inner_cusolver_restart"); + ir_->restart_ = nlp_->options->GetInteger("ir_inner_restart"); if ((ir_->restart_ <0) || (ir_->restart_ >100)){ nlp_->log->printf(hovWarning, @@ -148,7 +148,7 @@ namespace hiop } - ir_->tol_ = nlp_->options->GetNumeric("ir_inner_cusolver_tol"); + ir_->tol_ = nlp_->options->GetNumeric("ir_inner_tol"); if ((ir_->tol_ <0) || (ir_->tol_ >1)){ nlp_->log->printf(hovWarning, "Wrong tol value: %e. Use double tol value between 0 and 1. Setting default (1e-12) ...\n", diff --git a/src/LinAlg/hiopLinSolverSparseGinkgo.cpp b/src/LinAlg/hiopLinSolverSparseGinkgo.cpp index 79e7aa654..4af6348f2 100644 --- a/src/LinAlg/hiopLinSolverSparseGinkgo.cpp +++ b/src/LinAlg/hiopLinSolverSparseGinkgo.cpp @@ -233,7 +233,9 @@ std::shared_ptr create_exec(std::string executor_string) std::shared_ptr setup_solver_factory(std::shared_ptr exec, - std::shared_ptr> mtx) + std::shared_ptr> mtx, + gko::solver::trisolve_algorithm alg, + const unsigned gmres_iter, const double gmres_tol, const unsigned gmres_restart) { auto preprocessing_fact = gko::share(gko::reorder::Mc64::build().on(exec)); auto preprocessing = gko::share(preprocessing_fact->generate(mtx)); @@ -241,19 +243,24 @@ std::shared_ptr setup_solver_factory(std::shared_ptr::build() .with_factorization(lu_fact) + .with_algorithm(alg) .on(exec)); - auto solver_fact = gko::share(gko::solver::Gmres<>::build() - .with_criteria( - gko::stop::Iteration::build() - .with_max_iters(200u) - .on(exec), - gko::stop::ResidualNorm<>::build() - .with_baseline(gko::stop::mode::absolute) - .with_reduction_factor(1e-8) - .on(exec)) - .with_krylov_dim(10u) - .with_preconditioner(inner_solver_fact) - .on(exec)); + + std::shared_ptr solver_fact = inner_solver_fact; + if (gmres_iter > 0) { + solver_fact = gko::share(gko::solver::Gmres::build() + .with_criteria( + gko::stop::Iteration::build() + .with_max_iters(gmres_iter) + .on(exec), + gko::stop::ResidualNorm<>::build() + .with_baseline(gko::stop::mode::absolute) + .with_reduction_factor(gmres_tol) + .on(exec)) + .with_krylov_dim(gmres_restart) + .with_preconditioner(inner_solver_fact) + .on(exec)); + } auto reusable_factory = gko::share(gko::solver::ScaledReordered<>::build() .with_solver(solver_fact) @@ -265,6 +272,9 @@ std::shared_ptr setup_solver_factory(std::shared_ptr + hiopLinSolverSymSparseGinkgo::alg_map_ = {{"syncfree", gko::solver::trisolve_algorithm::syncfree}, + {"sparselib", gko::solver::trisolve_algorithm::sparselib}}; hiopLinSolverSymSparseGinkgo::hiopLinSolverSymSparseGinkgo(const int& n, const int& nnz, @@ -289,12 +299,17 @@ std::shared_ptr setup_solver_factory(std::shared_ptr0); exec_ = create_exec(nlp_->options->GetString("ginkgo_exec")); + auto alg = alg_map_.at(nlp_->options->GetString("ginkgo_trisolve")); + auto gmres_iter = nlp_->options->GetInteger("ir_inner_maxit"); + auto gmres_tol = nlp_->options->GetNumeric("ir_inner_tol"); + auto gmres_restart = nlp_->options->GetInteger("ir_inner_restart"); + iterative_refinement_ = gmres_iter > 0; host_mtx_ = transferTripletToCSR(exec_->get_master(), n_, M_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_); mtx_ = exec_ == (exec_->get_master()) ? host_mtx_ : gko::clone(exec_, host_mtx_); nnz_ = mtx_->get_num_stored_elements(); - reusable_factory_ = setup_solver_factory(exec_, mtx_); + reusable_factory_ = setup_solver_factory(exec_, mtx_, alg, gmres_iter, gmres_tol, gmres_restart); } int hiopLinSolverSymSparseGinkgo::matrixChanged() @@ -313,9 +328,14 @@ std::shared_ptr setup_solver_factory(std::shared_ptrgenerate(mtx_)); // Temporary solution for the ginkgo GLU integration. - auto sol = gko::as>(gko::as>(gko_solver_)->get_solver()); - auto precond = gko::as>(sol->get_preconditioner()); - auto status = precond->get_factorization_status(); + auto direct = iterative_refinement_ ? + gko::as>( + gko::as>( + gko::as>( + gko_solver_)->get_solver())->get_preconditioner()) : + gko::as>( + gko::as>(gko_solver_)->get_solver()); + auto status = direct->get_factorization_status(); return status == gko::experimental::factorization::status::success ? 0 : -1; } diff --git a/src/LinAlg/hiopLinSolverSparseGinkgo.hpp b/src/LinAlg/hiopLinSolverSparseGinkgo.hpp index e87bff3b9..058c606a5 100644 --- a/src/LinAlg/hiopLinSolverSparseGinkgo.hpp +++ b/src/LinAlg/hiopLinSolverSparseGinkgo.hpp @@ -92,7 +92,9 @@ class hiopLinSolverSymSparseGinkgo: public hiopLinSolverSymSparse std::shared_ptr> host_mtx_; std::shared_ptr reusable_factory_; std::shared_ptr gko_solver_; + bool iterative_refinement_; + static const std::map alg_map_; public: diff --git a/src/Utils/hiopOptions.cpp b/src/Utils/hiopOptions.cpp index fc513eb36..0237b85f6 100644 --- a/src/Utils/hiopOptions.cpp +++ b/src/Utils/hiopOptions.cpp @@ -890,6 +890,20 @@ void hiopOptionsNLP::register_options() "Selects the hardware architecture to run the Ginkgo linear solver on."); } + + // choose triangular solver implementation in Ginkgo. + // - Default is 'sparselib' which uses vendor triangular solvers + // - 'syncfree' uses the busy waiting loop based Ginkgo implementation + { + vector range {"syncfree", "sparselib"}; + + register_str_option("ginkgo_trisolve", + "syncfree", + range, + "Selects the triangular solver for Ginkgo."); + } + + // choose sparsity permutation (to reduce nz in the factors). This option is available only when using // Cholesky linear solvers // - metis: use CUDA function csrmetisnd, which is a wrapper of METIS_NodeND; requires linking with @@ -936,24 +950,24 @@ void hiopOptionsNLP::register_options() "'glu' is experimental and 'rf' is NVIDIA's stable refactorization. "); } - - register_int_option("ir_inner_cusolver_restart", + register_int_option("ir_inner_restart", 20, 1, 100, - "FGMRES restart value (default is 20). "); + "(F)GMRES restart value (default is 20). "); - register_num_option("ir_inner_cusolver_tol", + register_num_option("ir_inner_tol", 1e-12, 1e-16, 1e-1, - "FGMRES tolerance (default is 1e-12). "); + "(F)GMRES tolerance (default is 1e-12). "); - register_int_option("ir_inner_cusolver_maxit", + register_int_option("ir_inner_maxit", 50, 0, 1000, - "FGMRES maximum number of iterations (default is 50). "); + "(F)GMRES maximum number of iterations (default is 50). "); + { vector range = {"mgs", "cgs2", "mgs_two_synch", "mgs_pm"}; auto default_value = range[0];