diff --git a/examples/2_input_output/CMakeLists.txt b/examples/2_input_output/CMakeLists.txt index b479e11..2699b7e 100644 --- a/examples/2_input_output/CMakeLists.txt +++ b/examples/2_input_output/CMakeLists.txt @@ -11,18 +11,18 @@ if( ${Boost_FOUND} ) add_executable( "2_input_output_seq" "2_input_output.cpp" ) - target_link_libraries( "2_input_output_seq" PRIVATE chase_seq ${Boost_LIBRARIES} ${CUDA_nvToolsExt_LIBRARY}) + target_link_libraries( "2_input_output_seq" PRIVATE chase_seq ${Boost_LIBRARIES}) ############################################################################## # 2_input_output: no GPU, MPI ($A$ distributed among MPI-ranks) ############################################################################## add_executable( "2_input_output" "2_input_output.cpp" ) - target_link_libraries( "2_input_output" chase_mpi ${Boost_LIBRARIES} ${CUDA_nvToolsExt_LIBRARY}) + target_link_libraries( "2_input_output" chase_mpi ${Boost_LIBRARIES}) target_compile_definitions( "2_input_output" PRIVATE USE_MPI=1 PRINT_EIGENVALUES=1 CHASE_OUTPUT=1) add_executable( "2_input_output_block_cyclic" "2_input_output.cpp" ) - target_link_libraries( "2_input_output_block_cyclic" chase_mpi ${Boost_LIBRARIES} ${CUDA_nvToolsExt_LIBRARY}) + target_link_libraries( "2_input_output_block_cyclic" chase_mpi ${Boost_LIBRARIES} ) target_compile_definitions( "2_input_output_block_cyclic" PRIVATE USE_MPI=1 USE_BLOCK_CYCLIC=1) install (TARGETS 2_input_output_seq @@ -50,11 +50,11 @@ if( ${Boost_FOUND} ) endif() add_executable( 2_input_output_mgpu "2_input_output.cpp" ) - target_link_libraries( 2_input_output_mgpu chase_mpi chase_cuda ${Boost_LIBRARIES} ${CUDA_nvToolsExt_LIBRARY}) + target_link_libraries( 2_input_output_mgpu chase_mpi chase_cuda ${Boost_LIBRARIES}) target_compile_definitions( "2_input_output_mgpu" PRIVATE DRIVER_BUILD_MGPU=1 CHASE_OUTPUT=1 PRINT_EIGENVALUES=1 USE_MPI=1) add_executable( 2_input_output_mgpu_block_cyclic "2_input_output.cpp" ) - target_link_libraries( 2_input_output_mgpu_block_cyclic chase_mpi chase_cuda ${Boost_LIBRARIES} ${CUDA_nvToolsExt_LIBRARY}) + target_link_libraries( 2_input_output_mgpu_block_cyclic chase_mpi chase_cuda ${Boost_LIBRARIES} ) target_compile_definitions( "2_input_output_mgpu_block_cyclic" PRIVATE DRIVER_BUILD_MGPU=1 USE_MPI=1 USE_BLOCK_CYCLIC=1) install (TARGETS 2_input_output_mgpu diff --git a/interface/chase_c.cpp b/interface/chase_c.cpp index 1b31d5f..4d18998 100644 --- a/interface/chase_c.cpp +++ b/interface/chase_c.cpp @@ -23,6 +23,7 @@ #ifdef HAS_GPU #include "ChASE-MPI/impl/chase_mpidla_mgpu.hpp" + #include "ChASE-MPI/impl/chase_mpidla_cuda_seq.hpp" #endif using namespace chase; @@ -288,9 +289,13 @@ ChaseMpiProperties>* ChASE_State::getProperties() { } template -void chase_seq(T* H, int* N, T* V, Base* ritzv, int* nev, int* nex, +void chase_seq(int *N, T* H, int* ldh, T* V, Base* ritzv, int* nev, int* nex, int* deg, double* tol, char* mode, char* opt) { +#ifdef HAS_GPU + typedef ChaseMpi SEQ_CHASE; +#else typedef ChaseMpi SEQ_CHASE; +#endif std::vector> timings(3); std::vector> start_times(3); @@ -300,7 +305,9 @@ void chase_seq(T* H, int* N, T* V, Base* ritzv, int* nev, int* nex, std::mt19937 gen(2342.0); std::normal_distribution<> d; - SEQ_CHASE single(*N, *nev, *nex, V, ritzv, H); + SEQ_CHASE single(*N, *nev, *nex, V, ritzv); + + T* H_ = single.GetMatrixPtr(); ChaseConfig& config = single.GetConfig(); config.SetTol(*tol); @@ -308,6 +315,8 @@ void chase_seq(T* H, int* N, T* V, Base* ritzv, int* nev, int* nex, config.SetOpt(*opt == 'S'); config.SetApprox(*mode == 'A'); + t_lacpy('A', *N, *N, H, *ldh, H_, *N); + if (!config.UseApprox()) for (std::size_t k = 0; k < *N * (*nev + *nex); ++k) V[k] = getRandomT([&]() { return d(gen); }); @@ -317,9 +326,11 @@ void chase_seq(T* H, int* N, T* V, Base* ritzv, int* nev, int* nex, chase::Solve(&performanceDecorator); timings[2] = std::chrono::high_resolution_clock::now() - start_times[2]; timings[1] = std::chrono::high_resolution_clock::now() - start_times[1]; - std::cout << "ChASE]> Seq-ChASE Solve done in: " << timings[2].count() << "\n"; - performanceDecorator.GetPerfData().print(); - std::cout << "ChASE]> total time in ChASE: " << timings[1].count() << "\n"; +#ifdef CHASE_OUTPUT + std::cout << " ChASE]> ChASE Solve done in: " << timings[2].count() << "\n"; + performanceDecorator.GetPerfData().print(); + std::cout << " ChASE]> total time in ChASE: " << timings[1].count() << "\n"; +#endif } template @@ -347,8 +358,11 @@ void chase_setup(MPI_Fint* fcomm, int* N, int *nev, int *nex ){ template void chase_solve(T* H, int *LDH, T* V, Base* ritzv, int* deg, double* tol, char* mode, char* opt) { +#ifdef HAS_GPU + typedef ChaseMpi CHASE; +#else typedef ChaseMpi CHASE; - +#endif std::vector> timings(3); std::vector> start_times(3); @@ -371,84 +385,17 @@ void chase_solve(T* H, int *LDH, T* V, Base* ritzv, int* deg, double* tol, ch auto N = config.GetN(); auto nev = config.GetNev(); auto nex = config.GetNex(); - - if (!config.UseApprox()) - for (std::size_t k = 0; k < N * (nev + nex); ++k) - V[k] = getRandomT([&]() { return d(gen); }); -/* - for(auto j = 0; j < n; j++ ){ - for(auto i = 0; i < m; i++){ - H_[m * j + i] = H[j * ldh + i]; - } - } -*/ - + t_lacpy('A', m, n, H, ldh, H_, m); - - //std::cout << myRank << ": m = " << m << ", n = " << n << ", ldh = " << ldh << std::endl; config.SetTol(*tol); config.SetDeg(*deg); config.SetOpt(*opt == 'S'); config.SetApprox(*mode == 'A'); - PerformanceDecoratorChase performanceDecorator(&single); - start_times[2] = std::chrono::high_resolution_clock::now(); - chase::Solve(&performanceDecorator); - - timings[2] = std::chrono::high_resolution_clock::now() - start_times[2]; - timings[1] = std::chrono::high_resolution_clock::now() - start_times[1]; - if(myRank == 0){ - std::cout << "ChASE-MPI]> ChASE Solve done in: " << timings[2].count() << "\n"; - performanceDecorator.GetPerfData().print(); - std::cout << "ChASE-MPI]> total time in ChASE: " << timings[1].count() << "\n"; - } -} - -#ifdef HAS_GPU -template -void chase_solve_mgpu(T* H, int *LDH, T* V, Base* ritzv, int* deg, double* tol, char* mode, - char* opt) { - - typedef ChaseMpi CHASE; - - int ldh = *LDH; - std::vector> timings(3); - std::vector> start_times(3); - - std::mt19937 gen(2342.0); - std::normal_distribution<> d; - ChaseMpiProperties* props = ChASE_State::getProperties(); - - int myRank = props->get_my_rank(); - - CHASE single(props, V, ritzv); - - T* H_ = single.GetMatrixPtr(); - std::size_t m, n; - m = props->get_m(); - n = props->get_n(); - - ChaseConfig& config = single.GetConfig(); - auto N = config.GetN(); - auto nev = config.GetNev(); - auto nex = config.GetNex(); - if (!config.UseApprox()) for (std::size_t k = 0; k < N * (nev + nex); ++k) V[k] = getRandomT([&]() { return d(gen); }); -/* - for(auto j = 0; j < n; j++ ){ - for(auto i = 0; i < m; i++){ - H_[m * j + i] = H[j * ldh + i]; - } - } -*/ - t_lacpy('A', m, n, H, ldh, H_, m); - config.SetTol(*tol); - config.SetDeg(*deg); - config.SetOpt(*opt == 'S'); - config.SetApprox(*mode == 'A'); PerformanceDecoratorChase performanceDecorator(&single); start_times[2] = std::chrono::high_resolution_clock::now(); @@ -456,16 +403,14 @@ void chase_solve_mgpu(T* H, int *LDH, T* V, Base* ritzv, int* deg, double* to timings[2] = std::chrono::high_resolution_clock::now() - start_times[2]; timings[1] = std::chrono::high_resolution_clock::now() - start_times[1]; -#ifdef INFO_PRINT +#ifdef CHASE_OUTPUT if(myRank == 0){ - std::cout << "ChASE-MGPU]> ChASE Solve done in: " << timings[2].count() << "\n"; + std::cout << "ChASE-MPI]> ChASE Solve done in: " << timings[2].count() << "\n"; performanceDecorator.GetPerfData().print(); - std::cout << "ChASE-MGPU]> total time in ChASE: " << timings[1].count() << "\n"; + std::cout << "ChASE-MPI]> total time in ChASE: " << timings[1].count() << "\n"; } -#endif - +#endif } -#endif extern "C" { /** @defgroup chasc-c ChASE C Interface @@ -486,10 +431,10 @@ extern "C" { * @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. * @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. */ -void zchase_(std::complex* H, int* N, std::complex* V, +void zchase_(int *N, std::complex* H, int* ldh, std::complex* V, double* ritzv, int* nev, int* nex, int* deg, double* tol, char* mode, char* opt) { - chase_seq>(H, N, V, ritzv, nev, nex, deg, tol, mode, + chase_seq>(N, H, ldh, V, ritzv, nev, nex, deg, tol, mode, opt); } @@ -506,9 +451,9 @@ void zchase_(std::complex* H, int* N, std::complex* V, * @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. * @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. */ -void dchase_(double* H, int* N, double* V, double* ritzv, int* nev, int* nex, +void dchase_(int *N, double* H, int* ldh, double* V, double* ritzv, int* nev, int* nex, int* deg, double* tol, char* mode, char* opt) { - chase_seq(H, N, V, ritzv, nev, nex, deg, tol, mode, opt); + chase_seq(N, H, ldh, V, ritzv, nev, nex, deg, tol, mode, opt); } //! shard-memory version of ChASE with complex scalar in single precison @@ -524,10 +469,10 @@ void dchase_(double* H, int* N, double* V, double* ritzv, int* nev, int* nex, * @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. * @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. */ -void cchase_(std::complex* H, int* N, std::complex* V, +void cchase_(int *N, std::complex* H, int *ldh, std::complex* V, float* ritzv, int* nev, int* nex, int* deg, double* tol, char* mode, char* opt) { - chase_seq>(H, N, V, ritzv, nev, nex, deg, tol, mode, + chase_seq>(N, H, ldh, V, ritzv, nev, nex, deg, tol, mode, opt); } @@ -544,9 +489,9 @@ void cchase_(std::complex* H, int* N, std::complex* V, * @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. * @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. */ -void schase_(float* H, int* N, float* V, float* ritzv, int* nev, int* nex, +void schase_(int *N, float* H, int* ldh, float* V, float* ritzv, int* nev, int* nex, int* deg, double* tol, char* mode, char* opt) { - chase_seq(H, N, V, ritzv, nev, nex, deg, tol, mode, opt); + chase_seq(N, H, ldh, V, ritzv, nev, nex, deg, tol, mode, opt); } //! an initialisation of environment for distributed ChASE for complex scalar in double precision @@ -683,27 +628,6 @@ void pschase_(float* H, int *ldh, float* V, float* ritzv, int* deg, double* tol, chase_solve(H, ldh, V, ritzv, deg, tol, mode, opt); } -#ifdef HAS_GPU -void pzchase_mgpu_(std::complex* H, int *ldh, std::complex* V, - double* ritzv, int* deg, double* tol, char* mode, char* opt) { - chase_solve_mgpu>(H, ldh, V, ritzv, deg, tol, mode, opt); -} - -void pdchase_mgpu_(double* H, int *ldh, double* V, double* ritzv, int* deg, double* tol, - char* mode, char* opt) { - chase_solve_mgpu(H, ldh, V, ritzv, deg, tol, mode, opt); -} - -void pcchase_mgpu_(std::complex* H, int *ldh, std::complex* V, - float* ritzv, int* deg, double* tol, char* mode, char* opt) { - chase_solve_mgpu>(H, ldh, V, ritzv, deg, tol, mode, opt); -} - -void pschase_mgpu_(float* H, int *ldh, float* V, float* ritzv, int* deg, double* tol, - char* mode, char* opt) { - chase_solve_mgpu(H, ldh, V, ritzv, deg, tol, mode, opt); -} -#endif /** @} */ // end of chasc-c } // extern C diff --git a/interface/chase_f.f90 b/interface/chase_f.f90 index 8f12dde..4e753c8 100644 --- a/interface/chase_f.f90 +++ b/interface/chase_f.f90 @@ -7,10 +7,11 @@ MODULE chase_diag INTERFACE !> shard-memory version of ChASE with real scalar in double precison !> - SUBROUTINE dchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'dchase_' ) + SUBROUTINE dchase( n, h, ldh, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'dchase_' ) !> + !> @param[in] n global matrix size of the matrix to be diagonalized !> @param[in] h pointer to the matrix to be diagonalized - !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[in] ldh leading dimension of h !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues !> @param[int] nev number of desired eigenpairs @@ -21,7 +22,7 @@ SUBROUTINE dchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. USE, INTRINSIC :: iso_c_binding REAL(c_double) :: h(n,*), v(n,*) - INTEGER(c_int) :: n, deg, nev, nex + INTEGER(c_int) :: n, deg, nev, nex, ldh REAL(c_double) :: ritzv(*), tol CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE dchase @@ -31,9 +32,10 @@ END SUBROUTINE dchase !> shard-memory version of ChASE with real scalar in single precison !> !> - SUBROUTINE schase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'schase_' ) + SUBROUTINE schase( n, h, ldh, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'schase_' ) + !> @param[in] n global matrix size of the matrix to be diagonalized !> @param[in] h pointer to the matrix to be diagonalized - !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[in] ldh leading dimension of h !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues !> @param[int] nev number of desired eigenpairs @@ -44,7 +46,7 @@ SUBROUTINE schase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. USE, INTRINSIC :: iso_c_binding REAL(c_float) :: h(n,*), v(n,*) - INTEGER(c_int) :: n, deg, nev, nex + INTEGER(c_int) :: n, deg, nev, nex, ldh REAL(c_double) :: ritzv(*), tol CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE schase @@ -53,9 +55,10 @@ END SUBROUTINE schase !> !> INTERFACE - SUBROUTINE cchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'cchase_' ) - !> @param[in] h pointer to the matrix to be diagonalized + SUBROUTINE cchase( n, h, ldh, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'cchase_' ) !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[in] h pointer to the matrix to be diagonalized + !> @param[in] ldh leading dimension of h !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues !> @param[int] nev number of desired eigenpairs @@ -66,7 +69,7 @@ SUBROUTINE cchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. USE, INTRINSIC :: iso_c_binding COMPLEX(c_float_complex) :: h(n,*), v(n,*) - INTEGER(c_int) :: n, deg, nev, nex + INTEGER(c_int) :: n, deg, nev, nex, ldh REAL(c_double) :: ritzv(*), tol CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE cchase @@ -75,9 +78,10 @@ END SUBROUTINE cchase !> !> INTERFACE - SUBROUTINE zchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'zchase_' ) - !> @param[in] h pointer to the matrix to be diagonalized + SUBROUTINE zchase( n, h, ldh, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'zchase_' ) !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[in] h pointer to the matrix to be diagonalized + !> @param[in] ldh leading dimension of h !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues !> @param[int] nev number of desired eigenpairs @@ -88,7 +92,7 @@ SUBROUTINE zchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. USE, INTRINSIC :: iso_c_binding COMPLEX(c_double_complex) :: h(n,*), v(n,*) - INTEGER(c_int) :: n, deg, nev, nex + INTEGER(c_int) :: n, deg, nev, nex, ldh REAL(c_double) :: ritzv(*), tol CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE zchase @@ -418,94 +422,5 @@ SUBROUTINE pcchase(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pcch END SUBROUTINE pcchase END INTERFACE -#if defined(HAS_GPU) - !> distributed multi-GPU version ChASE for real scalar in double precision - !> - INTERFACE - SUBROUTINE pdchase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pdchase_mgpu_' ) - !> Compute the first nev eigenpairs by ChASE - !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction - !> @param[in] h pointer to the local portion of the matrix to be diagonalized - !> @param[in] ldh leading dimension of `h` - !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors - !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues - !> @param[int] deg initial degree of Cheyshev polynomial filter - !> @param[int] tol desired absolute tolerance of computed eigenpairs - !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. - !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. - USE, INTRINSIC :: iso_c_binding - REAL(c_double) :: h(*), v(*) - INTEGER(c_int) :: deg, ldh - REAL(c_double) :: ritzv(*), tol - CHARACTER(len=1,kind=c_char) :: mode, opt - END SUBROUTINE pdchase_mgpu - END INTERFACE - !> distributed multi-GPU version ChASE for complex scalar in double precision - !> - INTERFACE - SUBROUTINE pzchase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pzchase_mgpu_' ) - !> Compute the first nev eigenpairs by ChASE - !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction - !> @param[in] h pointer to the local portion of the matrix to be diagonalized - !> @param[in] ldh leading dimension of `h` - !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors - !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues - !> @param[int] deg initial degree of Cheyshev polynomial filter - !> @param[int] tol desired absolute tolerance of computed eigenpairs - !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. - !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. - USE, INTRINSIC :: iso_c_binding - COMPLEX(c_double_complex) :: h(*), v(*) - INTEGER(c_int) :: deg, ldh - REAL(c_double) :: ritzv(*), tol - CHARACTER(len=1,kind=c_char) :: mode, opt - END SUBROUTINE pzchase_mgpu - END INTERFACE - !> distributed multi-GPU version ChASE for real scalar in single precision - !> - INTERFACE - SUBROUTINE pschase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pschase_mgpu_' ) - !> Compute the first nev eigenpairs by ChASE - !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction - !> @param[in] h pointer to the local portion of the matrix to be diagonalized - !> @param[in] ldh leading dimension of `h` - !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors - !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues - !> @param[int] deg initial degree of Cheyshev polynomial filter - !> @param[int] tol desired absolute tolerance of computed eigenpairs - !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. - !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. - USE, INTRINSIC :: iso_c_binding - REAL(c_float) :: h(*), v(*) - INTEGER(c_int) :: deg, ldh - REAL(c_float) :: ritzv(*) - REAL(c_double) :: tol - CHARACTER(len=1,kind=c_char) :: mode, opt - END SUBROUTINE pschase_mgpu - END INTERFACE - !> distributed multi-GPU version ChASE for complex scalar in single precision - !> - INTERFACE - SUBROUTINE pcchase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pcchase_mgpu_' ) - !> Compute the first nev eigenpairs by ChASE - !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction - !> @param[in] h pointer to the local portion of the matrix to be diagonalized - !> @param[in] ldh leading dimension of `h` - !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors - !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues - !> @param[int] deg initial degree of Cheyshev polynomial filter - !> @param[int] tol desired absolute tolerance of computed eigenpairs - !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. - !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. - USE, INTRINSIC :: iso_c_binding - COMPLEX(c_float_complex) :: h(*), v(*) - INTEGER(c_int) :: deg, ldh - REAL(c_float) :: ritzv(*) - REAL(c_double) :: tol - CHARACTER(len=1,kind=c_char) :: mode, opt - END SUBROUTINE pcchase_mgpu - END INTERFACE -#endif - END MODULE chase_diag -!> @} end of chasc-c \ No newline at end of file +!> @} end of chasc-c