diff --git a/examples/2_input_output/CMakeLists.txt b/examples/2_input_output/CMakeLists.txt
index b479e11..2699b7e 100644
--- a/examples/2_input_output/CMakeLists.txt
+++ b/examples/2_input_output/CMakeLists.txt
@@ -11,18 +11,18 @@ if( ${Boost_FOUND} )
   add_executable( "2_input_output_seq"
     "2_input_output.cpp"
     )
-  target_link_libraries( "2_input_output_seq" PRIVATE chase_seq ${Boost_LIBRARIES} ${CUDA_nvToolsExt_LIBRARY})
+  target_link_libraries( "2_input_output_seq" PRIVATE chase_seq ${Boost_LIBRARIES})
 
   ##############################################################################
   #           2_input_output: no GPU, MPI ($A$ distributed among MPI-ranks)
   ##############################################################################
 
   add_executable( "2_input_output" "2_input_output.cpp" )
-  target_link_libraries( "2_input_output" chase_mpi ${Boost_LIBRARIES} ${CUDA_nvToolsExt_LIBRARY})
+  target_link_libraries( "2_input_output" chase_mpi ${Boost_LIBRARIES})
   target_compile_definitions( "2_input_output" PRIVATE USE_MPI=1 PRINT_EIGENVALUES=1 CHASE_OUTPUT=1)
 
   add_executable( "2_input_output_block_cyclic" "2_input_output.cpp" )
-  target_link_libraries( "2_input_output_block_cyclic" chase_mpi ${Boost_LIBRARIES} ${CUDA_nvToolsExt_LIBRARY})
+  target_link_libraries( "2_input_output_block_cyclic" chase_mpi ${Boost_LIBRARIES} )
   target_compile_definitions( "2_input_output_block_cyclic" PRIVATE USE_MPI=1 USE_BLOCK_CYCLIC=1)  
 
   install (TARGETS 2_input_output_seq
@@ -50,11 +50,11 @@ if( ${Boost_FOUND} )
     endif()
 
     add_executable( 2_input_output_mgpu "2_input_output.cpp" )
-    target_link_libraries( 2_input_output_mgpu chase_mpi chase_cuda ${Boost_LIBRARIES} ${CUDA_nvToolsExt_LIBRARY})
+    target_link_libraries( 2_input_output_mgpu chase_mpi chase_cuda ${Boost_LIBRARIES})
     target_compile_definitions( "2_input_output_mgpu" PRIVATE DRIVER_BUILD_MGPU=1 CHASE_OUTPUT=1 PRINT_EIGENVALUES=1 USE_MPI=1)
 
     add_executable( 2_input_output_mgpu_block_cyclic "2_input_output.cpp" )
-    target_link_libraries( 2_input_output_mgpu_block_cyclic chase_mpi chase_cuda ${Boost_LIBRARIES} ${CUDA_nvToolsExt_LIBRARY})
+    target_link_libraries( 2_input_output_mgpu_block_cyclic chase_mpi chase_cuda ${Boost_LIBRARIES} )
     target_compile_definitions( "2_input_output_mgpu_block_cyclic" PRIVATE DRIVER_BUILD_MGPU=1 USE_MPI=1 USE_BLOCK_CYCLIC=1)
 
     install (TARGETS 2_input_output_mgpu
diff --git a/interface/chase_c.cpp b/interface/chase_c.cpp
index 1b31d5f..4d18998 100644
--- a/interface/chase_c.cpp
+++ b/interface/chase_c.cpp
@@ -23,6 +23,7 @@
 
 #ifdef HAS_GPU
   #include "ChASE-MPI/impl/chase_mpidla_mgpu.hpp"
+  #include "ChASE-MPI/impl/chase_mpidla_cuda_seq.hpp"
 #endif
 
 using namespace chase;
@@ -288,9 +289,13 @@ ChaseMpiProperties<std::complex<float>>* ChASE_State::getProperties() {
 }
 
 template <typename T>
-void chase_seq(T* H, int* N, T* V, Base<T>* ritzv, int* nev, int* nex,
+void chase_seq(int *N, T* H, int* ldh, T* V, Base<T>* ritzv, int* nev, int* nex,
                 int* deg, double* tol, char* mode, char* opt) {
+#ifdef HAS_GPU
+  typedef ChaseMpi<ChaseMpiDLACudaSeq, T> SEQ_CHASE;
+#else	
   typedef ChaseMpi<ChaseMpiDLABlaslapackSeq, T> SEQ_CHASE;
+#endif
 
   std::vector<std::chrono::duration<double>> timings(3);
   std::vector<std::chrono::time_point<std::chrono::high_resolution_clock>> start_times(3);
@@ -300,7 +305,9 @@ void chase_seq(T* H, int* N, T* V, Base<T>* ritzv, int* nev, int* nex,
   std::mt19937 gen(2342.0);
   std::normal_distribution<> d;
 
-  SEQ_CHASE single(*N, *nev, *nex, V, ritzv, H);
+  SEQ_CHASE single(*N, *nev, *nex, V, ritzv);
+
+  T* H_ = single.GetMatrixPtr();
 
   ChaseConfig<T>& config = single.GetConfig();
   config.SetTol(*tol);
@@ -308,6 +315,8 @@ void chase_seq(T* H, int* N, T* V, Base<T>* ritzv, int* nev, int* nex,
   config.SetOpt(*opt == 'S');
   config.SetApprox(*mode == 'A');
 
+  t_lacpy('A', *N, *N, H, *ldh, H_, *N);
+
   if (!config.UseApprox())
     for (std::size_t k = 0; k < *N * (*nev + *nex); ++k)
       V[k] = getRandomT<T>([&]() { return d(gen); });
@@ -317,9 +326,11 @@ void chase_seq(T* H, int* N, T* V, Base<T>* ritzv, int* nev, int* nex,
   chase::Solve(&performanceDecorator);
   timings[2] = std::chrono::high_resolution_clock::now() - start_times[2];
   timings[1] = std::chrono::high_resolution_clock::now() - start_times[1];
-  std::cout << "ChASE]> Seq-ChASE Solve done in: " << timings[2].count() << "\n";
-  performanceDecorator.GetPerfData().print();  
-  std::cout << "ChASE]> total time in ChASE: " << timings[1].count() << "\n";
+#ifdef CHASE_OUTPUT
+  std::cout << "    ChASE]> ChASE Solve done in: " << timings[2].count() << "\n";
+  performanceDecorator.GetPerfData().print();   
+  std::cout << "    ChASE]> total time in ChASE: " << timings[1].count() << "\n";
+#endif
 }
 
 template <typename T>
@@ -347,8 +358,11 @@ void chase_setup(MPI_Fint* fcomm, int* N, int *nev, int *nex ){
 template <typename T>
 void chase_solve(T* H, int *LDH, T* V, Base<T>* ritzv, int* deg, double* tol, char* mode,
                  char* opt) {
+#ifdef HAS_GPU  
+  typedef ChaseMpi<ChaseMpiDLAMultiGPU, T> CHASE;
+#else
   typedef ChaseMpi<ChaseMpiDLABlaslapack, T> CHASE;
-
+#endif
   std::vector<std::chrono::duration<double>> timings(3);
   std::vector<std::chrono::time_point<std::chrono::high_resolution_clock>> start_times(3);
 
@@ -371,84 +385,17 @@ void chase_solve(T* H, int *LDH, T* V, Base<T>* ritzv, int* deg, double* tol, ch
   auto N = config.GetN();
   auto nev = config.GetNev();
   auto nex = config.GetNex();
-
-  if (!config.UseApprox())
-    for (std::size_t k = 0; k < N * (nev + nex); ++k)
-      V[k] = getRandomT<T>([&]() { return d(gen); });
-/*
-  for(auto j = 0; j < n; j++ ){
-      for(auto i = 0; i < m; i++){
-          H_[m * j + i] = H[j * ldh + i];
-      }
-  }
-*/  
-
+ 
   t_lacpy('A', m, n, H, ldh, H_, m);
-
-  //std::cout << myRank << ": m = " << m << ", n = " << n << ", ldh = " << ldh << std::endl;  
   
   config.SetTol(*tol);
   config.SetDeg(*deg);
   config.SetOpt(*opt == 'S');
   config.SetApprox(*mode == 'A');
 
-  PerformanceDecoratorChase<T> performanceDecorator(&single);
-  start_times[2] = std::chrono::high_resolution_clock::now();
-  chase::Solve(&performanceDecorator);
-
-  timings[2] = std::chrono::high_resolution_clock::now() - start_times[2];
-  timings[1] = std::chrono::high_resolution_clock::now() - start_times[1];
-  if(myRank == 0){
-      std::cout << "ChASE-MPI]> ChASE Solve done in: " << timings[2].count() << "\n";
-      performanceDecorator.GetPerfData().print();
-      std::cout << "ChASE-MPI]> total time in ChASE: " << timings[1].count() << "\n";      
-  }
-}
-
-#ifdef HAS_GPU
-template <typename T>
-void chase_solve_mgpu(T* H, int *LDH, T* V, Base<T>* ritzv, int* deg, double* tol, char* mode,
-                 char* opt) {
-  
-  typedef ChaseMpi<ChaseMpiDLAMultiGPU, T> CHASE;	
-  
-  int ldh = *LDH;
-  std::vector<std::chrono::duration<double>> timings(3);
-  std::vector<std::chrono::time_point<std::chrono::high_resolution_clock>> start_times(3);
-
-  std::mt19937 gen(2342.0);
-  std::normal_distribution<> d;
-  ChaseMpiProperties<T>* props = ChASE_State::getProperties<T>();
-
-  int myRank = props->get_my_rank();
-
-  CHASE single(props, V, ritzv);
-
-  T* H_ = single.GetMatrixPtr();
-  std::size_t m, n;
-  m = props->get_m();
-  n = props->get_n();
-
-  ChaseConfig<T>& config = single.GetConfig();
-  auto N = config.GetN();
-  auto nev = config.GetNev();
-  auto nex = config.GetNex();
-
   if (!config.UseApprox())
     for (std::size_t k = 0; k < N * (nev + nex); ++k)
       V[k] = getRandomT<T>([&]() { return d(gen); });
-/*
-  for(auto j = 0; j < n; j++ ){
-      for(auto i = 0; i < m; i++){
-          H_[m * j + i] = H[j * ldh + i];
-      }
-  }
-*/
-  t_lacpy('A', m, n, H, ldh, H_, m);
-  config.SetTol(*tol);
-  config.SetDeg(*deg);
-  config.SetOpt(*opt == 'S');
-  config.SetApprox(*mode == 'A');
 
   PerformanceDecoratorChase<T> performanceDecorator(&single);
   start_times[2] = std::chrono::high_resolution_clock::now();
@@ -456,16 +403,14 @@ void chase_solve_mgpu(T* H, int *LDH, T* V, Base<T>* ritzv, int* deg, double* to
 
   timings[2] = std::chrono::high_resolution_clock::now() - start_times[2];
   timings[1] = std::chrono::high_resolution_clock::now() - start_times[1];
-#ifdef INFO_PRINT
+#ifdef CHASE_OUTPUT
   if(myRank == 0){
-      std::cout << "ChASE-MGPU]> ChASE Solve done in: " << timings[2].count() << "\n";
+      std::cout << "ChASE-MPI]> ChASE Solve done in: " << timings[2].count() << "\n";
       performanceDecorator.GetPerfData().print();
-      std::cout << "ChASE-MGPU]> total time in ChASE: " << timings[1].count() << "\n";
+      std::cout << "ChASE-MPI]> total time in ChASE: " << timings[1].count() << "\n";      
   }
-#endif
-
+#endif  
 }
-#endif
 
 extern "C" {
 /** @defgroup chasc-c ChASE C Interface
@@ -486,10 +431,10 @@ extern "C" {
  * @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not.
  * @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.
  */  
-void zchase_(std::complex<double>* H, int* N, std::complex<double>* V,
+void zchase_(int *N, std::complex<double>* H, int* ldh, std::complex<double>* V,
              double* ritzv, int* nev, int* nex, int* deg, double* tol,
              char* mode, char* opt) {
-  chase_seq<std::complex<double>>(H, N, V, ritzv, nev, nex, deg, tol, mode,
+  chase_seq<std::complex<double>>(N, H, ldh, V, ritzv, nev, nex, deg, tol, mode,
                                    opt);
 }
 
@@ -506,9 +451,9 @@ void zchase_(std::complex<double>* H, int* N, std::complex<double>* V,
  * @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not.
  * @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.
  */  
-void dchase_(double* H, int* N, double* V, double* ritzv, int* nev, int* nex,
+void dchase_(int *N, double* H, int* ldh, double* V, double* ritzv, int* nev, int* nex,
              int* deg, double* tol, char* mode, char* opt) {
-  chase_seq<double>(H, N, V, ritzv, nev, nex, deg, tol, mode, opt);
+  chase_seq<double>(N, H, ldh, V, ritzv, nev, nex, deg, tol, mode, opt);
 }
 
 //! shard-memory version of ChASE with complex scalar in single precison
@@ -524,10 +469,10 @@ void dchase_(double* H, int* N, double* V, double* ritzv, int* nev, int* nex,
  * @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not.
  * @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.
  */  
-void cchase_(std::complex<float>* H, int* N, std::complex<float>* V,
+void cchase_(int *N, std::complex<float>* H, int *ldh, std::complex<float>* V,
              float* ritzv, int* nev, int* nex, int* deg, double* tol,
              char* mode, char* opt) {
-  chase_seq<std::complex<float>>(H, N, V, ritzv, nev, nex, deg, tol, mode,
+  chase_seq<std::complex<float>>(N, H, ldh, V, ritzv, nev, nex, deg, tol, mode,
                                    opt);
 }
 
@@ -544,9 +489,9 @@ void cchase_(std::complex<float>* H, int* N, std::complex<float>* V,
  * @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not.
  * @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.
  */  
-void schase_(float* H, int* N, float* V, float* ritzv, int* nev, int* nex,
+void schase_(int *N, float* H, int* ldh, float* V, float* ritzv, int* nev, int* nex,
              int* deg, double* tol, char* mode, char* opt) {
-  chase_seq<float>(H, N, V, ritzv, nev, nex, deg, tol, mode, opt);
+  chase_seq<float>(N, H, ldh, V, ritzv, nev, nex, deg, tol, mode, opt);
 }
 
 //! an initialisation of environment for distributed ChASE for complex scalar in double precision
@@ -683,27 +628,6 @@ void pschase_(float* H, int *ldh, float* V, float* ritzv, int* deg, double* tol,
   chase_solve<float>(H, ldh, V, ritzv, deg, tol, mode, opt);
 }
 
-#ifdef HAS_GPU
-void pzchase_mgpu_(std::complex<double>* H, int *ldh, std::complex<double>* V,
-                  double* ritzv, int* deg, double* tol, char* mode, char* opt) {
-  chase_solve_mgpu<std::complex<double>>(H, ldh, V, ritzv, deg, tol, mode, opt);
-}
-
-void pdchase_mgpu_(double* H, int *ldh, double* V, double* ritzv, int* deg, double* tol,
-                  char* mode, char* opt) {
-  chase_solve_mgpu<double>(H, ldh, V, ritzv, deg, tol, mode, opt);
-}
-
-void pcchase_mgpu_(std::complex<float>* H, int *ldh, std::complex<float>* V,
-                  float* ritzv, int* deg, double* tol, char* mode, char* opt) {
-  chase_solve_mgpu<std::complex<float>>(H, ldh, V, ritzv, deg, tol, mode, opt);
-}
-
-void pschase_mgpu_(float* H, int *ldh, float* V, float* ritzv, int* deg, double* tol,
-                  char* mode, char* opt) {
-  chase_solve_mgpu<float>(H, ldh, V, ritzv, deg, tol, mode, opt);
-}
-#endif
 /** @} */ // end of chasc-c
 
 }  // extern C 
diff --git a/interface/chase_f.f90 b/interface/chase_f.f90
index 8f12dde..4e753c8 100644
--- a/interface/chase_f.f90
+++ b/interface/chase_f.f90
@@ -7,10 +7,11 @@ MODULE chase_diag
   INTERFACE
   !> shard-memory version of ChASE with real scalar in double precison
   !>  
-    SUBROUTINE dchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'dchase_' )
+    SUBROUTINE dchase( n, h, ldh, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'dchase_' )
   !>
+  !> @param[in] n global matrix size of the matrix to be diagonalized  
   !> @param[in] h pointer to the matrix to be diagonalized
-  !> @param[in] n global matrix size of the matrix to be diagonalized
+  !> @param[in] ldh leading dimension of h
   !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors
   !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues
   !> @param[int] nev number of desired eigenpairs
@@ -21,7 +22,7 @@ SUBROUTINE dchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name
   !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.    
       USE, INTRINSIC :: iso_c_binding
       REAL(c_double)        :: h(n,*), v(n,*)
-      INTEGER(c_int)                :: n, deg, nev, nex
+      INTEGER(c_int)                :: n, deg, nev, nex, ldh
       REAL(c_double)                :: ritzv(*), tol
       CHARACTER(len=1,kind=c_char)  :: mode, opt
     END SUBROUTINE dchase
@@ -31,9 +32,10 @@ END SUBROUTINE dchase
   !> shard-memory version of ChASE with real scalar in single precison
   !>
 !>    
-    SUBROUTINE schase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'schase_' )
+    SUBROUTINE schase( n, h, ldh, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'schase_' )
+  !> @param[in] n global matrix size of the matrix to be diagonalized  
   !> @param[in] h pointer to the matrix to be diagonalized
-  !> @param[in] n global matrix size of the matrix to be diagonalized
+  !> @param[in] ldh leading dimension of h  
   !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors
   !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues
   !> @param[int] nev number of desired eigenpairs
@@ -44,7 +46,7 @@ SUBROUTINE schase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name
   !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.    
       USE, INTRINSIC :: iso_c_binding
       REAL(c_float)        :: h(n,*), v(n,*)
-      INTEGER(c_int)                :: n, deg, nev, nex
+      INTEGER(c_int)                :: n, deg, nev, nex, ldh
       REAL(c_double)                :: ritzv(*), tol
       CHARACTER(len=1,kind=c_char)  :: mode, opt
     END SUBROUTINE schase
@@ -53,9 +55,10 @@ END SUBROUTINE schase
   !>
 !>  
   INTERFACE
-    SUBROUTINE cchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'cchase_' )
-  !> @param[in] h pointer to the matrix to be diagonalized
+    SUBROUTINE cchase( n, h, ldh, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'cchase_' )
   !> @param[in] n global matrix size of the matrix to be diagonalized
+  !> @param[in] h pointer to the matrix to be diagonalized
+  !> @param[in] ldh leading dimension of h  
   !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors
   !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues
   !> @param[int] nev number of desired eigenpairs
@@ -66,7 +69,7 @@ SUBROUTINE cchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name
   !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.    
       USE, INTRINSIC :: iso_c_binding
       COMPLEX(c_float_complex)     :: h(n,*), v(n,*)
-      INTEGER(c_int)                :: n, deg, nev, nex
+      INTEGER(c_int)                :: n, deg, nev, nex, ldh
       REAL(c_double)                :: ritzv(*), tol
       CHARACTER(len=1,kind=c_char)  :: mode, opt
     END SUBROUTINE cchase
@@ -75,9 +78,10 @@ END SUBROUTINE cchase
   !>
 !>  
   INTERFACE
-    SUBROUTINE zchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'zchase_' )
-  !> @param[in] h pointer to the matrix to be diagonalized
+    SUBROUTINE zchase( n, h, ldh, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'zchase_' )
   !> @param[in] n global matrix size of the matrix to be diagonalized
+  !> @param[in] h pointer to the matrix to be diagonalized
+  !> @param[in] ldh leading dimension of h  
   !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors
   !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues
   !> @param[int] nev number of desired eigenpairs
@@ -88,7 +92,7 @@ SUBROUTINE zchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name
   !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.    
       USE, INTRINSIC :: iso_c_binding
       COMPLEX(c_double_complex)     :: h(n,*), v(n,*)
-      INTEGER(c_int)                :: n, deg, nev, nex
+      INTEGER(c_int)                :: n, deg, nev, nex, ldh
       REAL(c_double)                :: ritzv(*), tol
       CHARACTER(len=1,kind=c_char)  :: mode, opt
     END SUBROUTINE zchase
@@ -418,94 +422,5 @@ SUBROUTINE pcchase(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pcch
      END SUBROUTINE pcchase
   END INTERFACE
 
-#if defined(HAS_GPU)
-  !> distributed multi-GPU version ChASE for real scalar in double precision
-  !> 
-  INTERFACE
-     SUBROUTINE pdchase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pdchase_mgpu_' )
-  !> Compute the first nev eigenpairs by ChASE
-  !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction
-  !> @param[in] h pointer to the local portion of the matrix to be diagonalized
-  !> @param[in] ldh leading dimension of `h`
-  !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors
-  !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues
-  !> @param[int] deg initial degree of Cheyshev polynomial filter
-  !> @param[int] tol desired absolute tolerance of computed eigenpairs
-  !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not.
-  !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.         
-       USE, INTRINSIC :: iso_c_binding
-       REAL(c_double)        :: h(*), v(*)
-       INTEGER(c_int)                :: deg, ldh
-       REAL(c_double)                :: ritzv(*), tol
-       CHARACTER(len=1,kind=c_char)  :: mode, opt
-     END SUBROUTINE pdchase_mgpu
-  END INTERFACE
-  !> distributed multi-GPU version ChASE for complex scalar in double precision
-  !> 
-  INTERFACE
-     SUBROUTINE pzchase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pzchase_mgpu_' )
-  !> Compute the first nev eigenpairs by ChASE
-  !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction
-  !> @param[in] h pointer to the local portion of the matrix to be diagonalized
-  !> @param[in] ldh leading dimension of `h`
-  !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors
-  !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues
-  !> @param[int] deg initial degree of Cheyshev polynomial filter
-  !> @param[int] tol desired absolute tolerance of computed eigenpairs
-  !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not.
-  !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.         
-       USE, INTRINSIC :: iso_c_binding
-       COMPLEX(c_double_complex)     :: h(*), v(*)
-       INTEGER(c_int)                :: deg, ldh
-       REAL(c_double)                :: ritzv(*), tol
-       CHARACTER(len=1,kind=c_char)  :: mode, opt
-     END SUBROUTINE pzchase_mgpu
-  END INTERFACE
-  !> distributed multi-GPU version ChASE for real scalar in single precision
-  !> 
-  INTERFACE
-     SUBROUTINE pschase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pschase_mgpu_' )
-  !> Compute the first nev eigenpairs by ChASE
-  !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction
-  !> @param[in] h pointer to the local portion of the matrix to be diagonalized
-  !> @param[in] ldh leading dimension of `h`
-  !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors
-  !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues
-  !> @param[int] deg initial degree of Cheyshev polynomial filter
-  !> @param[int] tol desired absolute tolerance of computed eigenpairs
-  !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not.
-  !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.         
-       USE, INTRINSIC :: iso_c_binding
-       REAL(c_float)                 :: h(*), v(*)
-       INTEGER(c_int)                :: deg, ldh
-       REAL(c_float)                 :: ritzv(*)
-       REAL(c_double)                :: tol
-       CHARACTER(len=1,kind=c_char)  :: mode, opt
-     END SUBROUTINE pschase_mgpu
-  END INTERFACE
-  !> distributed multi-GPU version ChASE for complex scalar in single precision
-  !> 
-  INTERFACE
-     SUBROUTINE pcchase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pcchase_mgpu_' )
-  !> Compute the first nev eigenpairs by ChASE
-  !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction
-  !> @param[in] h pointer to the local portion of the matrix to be diagonalized
-  !> @param[in] ldh leading dimension of `h`
-  !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors
-  !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues
-  !> @param[int] deg initial degree of Cheyshev polynomial filter
-  !> @param[int] tol desired absolute tolerance of computed eigenpairs
-  !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not.
-  !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.         
-       USE, INTRINSIC :: iso_c_binding
-       COMPLEX(c_float_complex)      :: h(*), v(*)
-       INTEGER(c_int)                :: deg, ldh
-       REAL(c_float)                 :: ritzv(*)
-       REAL(c_double)                :: tol
-       CHARACTER(len=1,kind=c_char)  :: mode, opt
-     END SUBROUTINE pcchase_mgpu
-  END INTERFACE  
-#endif
-
 END MODULE chase_diag
-!> @} end of chasc-c
\ No newline at end of file
+!> @} end of chasc-c