From 91b9f9c08e8408e8a43d21f58547e0ffb3c793b5 Mon Sep 17 00:00:00 2001
From: Evan Weinberg <eweinberg@nvidia.com>
Date: Thu, 27 Jul 2023 13:44:58 -0700
Subject: [PATCH 1/8] Threaded in support for saving near-null and eigenvectors
 in PARTFILE format, plus exposed it on the command line

---
 include/multigrid.h                 |  5 +++++
 include/qio_field.h                 |  4 ++--
 include/quda.h                      |  6 ++++++
 include/vector_io.h                 |  4 +++-
 lib/check_params.h                  |  8 ++++++++
 lib/multigrid.cpp                   |  2 +-
 lib/qio_field.cpp                   |  4 ++--
 lib/vector_io.cpp                   | 14 ++++++++++----
 tests/utils/command_line_params.cpp |  7 +++++--
 tests/utils/command_line_params.h   |  2 ++
 tests/utils/set_params.cpp          |  4 ++++
 11 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/include/multigrid.h b/include/multigrid.h
index b610d5f5f9..2d50bcec97 100644
--- a/include/multigrid.h
+++ b/include/multigrid.h
@@ -157,6 +157,9 @@ namespace quda {
     /** Filename for where to load/store the null space */
     char filename[100];
 
+    /** Whether to save in partfile format (true) or singlefile (false) */
+    bool mg_vec_partfile;
+
     /** Whether or not this is a staggered solve or not */
     QudaTransferType transfer_type;
 
@@ -193,6 +196,7 @@ namespace quda {
       smoother_solve_type(param.smoother_solve_type[level]),
       location(param.location[level]),
       setup_location(param.setup_location[level]),
+      mg_vec_partfile(param.mg_vec_partfile[level]),
       transfer_type(param.transfer_type[level]),
       setup_use_mma(param.setup_use_mma[level] == QUDA_BOOLEAN_TRUE),
       dslash_use_mma(param.dslash_use_mma[level] == QUDA_BOOLEAN_TRUE)
@@ -230,6 +234,7 @@ namespace quda {
       smoother_solve_type(param.mg_global.smoother_solve_type[level]),
       location(param.mg_global.location[level]),
       setup_location(param.mg_global.setup_location[level]),
+      mg_vec_partfile(param.mg_global.mg_vec_partfile[level]),
       transfer_type(param.mg_global.transfer_type[level]),
       setup_use_mma(param.mg_global.setup_use_mma[level] == QUDA_BOOLEAN_TRUE),
       dslash_use_mma(param.mg_global.dslash_use_mma[level] == QUDA_BOOLEAN_TRUE)
diff --git a/include/qio_field.h b/include/qio_field.h
index 1662872f6d..a76cb44f2e 100644
--- a/include/qio_field.h
+++ b/include/qio_field.h
@@ -7,7 +7,7 @@ void write_gauge_field(const char *filename, void *gauge[], QudaPrecision prec,
 void read_spinor_field(const char *filename, void *V[], QudaPrecision precision, const int *X, QudaSiteSubset subset,
                        QudaParity parity, int nColor, int nSpin, int Nvec, int argc, char *argv[]);
 void write_spinor_field(const char *filename, const void *V[], QudaPrecision precision, const int *X, QudaSiteSubset subset,
-                        QudaParity parity, int nColor, int nSpin, int Nvec, int argc, char *argv[]);
+                        QudaParity parity, int nColor, int nSpin, int Nvec, int argc, char *argv[], bool partfile = false);
 #else
 inline void read_gauge_field(const char *, void *[], QudaPrecision, const int *, int, char *[])
 {
@@ -26,7 +26,7 @@ inline void read_spinor_field(const char *, void *[], QudaPrecision, const int *
   exit(-1);
 }
 inline void write_spinor_field(const char *, const void *[], QudaPrecision, const int *, QudaSiteSubset, QudaParity, int, int,
-                               int, int, char *[])
+                               int, int, char *[], bool)
 {
   printf("QIO support has not been enabled\n");
   exit(-1);
diff --git a/include/quda.h b/include/quda.h
index baf6c54b90..ebfe65ceb4 100644
--- a/include/quda.h
+++ b/include/quda.h
@@ -585,6 +585,9 @@ extern "C" {
         MILC I/O) */
     QudaBoolean io_parity_inflate;
 
+    /** Whether to save eigenvectors in QIO singlefile or partfile format */
+    QudaBoolean partfile;
+
     /** The Gflops rate of the eigensolver setup */
     double gflops;
 
@@ -779,6 +782,9 @@ extern "C" {
     /** Filename prefix for where to save the null-space vectors */
     char vec_outfile[QUDA_MAX_MG_LEVEL][256];
 
+    /** Whether to store the null-space vectors in singlefile or partfile format */
+    QudaBoolean mg_vec_partfile[QUDA_MAX_MG_LEVEL];
+
     /** Whether to use and initial guess during coarse grid deflation */
     QudaBoolean coarse_guess;
 
diff --git a/include/vector_io.h b/include/vector_io.h
index 5d8768e17f..8e3a1e6ee9 100644
--- a/include/vector_io.h
+++ b/include/vector_io.h
@@ -15,6 +15,7 @@ namespace quda
   {
     const std::string filename;
     bool parity_inflate;
+    bool partfile;
 
   public:
     /**
@@ -22,8 +23,9 @@ namespace quda
        @param[in] filename The filename associated with this IO object
        @param[in] parity_inflate Whether to inflate single_parity
        field to dual parity fields for I/O
+       @param[in] partfile Whether or not to save in partfiles (ignored on load)
     */
-    VectorIO(const std::string &filename, bool parity_inflate = false);
+    VectorIO(const std::string &filename, bool parity_inflate = false, bool partfile = false);
 
     /**
        @brief Load vectors from filename
diff --git a/lib/check_params.h b/lib/check_params.h
index c5c615ccce..00892741f2 100644
--- a/lib/check_params.h
+++ b/lib/check_params.h
@@ -197,6 +197,7 @@ void printQudaEigParam(QudaEigParam *param) {
   P(extlib_type, QUDA_EIGEN_EXTLIB);
   P(mem_type_ritz, QUDA_MEMORY_DEVICE);
   P(ortho_block_size, 0);
+  P(partfile, QUDA_BOOLEAN_FALSE);
 #else
   P(use_eigen_qr, QUDA_BOOLEAN_INVALID);
   P(use_poly_acc, QUDA_BOOLEAN_INVALID);
@@ -226,6 +227,7 @@ void printQudaEigParam(QudaEigParam *param) {
   P(extlib_type, QUDA_EXTLIB_INVALID);
   P(mem_type_ritz, QUDA_MEMORY_INVALID);
   P(ortho_block_size, INVALID_INT);
+  P(partfile, QUDA_BOOLEAN_INVALID);
 #endif
 
   // only need to enfore block size checking if doing a block eigen solve
@@ -931,6 +933,12 @@ void printQudaMultigridParam(QudaMultigridParam *param) {
 #else
     P(setup_location[i], QUDA_INVALID_FIELD_LOCATION);
 #endif
+
+#ifdef INIT_PARAM
+    P(mg_vec_partfile[i], QUDA_BOOLEAN_FALSE);
+#else
+    P(mg_vec_partfile[i], QUDA_BOOLEAN_INVALID);
+#endif
   }
 
 #ifdef INIT_PARAM
diff --git a/lib/multigrid.cpp b/lib/multigrid.cpp
index 35f6b98b16..450ca2ee75 100644
--- a/lib/multigrid.cpp
+++ b/lib/multigrid.cpp
@@ -1382,7 +1382,7 @@ namespace quda
       vec_outfile += std::to_string(param.level);
       vec_outfile += "_nvec_";
       vec_outfile += std::to_string(param.mg_global.n_vec[param.level]);
-      VectorIO io(vec_outfile);
+      VectorIO io(vec_outfile, false, param.mg_global.mg_vec_partfile[param.level]);
       vector_ref<const ColorSpinorField> B_ref;
       for (auto i = 0u; i < B.size(); i++) B_ref.push_back(*B[i]);
       io.save(std::move(B_ref));
diff --git a/lib/qio_field.cpp b/lib/qio_field.cpp
index e8962e6dcd..16a523a9a0 100644
--- a/lib/qio_field.cpp
+++ b/lib/qio_field.cpp
@@ -414,7 +414,7 @@ void write_gauge_field(const char *filename, void *gauge[], QudaPrecision precis
 }
 
 void write_spinor_field(const char *filename, const void *V[], QudaPrecision precision, const int *X, QudaSiteSubset subset,
-                        QudaParity parity, int nColor, int nSpin, int Nvec, int, char *[])
+                        QudaParity parity, int nColor, int nSpin, int Nvec, int, char *[], bool partfile)
 {
   quda_this_node = QMP_get_node_number();
 
@@ -426,7 +426,7 @@ void write_spinor_field(const char *filename, const void *V[], QudaPrecision pre
   sprintf(type, "QUDA_%sNs%dNc%d_ColorSpinorField", (file_prec == QUDA_DOUBLE_PRECISION) ? "D" : "F", nSpin, nColor);
 
   /* Open the test file for reading */
-  QIO_Writer *outfile = open_test_output(filename, QIO_SINGLEFILE, QIO_PARALLEL, QIO_ILDGNO);
+  QIO_Writer *outfile = open_test_output(filename, (partfile ? QIO_PARTFILE : QIO_SINGLEFILE), QIO_PARALLEL, QIO_ILDGNO);
   if (outfile == NULL) { errorQuda("Open file failed\n"); }
 
   /* Read the spinor field record */
diff --git a/lib/vector_io.cpp b/lib/vector_io.cpp
index 736cc4d84d..bddb11d4cd 100644
--- a/lib/vector_io.cpp
+++ b/lib/vector_io.cpp
@@ -6,9 +6,10 @@
 namespace quda
 {
 
-  VectorIO::VectorIO(const std::string &filename, bool parity_inflate) :
+  VectorIO::VectorIO(const std::string &filename, bool parity_inflate, bool partfile) :
     filename(filename),
-    parity_inflate(parity_inflate)
+    parity_inflate(parity_inflate),
+    partfile(partfile)
   {
     if (strcmp(filename.c_str(), "") == 0)
       errorQuda("No eigenspace input file defined (filename = %s, parity_inflate = %d", filename.c_str(), parity_inflate);
@@ -114,7 +115,12 @@ namespace quda
       }
     }
 
-    if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Start saving %d vectors to %s\n", Nvec, filename.c_str());
+    if (getVerbosity() >= QUDA_SUMMARIZE) {
+      if (partfile)
+        printfQuda("Start saving %d vectors to %s in PARTFILE format\n", Nvec, filename.c_str());
+      else
+        printfQuda("Start saving %d vectors to %s in SINGLEFILE format\n", Nvec, filename.c_str());
+    }
 
     if (v0.Ndim() == 4 || v0.Ndim() == 5) {
       // since QIO routines presently assume we have 4-d fields, we need to convert to array of 4-d fields
@@ -129,7 +135,7 @@ namespace quda
       }
 
       write_spinor_field(filename.c_str(), V.data(), save_prec, v0.X(), v0.SiteSubset(),
-                         spinor_parity, v0.Ncolor(), v0.Nspin(), Nvec * Ls, 0, nullptr);
+                         spinor_parity, v0.Ncolor(), v0.Nspin(), Nvec * Ls, 0, nullptr, partfile);
     } else {
       errorQuda("Unexpected field dimension %d", v0.Ndim());
     }
diff --git a/tests/utils/command_line_params.cpp b/tests/utils/command_line_params.cpp
index cda4d80aff..38dea372d0 100644
--- a/tests/utils/command_line_params.cpp
+++ b/tests/utils/command_line_params.cpp
@@ -66,6 +66,7 @@ int test_type = 0;
 quda::mgarray<int> nvec = {};
 quda::mgarray<std::string> mg_vec_infile;
 quda::mgarray<std::string> mg_vec_outfile;
+quda::mgarray<bool> mg_vec_partfile = {};
 QudaInverterType inv_type;
 bool inv_deflate = false;
 bool inv_multigrid = false;
@@ -229,6 +230,7 @@ std::string eig_vec_infile;
 std::string eig_vec_outfile;
 bool eig_io_parity_inflate = false;
 QudaPrecision eig_save_prec = QUDA_DOUBLE_PRECISION;
+bool eig_partfile = false;
 
 // Parameters for the MG eigensolver.
 // The coarsest grid params are for deflation,
@@ -708,13 +710,13 @@ void add_eigen_option_group(std::shared_ptr<QUDAApp> quda_app)
     "--eig-require-convergence",
     eig_require_convergence, "If true, the solver will error out if convergence is not attained. If false, a warning will be given (default true)");
   opgroup->add_option("--eig-save-vec", eig_vec_outfile, "Save eigenvectors to <file> (requires QIO)");
-  opgroup->add_option("--eig-load-vec", eig_vec_infile, "Load eigenvectors to <file> (requires QIO)")
-    ->check(CLI::ExistingFile);
+  opgroup->add_option("--eig-load-vec", eig_vec_infile, "Load eigenvectors to <file> (requires QIO)");
   opgroup
     ->add_option("--eig-save-prec", eig_save_prec,
                  "If saving eigenvectors, use this precision to save. No-op if eig-save-prec is greater than or equal "
                  "to precision of eigensolver (default = double)")
     ->transform(prec_transform);
+  opgroup->add_option("--eig-save-partfile", eig_partfile, "If saving eigenvectors, save in partfile format instead of singlefile (default false)");
 
   opgroup->add_option(
     "--eig-io-parity-inflate", eig_io_parity_inflate,
@@ -884,6 +886,7 @@ void add_multigrid_option_group(std::shared_ptr<QUDAApp> quda_app)
                          "Load the vectors <file> for the multigrid_test (requires QIO)");
   quda_app->add_mgoption(opgroup, "--mg-save-vec", mg_vec_outfile, CLI::Validator(),
                          "Save the generated null-space vectors <file> from the multigrid_test (requires QIO)");
+  quda_app->add_mgoption(opgroup, "--mg-save-partfile", mg_vec_partfile, CLI::Validator(), "Whether to save near-null vectors as partfile instead of singlefile (default false; singlefile)");
 
   quda_app
     ->add_mgoption("--mg-eig-save-prec", mg_eig_save_prec, CLI::Validator(),
diff --git a/tests/utils/command_line_params.h b/tests/utils/command_line_params.h
index 92dddf5c57..352134630e 100644
--- a/tests/utils/command_line_params.h
+++ b/tests/utils/command_line_params.h
@@ -202,6 +202,7 @@ extern int test_type;
 extern quda::mgarray<int> nvec;
 extern quda::mgarray<std::string> mg_vec_infile;
 extern quda::mgarray<std::string> mg_vec_outfile;
+extern quda::mgarray<bool> mg_vec_partfile;
 extern QudaInverterType inv_type;
 extern bool inv_deflate;
 extern bool inv_multigrid;
@@ -355,6 +356,7 @@ extern std::string eig_vec_infile;
 extern std::string eig_vec_outfile;
 extern bool eig_io_parity_inflate;
 extern QudaPrecision eig_save_prec;
+extern bool eig_partfile;
 
 // Parameters for the MG eigensolver.
 // The coarsest grid params are for deflation,
diff --git a/tests/utils/set_params.cpp b/tests/utils/set_params.cpp
index d036f484b1..cfd489ffc7 100644
--- a/tests/utils/set_params.cpp
+++ b/tests/utils/set_params.cpp
@@ -343,6 +343,7 @@ void setEigParam(QudaEigParam &eig_param)
   safe_strcpy(eig_param.vec_outfile, eig_vec_outfile, 256, "eig_vec_outfile");
   eig_param.save_prec = eig_save_prec;
   eig_param.io_parity_inflate = eig_io_parity_inflate ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE;
+  eig_param.partfile = eig_partfile ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE;
 
   eig_param.struct_size = sizeof(eig_param);
 }
@@ -609,6 +610,7 @@ void setMultigridParam(QudaMultigridParam &mg_param)
     safe_strcpy(mg_param.vec_outfile[i], mg_vec_outfile[i], 256, "mg_vec_outfile[" + std::to_string(i) + "]");
     if (mg_vec_infile[i].size() > 0) mg_param.vec_load[i] = QUDA_BOOLEAN_TRUE;
     if (mg_vec_outfile[i].size() > 0) mg_param.vec_store[i] = QUDA_BOOLEAN_TRUE;
+    mg_param.mg_vec_partfile[i] = mg_vec_partfile[i] ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE;
   }
 
   mg_param.coarse_guess = mg_eig_coarse_guess ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE;
@@ -800,6 +802,7 @@ void setMultigridEigParam(QudaEigParam &mg_eig_param, int level)
   strcpy(mg_eig_param.vec_outfile, "");
   mg_eig_param.save_prec = mg_eig_save_prec[level];
   mg_eig_param.io_parity_inflate = QUDA_BOOLEAN_FALSE;
+  mg_eig_param.partfile = QUDA_BOOLEAN_FALSE; // ignored
 
   mg_eig_param.struct_size = sizeof(mg_eig_param);
 }
@@ -1205,6 +1208,7 @@ void setStaggeredMultigridParam(QudaMultigridParam &mg_param)
     safe_strcpy(mg_param.vec_outfile[i], mg_vec_outfile[i], 256, "mg_vec_outfile[" + std::to_string(i) + "]");
     if (mg_vec_infile[i].size() > 0) mg_param.vec_load[i] = QUDA_BOOLEAN_TRUE;
     if (mg_vec_outfile[i].size() > 0) mg_param.vec_store[i] = QUDA_BOOLEAN_TRUE;
+    mg_param.mg_vec_partfile[i] = mg_vec_partfile[i] ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE;
   }
 
   mg_param.coarse_guess = mg_eig_coarse_guess ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE;

From 37d9a85601d888592582e09a497c01bf54e5151e Mon Sep 17 00:00:00 2001
From: Evan Weinberg <eweinberg@nvidia.com>
Date: Thu, 27 Jul 2023 17:13:05 -0700
Subject: [PATCH 2/8] Fixed a few bugs in partfile saving

---
 lib/eigensolve_quda.cpp | 2 +-
 lib/multigrid.cpp       | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/eigensolve_quda.cpp b/lib/eigensolve_quda.cpp
index afa869745b..00c888dd88 100644
--- a/lib/eigensolve_quda.cpp
+++ b/lib/eigensolve_quda.cpp
@@ -255,7 +255,7 @@ namespace quda
       for (auto &k : kSpace) k.setSuggestedParity(mat_parity);
 
       // save the vectors
-      VectorIO io(eig_param->vec_outfile, eig_param->io_parity_inflate == QUDA_BOOLEAN_TRUE);
+      VectorIO io(eig_param->vec_outfile, eig_param->io_parity_inflate == QUDA_BOOLEAN_TRUE, eig_param->partfile);
       io.save(kSpace, save_prec, n_eig);
     }
 
diff --git a/lib/multigrid.cpp b/lib/multigrid.cpp
index 450ca2ee75..06e48bf8e1 100644
--- a/lib/multigrid.cpp
+++ b/lib/multigrid.cpp
@@ -672,6 +672,7 @@ namespace quda
           vec_outfile += "_defl_";
           vec_outfile += std::to_string(param.mg_global.n_vec[param.level + 1]);
           strcpy(param_coarse_solver->eig_param.vec_outfile, vec_outfile.c_str());
+          param_coarse_solver->eig_param.partfile = param.mg_global.mg_vec_partfile[param.level + 1];
         }
       }
 

From 5c0867a8f88bd12709befefe35b97abf84f588ae Mon Sep 17 00:00:00 2001
From: Evan Weinberg <eweinberg@nvidia.com>
Date: Thu, 10 Aug 2023 13:26:02 -0700
Subject: [PATCH 3/8] Added verbose timers directly around read/write vector
 routines

---
 lib/vector_io.cpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/lib/vector_io.cpp b/lib/vector_io.cpp
index bddb11d4cd..97b95e852b 100644
--- a/lib/vector_io.cpp
+++ b/lib/vector_io.cpp
@@ -2,6 +2,7 @@
 #include <qio_field.h>
 #include <vector_io.h>
 #include <blas_quda.h>
+#include <timer.h>
 
 namespace quda
 {
@@ -56,8 +57,15 @@ namespace quda
         for (int j = 0; j < Ls; j++) { V[i * Ls + j] = static_cast<char *>(v.V()) + j * stride; }
       }
 
+      // time loading
+      quda::host_timer_t host_timer;
+      host_timer.start(); // start the timer
+
       read_spinor_field(filename.c_str(), V.data(), v0.Precision(), v0.X(), v0.SiteSubset(),
                         spinor_parity, v0.Ncolor(), v0.Nspin(), Nvec * Ls, 0, nullptr);
+
+      host_timer.stop(); // stop the timer
+      logQuda(QUDA_SUMMARIZE, "Time spent loading vectors from %s = %g secs\n", filename.c_str(), host_timer.last());
     } else {
       errorQuda("Unexpected field dimension %d", v0.Ndim());
     }
@@ -134,8 +142,15 @@ namespace quda
         for (int j = 0; j < Ls; j++) { V[i * Ls + j] = static_cast<const char *>(v.V()) + j * stride; }
       }
 
+      // time saving
+      quda::host_timer_t host_timer;
+      host_timer.start(); // start the timer
+
       write_spinor_field(filename.c_str(), V.data(), save_prec, v0.X(), v0.SiteSubset(),
                          spinor_parity, v0.Ncolor(), v0.Nspin(), Nvec * Ls, 0, nullptr, partfile);
+
+      host_timer.stop(); // stop the timer
+      logQuda(QUDA_SUMMARIZE, "Time spent saving vectors to %s = %g secs\n", filename.c_str(), host_timer.last());
     } else {
       errorQuda("Unexpected field dimension %d", v0.Ndim());
     }

From 478e7ab310200c10897a1fae4e823a6f8f682dd1 Mon Sep 17 00:00:00 2001
From: Evan Weinberg <eweinberg@nvidia.com>
Date: Thu, 10 Aug 2023 13:41:02 -0700
Subject: [PATCH 4/8] Threaded partfile support through the MILC MG interface

---
 lib/milc_interface.cpp | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/lib/milc_interface.cpp b/lib/milc_interface.cpp
index 5c8b2fe379..70428cd013 100644
--- a/lib/milc_interface.cpp
+++ b/lib/milc_interface.cpp
@@ -1464,6 +1464,7 @@ struct mgInputStruct {
   int setup_ca_basis_size[QUDA_MAX_MG_LEVEL];    // ignored on first and last level
   char mg_vec_infile[QUDA_MAX_MG_LEVEL][256];    // ignored on first and last level
   char mg_vec_outfile[QUDA_MAX_MG_LEVEL][256];   // ignored on first and last level
+  bool mg_vec_partfile[QUDA_MAX_MG_LEVEL];       // ignored on first and last level
   int geo_block_size[QUDA_MAX_MG_LEVEL][4]; // ignored on first and last level (values on first level are prescribed)
 
   /**
@@ -1496,6 +1497,7 @@ struct mgInputStruct {
   bool deflate_use_poly_acc;
   double deflate_a_min; // ignored if no polynomial acceleration
   int deflate_poly_deg; // ignored if no polynomial acceleration
+  bool deflate_vec_partfile;
 
   void setArrayDefaults()
   {
@@ -1511,6 +1513,7 @@ struct mgInputStruct {
       setup_ca_basis_size[i] = 4;
       mg_vec_infile[i][0] = 0;
       mg_vec_outfile[i][0] = 0;
+      mg_vec_partfile[i] = false;
       for (int d = 0; d < 4; d++) { geo_block_size[i][d] = 2; }
 
       setup_use_mma[i] = true;
@@ -1543,7 +1546,8 @@ struct mgInputStruct {
     deflate_tol(1e-5),
     deflate_use_poly_acc(false),
     deflate_a_min(1e-2),
-    deflate_poly_deg(50)
+    deflate_poly_deg(50),
+    deflate_vec_partfile(false)
   {
     /* initialize internal arrays */
     setArrayDefaults();
@@ -1840,6 +1844,12 @@ struct mgInputStruct {
         strcpy(mg_vec_outfile[atoi(input_line[1].c_str())], input_line[2].c_str());
       }
 
+    } else if (strcmp(input_line[0].c_str(), "mg_vec_partfile") == 0) {
+      if (input_line.size() < 3) {
+        error_code = 1;
+      } else {
+        mg_vec_partfile[atoi(input_line[1].c_str())] = input_line[2][0] == 't' ? true : false;
+      }
     } else /* Begin Solvers */
       if (strcmp(input_line[0].c_str(), "coarse_solve_type") == 0) {
       if (input_line.size() < 3) {
@@ -1947,6 +1957,12 @@ struct mgInputStruct {
         deflate_poly_deg = atoi(input_line[1].c_str());
       }
 
+    } else if (strcmp(input_line[0].c_str(), "deflate_vec_partfile") == 0) {
+      if (input_line.size() < 2) {
+        error_code = 1;
+      } else {
+        deflate_vec_partfile = input_line[1][0] == 't' ? true : false;
+      }
     } else {
       printf("Invalid option %s\n", input_line[0].c_str());
       return false;
@@ -2011,6 +2027,7 @@ void milcSetMultigridEigParam(QudaEigParam &mg_eig_param, mgInputStruct &input_s
   strcpy(mg_eig_param.vec_outfile, "");
   mg_eig_param.io_parity_inflate = QUDA_BOOLEAN_FALSE; // do not inflate coarse vectors
   mg_eig_param.save_prec = QUDA_SINGLE_PRECISION;      // cannot save in fixed point
+  mg_eig_param.partfile = QUDA_BOOLEAN_FALSE;          // ignored, multigrid parameters take precedence
 
   strcpy(mg_eig_param.QUDA_logfile, "" /*eig_QUDA_logfile*/);
 }
@@ -2357,6 +2374,10 @@ void milcSetMultigridParam(milcMultigridPack *mg_pack, QudaPrecision host_precis
     strcpy(mg_param.vec_outfile[i], input_struct.mg_vec_outfile[i]);
     if (strcmp(mg_param.vec_infile[i], "") != 0) mg_param.vec_load[i] = QUDA_BOOLEAN_TRUE;
     if (strcmp(mg_param.vec_outfile[i], "") != 0) mg_param.vec_store[i] = QUDA_BOOLEAN_TRUE;
+    if (i != mg_param.n_level - 1)
+      mg_param.mg_vec_partfile[i] = input_struct.mg_vec_partfile[i] ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE;
+    else
+      mg_param.mg_vec_partfile[i] = input_struct.deflate_vec_partfile ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE;
   }
 
   mg_param.coarse_guess = QUDA_BOOLEAN_FALSE; // mg_eig_coarse_guess ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE;

From 22a6101c590af00a578653348c1feab0b68dfec0 Mon Sep 17 00:00:00 2001
From: Evan Weinberg <eweinberg@nvidia.com>
Date: Thu, 10 Aug 2023 16:01:03 -0700
Subject: [PATCH 5/8] Addressed a missing eig partfile flag

---
 tests/utils/set_params.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/utils/set_params.cpp b/tests/utils/set_params.cpp
index cfd489ffc7..3801e1bd73 100644
--- a/tests/utils/set_params.cpp
+++ b/tests/utils/set_params.cpp
@@ -1375,6 +1375,7 @@ void setDeflationParam(QudaEigParam &df_param)
   safe_strcpy(df_param.vec_infile, eig_vec_infile, 256, "eig_vec_infile");
   safe_strcpy(df_param.vec_outfile, eig_vec_outfile, 256, "eig_vec_outfile");
   df_param.io_parity_inflate = eig_io_parity_inflate ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE;
+  df_param.partfile = eig_partfile ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE;
 }
 
 void setQudaStaggeredInvTestParams()

From fd467c00c1899fe3a4c5e810d11b008d3dae212b Mon Sep 17 00:00:00 2001
From: Evan Weinberg <eweinberg@nvidia.com>
Date: Fri, 11 Aug 2023 08:52:19 -0700
Subject: [PATCH 6/8] Added partfile loading/saving to io_test

---
 tests/io_test.cpp | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/tests/io_test.cpp b/tests/io_test.cpp
index b0a63ccd1a..fcbf16a8b9 100644
--- a/tests/io_test.cpp
+++ b/tests/io_test.cpp
@@ -71,7 +71,7 @@ TEST_P(GaugeIOTest, verify)
   for (int dir = 0; dir < 4; dir++) { host_free(gauge[dir]); }
 }
 
-using cs_test_t = ::testing::tuple<QudaSiteSubset, bool, QudaPrecision, QudaPrecision, int, QudaFieldLocation>;
+using cs_test_t = ::testing::tuple<QudaSiteSubset, bool, QudaPrecision, QudaPrecision, int, bool, QudaFieldLocation>;
 
 class ColorSpinorIOTest : public ::testing::TestWithParam<cs_test_t>
 {
@@ -81,6 +81,7 @@ class ColorSpinorIOTest : public ::testing::TestWithParam<cs_test_t>
   QudaPrecision prec;
   QudaPrecision prec_io;
   int nSpin;
+  bool partfile;
   QudaFieldLocation location;
 
 public:
@@ -90,7 +91,8 @@ class ColorSpinorIOTest : public ::testing::TestWithParam<cs_test_t>
     prec(::testing::get<2>(GetParam())),
     prec_io(::testing::get<3>(GetParam())),
     nSpin(::testing::get<4>(GetParam())),
-    location(::testing::get<5>(GetParam()))
+    partfile(::testing::get<5>(GetParam())),
+    location(::testing::get<6>(GetParam()))
   {
   }
 };
@@ -139,7 +141,7 @@ TEST_P(ColorSpinorIOTest, verify)
 
   auto file = "dummy.cs";
 
-  VectorIO io(file, inflate);
+  VectorIO io(file, inflate, partfile);
 
   io.save({v.begin(), v.end()}, prec_io, n_vector);
   io.load(u);
@@ -153,7 +155,15 @@ TEST_P(ColorSpinorIOTest, verify)
   }
 
   // cleanup after ourselves and delete the dummy lattice
-  if (::quda::comm_rank() == 0 && remove(file) != 0) errorQuda("Error deleting file");
+  if (partfile) {
+    // each rank created its own file, we need to generate the custom filename
+    char volstr[9];
+    sprintf(volstr, ".vol%04d", ::quda::comm_rank());
+    std::string part_filename = std::string(file) + volstr;
+    if (remove(part_filename.c_str()) != 0) errorQuda("Error deleting file");
+  } else {
+    if (::quda::comm_rank() == 0 && remove(file) != 0) errorQuda("Error deleting file");
+  }
 }
 
 int main(int argc, char **argv)
@@ -176,14 +186,15 @@ INSTANTIATE_TEST_SUITE_P(Gauge, GaugeIOTest, Combine(Values(QUDA_DOUBLE_PRECISIO
 INSTANTIATE_TEST_SUITE_P(Full, ColorSpinorIOTest,
                          Combine(Values(QUDA_FULL_SITE_SUBSET), Values(false),
                                  Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, QUDA_HALF_PRECISION),
-                                 Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION), Values(1, 2, 4),
+                                 Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION), Values(1, 2, 4), Values(false, true),
                                  Values(QUDA_CUDA_FIELD_LOCATION, QUDA_CPU_FIELD_LOCATION)),
                          [](testing::TestParamInfo<cs_test_t> param) {
                            std::string name;
                            name += get_prec_str(::testing::get<2>(param.param)) + std::string("_");
                            name += get_prec_str(::testing::get<3>(param.param)) + std::string("_");
                            name += std::string("spin") + std::to_string(::testing::get<4>(param.param));
-                           name += ::testing::get<5>(param.param) == QUDA_CUDA_FIELD_LOCATION ? "_device" : "_host";
+                           name += ::testing::get<5>(param.param) ? "_singlefile" : "_partfile";
+                           name += ::testing::get<6>(param.param) == QUDA_CUDA_FIELD_LOCATION ? "_device" : "_host";
                            return name;
                          });
 
@@ -191,7 +202,7 @@ INSTANTIATE_TEST_SUITE_P(Full, ColorSpinorIOTest,
 INSTANTIATE_TEST_SUITE_P(Parity, ColorSpinorIOTest,
                          Combine(Values(QUDA_PARITY_SITE_SUBSET), Values(false, true),
                                  Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, QUDA_HALF_PRECISION),
-                                 Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION), Values(1, 2, 4),
+                                 Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION), Values(1, 2, 4), Values(false, true),
                                  Values(QUDA_CUDA_FIELD_LOCATION, QUDA_CPU_FIELD_LOCATION)),
                          [](testing::TestParamInfo<cs_test_t> param) {
                            std::string name;
@@ -199,6 +210,7 @@ INSTANTIATE_TEST_SUITE_P(Parity, ColorSpinorIOTest,
                            name += get_prec_str(::testing::get<2>(param.param)) + std::string("_");
                            name += get_prec_str(::testing::get<3>(param.param)) + std::string("_");
                            name += std::string("spin") + std::to_string(::testing::get<4>(param.param));
-                           name += ::testing::get<5>(param.param) == QUDA_CUDA_FIELD_LOCATION ? "_device" : "_host";
+                           name += ::testing::get<5>(param.param) ? "_singlefile" : "_partfile";
+                           name += ::testing::get<6>(param.param) == QUDA_CUDA_FIELD_LOCATION ? "_device" : "_host";
                            return name;
                          });

From 461d6fe0d21b112e028c8444ff29c8784d5af74c Mon Sep 17 00:00:00 2001
From: Evan Weinberg <eweinberg@nvidia.com>
Date: Fri, 11 Aug 2023 08:59:34 -0700
Subject: [PATCH 7/8] Fixed a 1xGPU corner case in io_test

---
 tests/io_test.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/io_test.cpp b/tests/io_test.cpp
index fcbf16a8b9..34dca66b5e 100644
--- a/tests/io_test.cpp
+++ b/tests/io_test.cpp
@@ -155,8 +155,9 @@ TEST_P(ColorSpinorIOTest, verify)
   }
 
   // cleanup after ourselves and delete the dummy lattice
-  if (partfile) {
+  if (partfile && ::quda::comm_size() > 1) {
     // each rank created its own file, we need to generate the custom filename
+    // an exception is single-rank runs where QIO skips appending the volume string
     char volstr[9];
     sprintf(volstr, ".vol%04d", ::quda::comm_rank());
     std::string part_filename = std::string(file) + volstr;

From c162881e7547a5235609a741abf4bd5fd3afafcf Mon Sep 17 00:00:00 2001
From: Evan Weinberg <eweinberg@nvidia.com>
Date: Fri, 11 Aug 2023 09:03:15 -0700
Subject: [PATCH 8/8] clang-format

---
 include/qio_field.h                 | 9 +++++----
 lib/qio_field.cpp                   | 5 +++--
 lib/vector_io.cpp                   | 8 +++-----
 tests/io_test.cpp                   | 8 ++++----
 tests/utils/command_line_params.cpp | 7 +++++--
 5 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/include/qio_field.h b/include/qio_field.h
index a76cb44f2e..1fd48cd577 100644
--- a/include/qio_field.h
+++ b/include/qio_field.h
@@ -6,8 +6,9 @@ void read_gauge_field(const char *filename, void *gauge[], QudaPrecision prec, c
 void write_gauge_field(const char *filename, void *gauge[], QudaPrecision prec, const int *X, int argc, char *argv[]);
 void read_spinor_field(const char *filename, void *V[], QudaPrecision precision, const int *X, QudaSiteSubset subset,
                        QudaParity parity, int nColor, int nSpin, int Nvec, int argc, char *argv[]);
-void write_spinor_field(const char *filename, const void *V[], QudaPrecision precision, const int *X, QudaSiteSubset subset,
-                        QudaParity parity, int nColor, int nSpin, int Nvec, int argc, char *argv[], bool partfile = false);
+void write_spinor_field(const char *filename, const void *V[], QudaPrecision precision, const int *X,
+                        QudaSiteSubset subset, QudaParity parity, int nColor, int nSpin, int Nvec, int argc,
+                        char *argv[], bool partfile = false);
 #else
 inline void read_gauge_field(const char *, void *[], QudaPrecision, const int *, int, char *[])
 {
@@ -25,8 +26,8 @@ inline void read_spinor_field(const char *, void *[], QudaPrecision, const int *
   printf("QIO support has not been enabled\n");
   exit(-1);
 }
-inline void write_spinor_field(const char *, const void *[], QudaPrecision, const int *, QudaSiteSubset, QudaParity, int, int,
-                               int, int, char *[], bool)
+inline void write_spinor_field(const char *, const void *[], QudaPrecision, const int *, QudaSiteSubset, QudaParity,
+                               int, int, int, int, char *[], bool)
 {
   printf("QIO support has not been enabled\n");
   exit(-1);
diff --git a/lib/qio_field.cpp b/lib/qio_field.cpp
index 16a523a9a0..88be14528d 100644
--- a/lib/qio_field.cpp
+++ b/lib/qio_field.cpp
@@ -413,8 +413,9 @@ void write_gauge_field(const char *filename, void *gauge[], QudaPrecision precis
   printfQuda("%s: Closed file for writing\n", __func__);
 }
 
-void write_spinor_field(const char *filename, const void *V[], QudaPrecision precision, const int *X, QudaSiteSubset subset,
-                        QudaParity parity, int nColor, int nSpin, int Nvec, int, char *[], bool partfile)
+void write_spinor_field(const char *filename, const void *V[], QudaPrecision precision, const int *X,
+                        QudaSiteSubset subset, QudaParity parity, int nColor, int nSpin, int Nvec, int, char *[],
+                        bool partfile)
 {
   quda_this_node = QMP_get_node_number();
 
diff --git a/lib/vector_io.cpp b/lib/vector_io.cpp
index 97b95e852b..b247177876 100644
--- a/lib/vector_io.cpp
+++ b/lib/vector_io.cpp
@@ -8,9 +8,7 @@ namespace quda
 {
 
   VectorIO::VectorIO(const std::string &filename, bool parity_inflate, bool partfile) :
-    filename(filename),
-    parity_inflate(parity_inflate),
-    partfile(partfile)
+    filename(filename), parity_inflate(parity_inflate), partfile(partfile)
   {
     if (strcmp(filename.c_str(), "") == 0)
       errorQuda("No eigenspace input file defined (filename = %s, parity_inflate = %d", filename.c_str(), parity_inflate);
@@ -146,8 +144,8 @@ namespace quda
       quda::host_timer_t host_timer;
       host_timer.start(); // start the timer
 
-      write_spinor_field(filename.c_str(), V.data(), save_prec, v0.X(), v0.SiteSubset(),
-                         spinor_parity, v0.Ncolor(), v0.Nspin(), Nvec * Ls, 0, nullptr, partfile);
+      write_spinor_field(filename.c_str(), V.data(), save_prec, v0.X(), v0.SiteSubset(), spinor_parity, v0.Ncolor(),
+                         v0.Nspin(), Nvec * Ls, 0, nullptr, partfile);
 
       host_timer.stop(); // stop the timer
       logQuda(QUDA_SUMMARIZE, "Time spent saving vectors to %s = %g secs\n", filename.c_str(), host_timer.last());
diff --git a/tests/io_test.cpp b/tests/io_test.cpp
index 34dca66b5e..af708baf3b 100644
--- a/tests/io_test.cpp
+++ b/tests/io_test.cpp
@@ -187,8 +187,8 @@ INSTANTIATE_TEST_SUITE_P(Gauge, GaugeIOTest, Combine(Values(QUDA_DOUBLE_PRECISIO
 INSTANTIATE_TEST_SUITE_P(Full, ColorSpinorIOTest,
                          Combine(Values(QUDA_FULL_SITE_SUBSET), Values(false),
                                  Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, QUDA_HALF_PRECISION),
-                                 Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION), Values(1, 2, 4), Values(false, true),
-                                 Values(QUDA_CUDA_FIELD_LOCATION, QUDA_CPU_FIELD_LOCATION)),
+                                 Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION), Values(1, 2, 4),
+                                 Values(false, true), Values(QUDA_CUDA_FIELD_LOCATION, QUDA_CPU_FIELD_LOCATION)),
                          [](testing::TestParamInfo<cs_test_t> param) {
                            std::string name;
                            name += get_prec_str(::testing::get<2>(param.param)) + std::string("_");
@@ -203,8 +203,8 @@ INSTANTIATE_TEST_SUITE_P(Full, ColorSpinorIOTest,
 INSTANTIATE_TEST_SUITE_P(Parity, ColorSpinorIOTest,
                          Combine(Values(QUDA_PARITY_SITE_SUBSET), Values(false, true),
                                  Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, QUDA_HALF_PRECISION),
-                                 Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION), Values(1, 2, 4), Values(false, true),
-                                 Values(QUDA_CUDA_FIELD_LOCATION, QUDA_CPU_FIELD_LOCATION)),
+                                 Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION), Values(1, 2, 4),
+                                 Values(false, true), Values(QUDA_CUDA_FIELD_LOCATION, QUDA_CPU_FIELD_LOCATION)),
                          [](testing::TestParamInfo<cs_test_t> param) {
                            std::string name;
                            if (::testing::get<1>(param.param)) name += std::string("inflate_");
diff --git a/tests/utils/command_line_params.cpp b/tests/utils/command_line_params.cpp
index 38dea372d0..19719f5f86 100644
--- a/tests/utils/command_line_params.cpp
+++ b/tests/utils/command_line_params.cpp
@@ -716,7 +716,8 @@ void add_eigen_option_group(std::shared_ptr<QUDAApp> quda_app)
                  "If saving eigenvectors, use this precision to save. No-op if eig-save-prec is greater than or equal "
                  "to precision of eigensolver (default = double)")
     ->transform(prec_transform);
-  opgroup->add_option("--eig-save-partfile", eig_partfile, "If saving eigenvectors, save in partfile format instead of singlefile (default false)");
+  opgroup->add_option("--eig-save-partfile", eig_partfile,
+                      "If saving eigenvectors, save in partfile format instead of singlefile (default false)");
 
   opgroup->add_option(
     "--eig-io-parity-inflate", eig_io_parity_inflate,
@@ -886,7 +887,9 @@ void add_multigrid_option_group(std::shared_ptr<QUDAApp> quda_app)
                          "Load the vectors <file> for the multigrid_test (requires QIO)");
   quda_app->add_mgoption(opgroup, "--mg-save-vec", mg_vec_outfile, CLI::Validator(),
                          "Save the generated null-space vectors <file> from the multigrid_test (requires QIO)");
-  quda_app->add_mgoption(opgroup, "--mg-save-partfile", mg_vec_partfile, CLI::Validator(), "Whether to save near-null vectors as partfile instead of singlefile (default false; singlefile)");
+  quda_app->add_mgoption(
+    opgroup, "--mg-save-partfile", mg_vec_partfile, CLI::Validator(),
+    "Whether to save near-null vectors as partfile instead of singlefile (default false; singlefile)");
 
   quda_app
     ->add_mgoption("--mg-eig-save-prec", mg_eig_save_prec, CLI::Validator(),