Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:lattice/quda into feature/gaugef…
Browse files Browse the repository at this point in the history
…ield_unity
  • Loading branch information
maddyscientist committed Aug 18, 2023
2 parents 50987b1 + b869d61 commit a92296b
Show file tree
Hide file tree
Showing 26 changed files with 323 additions and 155 deletions.
64 changes: 64 additions & 0 deletions .github/workflows/cuda_githubactions_build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: cuda_ghactions_build

on:
pull_request:
branches: [ "develop" ]

defaults:
run:
shell: bash

env:
BUILD_TYPE: STRICT
CCACHE_COMPILERCHECK: content

jobs:
build:
strategy:
matrix:
compiler: [g++-12, clang++-14]
runs-on: ubuntu-latest

steps:
- name: Install software
run: |
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb
sudo dpkg -i cuda-keyring_1.0-1_all.deb
sudo apt-get update -y
sudo apt-get install -y --no-install-recommends ninja-build cmake libopenmpi-dev gfortran
- uses: awalsh128/cache-apt-pkgs-action@latest
with:
packages: cuda-compiler-12-1 cuda-libraries-dev-12-1 cuda-nvml-dev-12-1
execute_install_scripts: true

- uses: actions/checkout@v3

- name: Ccache for gh actions
uses: hendrikmuhs/ccache-action@v1.2.9
with:
key: ${{ github.job }}-${{ matrix.compiler }}
max-size: 2000M

- name: Configure CMake
run: >
cmake
-DCMAKE_CUDA_COMPILER=/usr/local/cuda-12.1/bin/nvcc
-DCMAKE_CXX_COMPILER=${{matrix.compiler}}
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
-DQUDA_GPU_ARCH=sm_80 -DQUDA_GPU_ARCH_SUFFIX=virtual -DQUDA_JITIFY=ON
-DQUDA_MULTIGRID=ON
-DQUDA_MULTIGRID_NVEC_LIST=24
-DQUDA_MDW_FUSED_LS_LIST=4
-DQUDA_MPI=ON -DMPI_CXX_SKIP_MPICXX=ON
-DQUDA_PRECISION=10 -DQUDA_FAST_COMPILE_DSLASH=ON -DQUDA_FAST_COMPILE_REDUCE=ON
-GNinja
-B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
- name: Build
run: cmake --build ${{github.workspace}}/build

- name: Install
run: cmake --install ${{github.workspace}}/build

17 changes: 7 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ set(CMAKE_CXX_FLAGS_DEVEL
"-g -O3"
CACHE STRING "Flags used by the C++ compiler during regular development builds.")
set(CMAKE_CXX_FLAGS_STRICT
"-O3"
"-Os"
CACHE STRING "Flags used by the C++ compiler during strict jenkins builds.")
set(CMAKE_CXX_FLAGS_RELEASE
"-O3 ${CXX_OPT}"
Expand All @@ -361,7 +361,7 @@ set(CMAKE_C_FLAGS_DEVEL
"-g -O3"
CACHE STRING "Flags used by the C compiler during regular development builds.")
set(CMAKE_C_FLAGS_STRICT
"-O3"
"-Os"
CACHE STRING "Flags used by the C compiler during strict jenkins builds.")
set(CMAKE_C_FLAGS_RELEASE
"-O3"
Expand Down Expand Up @@ -419,14 +419,11 @@ if(QUDA_DOWNLOAD_EIGEN)
CACHE STRING "Eigen use for QUDA_DOWNLOAD_EIGEN")
mark_as_advanced(QUDA_EIGEN_VERSION)
CPMAddPackage(
NAME
Eigen
VERSION
${QUDA_EIGEN_VERSION}
URL
https://gitlab.com/libeigen/eigen/-/archive/${QUDA_EIGEN_VERSION}/eigen-${QUDA_EIGEN_VERSION}.tar.bz2
DOWNLOAD_ONLY
YES)
NAME Eigen
VERSION ${QUDA_EIGEN_VERSION}
URL https://gitlab.com/libeigen/eigen/-/archive/${QUDA_EIGEN_VERSION}/eigen-${QUDA_EIGEN_VERSION}.tar.bz2
DOWNLOAD_ONLY YES
SYSTEM YES)
target_include_directories(Eigen SYSTEM INTERFACE ${Eigen_SOURCE_DIR})
install(DIRECTORY ${Eigen_SOURCE_DIR}/Eigen TYPE INCLUDE)
else()
Expand Down
16 changes: 14 additions & 2 deletions cmake/CPM.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
set(CPM_DOWNLOAD_VERSION 0.36.0)
set(CPM_DOWNLOAD_VERSION 0.38.2)

if(CPM_SOURCE_CACHE)
set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
Expand All @@ -10,12 +10,24 @@ endif()

# Expand relative path. This is important if the provided path contains a tilde (~)
get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE)
if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION}))

function(download_cpm)
message(STATUS "Downloading CPM.cmake to ${CPM_DOWNLOAD_LOCATION}")
file(DOWNLOAD
https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
${CPM_DOWNLOAD_LOCATION}
)
endfunction()

if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION}))
download_cpm()
else()
# resume download if it previously failed
file(READ ${CPM_DOWNLOAD_LOCATION} check)
if("${check}" STREQUAL "")
download_cpm()
endif()
unset(check)
endif()

include(${CPM_DOWNLOAD_LOCATION})
5 changes: 5 additions & 0 deletions include/multigrid.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ namespace quda {
/** Filename for where to load/store the null space */
char filename[100];

/** Whether to save in partfile format (true) or singlefile (false) */
bool mg_vec_partfile;

/** Whether or not this is a staggered solve or not */
QudaTransferType transfer_type;

Expand Down Expand Up @@ -193,6 +196,7 @@ namespace quda {
smoother_solve_type(param.smoother_solve_type[level]),
location(param.location[level]),
setup_location(param.setup_location[level]),
mg_vec_partfile(param.mg_vec_partfile[level]),
transfer_type(param.transfer_type[level]),
setup_use_mma(param.setup_use_mma[level] == QUDA_BOOLEAN_TRUE),
dslash_use_mma(param.dslash_use_mma[level] == QUDA_BOOLEAN_TRUE)
Expand Down Expand Up @@ -230,6 +234,7 @@ namespace quda {
smoother_solve_type(param.mg_global.smoother_solve_type[level]),
location(param.mg_global.location[level]),
setup_location(param.mg_global.setup_location[level]),
mg_vec_partfile(param.mg_global.mg_vec_partfile[level]),
transfer_type(param.mg_global.transfer_type[level]),
setup_use_mma(param.mg_global.setup_use_mma[level] == QUDA_BOOLEAN_TRUE),
dslash_use_mma(param.mg_global.dslash_use_mma[level] == QUDA_BOOLEAN_TRUE)
Expand Down
9 changes: 5 additions & 4 deletions include/qio_field.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ void read_gauge_field(const char *filename, void *gauge[], QudaPrecision prec, c
void write_gauge_field(const char *filename, void *gauge[], QudaPrecision prec, const int *X, int argc, char *argv[]);
void read_spinor_field(const char *filename, void *V[], QudaPrecision precision, const int *X, QudaSiteSubset subset,
QudaParity parity, int nColor, int nSpin, int Nvec, int argc, char *argv[]);
void write_spinor_field(const char *filename, const void *V[], QudaPrecision precision, const int *X, QudaSiteSubset subset,
QudaParity parity, int nColor, int nSpin, int Nvec, int argc, char *argv[]);
void write_spinor_field(const char *filename, const void *V[], QudaPrecision precision, const int *X,
QudaSiteSubset subset, QudaParity parity, int nColor, int nSpin, int Nvec, int argc,
char *argv[], bool partfile = false);
#else
inline void read_gauge_field(const char *, void *[], QudaPrecision, const int *, int, char *[])
{
Expand All @@ -25,8 +26,8 @@ inline void read_spinor_field(const char *, void *[], QudaPrecision, const int *
printf("QIO support has not been enabled\n");
exit(-1);
}
inline void write_spinor_field(const char *, const void *[], QudaPrecision, const int *, QudaSiteSubset, QudaParity, int, int,
int, int, char *[])
inline void write_spinor_field(const char *, const void *[], QudaPrecision, const int *, QudaSiteSubset, QudaParity,
int, int, int, int, char *[], bool)
{
printf("QIO support has not been enabled\n");
exit(-1);
Expand Down
6 changes: 6 additions & 0 deletions include/quda.h
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,9 @@ extern "C" {
MILC I/O) */
QudaBoolean io_parity_inflate;

/** Whether to save eigenvectors in QIO singlefile or partfile format */
QudaBoolean partfile;

/** The Gflops rate of the eigensolver setup */
double gflops;

Expand Down Expand Up @@ -779,6 +782,9 @@ extern "C" {
/** Filename prefix for where to save the null-space vectors */
char vec_outfile[QUDA_MAX_MG_LEVEL][256];

/** Whether to store the null-space vectors in singlefile or partfile format */
QudaBoolean mg_vec_partfile[QUDA_MAX_MG_LEVEL];

/** Whether to use and initial guess during coarse grid deflation */
QudaBoolean coarse_guess;

Expand Down
4 changes: 3 additions & 1 deletion include/vector_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,17 @@ namespace quda
{
const std::string filename;
bool parity_inflate;
bool partfile;

public:
/**
Constructor for VectorIO class
@param[in] filename The filename associated with this IO object
@param[in] parity_inflate Whether to inflate single_parity
field to dual parity fields for I/O
@param[in] partfile Whether or not to save in partfiles (ignored on load)
*/
VectorIO(const std::string &filename, bool parity_inflate = false);
VectorIO(const std::string &filename, bool parity_inflate = false, bool partfile = false);

/**
@brief Load vectors from filename
Expand Down
8 changes: 8 additions & 0 deletions lib/check_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ void printQudaEigParam(QudaEigParam *param) {
P(extlib_type, QUDA_EIGEN_EXTLIB);
P(mem_type_ritz, QUDA_MEMORY_DEVICE);
P(ortho_block_size, 0);
P(partfile, QUDA_BOOLEAN_FALSE);
#else
P(use_eigen_qr, QUDA_BOOLEAN_INVALID);
P(use_poly_acc, QUDA_BOOLEAN_INVALID);
Expand Down Expand Up @@ -226,6 +227,7 @@ void printQudaEigParam(QudaEigParam *param) {
P(extlib_type, QUDA_EXTLIB_INVALID);
P(mem_type_ritz, QUDA_MEMORY_INVALID);
P(ortho_block_size, INVALID_INT);
P(partfile, QUDA_BOOLEAN_INVALID);
#endif

// only need to enfore block size checking if doing a block eigen solve
Expand Down Expand Up @@ -931,6 +933,12 @@ void printQudaMultigridParam(QudaMultigridParam *param) {
#else
P(setup_location[i], QUDA_INVALID_FIELD_LOCATION);
#endif

#ifdef INIT_PARAM
P(mg_vec_partfile[i], QUDA_BOOLEAN_FALSE);
#else
P(mg_vec_partfile[i], QUDA_BOOLEAN_INVALID);
#endif
}

#ifdef INIT_PARAM
Expand Down
9 changes: 4 additions & 5 deletions lib/coarse_op.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,6 @@

namespace quda {

// For coarsening un-preconditioned operators we use uni-directional
// coarsening to reduce the set up code. For debugging we can force
// bi-directional coarsening.
static bool bidirectional_debug = false;

enum ComputeType {
COMPUTE_UV,
COMPUTE_LV,
Expand Down Expand Up @@ -983,6 +978,10 @@ namespace quda {
double mu_factor, bool allow_truncation, QudaDiracType dirac, QudaMatPCType matpc, bool need_bidirectional,
const int *fine_to_coarse, const int *coarse_to_fine)
{
// For coarsening un-preconditioned operators we use uni-directional
// coarsening to reduce the set up code. For debugging we can force
// bi-directional coarsening.
static bool bidirectional_debug = false;

// sanity checks
if (matpc == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC || matpc == QUDA_MATPC_ODD_ODD_ASYMMETRIC)
Expand Down
24 changes: 9 additions & 15 deletions lib/dslash_coarse.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -660,31 +660,25 @@ namespace quda {
}
};

static bool dslash_init = false;
static std::vector<DslashCoarsePolicy> policies(static_cast<int>(DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED), DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED);
static int first_active_policy=static_cast<int>(DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED);

// string used as a tunekey to ensure we retune if the dslash policy env changes
static char policy_string[TuneKey::aux_n];

static inline void enable_policy(DslashCoarsePolicy p) { policies[static_cast<std::size_t>(p)] = p; }

static inline void disable_policy(DslashCoarsePolicy p)
{
policies[static_cast<std::size_t>(p)] = DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED;
}

template <typename Launch>
class DslashCoarsePolicyTune : public Tunable {

static inline bool dslash_init = false;
static inline int first_active_policy = static_cast<int>(DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED);
// string used as a tunekey to ensure we retune if the dslash policy env changes
static inline char policy_string[TuneKey::aux_n] = {};
static inline std::vector<DslashCoarsePolicy> policies = {static_cast<int>(DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED), DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED};

static void enable_policy(DslashCoarsePolicy p) { policies[static_cast<std::size_t>(p)] = p; }

Launch &dslash;

bool tuneGridDim() const { return false; } // Don't tune the grid dimensions.
bool tuneAuxDim() const { return true; } // Do tune the aux dimensions.
static constexpr bool enable_coarse_shmem_overlap = Launch::enable_coarse_shmem_overlap();

public:
inline DslashCoarsePolicyTune(Launch &dslash) : dslash(dslash)
DslashCoarsePolicyTune(Launch &dslash) : dslash(dslash)
{
if (!dslash_init) {

Expand Down
2 changes: 1 addition & 1 deletion lib/eigensolve_quda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ namespace quda
for (auto &k : kSpace) k.setSuggestedParity(mat_parity);

// save the vectors
VectorIO io(eig_param->vec_outfile, eig_param->io_parity_inflate == QUDA_BOOLEAN_TRUE);
VectorIO io(eig_param->vec_outfile, eig_param->io_parity_inflate == QUDA_BOOLEAN_TRUE, eig_param->partfile);
io.save(kSpace, save_prec, n_eig);
}

Expand Down
10 changes: 6 additions & 4 deletions lib/inv_cg_quda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -834,14 +834,16 @@ namespace quda {
blas::copy(x, xSloppy); // no op when these pointers alias
blas::xpy(x, y);
mat(r, y);
blas::copy(rSloppy, r); // no op when these pointers alias
blas::zero(xSloppy);

// Recompute the exact residual and heavy quark residual
r2 = blas::xmyNorm(b, r);
rNorm = sqrt(r2);
hq_res = sqrt(blas::HeavyQuarkResidualNorm(y, r).z);

// Copy and update fields
blas::copy(rSloppy, r); // no op when these pointers alias
blas::zero(xSloppy);

// Check and see if we're "done" with the L2 norm. This could be because
// we were already done with it, we never needed it, or the L2 norm has finally converged.
if (!L2breakdown && convergenceL2(r2, hq_res, stop, param.tol_hq)) L2breakdown = true;
Expand Down Expand Up @@ -941,13 +943,13 @@ namespace quda {
// we "reset" the solve in a different way.
if (heavy_quark_restart) {
// If we're in the HQ residual part of the solve, we just do a hard CG restart.
logQuda(QUDA_SUMMARIZE, "HQ restart == hard CG restart\n");
logQuda(QUDA_DEBUG_VERBOSE, "HQ restart == hard CG restart\n");
blas::copy(p, rSloppy);
heavy_quark_restart = false;
} else {
// If we're still in the L2 norm part of the solve, we explicitly restore
// the orthogonality of the gradient vector, recompute beta, update `p`, and carry on with our lives.
logQuda(QUDA_SUMMARIZE, "Regular restart == explicit gradient vector re-orthogonalization\n");
logQuda(QUDA_DEBUG_VERBOSE, "Regular restart == explicit gradient vector re-orthogonalization\n");
Complex rp = blas::cDotProduct(rSloppy, p) / (r2);
blas::caxpy(-rp, rSloppy, p);

Expand Down
Loading

0 comments on commit a92296b

Please sign in to comment.