Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:lattice/quda into feature/quark-…
Browse files Browse the repository at this point in the history
…smearing.milc-interface
  • Loading branch information
maddyscientist committed Aug 29, 2023
2 parents 0c279a7 + 1d6af2d commit 073fd0f
Show file tree
Hide file tree
Showing 162 changed files with 9,250 additions and 4,143 deletions.
64 changes: 64 additions & 0 deletions .github/workflows/cuda_githubactions_build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: cuda_ghactions_build

on:
pull_request:
branches: [ "develop" ]

defaults:
run:
shell: bash

env:
BUILD_TYPE: STRICT
CCACHE_COMPILERCHECK: content

jobs:
build:
strategy:
matrix:
compiler: [g++-12, clang++-14]
runs-on: ubuntu-latest

steps:
- name: Install software
run: |
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb
sudo dpkg -i cuda-keyring_1.0-1_all.deb
sudo apt-get update -y
sudo apt-get install -y --no-install-recommends ninja-build cmake libopenmpi-dev gfortran
- uses: awalsh128/cache-apt-pkgs-action@latest
with:
packages: cuda-compiler-12-1 cuda-libraries-dev-12-1 cuda-nvml-dev-12-1
execute_install_scripts: true

- uses: actions/checkout@v3

- name: Ccache for gh actions
uses: hendrikmuhs/ccache-action@v1.2.9
with:
key: ${{ github.job }}-${{ matrix.compiler }}
max-size: 2000M

- name: Configure CMake
run: >
cmake
-DCMAKE_CUDA_COMPILER=/usr/local/cuda-12.1/bin/nvcc
-DCMAKE_CXX_COMPILER=${{matrix.compiler}}
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
-DQUDA_GPU_ARCH=sm_80 -DQUDA_GPU_ARCH_SUFFIX=virtual -DQUDA_JITIFY=ON
-DQUDA_MULTIGRID=ON
-DQUDA_MULTIGRID_NVEC_LIST=24
-DQUDA_MDW_FUSED_LS_LIST=4
-DQUDA_MPI=ON -DMPI_CXX_SKIP_MPICXX=ON
-DQUDA_PRECISION=10 -DQUDA_FAST_COMPILE_DSLASH=ON -DQUDA_FAST_COMPILE_REDUCE=ON
-GNinja
-B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
- name: Build
run: cmake --build ${{github.workspace}}/build

- name: Install
run: cmake --install ${{github.workspace}}/build

55 changes: 55 additions & 0 deletions .github/workflows/rocm-build-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: rocm-build-ci
run-name: ${{ github.actor }} is kicking off a ROCm build
on: pull_request
jobs:
rocm-build:
runs-on: [self-hosted, amd]
steps:
- uses: actions/checkout@v3
- run: |
export ROCM_PATH=/opt/rocm-5.5.0
export PATH=${ROCM_PATH}/bin:${ROCM_PATH}/llvm/bin:${PATH}
SRCROOT=`pwd`
BUILDROOT=`mktemp -d build-XXXXXXXX`
INSTALLROOT=`mktemp -d install-XXXXXXXX`
QUDA_GPU_ARCH=gfx90a
cmake ${SRCROOT} \
-B ${BUILDROOT} \
-DQUDA_TARGET_TYPE="HIP" \
-DQUDA_GPU_ARCH=${QUDA_GPU_ARCH} \
-DROCM_PATH=${ROCM_PATH} \
-DQUDA_DIRAC_CLOVER=ON \
-DQUDA_DIRAC_CLOVER_HASENBUSCH=OFF \
-DQUDA_DIRAC_DOMAIN_WALL=OFF \
-DQUDA_DIRAC_NDEG_TWISTED_MASS=OFF \
-DQUDA_DIRAC_STAGGERED=ON \
-DQUDA_DIRAC_TWISTED_MASS=OFF \
-DQUDA_DIRAC_TWISTED_CLOVER=OFF \
-DQUDA_DIRAC_WILSON=ON \
-DQUDA_CLOVER_DYNAMIC=ON \
-DQUDA_QDPJIT=OFF \
-DQUDA_INTERFACE_QDPJIT=OFF \
-DQUDA_INTERFACE_MILC=ON \
-DQUDA_INTERFACE_CPS=OFF \
-DQUDA_INTERFACE_QDP=ON \
-DQUDA_INTERFACE_TIFR=OFF \
-DQUDA_QMP=ON \
-DQUDA_DOWNLOAD_USQCD=ON \
-DQUDA_OPENMP=OFF \
-DQUDA_MULTIGRID=ON \
-DQUDA_DOWNLOAD_EIGEN=ON \
-DQUDA_PRECISION=14 \
-DCMAKE_INSTALL_PREFIX=${INSTALLROOT} \
-DCMAKE_BUILD_TYPE="DEVEL" \
-DCMAKE_CXX_COMPILER="${ROCM_PATH}/llvm/bin/clang++" \
-DCMAKE_C_COMPILER="${ROCM_PATH}/llvm/bin/clang" \
-DCMAKE_HIP_COMPILER="${ROCM_PATH}/llvm/bin/clang++" \
-DBUILD_SHARED_LIBS=ON \
-DQUDA_BUILD_SHAREDLIB=ON \
-DQUDA_BUILD_ALL_TESTS=ON \
-DQUDA_CTEST_DISABLE_BENCHMARKS=ON \
-DCMAKE_C_STANDARD=99
cmake --build ${BUILDROOT} -j 16
cmake --install ${BUILDROOT}
rm -rf ${BUILDROOT}
rm -rf ${INSTALLROOT}
60 changes: 30 additions & 30 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ set_property(CACHE QUDA_ORDER_SP_MG PROPERTY STRINGS 2 4)
set_property(CACHE QUDA_ORDER_FP_MG PROPERTY STRINGS 2 4 8)

option(QUDA_BUILD_NATIVE_LAPACK "build the native blas/lapack library according to QUDA_TARGET" ON)
option(QUDA_BUILD_NATIVE_FFT "build the native FFT library according to QUDA_TARGET" ON)

# QUDA uses tiling routines to compute certain BLAS routines. The maximum allowable
# tile size is governed by this number. The larger the number, the faster the routines
Expand Down Expand Up @@ -338,7 +339,7 @@ set(CMAKE_CXX_FLAGS_DEVEL
"-g -O3"
CACHE STRING "Flags used by the C++ compiler during regular development builds.")
set(CMAKE_CXX_FLAGS_STRICT
"-O3"
"-Os"
CACHE STRING "Flags used by the C++ compiler during strict jenkins builds.")
set(CMAKE_CXX_FLAGS_RELEASE
"-O3 ${CXX_OPT}"
Expand All @@ -360,7 +361,7 @@ set(CMAKE_C_FLAGS_DEVEL
"-g -O3"
CACHE STRING "Flags used by the C compiler during regular development builds.")
set(CMAKE_C_FLAGS_STRICT
"-O3"
"-Os"
CACHE STRING "Flags used by the C compiler during strict jenkins builds.")
set(CMAKE_C_FLAGS_RELEASE
"-O3"
Expand Down Expand Up @@ -418,14 +419,11 @@ if(QUDA_DOWNLOAD_EIGEN)
CACHE STRING "Eigen use for QUDA_DOWNLOAD_EIGEN")
mark_as_advanced(QUDA_EIGEN_VERSION)
CPMAddPackage(
NAME
Eigen
VERSION
${QUDA_EIGEN_VERSION}
URL
https://gitlab.com/libeigen/eigen/-/archive/${QUDA_EIGEN_VERSION}/eigen-${QUDA_EIGEN_VERSION}.tar.bz2
DOWNLOAD_ONLY
YES)
NAME Eigen
VERSION ${QUDA_EIGEN_VERSION}
URL https://gitlab.com/libeigen/eigen/-/archive/${QUDA_EIGEN_VERSION}/eigen-${QUDA_EIGEN_VERSION}.tar.bz2
DOWNLOAD_ONLY YES
SYSTEM YES)
target_include_directories(Eigen SYSTEM INTERFACE ${Eigen_SOURCE_DIR})
install(DIRECTORY ${Eigen_SOURCE_DIR}/Eigen TYPE INCLUDE)
else()
Expand All @@ -451,26 +449,28 @@ endif()
if(QUDA_MPI OR QUDA_QMP)
# if we are using MPI and no MPI_<LANG>_COMPILER was specified on the command line
# check for MPICXX and MPICC environment variables
if((NOT MPI_CXX_COMPILER) AND DEFINED ENV{MPICXX})
set(MPI_CXX_COMPILER $ENV{MPICXX})
set(mpimessage True)
message(STATUS "Found environment variable MPICXX. Using it for MPI detection: $ENV{MPICXX}")
endif()
if((NOT MPI_C_COMPILER) AND DEFINED ENV{MPICC})
message(STATUS "Found environment variable MPICC. Using it for MPI detection: $ENV{MPICC}")
set(MPI_C_COMPILER $ENV{MPICC})
set(mpimessage True)
endif()
# I think we don't use that at all but
if((NOT MPI_Fortran_COMPILER) AND DEFINED ENV{MPIFORT})
message(STATUS "Found environment variable MPIFORT. Using it for MPI detection: $ENV{MPIFORT}")
set(MPI_Fortran_COMPILER $ENV{MPIFORT})
set(mpimessage True)
endif()
if(mpimessage)
message(
"Found MPIFORT/MPICC/MPICXX environment variables. If this is not what you want please use -DMPI_<LANG>_COMPILER and consult the cmake FindMPI documentation."
)
if( NOT QUDA_SPACK_BUILD )
if((NOT MPI_CXX_COMPILER) AND DEFINED ENV{MPICXX})
set(MPI_CXX_COMPILER $ENV{MPICXX})
set(mpimessage True)
message(STATUS "Found environment variable MPICXX. Using it for MPI detection: $ENV{MPICXX}")
endif()
if((NOT MPI_C_COMPILER) AND DEFINED ENV{MPICC})
message(STATUS "Found environment variable MPICC. Using it for MPI detection: $ENV{MPICC}")
set(MPI_C_COMPILER $ENV{MPICC})
set(mpimessage True)
endif()
# I think we don't use that at all but
if((NOT MPI_Fortran_COMPILER) AND DEFINED ENV{MPIFORT})
message(STATUS "Found environment variable MPIFORT. Using it for MPI detection: $ENV{MPIFORT}")
set(MPI_Fortran_COMPILER $ENV{MPIFORT})
set(mpimessage True)
endif()
if(mpimessage)
message(
"Found MPIFORT/MPICC/MPICXX environment variables. If this is not what you want please use -DMPI_<LANG>_COMPILER and consult the cmake FindMPI documentation."
)
endif()
endif()
find_package(MPI REQUIRED)
endif()
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,8 @@ Advanced Scientific Computing (PASC21) [arXiv:2104.05615[hep-lat]].
* Evan Weinberg (NVIDIA)
* Frank Winter (Jefferson Lab)
* Yi-Bo Yang (Chinese Academy of Sciences)
* Anthony Grebe (Fermilab)
* Michael Wagman (Fermilab)


Portions of this software were developed at the Innovative Systems Lab,
Expand Down
16 changes: 14 additions & 2 deletions cmake/CPM.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
set(CPM_DOWNLOAD_VERSION 0.36.0)
set(CPM_DOWNLOAD_VERSION 0.38.2)

if(CPM_SOURCE_CACHE)
set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
Expand All @@ -10,12 +10,24 @@ endif()

# Expand relative path. This is important if the provided path contains a tilde (~)
get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE)
if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION}))

function(download_cpm)
message(STATUS "Downloading CPM.cmake to ${CPM_DOWNLOAD_LOCATION}")
file(DOWNLOAD
https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
${CPM_DOWNLOAD_LOCATION}
)
endfunction()

if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION}))
download_cpm()
else()
# resume download if it previously failed
file(READ ${CPM_DOWNLOAD_LOCATION} check)
if("${check}" STREQUAL "")
download_cpm()
endif()
unset(check)
endif()

include(${CPM_DOWNLOAD_LOCATION})
4 changes: 3 additions & 1 deletion include/accelerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ namespace quda
}
}

virtual bool hermitian() { return base_solver->hermitian(); }
virtual bool hermitian() const final { return base_solver->hermitian(); }

virtual QudaInverterType getInverterType() const final { return base_solver->getInverterType(); }

/**
* @brief Train the underlying accelerate parameter.
Expand Down
4 changes: 4 additions & 0 deletions include/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,8 @@ namespace quda
template <typename T, int m, int n> using array_2d = array<array<T, n>, m>;
template <typename T, int m, int n, int k> using array_3d = array<array<array<T, k>, n>, m>;

struct assign_t {
template <class T> __device__ __host__ inline void operator()(T *out, T in) { *out = in; }
};

} // namespace quda
16 changes: 16 additions & 0 deletions include/color_spinor_field_order.h
Original file line number Diff line number Diff line change
Expand Up @@ -475,8 +475,10 @@ namespace quda
using store_type = storeFloat; /**< Storage type */
complex<storeFloat> *v; /**< Field memory address this wrapper encompasses */
const int idx; /**< Index into field */
private:
const Float scale; /**< Float to fixed-point scale factor */
const Float scale_inv; /**< Fixed-point to float scale factor */
public:
norm_t *norm; /**< Address of norm field (if it exists) */
const int norm_idx; /**< Index into norm field */
const bool norm_write; /**< Whether we need to write to the norm field */
Expand Down Expand Up @@ -574,6 +576,20 @@ namespace quda
*/
__device__ __host__ inline auto data() const { return &v[idx]; }

/**
* @brief returns the scale of this wrapper object
*/
__device__ __host__ inline auto get_scale() const
{
static_assert(block_float == false, "Orders with block_float == true should not call the get_scale method.");
return block_float ? static_cast<Float>(1) / norm[norm_idx] : scale;
}

/**
* @brief returns the scale_inv of this wrapper object
*/
__device__ __host__ inline auto get_scale_inv() const { return block_float ? norm[norm_idx] : scale_inv; }

/**
@brief Operator+= with complex number instance as input
@param a Complex number we want to add to this accessor
Expand Down
11 changes: 10 additions & 1 deletion include/comm_quda.h
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,16 @@ namespace quda

void comm_allreduce_int(int &data);
void comm_allreduce_xor(uint64_t &data);
void comm_broadcast(void *data, size_t nbytes);

/**
@brief Broadcast from the root rank
@param[in,out] data The data to be read from on the root rank, and
written to on all other ranks
@param[in] nbytes The size in bytes of data to be broadcast
@param[in] root The process that will be broadcasting
*/
void comm_broadcast(void *data, size_t nbytes, int root = 0);

void comm_barrier(void);
void comm_abort(int status);
void comm_abort_(int status);
Expand Down
Loading

0 comments on commit 073fd0f

Please sign in to comment.