Skip to content

Commit

Permalink
amrex::FFT (#4193)
Browse files Browse the repository at this point in the history
Add parallel FFT capability to AMReX. It relies on FFTW3, cuFFT, rocFFT
and oneMKL, for CPU, CUDA, HIP and SYCL builds, respectively.
  • Loading branch information
WeiqunZhang authored Oct 21, 2024
1 parent 62c2a81 commit b00c828
Show file tree
Hide file tree
Showing 43 changed files with 1,960 additions and 10 deletions.
1 change: 1 addition & 0 deletions .github/workflows/apps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ jobs:
-DWarpX_OPENPMD=OFF \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DAMReX_FFT=ON \
-DAMReX_LINEAR_SOLVER_INCFLO=OFF
cmake --build WarpX/build -j 4
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/clang.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ jobs:
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DCMAKE_INSTALL_PREFIX=/tmp/my-amrex \
-DAMReX_FFT=ON \
-DAMReX_EB=ON \
-DAMReX_FORTRAN=ON \
-DAMReX_MPI=OFF \
Expand Down Expand Up @@ -104,6 +105,7 @@ jobs:
cmake .. \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DAMReX_FFT=ON \
-DAMReX_EB=ON \
-DAMReX_ENABLE_TESTS=ON \
-DAMReX_FORTRAN=ON \
Expand Down Expand Up @@ -158,6 +160,7 @@ jobs:
cmake .. \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DAMReX_FFT=ON \
-DAMReX_EB=ON \
-DAMReX_ENABLE_TESTS=ON \
-DAMReX_FORTRAN=OFF \
Expand Down Expand Up @@ -200,7 +203,7 @@ jobs:
export CCACHE_LOGFILE=${{ github.workspace }}/ccache.log.txt
ccache -z
./configure --dim 2 --with-fortran no --comp llvm --with-mpi no
./configure --dim 2 --with-fortran no --comp llvm --with-mpi no --enable-fft yes
make -j4 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS="-fno-operator-names" \
CCACHE=ccache
make install
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ jobs:
cmake -S . -B build \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DAMReX_FFT=ON \
-DAMReX_EB=ON \
-DAMReX_ENABLE_TESTS=ON \
-DAMReX_FORTRAN=OFF \
Expand Down Expand Up @@ -97,6 +98,7 @@ jobs:
cmake -S . -B build \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DAMReX_MPI=OFF \
-DAMReX_FFT=ON \
-DAMReX_EB=ON \
-DAMReX_ENABLE_TESTS=ON \
-DAMReX_FORTRAN=OFF \
Expand Down Expand Up @@ -153,6 +155,7 @@ jobs:
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DAMReX_ENABLE_TESTS=ON \
-DAMReX_TEST_TYPE=Small \
-DAMReX_FFT=ON \
-DAMReX_FORTRAN=ON \
-DAMReX_FORTRAN_INTERFACES=ON \
-DAMReX_GPU_BACKEND=CUDA \
Expand Down Expand Up @@ -196,7 +199,7 @@ jobs:
ccache -z
export PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
./configure --dim 3 --with-cuda yes --enable-eb yes --enable-xsdk-defaults yes --with-fortran no
./configure --dim 3 --with-cuda yes --enable-eb yes --enable-xsdk-defaults yes --with-fortran no --enable-fft yes
#
# /home/runner/work/amrex/amrex/Src/Base/AMReX_GpuLaunchGlobal.H:16:41: error: unused parameter ‘f0’ [-Werror=unused-parameter]
# 16 | AMREX_GPU_GLOBAL void launch_global (L f0) { f0(); }
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/dependencies/dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ sudo apt-get update

sudo apt-get install -y --no-install-recommends\
build-essential \
libfftw3-dev \
g++ gfortran \
libopenmpi-dev \
openmpi-bin
1 change: 1 addition & 0 deletions .github/workflows/dependencies/dependencies_clang.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,6 @@ sudo apt-get update

sudo apt-get install -y --no-install-recommends \
build-essential \
libfftw3-dev \
gfortran \
clang-$1
1 change: 1 addition & 0 deletions .github/workflows/dependencies/dependencies_gcc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ sudo apt-get update

sudo apt-get install -y --no-install-recommends \
build-essential \
libfftw3-dev \
g++-$1 gfortran-$1 \
libopenmpi-dev \
openmpi-bin
1 change: 1 addition & 0 deletions .github/workflows/dependencies/dependencies_hip.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ sudo apt-get install -y --no-install-recommends \
roctracer-dev \
rocprofiler-dev \
rocrand-dev \
rocfft-dev \
rocprim-dev

# hiprand-dev is a new package that does not exist in old versions
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/dependencies/dependencies_nvcc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,6 @@ sudo apt-get install -y \
cuda-minimal-build-$VERSION_DASHED \
cuda-nvml-dev-$VERSION_DASHED \
cuda-nvtx-$VERSION_DASHED \
libcufft-dev-$VERSION_DASHED \
libcurand-dev-$VERSION_DASHED
sudo ln -s cuda-$VERSION_DOTTED /usr/local/cuda
13 changes: 11 additions & 2 deletions .github/workflows/gcc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ jobs:
mkdir build
cd build
cmake .. \
-DAMReX_FFT=ON \
-DAMReX_FORTRAN=ON \
-DAMReX_PLOTFILE_TOOLS=ON \
-DCMAKE_VERBOSE_MAKEFILE=ON \
Expand Down Expand Up @@ -99,6 +100,7 @@ jobs:
cmake -S . -B build \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DAMReX_FFT=ON \
-DAMReX_EB=ON \
-DAMReX_ENABLE_TESTS=ON \
-DAMReX_FORTRAN=ON \
Expand Down Expand Up @@ -147,6 +149,7 @@ jobs:
cmake -S . -B build \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DAMReX_FFT=ON \
-DAMReX_EB=ON \
-DAMReX_ENABLE_TESTS=ON \
-DAMReX_FORTRAN=ON \
Expand Down Expand Up @@ -196,6 +199,7 @@ jobs:
cmake -S . -B build \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DAMReX_FFT=ON \
-DAMReX_EB=OFF \
-DAMReX_ENABLE_TESTS=ON \
-DAMReX_FORTRAN=ON \
Expand Down Expand Up @@ -248,6 +252,7 @@ jobs:
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DAMReX_ASSERTIONS=ON \
-DAMReX_TESTING=ON \
-DAMReX_FFT=ON \
-DAMReX_EB=OFF \
-DAMReX_ENABLE_TESTS=ON \
-DAMReX_BOUND_CHECK=ON \
Expand Down Expand Up @@ -310,6 +315,7 @@ jobs:
-DAMReX_TESTING=ON \
-DAMReX_BOUND_CHECK=ON \
-DAMReX_FPE=ON \
-DAMReX_FFT=ON \
-DAMReX_EB=ON \
-DAMReX_ENABLE_TESTS=ON \
-DAMReX_FORTRAN=ON \
Expand Down Expand Up @@ -371,6 +377,7 @@ jobs:
-DAMReX_TESTING=ON \
-DAMReX_BOUND_CHECK=ON \
-DAMReX_FPE=ON \
-DAMReX_FFT=ON \
-DAMReX_EB=ON \
-DAMReX_ENABLE_TESTS=ON \
-DAMReX_FORTRAN=OFF \
Expand Down Expand Up @@ -457,7 +464,7 @@ jobs:
export CCACHE_LOGFILE=${{ github.workspace }}/ccache.log.txt
ccache -z
./configure --dim 3 --enable-eb yes --enable-xsdk-defaults yes
./configure --dim 3 --enable-eb yes --enable-xsdk-defaults yes --enable-fft yes
make -j4 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS=-fno-operator-names \
CCACHE=ccache
make install
Expand Down Expand Up @@ -497,7 +504,8 @@ jobs:
export CCACHE_LOGFILE=${{ github.workspace }}/ccache.log.txt
ccache -z
./configure --dim 3 --enable-eb no --enable-xsdk-defaults no --single-precision yes --single-precision-particles yes --enable-tiny-profile yes
./configure --dim 3 --enable-eb no --enable-xsdk-defaults no --single-precision yes \
--single-precision-particles yes --enable-tiny-profile yes --enable-fft yes
make -j4 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS=-fno-operator-names \
CCACHE=ccache
make install
Expand Down Expand Up @@ -623,6 +631,7 @@ jobs:
-DAMReX_OMP=ON \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DAMReX_ENABLE_TESTS=ON \
-DAMReX_FFT=ON \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
make -j 4
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/hip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ jobs:
cmake -S . -B build \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DAMReX_FFT=ON \
-DAMReX_EB=ON \
-DAMReX_ENABLE_TESTS=ON \
-DAMReX_FORTRAN=ON \
Expand Down Expand Up @@ -103,6 +104,7 @@ jobs:
cmake -S . -B build_full_legacywrapper \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DAMReX_FFT=ON \
-DAMReX_EB=OFF \
-DAMReX_ENABLE_TESTS=ON \
-DAMReX_FORTRAN=ON \
Expand Down Expand Up @@ -145,7 +147,9 @@ jobs:
export CCACHE_MAXSIZE=100M
ccache -z
./configure --dim 2 --with-hip yes --enable-eb yes --enable-xsdk-defaults yes --with-mpi no --with-omp no --single-precision yes --single-precision-particles yes
./configure --dim 2 --with-hip yes --enable-eb yes --enable-xsdk-defaults yes \
--with-mpi no --with-omp no --single-precision yes \
--single-precision-particles yes --enable-fft yes
make -j4 WARN_ALL=TRUE AMD_ARCH=gfx90a CCACHE=ccache
make install
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/intel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ jobs:
set -e
cmake -S . -B build \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DAMReX_FFT=ON \
-DAMReX_EB=OFF \
-DAMReX_ENABLE_TESTS=ON \
-DAMReX_FORTRAN=ON \
Expand Down Expand Up @@ -89,6 +90,7 @@ jobs:
set -e
cmake -S . -B build \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DAMReX_FFT=ON \
-DAMReX_EB=ON \
-DAMReX_ENABLE_TESTS=ON \
-DAMReX_FORTRAN=OFF \
Expand Down
4 changes: 4 additions & 0 deletions Docs/sphinx_documentation/source/BuildingAMReX.rst
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,8 @@ The list of available options is reported in the :ref:`table <tab:cmakevar>` bel
+------------------------------+-------------------------------------------------+-------------------------+-----------------------+
| AMReX_EB | Build Embedded Boundary support | NO | YES, NO |
+------------------------------+-------------------------------------------------+-------------------------+-----------------------+
| AMReX_FFT | Build FFT support | NO | YES, NO |
+------------------------------+-------------------------------------------------+-------------------------+-----------------------+
| AMReX_PARTICLES | Build particle classes | YES | YES, NO |
+------------------------------+-------------------------------------------------+-------------------------+-----------------------+
| AMReX_PARTICLES_PRECISION | Set reals precision in particle classes | Same as AMReX_PRECISION | DOUBLE, SINGLE |
Expand Down Expand Up @@ -697,6 +699,8 @@ A list of AMReX component names and related configure options are shown in the t
+------------------------------+-----------------+
| AMReX_EB | EB |
+------------------------------+-----------------+
| AMReX_FFT | FFT |
+------------------------------+-----------------+
| AMReX_PARTICLES | PARTICLES |
+------------------------------+-----------------+
| AMReX_PARTICLES_PRECISION | PDOUBLE, PSINGLE|
Expand Down
71 changes: 71 additions & 0 deletions Docs/sphinx_documentation/source/FFT.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
.. role:: cpp(code)
:language: c++

.. _sec:FFT:r2c:

FFT::R2C Class
==============

Class template `FFT::R2C` supports discrete Fourier transforms between real
and complex data. The name R2C indicates that the forward transform converts
real data to complex data, while the backward transform converts complex
data to real data. It should be noted that both directions of transformation
are supported, not just from real to complex.

The implementation utilizes cuFFT, rocFFT, oneMKL and FFTW, for CUDA, HIP,
SYCL and CPU builds, respectively. Because the parallel communication is
handled by AMReX, it does not need the parallel version of
FFTW. Furthermore, there is no constraint on the domain decomposition such
as one Box per process. This class performs parallel FFT on AMReX's parallel
data containers (e.g., :cpp:`MultiFab` and
:cpp:`FabArray<BaseFab<ComplexData<Real>>>`. For local FFT, the users can
use FFTW, cuFFT, rocFFT, or oneMKL directly.

Other than using column-majored order, AMReX follows the convention of
FFTW. Applying the forward transform followed by the backward transform
scales the original data by the size of the input array. The layout of the
complex data also follows the FFTW convention, where the complex Hermitian
output array has `(nx/2+1,ny,nz)` elements. Here `nx`, `ny` and `nz` are the
sizes of the real array and the division is rounded down.

Below are examples of using :cpp:`FFT:R2C`.

.. highlight:: c++

::

Geometry geom(...);
MultiFab mfin(...);
MultiFab mfout(...);

auto scaling = 1. / geom.Domain().d_numPts();

FFT::R2C r2c(geom.Domain());
r2c.forwardThenBackward(mfin, mfout,
[=] AMREX_GPU_DEVICE (int, int, int, auto& sp)
{
sp *= scaling;
});

cMultiFab cmf(...);
FFT::R2C<Real,FFT::Direction::forward> r2c_forward(geom.Domain());
r2c_forward(mfin, cmf);

FFT::R2C<Real,FFT::Direction::backward> r2c_backward(geom.Domain());
r2c_backward(cmf, mfout);

Note that using :cpp:`forwardThenBackward` is expected to be more efficient
than separate calls to :cpp:`forward` and :cpp:`backward` because some
parallel communication can be avoided. It should also be noted that a lot of
preparation works are done in the construction of an :cpp:`FFT::R2C`
object. Therefore, one should cache it for reuse if possible.


Poisson Solver
==============

AMReX provides FFT based Poisson solvers. :cpp:`FFT::Poisson` supports all
periodic boundaries using purely FFT. :cpp:`FFT::PoissonHybrid` is a 3D only
solver that supports periodic boundaries in the first two dimensions and
Neumann boundary in the last dimension. Similar to :cpp:`FFT::R2C`, the
Poisson solvers should be cached for reuse.
16 changes: 16 additions & 0 deletions Docs/sphinx_documentation/source/FFT_Chapter.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
.. _Chap:FFT:

.. _sec:FFT:FFTOverview:

Discrete Fourier Transform
==========================

AMReX provides support for parallel discrete Fourier transform. The
implementation utilizes cuFFT, rocFFT, oneMKL and FFTW, for CUDA, HIP, SYCL
and CPU builds, respectively. It also provides FFT based Poisson
solvers.

.. toctree::
:maxdepth: 1

FFT
1 change: 1 addition & 0 deletions Docs/sphinx_documentation/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ Documentation on migration from BoxLib is available in the AMReX repository at D
Fortran_Chapter
Python_Chapter
EB_Chapter
FFT_Chapter
TimeIntegration_Chapter
GPU_Chapter
Visualization_Chapter
Expand Down
3 changes: 3 additions & 0 deletions GNUmakefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ ifeq ($(USE_LINEAR_SOLVERS),TRUE)
Pdirs += F_Interfaces/LinearSolvers
endif
endif
ifeq ($(USE_FFT),TRUE)
Pdirs += FFT
endif
ifeq ($(USE_EB),TRUE)
Pdirs += EB
endif
Expand Down
3 changes: 3 additions & 0 deletions Src/Base/AMReX_FabArray.H
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <AMReX_FabFactory.H>
#include <AMReX_DistributionMapping.H>
#include <AMReX_Geometry.H>
#include <AMReX_GpuComplex.H>
#include <AMReX_ParallelDescriptor.H>
#include <AMReX_Utility.H>
#include <AMReX_ccse-mpi.H>
Expand Down Expand Up @@ -3679,6 +3680,8 @@ FabArray<FAB>::norminf (FabArray<IFAB> const& mask, int comp, int ncomp,
return nm0;
}

using cMultiFab = FabArray<BaseFab<GpuComplex<Real> > >;

}

#endif /*BL_FABARRAY_H*/
6 changes: 3 additions & 3 deletions Src/Base/AMReX_GpuComplex.H
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,16 @@ struct alignas(2*sizeof(T)) GpuComplex
/**
* \brief Return the real part.
*/
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
constexpr T real () const noexcept { return m_real; }

/**
* \brief Return the imaginary part.
*/
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
constexpr T imag () const noexcept { return m_imag; }

/**
/**
* \brief Add a real number to this complex number.
*/
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
Expand Down
Loading

0 comments on commit b00c828

Please sign in to comment.