amrex::FFT (#4193)

Add parallel FFT capability to AMReX. It relies on FFTW3, cuFFT, rocFFT and oneMKL, for CPU, CUDA, HIP and SYCL builds, respectively.
AMReX-Codes · Oct 21, 2024 · b00c828 · b00c828
1 parent 62c2a81
commit b00c828
Show file tree

Hide file tree

Showing 43 changed files with 1,960 additions and 10 deletions.
diff --git a/.github/workflows/apps.yml b/.github/workflows/apps.yml
@@ -95,6 +95,7 @@ jobs:
             -DWarpX_OPENPMD=OFF                   \
             -DCMAKE_VERBOSE_MAKEFILE=ON           \
             -DCMAKE_CXX_COMPILER_LAUNCHER=ccache  \
+            -DAMReX_FFT=ON                        \
             -DAMReX_LINEAR_SOLVER_INCFLO=OFF
         cmake --build WarpX/build -j 4
 

diff --git a/.github/workflows/clang.yml b/.github/workflows/clang.yml
@@ -44,6 +44,7 @@ jobs:
             -DCMAKE_BUILD_TYPE=Debug    \
             -DCMAKE_VERBOSE_MAKEFILE=ON \
             -DCMAKE_INSTALL_PREFIX=/tmp/my-amrex      \
+            -DAMReX_FFT=ON                            \
             -DAMReX_EB=ON                             \
             -DAMReX_FORTRAN=ON                        \
             -DAMReX_MPI=OFF                           \
@@ -104,6 +105,7 @@ jobs:
         cmake ..                                      \
             -DCMAKE_BUILD_TYPE=Debug                  \
             -DCMAKE_VERBOSE_MAKEFILE=ON               \
+            -DAMReX_FFT=ON                            \
             -DAMReX_EB=ON                             \
             -DAMReX_ENABLE_TESTS=ON                   \
             -DAMReX_FORTRAN=ON                        \
@@ -158,6 +160,7 @@ jobs:
         cmake ..                                      \
             -DCMAKE_BUILD_TYPE=RelWithDebInfo         \
             -DCMAKE_VERBOSE_MAKEFILE=ON               \
+            -DAMReX_FFT=ON                            \
             -DAMReX_EB=ON                             \
             -DAMReX_ENABLE_TESTS=ON                   \
             -DAMReX_FORTRAN=OFF                       \
@@ -200,7 +203,7 @@ jobs:
         export CCACHE_LOGFILE=${{ github.workspace }}/ccache.log.txt
         ccache -z
 
-        ./configure --dim 2 --with-fortran no --comp llvm --with-mpi no
+        ./configure --dim 2 --with-fortran no --comp llvm --with-mpi no --enable-fft yes
         make -j4 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS="-fno-operator-names" \
             CCACHE=ccache
         make install

diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
@@ -38,6 +38,7 @@ jobs:
 
         cmake -S . -B build                              \
             -DCMAKE_VERBOSE_MAKEFILE=ON                  \
+            -DAMReX_FFT=ON                               \
             -DAMReX_EB=ON                                \
             -DAMReX_ENABLE_TESTS=ON                      \
             -DAMReX_FORTRAN=OFF                          \
@@ -97,6 +98,7 @@ jobs:
         cmake -S . -B build                              \
             -DCMAKE_VERBOSE_MAKEFILE=ON                  \
             -DAMReX_MPI=OFF                              \
+            -DAMReX_FFT=ON                               \
             -DAMReX_EB=ON                                \
             -DAMReX_ENABLE_TESTS=ON                      \
             -DAMReX_FORTRAN=OFF                          \
@@ -153,6 +155,7 @@ jobs:
             -DCMAKE_VERBOSE_MAKEFILE=ON                  \
             -DAMReX_ENABLE_TESTS=ON                      \
             -DAMReX_TEST_TYPE=Small                      \
+            -DAMReX_FFT=ON                               \
             -DAMReX_FORTRAN=ON                           \
             -DAMReX_FORTRAN_INTERFACES=ON                \
             -DAMReX_GPU_BACKEND=CUDA                     \
@@ -196,7 +199,7 @@ jobs:
         ccache -z
 
         export PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
-        ./configure --dim 3 --with-cuda yes --enable-eb yes --enable-xsdk-defaults yes --with-fortran no
+        ./configure --dim 3 --with-cuda yes --enable-eb yes --enable-xsdk-defaults yes --with-fortran no --enable-fft yes
         #
         # /home/runner/work/amrex/amrex/Src/Base/AMReX_GpuLaunchGlobal.H:16:41: error: unused parameter ‘f0’ [-Werror=unused-parameter]
         #    16 |     AMREX_GPU_GLOBAL void launch_global (L f0) { f0(); }

diff --git a/.github/workflows/dependencies/dependencies.sh b/.github/workflows/dependencies/dependencies.sh
@@ -16,6 +16,7 @@ sudo apt-get update
 
 sudo apt-get install -y --no-install-recommends\
     build-essential \
+    libfftw3-dev    \
     g++ gfortran    \
     libopenmpi-dev  \
     openmpi-bin
diff --git a/.github/workflows/dependencies/dependencies_clang.sh b/.github/workflows/dependencies/dependencies_clang.sh
@@ -16,5 +16,6 @@ sudo apt-get update
 
 sudo apt-get install -y --no-install-recommends \
     build-essential      \
+    libfftw3-dev         \
     gfortran             \
     clang-$1
diff --git a/.github/workflows/dependencies/dependencies_gcc.sh b/.github/workflows/dependencies/dependencies_gcc.sh
@@ -17,6 +17,7 @@ sudo apt-get update
 
 sudo apt-get install -y --no-install-recommends \
     build-essential    \
+    libfftw3-dev       \
     g++-$1 gfortran-$1 \
     libopenmpi-dev     \
     openmpi-bin
diff --git a/.github/workflows/dependencies/dependencies_hip.sh b/.github/workflows/dependencies/dependencies_hip.sh
@@ -56,6 +56,7 @@ sudo apt-get install -y --no-install-recommends \
     roctracer-dev   \
     rocprofiler-dev \
     rocrand-dev     \
+    rocfft-dev      \
     rocprim-dev
 
 # hiprand-dev is a new package that does not exist in old versions

diff --git a/.github/workflows/dependencies/dependencies_nvcc.sh b/.github/workflows/dependencies/dependencies_nvcc.sh
@@ -35,5 +35,6 @@ sudo apt-get install -y \
     cuda-minimal-build-$VERSION_DASHED      \
     cuda-nvml-dev-$VERSION_DASHED           \
     cuda-nvtx-$VERSION_DASHED               \
+    libcufft-dev-$VERSION_DASHED            \
     libcurand-dev-$VERSION_DASHED
 sudo ln -s cuda-$VERSION_DOTTED /usr/local/cuda
diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml
@@ -42,6 +42,7 @@ jobs:
         mkdir build
         cd build
         cmake ..                                  \
+            -DAMReX_FFT=ON                        \
             -DAMReX_FORTRAN=ON                    \
             -DAMReX_PLOTFILE_TOOLS=ON             \
             -DCMAKE_VERBOSE_MAKEFILE=ON           \
@@ -99,6 +100,7 @@ jobs:
         cmake -S . -B build             \
             -DCMAKE_BUILD_TYPE=Debug    \
             -DCMAKE_VERBOSE_MAKEFILE=ON \
+            -DAMReX_FFT=ON              \
             -DAMReX_EB=ON               \
             -DAMReX_ENABLE_TESTS=ON     \
             -DAMReX_FORTRAN=ON          \
@@ -147,6 +149,7 @@ jobs:
         cmake -S . -B build             \
             -DCMAKE_BUILD_TYPE=Debug    \
             -DCMAKE_VERBOSE_MAKEFILE=ON \
+            -DAMReX_FFT=ON              \
             -DAMReX_EB=ON               \
             -DAMReX_ENABLE_TESTS=ON     \
             -DAMReX_FORTRAN=ON          \
@@ -196,6 +199,7 @@ jobs:
         cmake -S . -B build             \
             -DCMAKE_BUILD_TYPE=Debug    \
             -DCMAKE_VERBOSE_MAKEFILE=ON \
+            -DAMReX_FFT=ON              \
             -DAMReX_EB=OFF              \
             -DAMReX_ENABLE_TESTS=ON     \
             -DAMReX_FORTRAN=ON          \
@@ -248,6 +252,7 @@ jobs:
             -DCMAKE_VERBOSE_MAKEFILE=ON \
             -DAMReX_ASSERTIONS=ON       \
             -DAMReX_TESTING=ON          \
+            -DAMReX_FFT=ON              \
             -DAMReX_EB=OFF              \
             -DAMReX_ENABLE_TESTS=ON     \
             -DAMReX_BOUND_CHECK=ON      \
@@ -310,6 +315,7 @@ jobs:
             -DAMReX_TESTING=ON          \
             -DAMReX_BOUND_CHECK=ON      \
             -DAMReX_FPE=ON              \
+            -DAMReX_FFT=ON              \
             -DAMReX_EB=ON               \
             -DAMReX_ENABLE_TESTS=ON     \
             -DAMReX_FORTRAN=ON          \
@@ -371,6 +377,7 @@ jobs:
             -DAMReX_TESTING=ON          \
             -DAMReX_BOUND_CHECK=ON      \
             -DAMReX_FPE=ON              \
+            -DAMReX_FFT=ON              \
             -DAMReX_EB=ON               \
             -DAMReX_ENABLE_TESTS=ON     \
             -DAMReX_FORTRAN=OFF         \
@@ -457,7 +464,7 @@ jobs:
         export CCACHE_LOGFILE=${{ github.workspace }}/ccache.log.txt
         ccache -z
 
-        ./configure --dim 3 --enable-eb yes --enable-xsdk-defaults yes
+        ./configure --dim 3 --enable-eb yes --enable-xsdk-defaults yes --enable-fft yes
         make -j4 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS=-fno-operator-names \
             CCACHE=ccache
         make install
@@ -497,7 +504,8 @@ jobs:
         export CCACHE_LOGFILE=${{ github.workspace }}/ccache.log.txt
         ccache -z
 
-        ./configure --dim 3 --enable-eb no --enable-xsdk-defaults no --single-precision yes --single-precision-particles yes --enable-tiny-profile yes
+        ./configure --dim 3 --enable-eb no --enable-xsdk-defaults no --single-precision yes \
+                    --single-precision-particles yes --enable-tiny-profile yes --enable-fft yes
         make -j4 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS=-fno-operator-names \
             CCACHE=ccache
         make install
@@ -623,6 +631,7 @@ jobs:
             -DAMReX_OMP=ON              \
             -DCMAKE_VERBOSE_MAKEFILE=ON \
             -DAMReX_ENABLE_TESTS=ON     \
+            -DAMReX_FFT=ON              \
             -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
         make -j 4
 

diff --git a/.github/workflows/hip.yml b/.github/workflows/hip.yml
@@ -48,6 +48,7 @@ jobs:
 
         cmake -S . -B build                               \
             -DCMAKE_VERBOSE_MAKEFILE=ON                   \
+            -DAMReX_FFT=ON                                \
             -DAMReX_EB=ON                                 \
             -DAMReX_ENABLE_TESTS=ON                       \
             -DAMReX_FORTRAN=ON                            \
@@ -103,6 +104,7 @@ jobs:
 
         cmake -S . -B build_full_legacywrapper            \
             -DCMAKE_VERBOSE_MAKEFILE=ON                   \
+            -DAMReX_FFT=ON                                \
             -DAMReX_EB=OFF                                \
             -DAMReX_ENABLE_TESTS=ON                       \
             -DAMReX_FORTRAN=ON                            \
@@ -145,7 +147,9 @@ jobs:
         export CCACHE_MAXSIZE=100M
         ccache -z
 
-        ./configure --dim 2 --with-hip yes --enable-eb yes --enable-xsdk-defaults yes --with-mpi no --with-omp no --single-precision yes --single-precision-particles yes
+        ./configure --dim 2 --with-hip yes --enable-eb yes --enable-xsdk-defaults yes \
+                    --with-mpi no --with-omp no --single-precision yes \
+                    --single-precision-particles yes --enable-fft yes
         make -j4 WARN_ALL=TRUE AMD_ARCH=gfx90a CCACHE=ccache
         make install
 

diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml
@@ -41,6 +41,7 @@ jobs:
         set -e
         cmake -S . -B build                                \
             -DCMAKE_VERBOSE_MAKEFILE=ON                    \
+            -DAMReX_FFT=ON                                 \
             -DAMReX_EB=OFF                                 \
             -DAMReX_ENABLE_TESTS=ON                        \
             -DAMReX_FORTRAN=ON                             \
@@ -89,6 +90,7 @@ jobs:
         set -e
         cmake -S . -B build                                \
             -DCMAKE_VERBOSE_MAKEFILE=ON                    \
+            -DAMReX_FFT=ON                                 \
             -DAMReX_EB=ON                                  \
             -DAMReX_ENABLE_TESTS=ON                        \
             -DAMReX_FORTRAN=OFF                            \

diff --git a/Docs/sphinx_documentation/source/BuildingAMReX.rst b/Docs/sphinx_documentation/source/BuildingAMReX.rst
@@ -475,6 +475,8 @@ The list of available options is reported in the :ref:`table <tab:cmakevar>` bel
    +------------------------------+-------------------------------------------------+-------------------------+-----------------------+
    | AMReX_EB                     |  Build Embedded Boundary support                | NO                      | YES, NO               |
    +------------------------------+-------------------------------------------------+-------------------------+-----------------------+
+   | AMReX_FFT                    |  Build FFT support                              | NO                      | YES, NO               |
+   +------------------------------+-------------------------------------------------+-------------------------+-----------------------+
    | AMReX_PARTICLES              |  Build particle classes                         | YES                     | YES, NO               |
    +------------------------------+-------------------------------------------------+-------------------------+-----------------------+
    | AMReX_PARTICLES_PRECISION    |  Set reals precision in particle classes        | Same as AMReX_PRECISION | DOUBLE, SINGLE        |
@@ -697,6 +699,8 @@ A list of AMReX component names and related configure options are shown in the t
    +------------------------------+-----------------+
    | AMReX_EB                     | EB              |
    +------------------------------+-----------------+
+   | AMReX_FFT                    | FFT             |
+   +------------------------------+-----------------+
    | AMReX_PARTICLES              | PARTICLES       |
    +------------------------------+-----------------+
    | AMReX_PARTICLES_PRECISION    | PDOUBLE, PSINGLE|

diff --git a/Docs/sphinx_documentation/source/FFT.rst b/Docs/sphinx_documentation/source/FFT.rst
@@ -0,0 +1,71 @@
+.. role:: cpp(code)
+   :language: c++
+
+.. _sec:FFT:r2c:
+
+FFT::R2C Class
+==============
+
+Class template `FFT::R2C` supports discrete Fourier transforms between real
+and complex data. The name R2C indicates that the forward transform converts
+real data to complex data, while the backward transform converts complex
+data to real data. It should be noted that both directions of transformation
+are supported, not just from real to complex.
+
+The implementation utilizes cuFFT, rocFFT, oneMKL and FFTW, for CUDA, HIP,
+SYCL and CPU builds, respectively. Because the parallel communication is
+handled by AMReX, it does not need the parallel version of
+FFTW. Furthermore, there is no constraint on the domain decomposition such
+as one Box per process. This class performs parallel FFT on AMReX's parallel
+data containers (e.g., :cpp:`MultiFab` and
+:cpp:`FabArray<BaseFab<ComplexData<Real>>>`. For local FFT, the users can
+use FFTW, cuFFT, rocFFT, or oneMKL directly.
+
+Other than using column-majored order, AMReX follows the convention of
+FFTW. Applying the forward transform followed by the backward transform
+scales the original data by the size of the input array. The layout of the
+complex data also follows the FFTW convention, where the complex Hermitian
+output array has `(nx/2+1,ny,nz)` elements. Here `nx`, `ny` and `nz` are the
+sizes of the real array and the division is rounded down.
+
+Below are examples of using :cpp:`FFT:R2C`.
+
+.. highlight:: c++
+
+::
+
+    Geometry geom(...);
+    MultiFab mfin(...);
+    MultiFab mfout(...);
+
+    auto scaling = 1. / geom.Domain().d_numPts();
+
+    FFT::R2C r2c(geom.Domain());
+    r2c.forwardThenBackward(mfin, mfout,
+        [=] AMREX_GPU_DEVICE (int, int, int, auto& sp)
+        {
+            sp *= scaling;
+        });
+
+    cMultiFab cmf(...);
+    FFT::R2C<Real,FFT::Direction::forward> r2c_forward(geom.Domain());
+    r2c_forward(mfin, cmf);
+
+    FFT::R2C<Real,FFT::Direction::backward> r2c_backward(geom.Domain());
+    r2c_backward(cmf, mfout);
+
+Note that using :cpp:`forwardThenBackward` is expected to be more efficient
+than separate calls to :cpp:`forward` and :cpp:`backward` because some
+parallel communication can be avoided. It should also be noted that a lot of
+preparation works are done in the construction of an :cpp:`FFT::R2C`
+object. Therefore, one should cache it for reuse if possible.
+
+
+Poisson Solver
+==============
+
+AMReX provides FFT based Poisson solvers. :cpp:`FFT::Poisson` supports all
+periodic boundaries using purely FFT. :cpp:`FFT::PoissonHybrid` is a 3D only
+solver that supports periodic boundaries in the first two dimensions and
+Neumann boundary in the last dimension. Similar to :cpp:`FFT::R2C`, the
+Poisson solvers should be cached for reuse.
diff --git a/Docs/sphinx_documentation/source/FFT_Chapter.rst b/Docs/sphinx_documentation/source/FFT_Chapter.rst
@@ -0,0 +1,16 @@
+.. _Chap:FFT:
+
+.. _sec:FFT:FFTOverview:
+
+Discrete Fourier Transform
+==========================
+
+AMReX provides support for parallel discrete Fourier transform. The
+implementation utilizes cuFFT, rocFFT, oneMKL and FFTW, for CUDA, HIP, SYCL
+and CPU builds, respectively. It also provides FFT based Poisson
+solvers.
+
+.. toctree::
+   :maxdepth: 1
+
+   FFT
diff --git a/Docs/sphinx_documentation/source/index.rst b/Docs/sphinx_documentation/source/index.rst
@@ -52,6 +52,7 @@ Documentation on migration from BoxLib is available in the AMReX repository at D
    Fortran_Chapter
    Python_Chapter
    EB_Chapter
+   FFT_Chapter
    TimeIntegration_Chapter
    GPU_Chapter
    Visualization_Chapter

diff --git a/GNUmakefile.in b/GNUmakefile.in
@@ -26,6 +26,9 @@ ifeq ($(USE_LINEAR_SOLVERS),TRUE)
      Pdirs += F_Interfaces/LinearSolvers
    endif
 endif
+ifeq ($(USE_FFT),TRUE)
+   Pdirs += FFT
+endif
 ifeq ($(USE_EB),TRUE)
    Pdirs += EB
 endif

diff --git a/Src/Base/AMReX_FabArray.H b/Src/Base/AMReX_FabArray.H
@@ -13,6 +13,7 @@
 #include <AMReX_FabFactory.H>
 #include <AMReX_DistributionMapping.H>
 #include <AMReX_Geometry.H>
+#include <AMReX_GpuComplex.H>
 #include <AMReX_ParallelDescriptor.H>
 #include <AMReX_Utility.H>
 #include <AMReX_ccse-mpi.H>
@@ -3679,6 +3680,8 @@ FabArray<FAB>::norminf (FabArray<IFAB> const& mask, int comp, int ncomp,
     return nm0;
 }
 
+using cMultiFab = FabArray<BaseFab<GpuComplex<Real> > >;
+
 }
 
 #endif /*BL_FABARRAY_H*/
diff --git a/Src/Base/AMReX_GpuComplex.H b/Src/Base/AMReX_GpuComplex.H
@@ -41,16 +41,16 @@ struct alignas(2*sizeof(T)) GpuComplex
     /**
      * \brief Return the real part.
      */
-    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
     constexpr T real () const noexcept { return m_real; }
 
     /**
      * \brief Return the imaginary part.
      */
-    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
     constexpr T imag () const noexcept { return m_imag; }
 
-   /**
+    /**
      * \brief Add a real number to this complex number.
      */
     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE