diff --git a/.clang-tidy b/.clang-tidy
index 239e1e8e52a..c7d43e1f907 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -1,56 +1,8 @@
 Checks: '-*,
-    bugprone-branch-clone,
-    bugprone-exception-escape,
-    bugprone-fold-init-type,
-    bugprone-forward-declaration-namespace,
-    bugprone-forwarding-reference-overload,
+    bugprone-*
+    -bugprone-easily-swappable-parameters,
     -bugprone-implicit-widening-of-multiplication-result,
-    bugprone-inaccurate-erase,
-    bugprone-incorrect-roundings,
-    bugprone-infinite-loop,
-    bugprone-integer-division,
-    bugprone-lambda-function-name,
-    bugprone-macro-parentheses,
-    bugprone-macro-repeated-side-effects,
-    bugprone-misplaced-operator-in-strlen-in-alloc,
-    bugprone-misplaced-pointer-arithmetic-in-alloc,
     -bugprone-misplaced-widening-cast,
-    bugprone-move-forwarding-reference,
-    bugprone-multiple-statement-macro,
-    bugprone-no-escape,
-    bugprone-not-null-terminated-result,
-    bugprone-parent-virtual-call,
-    bugprone-posix-return,
-    bugprone-redundant-branch-condition,
-    bugprone-reserved-identifier,
-    bugprone-signal-handler,
-    bugprone-signed-char-misuse,
-    bugprone-sizeof-container,
-    bugprone-sizeof-expression,
-    bugprone-spuriously-wake-up-functions,
-    bugprone-string-constructor,
-    bugprone-string-integer-assignment,
-    bugprone-string-literal-with-embedded-nul,
-    bugprone-stringview-nullptr,
-    bugprone-suspicious-enum-usage,
-    bugprone-suspicious-include,
-    bugprone-suspicious-memory-comparison,
-    bugprone-suspicious-memset-usage,
-    bugprone-suspicious-missing-comma,
-    bugprone-suspicious-semicolon,
-    bugprone-suspicious-string-compare,
-    bugprone-swapped-arguments,
-    bugprone-terminating-continue,
-    bugprone-throw-keyword-missing,
-    bugprone-too-small-loop-variable,
-    bugprone-undefined-memory-manipulation,
-    bugprone-undelegated-constructor,
-    bugprone-unhandled-exception-at-new,
-    bugprone-unhandled-self-assignment,
-    bugprone-unused-raii,
-    bugprone-unused-return-value,
-    bugprone-use-after-move,
-    bugprone-virtual-near-miss,
     cppcoreguidelines-avoid-goto,
     misc-const-correctness,
     modernize-avoid-bind,
diff --git a/.github/workflows/dependencies/dpcpp.sh b/.github/workflows/dependencies/dpcpp.sh
index 65e3f36477f..9ecc5e4ca19 100755
--- a/.github/workflows/dependencies/dpcpp.sh
+++ b/.github/workflows/dependencies/dpcpp.sh
@@ -14,10 +14,13 @@ echo 'Acquire::Retries "3";' | sudo tee /etc/apt/apt.conf.d/80-retries
 
 # Ref.: https://github.com/rscohn2/oneapi-ci
 # intel-basekit intel-hpckit are too large in size
-wget -q -O - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB \
-  | sudo apt-key add -
-echo "deb https://apt.repos.intel.com/oneapi all main" \
-  | sudo tee /etc/apt/sources.list.d/oneAPI.list
+
+# download the key to system keyring
+wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
+| gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
+
+# add signed entry to apt sources and configure the APT client to use Intel repository:
+echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
 
 sudo apt-get update
 
@@ -33,7 +36,7 @@ do
         build-essential \
         ccache          \
         cmake           \
-        intel-oneapi-dpcpp-cpp-compiler intel-oneapi-mkl-devel \
+        intel-oneapi-compiler-dpcpp-cpp intel-oneapi-mkl-devel \
         g++ gfortran    \
         libopenmpi-dev  \
         openmpi-bin     \
diff --git a/.github/workflows/insitu.yml b/.github/workflows/insitu.yml
index d6d9f338181..57a25ce7629 100644
--- a/.github/workflows/insitu.yml
+++ b/.github/workflows/insitu.yml
@@ -39,7 +39,7 @@ jobs:
       CC: gcc
       CMAKE_PREFIX_PATH: /ascent/install/lib/cmake/
     container:
-      image: alpinedav/ascent:0.9.1
+      image: alpinedav/ascent:0.9.2
     steps:
     - uses: actions/checkout@v3
     - name: Configure
diff --git a/Docs/source/install/hpc.rst b/Docs/source/install/hpc.rst
index 2829527675c..67468cc2f02 100644
--- a/Docs/source/install/hpc.rst
+++ b/Docs/source/install/hpc.rst
@@ -39,6 +39,7 @@ This section documents quick-start guides for a selection of supercomputers that
    hpc/fugaku
    hpc/hpc3
    hpc/juwels
+   hpc/karolina
    hpc/lassen
    hpc/lawrencium
    hpc/lumi
diff --git a/Docs/source/install/hpc/karolina.rst b/Docs/source/install/hpc/karolina.rst
new file mode 100644
index 00000000000..67627f04e91
--- /dev/null
+++ b/Docs/source/install/hpc/karolina.rst
@@ -0,0 +1,209 @@
+.. _building-karolina:
+
+Karolina (IT4I)
+===============
+
+The `Karolina cluster <https://docs.it4i.cz/karolina/introduction/>`_ is located at `IT4I, Technical University of Ostrava <https://www.it4i.cz/en>`__.
+
+
+Introduction
+------------
+
+If you are new to this system, **please see the following resources**:
+
+* `IT4I user guide <https://docs.it4i.cz>`__
+* Batch system: `PBS <https://docs.it4i.cz/general/job-submission-and-execution/>`__
+* Jupyter service: not provided/documented (yet)
+* `Filesystems <https://docs.it4i.cz/karolina/storage/>`__:
+
+  * ``$HOME``: per-user directory, use only for inputs, source and scripts; backed up (25GB default quota)
+  * ``/scatch/``: `production directory <https://docs.it4i.cz/karolina/storage/#scratch-file-system>`__; very fast for parallel jobs (20TB default)
+
+
+.. _building-karolina-preparation:
+
+Preparation
+-----------
+
+Use the following commands to download the WarpX source code:
+
+.. code-block:: bash
+
+   git clone https://github.com/ECP-WarpX/WarpX.git $HOME/src/warpx
+
+On Karolina, you can run either on GPU nodes with fast A100 GPUs (recommended) or CPU nodes.
+
+.. tab-set::
+
+   .. tab-item:: A100 GPUs
+
+      We use system software modules, add environment hints and further dependencies via the file ``$HOME/karolina_gpu_warpx.profile``.
+      Create it now:
+
+      .. code-block:: bash
+
+         cp $HOME/src/warpx/Tools/machines/karolina-it4i/karolina_gpu_warpx.profile.example $HOME/karolina_gpu_warpx.profile
+
+      .. dropdown:: Script Details
+         :color: light
+         :icon: info
+         :animate: fade-in-slide-down
+
+         .. literalinclude:: ../../../../Tools/machines/karolina-it4i/karolina_gpu_warpx.profile.example
+            :language: bash
+
+      Edit the 2nd line of this script, which sets the ``export proj=""`` variable.
+      For example, if you are member of the project ``DD-23-83``, then run ``vi $HOME/karolina_gpu_warpx.profile``.
+      Enter the edit mode by typing ``i`` and edit line 2 to read:
+
+      .. code-block:: bash
+
+         export proj="DD-23-83"
+
+      Exit the ``vi`` editor with ``Esc`` and then type ``:wq`` (write & quit).
+
+      .. important::
+
+         Now, and as the first step on future logins to Karolina, activate these environment settings:
+
+         .. code-block:: bash
+
+            source $HOME/karolina_gpu_warpx.profile
+
+      Finally, since Karolina does not yet provide software modules for some of our dependencies, install them once:
+
+      .. code-block:: bash
+
+         bash $HOME/src/warpx/Tools/machines/karolina-it4i/install_gpu_dependencies.sh
+         source $HOME/sw/karolina/gpu/venvs/warpx-gpu/bin/activate
+
+      .. dropdown:: Script Details
+         :color: light
+         :icon: info
+         :animate: fade-in-slide-down
+
+         .. literalinclude:: ../../../../Tools/machines/karolina-it4i/install_gpu_dependencies.sh
+            :language: bash
+
+
+   .. tab-item:: CPU Nodes
+
+      CPU usage is documentation is TODO.
+
+
+.. _building-karolina-compilation:
+
+Compilation
+-----------
+
+Use the following :ref:`cmake commands <building-cmake>` to compile:
+
+.. tab-set::
+
+   .. tab-item:: A100 GPUs
+
+      .. code-block:: bash
+
+         cd $HOME/src/warpx
+         rm -rf build_gpu
+
+         cmake -S . -B build_gpu -DWarpX_COMPUTE=CUDA -DWarpX_PSATD=ON -DWarpX_QED_TABLE_GEN=ON -DWarpX_LIB=ON -DWarpX_DIMS="1;2;RZ;3"
+         cmake --build build_gpu -j 12
+         cmake --build build_gpu -j 12 --target pip_install
+
+      **That's it!**
+      The WarpX application executables are now in ``$HOME/src/warpx/build_gpu/bin/`` and we installed the ``pywarpx`` Python module.
+
+   .. tab-item:: CPU Nodes
+
+      .. code-block:: bash
+
+         cd $HOME/src/warpx
+         rm -rf build_cpu
+
+         cmake -S . -B build_cpu -DWarpX_COMPUTE=OMP -DWarpX_PSATD=ON -DWarpX_QED_TABLE_GEN=ON -DWarpX_LIB=ON -DWarpX_DIMS="1;2;RZ;3"
+         cmake --build build_cpu -j 12
+         cmake --build build_cpu -j 12 --target pip_install
+
+      **That's it!**
+      The WarpX application executables are now in ``$HOME/src/warpx/build_cpu/bin/`` and we installed the ``pywarpx`` Python module.
+
+Now, you can :ref:`submit Karolina compute jobs <running-cpp-karolina>` for WarpX :ref:`Python (PICMI) scripts <usage-picmi>` (:ref:`example scripts <usage-examples>`).
+Or, you can use the WarpX executables to submit Karolina jobs (:ref:`example inputs <usage-examples>`).
+For executables, you can reference their location in your :ref:`job script <running-cpp-karolina>` or copy them to a location in ``/scatch/``.
+
+
+.. _building-karolina-update:
+
+Update WarpX & Dependencies
+---------------------------
+
+If you already installed WarpX in the past and want to update it, start by getting the latest source code:
+
+.. code-block:: bash
+
+   cd $HOME/src/warpx
+
+   # read the output of this command - does it look ok?
+   git status
+
+   # get the latest WarpX source code
+   git fetch
+   git pull
+
+   # read the output of these commands - do they look ok?
+   git status
+   git log # press q to exit
+
+And, if needed,
+
+- :ref:`update the karolina_gpu_warpx.profile or karolina_cpu_warpx.profile files <building-karolina-preparation>`,
+- log out and into the system, activate the now updated environment profile as usual,
+- :ref:`execute the dependency install scripts <building-karolina-preparation>`.
+
+As a last step, clean the build directory ``rm -rf $HOME/src/warpx/build_*`` and rebuild WarpX.
+
+
+.. _running-cpp-karolina:
+
+Running
+-------
+
+.. tab-set::
+
+   .. tab-item:: A100 (40GB) GPUs
+
+      The batch script below can be used to run a WarpX simulation on multiple GPU nodes (change ``#PBS -l select=`` accordingly) on the supercomputer Karolina at IT4I.
+      This partition as up to `72 nodes <https://docs.it4i.cz/karolina/hardware-overview/>`__.
+      Every node has 8x A100 (40GB) GPUs and 2x AMD EPYC 7763, 64-core, 2.45 GHz processors.
+
+      Replace descriptions between chevrons ``<>`` by relevant values, for instance ``<proj>`` could be ``DD-23-83``.
+      Note that we run one MPI rank per GPU.
+
+      .. literalinclude:: ../../../../Tools/machines/karolina-it4i/karolina_gpu.qsub
+         :language: bash
+         :caption: You can copy this file from ``$HOME/src/warpx/Tools/machines/karolina-it4i/karolina_gpu.qsub``.
+
+      To run a simulation, copy the lines above to a file ``karolina_gpu.qsub`` and run
+
+      .. code-block:: bash
+
+         qsub karolina_gpu.qsub
+
+      to submit the job.
+
+
+   .. tab-item:: CPU Nodes
+
+      CPU usage is documentation is TODO.
+
+
+.. _post-processing-karolina:
+
+Post-Processing
+---------------
+
+.. note::
+
+   This section was not yet written.
+   Usually, we document here how to use a Jupyter service.
diff --git a/Docs/source/usage/parameters.rst b/Docs/source/usage/parameters.rst
index 0a12dc82d6c..9c3041fce11 100644
--- a/Docs/source/usage/parameters.rst
+++ b/Docs/source/usage/parameters.rst
@@ -2659,7 +2659,8 @@ Reduced Diagnostics
         to file. The electromagnetic field components are interpolated to the measurement point
         by default, but can they be saved as non-averaged by setting
         ``<reduced_diags_name>.raw_fields = true``, in which case the raw fields for the cell
-        containing the measurement point are saved.
+        containing the measurement point are saved. In RZ geometry, this only saves the
+        0'th azimuthal mode component of the fields.
         The interpolation order can be set by specifying ``<reduced_diags_name>.interp_order``,
         otherwise it is set to ``1``.
         Integrated electric and magnetic field components can instead be obtained by specifying
@@ -2983,6 +2984,9 @@ Reduced Diagnostics
     The separator between row values in the output file.
     The default separator is a whitespace.
 
+* ``<reduced_diags_name>.precision`` (`integer`) optional (default `14`)
+    The precision used when writing out the data to the text files.
+
 Lookup tables and other settings for QED modules
 ------------------------------------------------
 
diff --git a/Source/Diagnostics/FlushFormats/FlushFormatPlotfile.cpp b/Source/Diagnostics/FlushFormats/FlushFormatPlotfile.cpp
index 3ea631292cf..28033b3ace0 100644
--- a/Source/Diagnostics/FlushFormats/FlushFormatPlotfile.cpp
+++ b/Source/Diagnostics/FlushFormats/FlushFormatPlotfile.cpp
@@ -302,7 +302,12 @@ FlushFormatPlotfile::WriteWarpXHeader(
 
         warpx.GetPartContainer().WriteHeader(HeaderFile);
 
-        HeaderFile << warpx.getcurrent_injection_position() << "\n";
+        MultiParticleContainer& mypc = warpx.GetPartContainer();
+        const int n_species = mypc.nSpecies();
+        for (int i=0; i<n_species; i++)
+        {
+             HeaderFile << mypc.GetParticleContainer(i).m_current_injection_position << "\n";
+        }
 
         HeaderFile << warpx.getdo_moving_window() << "\n";
 
diff --git a/Source/Diagnostics/ReducedDiags/FieldProbe.H b/Source/Diagnostics/ReducedDiags/FieldProbe.H
index c7b0a9869c5..1150137d1a7 100644
--- a/Source/Diagnostics/ReducedDiags/FieldProbe.H
+++ b/Source/Diagnostics/ReducedDiags/FieldProbe.H
@@ -66,7 +66,7 @@ public:
      */
 
     //! noutputs is 11 for particle id + (x, y, z, Ex, Ey, Ez, Bx, By, Bz, S)
-    static constexpr int noutputs = FieldProbePIdx::nattribs + 3 + 1;
+    static const int noutputs = 11;
 
 private:
     amrex::Real x_probe = 0._rt;
diff --git a/Source/Diagnostics/ReducedDiags/FieldProbe.cpp b/Source/Diagnostics/ReducedDiags/FieldProbe.cpp
index 669b18d296b..55e5054e0d6 100644
--- a/Source/Diagnostics/ReducedDiags/FieldProbe.cpp
+++ b/Source/Diagnostics/ReducedDiags/FieldProbe.cpp
@@ -51,12 +51,6 @@ FieldProbe::FieldProbe (std::string rd_name)
 : ReducedDiags{rd_name}, m_probe(&WarpX::GetInstance())
 {
 
-    // RZ coordinate is not working
-#if (defined WARPX_DIM_RZ)
-    WARPX_ALWAYS_ASSERT_WITH_MESSAGE(false,
-        "FieldProbe reduced diagnostics does not work for RZ coordinate.");
-#endif
-
     // read number of levels
     int nLevel = 0;
     const amrex::ParmParse pp_amr("amr");
diff --git a/Source/Diagnostics/ReducedDiags/FieldProbeParticleContainer.H b/Source/Diagnostics/ReducedDiags/FieldProbeParticleContainer.H
index dbb55fc0aff..d658f209c8f 100644
--- a/Source/Diagnostics/ReducedDiags/FieldProbeParticleContainer.H
+++ b/Source/Diagnostics/ReducedDiags/FieldProbeParticleContainer.H
@@ -19,13 +19,20 @@
  * This enumerated struct is used to index the field probe particle
  * values that are being stored as SoA data. Nattribs
  * is enumerated to give the number of attributes stored.
+ * The strange insertion of `theta` below is due to the use of
+ * GetParticlePosition for the field probe locations, which reads the probe
+ * theta value from PIdx::theta = 4.
  */
 struct FieldProbePIdx
 {
     enum
     {
         Ex = 0, Ey, Ez,
-        Bx, By, Bz,
+        Bx,
+#ifdef WARPX_DIM_RZ
+        theta,      ///< RZ needs all three position components
+#endif
+        By, Bz,
         S, //!< the Poynting vector
         nattribs
     };
diff --git a/Source/Diagnostics/ReducedDiags/FieldProbeParticleContainer.cpp b/Source/Diagnostics/ReducedDiags/FieldProbeParticleContainer.cpp
index 501bfc81515..d928bd33fb9 100644
--- a/Source/Diagnostics/ReducedDiags/FieldProbeParticleContainer.cpp
+++ b/Source/Diagnostics/ReducedDiags/FieldProbeParticleContainer.cpp
@@ -114,6 +114,7 @@ FieldProbeParticleContainer::AddNParticles (int lev,
         amrex::ignore_unused(x, y);
         p.pos(0) = z[i];
 #endif
+
         // write position, cpu id, and particle id to particle
         pinned_tile.push_back(p);
     }
@@ -121,6 +122,11 @@ FieldProbeParticleContainer::AddNParticles (int lev,
     // write Real attributes (SoA) to particle initialized zero
     DefineAndReturnParticleTile(0, 0, 0);
 
+    // for RZ write theta value
+#ifdef WARPX_DIM_RZ
+    pinned_tile.push_back_real(FieldProbePIdx::theta, np, 0.0);
+#endif
+
     pinned_tile.push_back_real(FieldProbePIdx::Ex, np, 0.0);
     pinned_tile.push_back_real(FieldProbePIdx::Ey, np, 0.0);
     pinned_tile.push_back_real(FieldProbePIdx::Ez, np, 0.0);
@@ -129,16 +135,16 @@ FieldProbeParticleContainer::AddNParticles (int lev,
     pinned_tile.push_back_real(FieldProbePIdx::Bz, np, 0.0);
     pinned_tile.push_back_real(FieldProbePIdx::S, np, 0.0);
 
+    auto old_np = particle_tile.numParticles();
+    auto new_np = old_np + pinned_tile.numParticles();
+    particle_tile.resize(new_np);
+    amrex::copyParticles(
+        particle_tile, pinned_tile, 0, old_np, pinned_tile.numParticles());
+
     /*
      * Redistributes particles to their appropriate tiles if the box
      * structure of the simulation changes to accomodate data more
      * efficiently.
      */
-    auto old_np = particle_tile.numParticles();
-        auto new_np = old_np + pinned_tile.numParticles();
-        particle_tile.resize(new_np);
-        amrex::copyParticles(
-        particle_tile, pinned_tile, 0, old_np, pinned_tile.numParticles());
     Redistribute();
-
 }
diff --git a/Source/Diagnostics/ReducedDiags/ReducedDiags.H b/Source/Diagnostics/ReducedDiags/ReducedDiags.H
index b8102330d12..c4fc82fbddf 100644
--- a/Source/Diagnostics/ReducedDiags/ReducedDiags.H
+++ b/Source/Diagnostics/ReducedDiags/ReducedDiags.H
@@ -42,6 +42,9 @@ public:
     /// separator in the output file
     std::string m_sep = " ";
 
+    /// precision for data in the output file
+    int m_precision = 14;
+
     /// output data
     std::vector<amrex::Real> m_data;
 
diff --git a/Source/Diagnostics/ReducedDiags/ReducedDiags.cpp b/Source/Diagnostics/ReducedDiags/ReducedDiags.cpp
index 7a2cb6df2ca..38da1feff83 100644
--- a/Source/Diagnostics/ReducedDiags/ReducedDiags.cpp
+++ b/Source/Diagnostics/ReducedDiags/ReducedDiags.cpp
@@ -9,6 +9,7 @@
 
 #include "WarpX.H"
 #include "Utils/Parser/IntervalsParser.H"
+#include "Utils/Parser/ParserUtils.H"
 #include "Utils/TextMsg.H"
 
 #include <AMReX.H>
@@ -66,6 +67,9 @@ ReducedDiags::ReducedDiags (std::string rd_name)
 
     // read separator
     pp_rd_name.query("separator", m_sep);
+
+    // precision of data in the output file
+    utils::parser::queryWithParser(pp_rd_name, "precision", m_precision);
 }
 // end constructor
 
@@ -107,7 +111,7 @@ void ReducedDiags::WriteToFile (int step) const
     ofs << m_sep;
 
     // set precision
-    ofs << std::fixed << std::setprecision(14) << std::scientific;
+    ofs << std::fixed << std::setprecision(m_precision) << std::scientific;
 
     // write time
     ofs << WarpX::GetInstance().gett_new(0);
diff --git a/Source/Diagnostics/WarpXIO.cpp b/Source/Diagnostics/WarpXIO.cpp
index 36f8756598a..aeaf8530438 100644
--- a/Source/Diagnostics/WarpXIO.cpp
+++ b/Source/Diagnostics/WarpXIO.cpp
@@ -211,8 +211,12 @@ WarpX::InitFromCheckpoint ()
         }
 
         mypc->ReadHeader(is);
-        is >> current_injection_position;
-        GotoNextLine(is);
+        const int n_species = mypc->nSpecies();
+        for (int i=0; i<n_species; i++)
+        {
+             is >> mypc->GetParticleContainer(i).m_current_injection_position;
+             GotoNextLine(is);
+        }
 
         int do_moving_window_before_restart;
         is >> do_moving_window_before_restart;
diff --git a/Source/Initialization/PlasmaInjector.H b/Source/Initialization/PlasmaInjector.H
index 0f33aa06221..1c2fc453d73 100644
--- a/Source/Initialization/PlasmaInjector.H
+++ b/Source/Initialization/PlasmaInjector.H
@@ -133,8 +133,10 @@ public:
 
     InjectorPosition* getInjectorPosition ();
     InjectorDensity*  getInjectorDensity ();
+
     InjectorFlux*  getInjectorFlux ();
-    InjectorMomentum* getInjectorMomentum ();
+    InjectorMomentum* getInjectorMomentumDevice ();
+    InjectorMomentum* getInjectorMomentumHost ();
 
 protected:
 
diff --git a/Source/Initialization/PlasmaInjector.cpp b/Source/Initialization/PlasmaInjector.cpp
index 865e531e7b8..1ed86864710 100644
--- a/Source/Initialization/PlasmaInjector.cpp
+++ b/Source/Initialization/PlasmaInjector.cpp
@@ -775,7 +775,13 @@ PlasmaInjector::getInjectorFlux ()
 }
 
 InjectorMomentum*
-PlasmaInjector::getInjectorMomentum ()
+PlasmaInjector::getInjectorMomentumDevice ()
 {
     return d_inj_mom;
 }
+
+InjectorMomentum*
+PlasmaInjector::getInjectorMomentumHost ()
+{
+    return h_inj_mom.get();
+}
diff --git a/Source/Initialization/WarpXAMReXInit.H b/Source/Initialization/WarpXAMReXInit.H
index a34072f223b..06c31e35872 100644
--- a/Source/Initialization/WarpXAMReXInit.H
+++ b/Source/Initialization/WarpXAMReXInit.H
@@ -11,24 +11,28 @@
 
 #include <AMReX_BaseFwd.H>
 
-/** Call amrex::Initialize
- *
- * This function calls amrex::Initialize and overwrites AMReX' defaults.
- * Note: AMReX defines a placeholder/"mock-up" for MPI_Comm and
- * MPI_COMM_WORLD in serial builds
- *
- * @param[in] argc number of arguments from main()
- * @param[in] argv argument strings from main()
- * @param[in] build_parm_parse build the input file parser (AMReX' default: true)
- * @param[in] mpi_comm the MPI communicator to use (AMReX' default: MPI_COMM_WORLD)
- * @returns pointer to an AMReX* object, forwarded from amrex::Initialize
- */
-amrex::AMReX*
-warpx_amrex_init(
-    int& argc,
-    char**& argv,
-    bool const build_parm_parse = true,
-    MPI_Comm const mpi_comm = MPI_COMM_WORLD
-);
+namespace warpx::initialization
+{
+
+    /** Call amrex::Initialize
+     *
+     * This function calls amrex::Initialize and overwrites AMReX' defaults.
+     * Note: AMReX defines a placeholder/"mock-up" for MPI_Comm and
+     * MPI_COMM_WORLD in serial builds
+     *
+     * @param[in] argc number of arguments from main()
+     * @param[in] argv argument strings from main()
+     * @param[in] build_parm_parse build the input file parser (AMReX' default: true)
+     * @param[in] mpi_comm the MPI communicator to use (AMReX' default: MPI_COMM_WORLD)
+     * @returns pointer to an AMReX* object, forwarded from amrex::Initialize
+     */
+    amrex::AMReX*
+    amrex_init(
+        int& argc,
+        char**& argv,
+        bool const build_parm_parse = true,
+        MPI_Comm const mpi_comm = MPI_COMM_WORLD
+    );
 
+}
 #endif
diff --git a/Source/Initialization/WarpXAMReXInit.cpp b/Source/Initialization/WarpXAMReXInit.cpp
index 7f8af142f4a..33bc1a78998 100644
--- a/Source/Initialization/WarpXAMReXInit.cpp
+++ b/Source/Initialization/WarpXAMReXInit.cpp
@@ -56,14 +56,20 @@ namespace {
     }
 }
 
-amrex::AMReX*
-warpx_amrex_init (int& argc, char**& argv, bool const build_parm_parse, MPI_Comm const mpi_comm)
+
+namespace warpx::initialization
 {
-    return amrex::Initialize(
-        argc,
-        argv,
-        build_parm_parse,
-        mpi_comm,
-        overwrite_amrex_parser_defaults
-    );
+
+    amrex::AMReX*
+    amrex_init (int& argc, char**& argv, bool const build_parm_parse, MPI_Comm const mpi_comm)
+    {
+        return amrex::Initialize(
+            argc,
+            argv,
+            build_parm_parse,
+            mpi_comm,
+            ::overwrite_amrex_parser_defaults
+        );
+    }
+
 }
diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H
index 4a9beeec24f..08682c5819d 100644
--- a/Source/Particles/PhysicalParticleContainer.H
+++ b/Source/Particles/PhysicalParticleContainer.H
@@ -66,6 +66,11 @@ public:
 
     virtual void InitIonizationModule () override;
 
+    /*
+     * \brief Returns a pointer to the plasma injector.
+     */
+    virtual PlasmaInjector* GetPlasmaInjector () override;
+
     /**
      * \brief Evolve is the central function PhysicalParticleContainer that
      * advances plasma particles for a time dt (typically one timestep).
@@ -194,7 +199,8 @@ public:
     void AddPlasmaFlux (amrex::Real dt);
 
     void MapParticletoBoostedFrame (amrex::ParticleReal& x, amrex::ParticleReal& y, amrex::ParticleReal& z,
-                                    amrex::ParticleReal& ux, amrex::ParticleReal& uy, amrex::ParticleReal& uz);
+                                    amrex::ParticleReal& ux, amrex::ParticleReal& uy, amrex::ParticleReal& uz,
+                                    amrex::ParticleReal t_lab = 0._prt);
 
     void AddGaussianBeam (
         const amrex::Real x_m, const amrex::Real y_m, const amrex::Real z_m,
@@ -219,7 +225,8 @@ public:
         amrex::Gpu::HostVector<amrex::ParticleReal>& particle_ux,
         amrex::Gpu::HostVector<amrex::ParticleReal>& particle_uy,
         amrex::Gpu::HostVector<amrex::ParticleReal>& particle_uz,
-        amrex::Gpu::HostVector<amrex::ParticleReal>& particle_w);
+        amrex::Gpu::HostVector<amrex::ParticleReal>& particle_w,
+        amrex::ParticleReal t_lab= 0._prt);
 
     /**
      * \brief Default initialize runtime attributes in a tile. This routine does not initialize the
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index 0793094ff0c..56553575c49 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -393,7 +393,7 @@ void PhysicalParticleContainer::InitData ()
 }
 
 void PhysicalParticleContainer::MapParticletoBoostedFrame (
-    ParticleReal& x, ParticleReal& y, ParticleReal& z, ParticleReal& ux, ParticleReal& uy, ParticleReal& uz)
+    ParticleReal& x, ParticleReal& y, ParticleReal& z, ParticleReal& ux, ParticleReal& uy, ParticleReal& uz, ParticleReal t_lab)
 {
     // Map the particles from the lab frame to the boosted frame.
     // This boosts the particle to the lab frame and calculates
@@ -402,8 +402,6 @@ void PhysicalParticleContainer::MapParticletoBoostedFrame (
 
     // For now, start with the assumption that this will only happen
     // at the start of the simulation.
-    const ParticleReal t_lab = 0._prt;
-
     const ParticleReal uz_boost = WarpX::gamma_boost*WarpX::beta_boost*PhysConst::c;
 
     // tpr is the particle's time in the boosted frame
@@ -429,13 +427,14 @@ void PhysicalParticleContainer::MapParticletoBoostedFrame (
         uz = -uz;
     }
 
-    // Move the particles to where they will be at t = 0 in the boosted frame
+    //Move the particles to where they will be at t = t0, the current simulation time in the boosted frame
+    constexpr int lev = 0;
+    const amrex::Real t0 = WarpX::GetInstance().gett_new(lev);
     if (boost_adjust_transverse_positions) {
-        x = xpr - tpr*vxpr;
-        y = ypr - tpr*vypr;
+        x = xpr - (tpr-t0)*vxpr;
+        y = ypr - (tpr-t0)*vypr;
     }
-
-    z = zpr - tpr*vzpr;
+    z = zpr - (tpr-t0)*vzpr;
 
 }
 
@@ -582,6 +581,7 @@ PhysicalParticleContainer::AddPlasmaFromFile(ParticleReal q_tot,
 
         // assumption asserts: see PlasmaInjector
         openPMD::Iteration it = series->iterations.begin()->second;
+        double const t_lab = it.time<double>() * it.timeUnitSI();
         std::string const ps_name = it.particles.begin()->first;
         openPMD::ParticleSpecies ps = it.particles.begin()->second;
 
@@ -649,7 +649,7 @@ PhysicalParticleContainer::AddPlasmaFromFile(ParticleReal q_tot,
                 CheckAndAddParticle(x, y, z, ux, uy, uz, weight,
                                     particle_x,  particle_y,  particle_z,
                                     particle_ux, particle_uy, particle_uz,
-                                    particle_w);
+                                    particle_w, t_lab);
             }
         }
         auto const np = particle_z.size();
@@ -795,10 +795,11 @@ PhysicalParticleContainer::CheckAndAddParticle (
     Gpu::HostVector<ParticleReal>& particle_ux,
     Gpu::HostVector<ParticleReal>& particle_uy,
     Gpu::HostVector<ParticleReal>& particle_uz,
-    Gpu::HostVector<ParticleReal>& particle_w)
+    Gpu::HostVector<ParticleReal>& particle_w,
+    ParticleReal t_lab)
 {
     if (WarpX::gamma_boost > 1.) {
-        MapParticletoBoostedFrame(x, y, z, ux, uy, uz);
+        MapParticletoBoostedFrame(x, y, z, ux, uy, uz, t_lab);
     }
     particle_x.push_back(x);
     particle_y.push_back(y);
@@ -926,7 +927,7 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox)
 
     InjectorPosition* inj_pos = plasma_injector->getInjectorPosition();
     InjectorDensity*  inj_rho = plasma_injector->getInjectorDensity();
-    InjectorMomentum* inj_mom = plasma_injector->getInjectorMomentum();
+    InjectorMomentum* inj_mom = plasma_injector->getInjectorMomentumDevice();
     const Real gamma_boost = WarpX::gamma_boost;
     const Real beta_boost = WarpX::beta_boost;
     const Real t = WarpX::GetInstance().gett_new(lev);
@@ -1476,7 +1477,7 @@ PhysicalParticleContainer::AddPlasmaFlux (amrex::Real dt)
 
     InjectorPosition* inj_pos = plasma_injector->getInjectorPosition();
     InjectorFlux*  inj_flux = plasma_injector->getInjectorFlux();
-    InjectorMomentum* inj_mom = plasma_injector->getInjectorMomentum();
+    InjectorMomentum* inj_mom = plasma_injector->getInjectorMomentumDevice();
     constexpr int level_zero = 0;
     const amrex::Real t = WarpX::GetInstance().gett_new(level_zero);
 
@@ -2922,6 +2923,11 @@ PhysicalParticleContainer::getIonizationFunc (const WarpXParIter& pti,
                                 ion_atomic_number);
 }
 
+PlasmaInjector* PhysicalParticleContainer::GetPlasmaInjector ()
+{
+    return plasma_injector.get();
+}
+
 void PhysicalParticleContainer::resample (const int timestep)
 {
     // In heavily load imbalanced simulations, MPI processes with few particles will spend most of
diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H
index ffeead6f1d6..6e0d3f4f927 100644
--- a/Source/Particles/WarpXParticleContainer.H
+++ b/Source/Particles/WarpXParticleContainer.H
@@ -13,6 +13,7 @@
 #include "WarpXParticleContainer_fwd.H"
 
 #include "Evolve/WarpXDtType.H"
+#include "Initialization/PlasmaInjector.H"
 #include "Particles/ParticleBoundaries.H"
 #include "SpeciesPhysicalProperties.H"
 
@@ -121,6 +122,12 @@ public:
 
     virtual void InitIonizationModule () {}
 
+    /*
+     * \brief Virtual function that returns a pointer to the plasma injector,
+     *        for derived classes that define one (PhysicalParticleContainer).
+     */
+    virtual PlasmaInjector* GetPlasmaInjector () { return nullptr; }
+
     /**
      * Evolve is the central WarpXParticleContainer function that advances
      * particles for a time dt (typically one timestep). It is a pure virtual
@@ -241,10 +248,13 @@ public:
     virtual void ContinuousInjection(const amrex::RealBox& /*injection_box*/) {}
     // Update optional sub-class-specific injection location.
     virtual void UpdateContinuousInjectionPosition(amrex::Real /*dt*/) {}
+    bool doContinuousInjection() const {return do_continuous_injection;}
 
     // Inject a continuous flux of particles from a defined plane
     virtual void ContinuousFluxInjection(amrex::Real /*t*/, amrex::Real /*dt*/) {}
 
+    int getSpeciesId() const {return species_id;}
+
     ///
     /// This returns the total charge for all the particles in this ParticleContainer.
     /// This is needed when solving Poisson's equation with periodic boundary conditions.
@@ -306,6 +316,9 @@ public:
     int self_fields_max_iters = 200;
     int self_fields_verbosity = 2;
 
+    //! Current injection position
+    amrex::Real m_current_injection_position;
+
     // split along diagonals (0) or axes (1)
     int split_type = 0;
 
diff --git a/Source/Python/WarpXWrappers.cpp b/Source/Python/WarpXWrappers.cpp
index afc4caa5bee..cbee65b6159 100644
--- a/Source/Python/WarpXWrappers.cpp
+++ b/Source/Python/WarpXWrappers.cpp
@@ -150,12 +150,12 @@ namespace
 
     void amrex_init (int argc, char* argv[])
     {
-        warpx_amrex_init(argc, argv);
+        warpx::initialization::amrex_init(argc, argv);
     }
 
     void amrex_init_with_inited_mpi (int argc, char* argv[], MPI_Comm mpicomm)
     {
-        warpx_amrex_init(argc, argv, true, mpicomm);
+        warpx::initialization::amrex_init(argc, argv, true, mpicomm);
     }
 
     void amrex_finalize (int /*finalize_mpi*/)
diff --git a/Source/Utils/WarpXMovingWindow.cpp b/Source/Utils/WarpXMovingWindow.cpp
index 7e7985b98a0..e43740d87e2 100644
--- a/Source/Utils/WarpXMovingWindow.cpp
+++ b/Source/Utils/WarpXMovingWindow.cpp
@@ -56,25 +56,71 @@ void
 WarpX::UpdatePlasmaInjectionPosition (amrex::Real a_dt)
 {
     const int dir = moving_window_dir;
-    // Continuously inject plasma in new cells (by default only on level 0)
-    if (WarpX::warpx_do_continuous_injection and (WarpX::gamma_boost > 1)){
-        // In boosted-frame simulations, the plasma has moved since the last
-        // call to this function, and injection position needs to be updated
-        current_injection_position -= WarpX::beta_boost *
+
+    // Loop over species
+    const int n_species = mypc->nSpecies();
+    for (int i=0; i<n_species; i++)
+    {
+        WarpXParticleContainer& pc = mypc->GetParticleContainer(i);
+
+        // Continuously inject plasma in new cells (by default only on level 0)
+        if (pc.doContinuousInjection())
+        {
+            PlasmaInjector* plasma_injector = pc.GetPlasmaInjector();
+            if (plasma_injector == nullptr) continue;
+
+            // Get bulk momentum and velocity of plasma
+            // 1D: dir=0 is z
+            // 2D: dir=0 is x, dir=1 is z
+            // 3D: dir=0 is x, dir=1 is y, dir=2 is z
+            amrex::Vector<amrex::Real> current_injection_position = {0._rt, 0._rt, 0._rt};
+#if defined(WARPX_DIM_1D_Z)
+            current_injection_position[2] = pc.m_current_injection_position;
+#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
+            current_injection_position[dir*2] = pc.m_current_injection_position;
+#else // 3D
+            current_injection_position[dir] = pc.m_current_injection_position;
+#endif
+            amrex::XDim3 u_bulk = plasma_injector->getInjectorMomentumHost()->getBulkMomentum(current_injection_position[0],
+                                                                                              current_injection_position[1],
+                                                                                              current_injection_position[2]);
+#if defined(WARPX_DIM_1D_Z)
+            amrex::Vector<amrex::Real> u_bulk_vec = {u_bulk.z};
+#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
+            amrex::Vector<amrex::Real> u_bulk_vec = {u_bulk.x, u_bulk.z};
+#else // 3D
+            amrex::Vector<amrex::Real> u_bulk_vec = {u_bulk.x, u_bulk.y, u_bulk.z};
+#endif
+            amrex::Real v_bulk = PhysConst::c * u_bulk_vec[dir] / std::sqrt(1._rt + u_bulk_vec[dir]*u_bulk_vec[dir]);
+
+            // In boosted-frame simulations, the plasma has moved since the last
+            // call to this function, and injection position needs to be updated.
+            // Note that the bulk velocity v, obtained from getBulkMomentum, is
+            // transformed to the boosted frame velocity v' via the formula
+            // v' = (v-c*beta)/(1-v*beta/c)
+            if (WarpX::gamma_boost > 1._rt)
+            {
+                v_bulk = (v_bulk - PhysConst::c*WarpX::beta_boost)
+                         / (1._rt - v_bulk*WarpX::beta_boost/PhysConst::c);
 #if defined(WARPX_DIM_3D)
-            WarpX::boost_direction[dir] * PhysConst::c * a_dt;
+                v_bulk *= WarpX::boost_direction[dir];
 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
-            // In 2D, dir=0 corresponds to x and dir=1 corresponds to z
-            // This needs to be converted in order to index `boost_direction`
-            // which has 3 components, for both 2D and 3D simulations.
-            WarpX::boost_direction[2*dir] * PhysConst::c * a_dt;
+                // In 2D, dir=0 corresponds to x and dir=1 corresponds to z.
+                // This needs to be converted to access boost_direction,
+                // which has always 3 components.
+                v_bulk *= WarpX::boost_direction[2*dir];
 #elif defined(WARPX_DIM_1D_Z)
-            // In 1D, dir=0 corresponds to z
-            // This needs to be converted in order to index `boost_direction`
-            // which has 3 components, for 1D, 2D, and 3D simulations.
-            WarpX::boost_direction[2] * PhysConst::c * a_dt;
-            amrex::ignore_unused(dir);
+                // In 1D, dir=0 corresponds to z.
+                // This needs to be converted to access boost_direction,
+                // which has always 3 components.
+                v_bulk *= WarpX::boost_direction[2];
+                amrex::ignore_unused(dir);
 #endif
+            }
+
+            // Update current injection position
+            pc.m_current_injection_position += v_bulk * a_dt;
+        }
     }
 }
 
@@ -96,15 +142,13 @@ WarpX::MoveWindow (const int step, bool move_j)
     moving_window_x += (moving_window_v - WarpX::beta_boost * PhysConst::c)/(1 - moving_window_v * WarpX::beta_boost / PhysConst::c) * dt[0];
     const int dir = moving_window_dir;
 
-    // Update warpx.current_injection_position
+    // Update warpx.current_injection_position,
     // PhysicalParticleContainer uses this injection position
     UpdatePlasmaInjectionPosition( dt[0] );
-    if (WarpX::warpx_do_continuous_injection){
-        // Update injection position for WarpXParticleContainer in mypc.
-        // Nothing to do for PhysicalParticleContainers
-        // For LaserParticleContainer, need to update the antenna position.
-        mypc->UpdateContinuousInjectionPosition( dt[0] );
-    }
+    // Update injection position for WarpXParticleContainer in mypc,
+    // nothing to do for PhysicalParticleContainer,
+    // need to update the antenna position for LaserParticleContainer.
+    mypc->UpdateContinuousInjectionPosition( dt[0] );
 
     // compute the number of cells to shift on the base level
     amrex::Real new_lo[AMREX_SPACEDIM];
@@ -312,41 +356,54 @@ WarpX::MoveWindow (const int step, bool move_j)
         }
     }
 
-    // Continuously inject plasma in new cells (by default only on level 0)
-    if (WarpX::warpx_do_continuous_injection) {
-
-        const int lev = 0;
-
-        // particleBox encloses the cells where we generate particles
-        // (only injects particles in an integer number of cells,
-        // for correct particle spacing)
-        amrex::RealBox particleBox = geom[lev].ProbDomain();
-        amrex::Real new_injection_position;
-        if (moving_window_v >= 0){
-            // Forward-moving window
-            const amrex::Real dx = geom[lev].CellSize(dir);
-            new_injection_position = current_injection_position +
-                std::floor( (geom[lev].ProbHi(dir) - current_injection_position)/dx ) * dx;
-        } else {
-            // Backward-moving window
-            const amrex::Real dx = geom[lev].CellSize(dir);
-            new_injection_position = current_injection_position -
-                std::floor( (current_injection_position - geom[lev].ProbLo(dir))/dx) * dx;
-        }
-        // Modify the corresponding bounds of the particleBox
-        if (moving_window_v >= 0) {
-            particleBox.setLo( dir, current_injection_position );
-            particleBox.setHi( dir, new_injection_position );
-        } else {
-            particleBox.setLo( dir, new_injection_position );
-            particleBox.setHi( dir, current_injection_position );
-        }
+    // Loop over species
+    const int n_species = mypc->nSpecies();
+    for (int i=0; i<n_species; i++)
+    {
+        WarpXParticleContainer& pc = mypc->GetParticleContainer(i);
 
-        if (particleBox.ok() and (current_injection_position != new_injection_position)){
-            // Performs continuous injection of all WarpXParticleContainer
-            // in mypc.
-            mypc->ContinuousInjection(particleBox);
-            current_injection_position = new_injection_position;
+        // Continuously inject plasma in new cells (by default only on level 0)
+        if (pc.doContinuousInjection())
+        {
+            const int lev = 0;
+
+            // particleBox encloses the cells where we generate particles
+            // (only injects particles in an integer number of cells,
+            // for correct particle spacing)
+            amrex::RealBox particleBox = geom[lev].ProbDomain();
+            amrex::Real new_injection_position = pc.m_current_injection_position;
+            if (moving_window_v > 0._rt)
+            {
+                // Forward-moving window
+                const amrex::Real dx = geom[lev].CellSize(dir);
+                new_injection_position = pc.m_current_injection_position +
+                    std::floor( (geom[lev].ProbHi(dir) - pc.m_current_injection_position)/dx ) * dx;
+            }
+            else if (moving_window_v < 0._rt)
+            {
+                // Backward-moving window
+                const amrex::Real dx = geom[lev].CellSize(dir);
+                new_injection_position = pc.m_current_injection_position -
+                    std::floor( (pc.m_current_injection_position - geom[lev].ProbLo(dir))/dx) * dx;
+            }
+            // Modify the corresponding bounds of the particleBox
+            if (moving_window_v > 0._rt)
+            {
+                particleBox.setLo( dir, pc.m_current_injection_position );
+                particleBox.setHi( dir, new_injection_position );
+            }
+            else if (moving_window_v < 0._rt)
+            {
+                particleBox.setLo( dir, new_injection_position );
+                particleBox.setHi( dir, pc.m_current_injection_position );
+            }
+
+            if (particleBox.ok() and (pc.m_current_injection_position != new_injection_position)){
+                // Performs continuous injection of all WarpXParticleContainer
+                // in mypc.
+                pc.ContinuousInjection(particleBox);
+                pc.m_current_injection_position = new_injection_position;
+            }
         }
     }
 
diff --git a/Source/Utils/WarpXUtil.cpp b/Source/Utils/WarpXUtil.cpp
index 7e486eb98df..c65603f4d02 100644
--- a/Source/Utils/WarpXUtil.cpp
+++ b/Source/Utils/WarpXUtil.cpp
@@ -322,14 +322,15 @@ void CheckGriddingForRZSpectral ()
     Vector<int> max_grid_size_x(max_level+1);
 
     // Set the radial block size to be the power of 2 greater than or equal to
-    // the number of grid cells. The blocking_factor must be a power of 2
-    // and the max_grid_size should be a multiple of the blocking_factor.
+    // the number of grid cells. The blocking factor must be a power of 2
+    // and the max_grid_size must be a multiple of the blocking_factor unless
+    // it is less than the blocking factor.
     int k = 1;
     while (k < n_cell[0]) {
         k *= 2;
     }
     blocking_factor_x[0] = k;
-    max_grid_size_x[0] = k;
+    max_grid_size_x[0] = n_cell[0];
 
     for (int lev=1 ; lev <= max_level ; lev++) {
         // For this to be correct, this needs to read in any user specified refinement ratios.
diff --git a/Source/WarpX.H b/Source/WarpX.H
index fb5e403f432..dbe922812a3 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -868,7 +868,6 @@ public:
     amrex::Real getdt (int lev) const {return dt[lev];}
     int getdo_moving_window() const {return do_moving_window;}
     amrex::Real getmoving_window_x() const {return moving_window_x;}
-    amrex::Real getcurrent_injection_position () const {return current_injection_position;}
     bool getis_synchronized() const {return is_synchronized;}
 
     int maxStep () const {return max_step;}
@@ -1470,7 +1469,6 @@ private:
     amrex::Real v_particle_pml;
 
     amrex::Real moving_window_x = std::numeric_limits<amrex::Real>::max();
-    amrex::Real current_injection_position = 0;
 
     // Plasma injection parameters
     int warpx_do_continuous_injection = 0;
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 17601175bc5..fc37ccc6523 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -279,16 +279,25 @@ WarpX::WarpX ()
     t_old.resize(nlevs_max, std::numeric_limits<Real>::lowest());
     dt.resize(nlevs_max, std::numeric_limits<Real>::max());
 
-    // Particle Container
+    // Loop over species and set current injection position per species
     mypc = std::make_unique<MultiParticleContainer>(this);
-    warpx_do_continuous_injection = mypc->doContinuousInjection();
-    if (warpx_do_continuous_injection){
-        if (moving_window_v >= 0){
+    const int n_species = mypc->nSpecies();
+    for (int i=0; i<n_species; i++)
+    {
+        WarpXParticleContainer& pc = mypc->GetParticleContainer(i);
+
+        // Storing injection position for all species, regardless of whether
+        // they are continuously injected, since it makes looping over the
+        // elements of current_injection_position easier elsewhere in the code.
+        if (moving_window_v > 0._rt)
+        {
             // Inject particles continuously from the right end of the box
-            current_injection_position = geom[0].ProbHi(moving_window_dir);
-        } else {
+            pc.m_current_injection_position = geom[0].ProbHi(moving_window_dir);
+        }
+        else if (moving_window_v < 0._rt)
+        {
             // Inject particles continuously from the left end of the box
-            current_injection_position = geom[0].ProbLo(moving_window_dir);
+            pc.m_current_injection_position = geom[0].ProbLo(moving_window_dir);
         }
     }
 
diff --git a/Source/ablastr/coarsen/average.H b/Source/ablastr/coarsen/average.H
index 269403f7b2c..8f484a18709 100644
--- a/Source/ablastr/coarsen/average.H
+++ b/Source/ablastr/coarsen/average.H
@@ -7,16 +7,13 @@
 #ifndef ABLASTR_COARSEN_AVERAGE_H_
 #define ABLASTR_COARSEN_AVERAGE_H_
 
-
 #include <AMReX_Array.H>
 #include <AMReX_Array4.H>
 #include <AMReX_BLassert.H>
 #include <AMReX_Extension.H>
 #include <AMReX_GpuQualifiers.H>
-#include <AMReX_IntVect.H>
 #include <AMReX_Math.H>
 #include <AMReX_REAL.H>
-
 #include <AMReX_BaseFwd.H>
 
 #include <cstdlib>
diff --git a/Source/ablastr/coarsen/average.cpp b/Source/ablastr/coarsen/average.cpp
index 021df06b4cf..a5dde3d8aa4 100644
--- a/Source/ablastr/coarsen/average.cpp
+++ b/Source/ablastr/coarsen/average.cpp
@@ -9,15 +9,16 @@
 #include "ablastr/utils/TextMsg.H"
 
 #include <AMReX_BLProfiler.H>
-#include <AMReX_BLassert.H>
 #include <AMReX_BoxArray.H>
 #include <AMReX_Config.H>
 #include <AMReX_GpuControl.H>
 #include <AMReX_GpuLaunch.H>
+#include <AMReX_IndexType.H>
 #include <AMReX_IntVect.H>
 #include <AMReX_MFIter.H>
 #include <AMReX_MultiFab.H>
 
+#include <memory>
 
 namespace ablastr::coarsen::average
 {
diff --git a/Source/ablastr/coarsen/sample.H b/Source/ablastr/coarsen/sample.H
index 1390cbebb3c..0a338a1c464 100644
--- a/Source/ablastr/coarsen/sample.H
+++ b/Source/ablastr/coarsen/sample.H
@@ -10,7 +10,6 @@
 
 #include <AMReX_Array.H>
 #include <AMReX_Array4.H>
-#include <AMReX_BLassert.H>
 #include <AMReX_Extension.H>
 #include <AMReX_GpuQualifiers.H>
 #include <AMReX_IntVect.H>
diff --git a/Source/ablastr/coarsen/sample.cpp b/Source/ablastr/coarsen/sample.cpp
index 5d6973b21b5..65ada612905 100644
--- a/Source/ablastr/coarsen/sample.cpp
+++ b/Source/ablastr/coarsen/sample.cpp
@@ -9,15 +9,20 @@
 #include "ablastr/utils/TextMsg.H"
 
 #include <AMReX_BLProfiler.H>
-#include <AMReX_BLassert.H>
 #include <AMReX_BoxArray.H>
 #include <AMReX_Config.H>
+#include <AMReX_DistributionMapping.H>
+#include <AMReX_FArrayBox.H>
+#include <AMReX_FabArray.H>
 #include <AMReX_GpuControl.H>
 #include <AMReX_GpuLaunch.H>
+#include <AMReX_IndexType.H>
 #include <AMReX_IntVect.H>
 #include <AMReX_MFIter.H>
 #include <AMReX_MultiFab.H>
 
+#include <memory>
+
 
 namespace ablastr::coarsen::sample
 {
diff --git a/Source/ablastr/parallelization/MPIInitHelpers.cpp b/Source/ablastr/parallelization/MPIInitHelpers.cpp
index cdf45821356..65e7525c087 100644
--- a/Source/ablastr/parallelization/MPIInitHelpers.cpp
+++ b/Source/ablastr/parallelization/MPIInitHelpers.cpp
@@ -10,7 +10,6 @@
 
 #include <AMReX_Config.H>
 #include <AMReX_ParallelDescriptor.H>
-#include <AMReX_Print.H>
 
 #if defined(AMREX_USE_MPI)
 #   include <mpi.h>
diff --git a/Source/ablastr/utils/Communication.H b/Source/ablastr/utils/Communication.H
index 2c879460490..1653c0a1a0d 100644
--- a/Source/ablastr/utils/Communication.H
+++ b/Source/ablastr/utils/Communication.H
@@ -7,16 +7,13 @@
 #ifndef ABLASTR_UTILS_COMMUNICATION_H_
 #define ABLASTR_UTILS_COMMUNICATION_H_
 
-#include <AMReX_FabArray.H>
-#include <AMReX_Gpu.H>
+#include <AMReX_FabArrayBase.H>
 #include <AMReX_GpuDevice.H>
-#include <AMReX_iMultiFab.H>
-#include <AMReX_IntVect.H>
-#include <AMReX_MultiFab.H>
+#include <AMReX_GpuQualifiers.H>
 #include <AMReX_Periodicity.H>
-#include <AMReX_TypeTraits.H>
+#include <AMReX_Vector.H>
 
-#include "WarpX.H"
+#include <AMReX_BaseFwd.H>
 
 #include <optional>
 
diff --git a/Source/ablastr/utils/Communication.cpp b/Source/ablastr/utils/Communication.cpp
index ac1dd1331fe..89579ba1ee5 100644
--- a/Source/ablastr/utils/Communication.cpp
+++ b/Source/ablastr/utils/Communication.cpp
@@ -6,15 +6,21 @@
  */
 #include "Communication.H"
 
-#include <AMReX.H>
 #include <AMReX_BaseFab.H>
 #include <AMReX_BLProfiler.H>
 #include <AMReX_IntVect.H>
 #include <AMReX_FabArray.H>
+#include <AMReX_FabArrayUtility.H>
+#include <AMReX_FabFactory.H>
 #include <AMReX_MultiFab.H>
 #include <AMReX_iMultiFab.H>
+#include <AMReX_IndexType.H>
 #include <AMReX_ParmParse.H>
 
+#include <algorithm>
+#include <memory>
+#include <vector>
+
 
 namespace ablastr::utils::communication
 {
diff --git a/Source/ablastr/utils/Serialization.H b/Source/ablastr/utils/Serialization.H
index 7fb10c14fde..5db3e9968b4 100644
--- a/Source/ablastr/utils/Serialization.H
+++ b/Source/ablastr/utils/Serialization.H
@@ -11,6 +11,7 @@
 #include <algorithm>
 #include <array>
 #include <cstring>
+#include <iterator>
 #include <string>
 #include <type_traits>
 #include <vector>
diff --git a/Source/ablastr/utils/SignalHandling.cpp b/Source/ablastr/utils/SignalHandling.cpp
index 36ab50f4c3a..c34d760be04 100644
--- a/Source/ablastr/utils/SignalHandling.cpp
+++ b/Source/ablastr/utils/SignalHandling.cpp
@@ -8,11 +8,11 @@
 #include "SignalHandling.H"
 #include "TextMsg.H"
 
-#include <AMReX.H>
 #include <AMReX_ParallelDescriptor.H>
 #include <AMReX_IParser.H>
 
 #include <cctype>
+#include <stdexcept>
 
 // For sigaction() et al.
 #if defined(__linux__) || defined(__APPLE__)
diff --git a/Source/ablastr/utils/TextMsg.H b/Source/ablastr/utils/TextMsg.H
index 6fa7b28a982..89bbf500395 100644
--- a/Source/ablastr/utils/TextMsg.H
+++ b/Source/ablastr/utils/TextMsg.H
@@ -9,8 +9,6 @@
 #define ABLASTR_TEXT_MSG_H_
 
 #include <string>
-#include <vector>
-
 
 namespace ablastr::utils::TextMsg
 {
diff --git a/Source/ablastr/utils/TextMsg.cpp b/Source/ablastr/utils/TextMsg.cpp
index 324b0752554..fb8e4894792 100644
--- a/Source/ablastr/utils/TextMsg.cpp
+++ b/Source/ablastr/utils/TextMsg.cpp
@@ -14,7 +14,7 @@
 #include <algorithm>
 #include <iterator>
 #include <sstream>
-#include <string>
+#include <vector>
 
 
 namespace
diff --git a/Source/ablastr/utils/UsedInputsFile.cpp b/Source/ablastr/utils/UsedInputsFile.cpp
index 175c67619e7..dfdc4bfa192 100644
--- a/Source/ablastr/utils/UsedInputsFile.cpp
+++ b/Source/ablastr/utils/UsedInputsFile.cpp
@@ -11,7 +11,6 @@
 #include <AMReX_ParmParse.H>
 #include <AMReX_Print.H>
 
-#include <fstream>
 #include <ios>
 #include <string>
 
diff --git a/Source/ablastr/utils/msg_logger/MsgLogger.H b/Source/ablastr/utils/msg_logger/MsgLogger.H
index 451f90d3dd7..d6683a1ffc1 100644
--- a/Source/ablastr/utils/msg_logger/MsgLogger.H
+++ b/Source/ablastr/utils/msg_logger/MsgLogger.H
@@ -8,7 +8,7 @@
 #ifndef ABLASTR_MSG_LOGGER_H_
 #define ABLASTR_MSG_LOGGER_H_
 
-#include <AMReX.H>
+#include <AMReX_Config.H>
 
 #include <cstdint>
 #include <map>
diff --git a/Source/ablastr/utils/msg_logger/MsgLogger.cpp b/Source/ablastr/utils/msg_logger/MsgLogger.cpp
index f82c86eddcc..80787adc548 100644
--- a/Source/ablastr/utils/msg_logger/MsgLogger.cpp
+++ b/Source/ablastr/utils/msg_logger/MsgLogger.cpp
@@ -10,13 +10,11 @@
 #include "ablastr/utils/TextMsg.H"
 #include "ablastr/utils/Serialization.H"
 
-#ifdef AMREX_USE_MPI
-#   include <AMReX_ParallelDescriptor.H>
-#endif
-#include <AMReX_Print.H>
+#include <AMReX_ParallelDescriptor.H>
 
-#include <iostream>
-#include <sstream>
+#include <algorithm>
+#include <array>
+#include <memory>
 #include <numeric>
 
 namespace abl_msg_logger = ablastr::utils::msg_logger;
diff --git a/Source/ablastr/utils/text/StringUtils.H b/Source/ablastr/utils/text/StringUtils.H
index 35c280e17d8..a6531d6517e 100644
--- a/Source/ablastr/utils/text/StringUtils.H
+++ b/Source/ablastr/utils/text/StringUtils.H
@@ -11,6 +11,7 @@
 
 #include <AMReX_Utility.H>
 
+#include <cstddef>
 #include <string>
 #include <vector>
 
diff --git a/Source/ablastr/warn_manager/WarnManager.H b/Source/ablastr/warn_manager/WarnManager.H
index 65c1eacc7ef..cc284c98693 100644
--- a/Source/ablastr/warn_manager/WarnManager.H
+++ b/Source/ablastr/warn_manager/WarnManager.H
@@ -10,7 +10,7 @@
 
 #include "ablastr/utils/msg_logger/MsgLogger_fwd.H"
 
-#include <AMReX_ParmParse.H>
+#include <AMReX_BaseFwd.H>
 
 #include <memory>
 #include <optional>
diff --git a/Source/ablastr/warn_manager/WarnManager.cpp b/Source/ablastr/warn_manager/WarnManager.cpp
index 853274ab985..889f2df848d 100644
--- a/Source/ablastr/warn_manager/WarnManager.cpp
+++ b/Source/ablastr/warn_manager/WarnManager.cpp
@@ -11,10 +11,14 @@
 #include "ablastr/utils/text/StringUtils.H"
 #include "ablastr/utils/TextMsg.H"
 
+#include <AMReX.H>
+#include <AMReX_Config.H>
 #include <AMReX_ParallelDescriptor.H>
+#include <AMReX_ParmParse.H>
 
 #include <algorithm>
 #include <sstream>
+#include <vector>
 
 namespace abl_msg_logger = ablastr::utils::msg_logger;
 using namespace ablastr::warn_manager;
diff --git a/Source/main.cpp b/Source/main.cpp
index 54638fc14fe..b889bb24a76 100644
--- a/Source/main.cpp
+++ b/Source/main.cpp
@@ -23,7 +23,7 @@ int main(int argc, char* argv[])
 {
     ablastr::parallelization::mpi_init(argc, argv);
 
-    warpx_amrex_init(argc, argv);
+    warpx::initialization::amrex_init(argc, argv);
 
     utils::rocfft::setup();
 
diff --git a/Tools/machines/karolina-it4i/install_cpu_dependencies.sh b/Tools/machines/karolina-it4i/install_cpu_dependencies.sh
new file mode 100755
index 00000000000..6aceee7e892
--- /dev/null
+++ b/Tools/machines/karolina-it4i/install_cpu_dependencies.sh
@@ -0,0 +1,138 @@
+#!/bin/bash
+#
+# Copyright 2023 The WarpX Community
+#
+# This file is part of WarpX.
+#
+# Author: Axel Huebl
+# License: BSD-3-Clause-LBNL
+
+# Exit on first error encountered #############################################
+#
+set -eu -o pipefail
+
+
+# Check: ######################################################################
+#
+#   Was karolina_cpu_warpx.profile sourced and configured correctly?
+if [ -z ${proj-} ]; then echo "WARNING: The 'proj' variable is not yet set in your karolina_cpu_warpx.profile file! Please edit its line 2 to continue!"; exit 1; fi
+
+
+# Remove old dependencies #####################################################
+#
+SW_DIR="${HOME}/sw/karolina/cpu"
+rm -rf ${SW_DIR}
+mkdir -p ${SW_DIR}
+
+# remove common user mistakes in python, located in .local instead of a venv
+python3 -m pip uninstall -qq -y pywarpx
+python3 -m pip uninstall -qq -y warpx
+python3 -m pip uninstall -qqq -y mpi4py 2>/dev/null || true
+
+
+# General extra dependencies ##################################################
+#
+
+# c-blosc (I/O compression)
+if [ -d $HOME/src/c-blosc ]
+then
+  cd $HOME/src/c-blosc
+  git fetch
+  git checkout v1.21.1
+  cd -
+else
+  git clone -b v1.21.1 https://github.com/Blosc/c-blosc.git $HOME/src/c-blosc
+fi
+rm -rf $HOME/src/c-blosc-cpu-build
+cmake -S $HOME/src/c-blosc -B $HOME/src/c-blosc-cpu-build -DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDEACTIVATE_AVX2=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/c-blosc-1.21.1
+cmake --build $HOME/src/c-blosc-cpu-build --target install --parallel 16
+rm -rf $HOME/src/c-blosc-cpu-build
+
+# HDF5
+if [ -d $HOME/src/hdf5 ]
+then
+  cd $HOME/src/hdf5
+  git fetch
+  git checkout hdf5-1_14_1-2
+  cd -
+else
+  git clone -b hdf5-1_14_1-2 https://github.com/HDFGroup/hdf5.git src/hdf5
+fi
+rm -rf $HOME/src/hdf5-build
+cmake -S $HOME/src/hdf5 -B $HOME/src/hdf5-build -DBUILD_TESTING=OFF -DHDF5_ENABLE_PARALLEL=ON -DCMAKE_INSTALL_PREFIX=${SW_DIR}/hdf5-1.14.1.2
+cmake --build $HOME/src/hdf5-build --target install --parallel 16
+rm -rf $HOME/src/hdf5-build
+
+# ADIOS2
+if [ -d $HOME/src/adios2 ]
+then
+  cd $HOME/src/adios2
+  git fetch
+  git checkout v2.8.3
+  cd -
+else
+  git clone -b v2.8.3 https://github.com/ornladios/ADIOS2.git $HOME/src/adios2
+fi
+rm -rf $HOME/src/adios2-cpu-build
+cmake -S $HOME/src/adios2 -B $HOME/src/adios2-cpu-build -DADIOS2_USE_Blosc=ON -DADIOS2_USE_HDF5=OFF -DADIOS2_USE_Fortran=OFF -DADIOS2_USE_Python=OFF -DADIOS2_USE_ZeroMQ=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/adios2-2.8.3
+cmake --build $HOME/src/adios2-cpu-build --target install --parallel 16
+rm -rf $HOME/src/adios2-cpu-build
+
+# BLAS++ (for PSATD+RZ)
+if [ -d $HOME/src/blaspp ]
+then
+  cd $HOME/src/blaspp
+  git fetch
+  git checkout master
+  git pull
+  cd -
+else
+  git clone https://github.com/icl-utk-edu/blaspp.git $HOME/src/blaspp
+fi
+rm -rf $HOME/src/blaspp-cpu-build
+cmake -S $HOME/src/blaspp -B $HOME/src/blaspp-cpu-build -Duse_openmp=ON -Dcpu_backend=OFF -DCMAKE_CXX_STANDARD=17 -DCMAKE_INSTALL_PREFIX=${SW_DIR}/blaspp-master
+cmake --build $HOME/src/blaspp-cpu-build --target install --parallel 16
+rm -rf $HOME/src/blaspp-cpu-build
+
+# LAPACK++ (for PSATD+RZ)
+if [ -d $HOME/src/lapackpp ]
+then
+  cd $HOME/src/lapackpp
+  git fetch
+  git checkout master
+  git pull
+  cd -
+else
+  git clone https://github.com/icl-utk-edu/lapackpp.git $HOME/src/lapackpp
+fi
+rm -rf $HOME/src/lapackpp-cpu-build
+CXXFLAGS="-DLAPACK_FORTRAN_ADD_" cmake -S $HOME/src/lapackpp -B $HOME/src/lapackpp-cpu-build -DCMAKE_CXX_STANDARD=17 -Dbuild_tests=OFF -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON -DCMAKE_INSTALL_PREFIX=${SW_DIR}/lapackpp-master
+cmake --build $HOME/src/lapackpp-cpu-build --target install --parallel 16
+rm -rf $HOME/src/lapackpp-cpu-build
+
+
+# Python ######################################################################
+#
+python3 -m pip install --upgrade pip
+python3 -m pip install --upgrade virtualenv
+python3 -m pip cache purge
+rm -rf ${SW_DIR}/venvs/warpx-cpu
+python3 -m venv ${SW_DIR}/venvs/warpx-cpu
+source ${SW_DIR}/venvs/warpx-cpu/bin/activate
+python3 -m pip install --upgrade pip
+python3 -m pip install --upgrade wheel
+python3 -m pip install --upgrade cython
+python3 -m pip install --upgrade numpy
+python3 -m pip install --upgrade pandas
+python3 -m pip install --upgrade scipy
+python3 -m pip install --upgrade mpi4py --no-cache-dir --no-build-isolation --no-binary mpi4py
+python3 -m pip install --upgrade openpmd-api
+python3 -m pip install --upgrade matplotlib
+python3 -m pip install --upgrade yt
+# install or update WarpX dependencies such as picmistandard
+python3 -m pip install --upgrade -r $HOME/src/warpx/requirements.txt
+# optional: for libEnsemble
+python3 -m pip install -r $HOME/src/warpx/Tools/LibEnsemble/requirements.txt
+# optional: for optimas (based on libEnsemble & ax->botorch->gpytorch->pytorch)
+python3 -m pip install --upgrade torch --index-url https://download.pytorch.org/whl/cpu
+python3 -m pip install -r $HOME/src/warpx/Tools/optimas/requirements.txt
diff --git a/Tools/machines/karolina-it4i/install_gpu_dependencies.sh b/Tools/machines/karolina-it4i/install_gpu_dependencies.sh
new file mode 100755
index 00000000000..1db5c38c1a4
--- /dev/null
+++ b/Tools/machines/karolina-it4i/install_gpu_dependencies.sh
@@ -0,0 +1,138 @@
+#!/bin/bash
+#
+# Copyright 2023 The WarpX Community
+#
+# This file is part of WarpX.
+#
+# Author: Axel Huebl
+# License: BSD-3-Clause-LBNL
+
+# Exit on first error encountered #############################################
+#
+set -eu -o pipefail
+
+
+# Check: ######################################################################
+#
+#   Was karolina_gpu_warpx.profile sourced and configured correctly?
+if [ -z ${proj-} ]; then echo "WARNING: The 'proj' variable is not yet set in your karolina_gpu_warpx.profile file! Please edit its line 2 to continue!"; exit 1; fi
+
+
+# Remove old dependencies #####################################################
+#
+SW_DIR="${HOME}/sw/karolina/gpu"
+rm -rf ${SW_DIR}
+mkdir -p ${SW_DIR}
+
+# remove common user mistakes in python, located in .local instead of a venv
+python3 -m pip uninstall -qq -y pywarpx
+python3 -m pip uninstall -qq -y warpx
+python3 -m pip uninstall -qqq -y mpi4py 2>/dev/null || true
+
+
+# General extra dependencies ##################################################
+#
+
+# c-blosc (I/O compression)
+if [ -d $HOME/src/c-blosc ]
+then
+  cd $HOME/src/c-blosc
+  git fetch
+  git checkout v1.21.1
+  cd -
+else
+  git clone -b v1.21.1 https://github.com/Blosc/c-blosc.git $HOME/src/c-blosc
+fi
+rm -rf $HOME/src/c-blosc-gpu-build
+cmake -S $HOME/src/c-blosc -B $HOME/src/c-blosc-gpu-build -DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDEACTIVATE_AVX2=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/c-blosc-1.21.1
+cmake --build $HOME/src/c-blosc-gpu-build --target install --parallel 16
+rm -rf $HOME/src/c-blosc-gpu-build
+
+# HDF5
+if [ -d $HOME/src/hdf5 ]
+then
+  cd $HOME/src/hdf5
+  git fetch
+  git checkout hdf5-1_14_1-2
+  cd -
+else
+  git clone -b hdf5-1_14_1-2 https://github.com/HDFGroup/hdf5.git src/hdf5
+fi
+rm -rf $HOME/src/hdf5-build
+cmake -S $HOME/src/hdf5 -B $HOME/src/hdf5-build -DBUILD_TESTING=OFF -DHDF5_ENABLE_PARALLEL=ON -DCMAKE_INSTALL_PREFIX=${SW_DIR}/hdf5-1.14.1.2
+cmake --build $HOME/src/hdf5-build --target install --parallel 16
+rm -rf $HOME/src/hdf5-build
+
+# ADIOS2
+if [ -d $HOME/src/adios2 ]
+then
+  cd $HOME/src/adios2
+  git fetch
+  git checkout v2.8.3
+  cd -
+else
+  git clone -b v2.8.3 https://github.com/ornladios/ADIOS2.git $HOME/src/adios2
+fi
+rm -rf $HOME/src/adios2-gpu-build
+cmake -S $HOME/src/adios2 -B $HOME/src/adios2-gpu-build -DADIOS2_USE_Blosc=ON -DADIOS2_USE_HDF5=OFF -DADIOS2_USE_Fortran=OFF -DADIOS2_USE_Python=OFF -DADIOS2_USE_ZeroMQ=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/adios2-2.8.3
+cmake --build $HOME/src/adios2-gpu-build --target install --parallel 12
+rm -rf $HOME/src/adios2-gpu-build
+
+# BLAS++ (for PSATD+RZ)
+if [ -d $HOME/src/blaspp ]
+then
+  cd $HOME/src/blaspp
+  git fetch
+  git checkout master
+  git pull
+  cd -
+else
+  git clone https://github.com/icl-utk-edu/blaspp.git $HOME/src/blaspp
+fi
+rm -rf $HOME/src/blaspp-gpu-build
+cmake -S $HOME/src/blaspp -B $HOME/src/blaspp-gpu-build -Duse_openmp=OFF -Dgpu_backend=cuda -DCMAKE_CXX_STANDARD=17 -DCMAKE_INSTALL_PREFIX=${SW_DIR}/blaspp-master
+cmake --build $HOME/src/blaspp-gpu-build --target install --parallel 12
+rm -rf $HOME/src/blaspp-gpu-build
+
+# LAPACK++ (for PSATD+RZ)
+if [ -d $HOME/src/lapackpp ]
+then
+  cd $HOME/src/lapackpp
+  git fetch
+  git checkout master
+  git pull
+  cd -
+else
+  git clone https://github.com/icl-utk-edu/lapackpp.git $HOME/src/lapackpp
+fi
+rm -rf $HOME/src/lapackpp-gpu-build
+CXXFLAGS="-DLAPACK_FORTRAN_ADD_" cmake -S $HOME/src/lapackpp -B $HOME/src/lapackpp-gpu-build -DCMAKE_CXX_STANDARD=17 -Dbuild_tests=OFF -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON -DCMAKE_INSTALL_PREFIX=${SW_DIR}/lapackpp-master
+cmake --build $HOME/src/lapackpp-gpu-build --target install --parallel 12
+rm -rf $HOME/src/lapackpp-gpu-build
+
+
+# Python ######################################################################
+#
+python3 -m pip install --upgrade pip
+python3 -m pip install --upgrade virtualenv
+python3 -m pip cache purge
+rm -rf ${SW_DIR}/venvs/warpx-gpu
+python3 -m venv ${SW_DIR}/venvs/warpx-gpu
+source ${SW_DIR}/venvs/warpx-gpu/bin/activate
+python3 -m pip install --upgrade pip
+python3 -m pip install --upgrade wheel
+python3 -m pip install --upgrade cython
+python3 -m pip install --upgrade numpy
+python3 -m pip install --upgrade pandas
+python3 -m pip install --upgrade scipy
+python3 -m pip install --upgrade mpi4py --no-cache-dir --no-build-isolation --no-binary mpi4py
+python3 -m pip install --upgrade openpmd-api
+python3 -m pip install --upgrade matplotlib
+python3 -m pip install --upgrade yt
+# install or update WarpX dependencies such as picmistandard
+python3 -m pip install --upgrade -r $HOME/src/warpx/requirements.txt
+# optional: for libEnsemble
+python3 -m pip install -r $HOME/src/warpx/Tools/LibEnsemble/requirements.txt
+# optional: for optimas (based on libEnsemble & ax->botorch->gpytorch->pytorch)
+python3 -m pip install --upgrade torch  # CUDA 11.7 compatible wheel
+python3 -m pip install -r $HOME/src/warpx/Tools/optimas/requirements.txt
diff --git a/Tools/machines/karolina-it4i/karolina_cpu_warpx.profile.example b/Tools/machines/karolina-it4i/karolina_cpu_warpx.profile.example
new file mode 100644
index 00000000000..66b2f67a8be
--- /dev/null
+++ b/Tools/machines/karolina-it4i/karolina_cpu_warpx.profile.example
@@ -0,0 +1,58 @@
+# please set your project account
+export proj=""  # change me!
+
+# remembers the location of this script
+export MY_PROFILE=$(cd $(dirname $BASH_SOURCE) && pwd)"/"$(basename $BASH_SOURCE)
+if [ -z ${proj-} ]; then echo "WARNING: The 'proj' variable is not yet set in your $MY_PROFILE file! Please edit its line 2 to continue!"; return; fi
+
+# required dependencies
+module load GCCcore/11.3.0
+module load CMake/3.23.1-GCCcore-11.3.0
+module load OpenMPI/4.1.4-GCC-11.3.0
+
+# optional: for QED support with detailed tables
+module load Boost/1.79.0-GCC-11.3.0
+
+# optional: for openPMD and PSATD+RZ support
+module load OpenBLAS/0.3.20-GCC-11.3.0
+export CMAKE_PREFIX_PATH=${HOME}/sw/karolina/cpu/hdf5-1.14.1.2:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${HOME}/sw/karolina/cpu/c-blosc-1.21.1:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${HOME}/sw/karolina/cpu/adios2-2.8.3:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${HOME}/sw/karolina/cpu/blaspp-master:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${HOME}/sw/karolina/cpu/lapackpp-master:$CMAKE_PREFIX_PATH
+
+export LD_LIBRARY_PATH=${HOME}/sw/karolina/cpu/hdf5-1.14.1.2/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${HOME}/sw/karolina/cpu/c-blosc-1.21.1/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${HOME}/sw/karolina/cpu/adios2-2.8.3/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${HOME}/sw/karolina/cpu/blaspp-master/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${HOME}/sw/karolina/cpu/lapackpp-master/lib64:$LD_LIBRARY_PATH
+
+# optional: CCache (not found)
+#module load ccache
+
+# optional: for Python bindings or libEnsemble
+module load Python/3.10.4-GCCcore-11.3.0-bare
+
+if [ -d "${HOME}/sw/karolina/cpu/venvs/warpx-cpu" ]
+then
+  source ${HOME}/sw/karolina/cpu/venvs/warpx-cpu/bin/activate
+fi
+
+# an alias to request an interactive batch node for one hour (TODO)
+#   for parallel execution, start on the batch node: srun <command>
+#alias getNode="salloc -N 1 --ntasks-per-node=4 -t 1:00:00 -q interactive -C gpu --gpu-bind=single:1 -c 32 -G 4 -A $proj"
+# an alias to run a command on a batch node for up to 30min
+#   usage: runNode <command>
+#alias runNode="srun -N 1 --ntasks-per-node=4 -t 0:30:00 -q interactive -C gpu --gpu-bind=single:1 -c 32 -G 4 -A $proj"
+
+# optimize CUDA compilation for A100
+export AMREX_CUDA_ARCH=8.0
+
+# optimize CPU microarchitecture for ... (TODO)
+#export CXXFLAGS="-march=abc"
+#export CFLAGS="-march=def"
+
+# compiler environment hints
+export CC=$(which gcc)
+export CXX=$(which g++)
+export FC=$(which gfortran)
diff --git a/Tools/machines/karolina-it4i/karolina_gpu.qsub b/Tools/machines/karolina-it4i/karolina_gpu.qsub
new file mode 100644
index 00000000000..274184ed1ca
--- /dev/null
+++ b/Tools/machines/karolina-it4i/karolina_gpu.qsub
@@ -0,0 +1,29 @@
+#!/bin/bash -l
+
+# Copyright 2023 The WarpX Community
+#
+# This file is part of WarpX.
+#
+# Authors: Axel Huebl, Andrei Berceanu
+# License: BSD-3-Clause-LBNL
+
+#PBS -q qgpu
+#PBS -N WarpX
+# Use two full nodes, 8 GPUs per node, 16 GPUs total
+#PBS -l select=2:ncpus=128:ngpus=8:mpiprocs=8:ompthreads=16,walltime=00:10:00
+#PBS -A <proj>
+
+cd ${PBS_O_WORKDIR}
+
+# executable & inputs file or python interpreter & PICMI script here
+EXE=./warpx.rz
+INPUTS=inputs_rz
+
+# OpenMP threads per MPI rank
+export OMP_NUM_THREADS=16
+
+# run
+mpirun -np ${PBS_NP} bash -c "
+    export CUDA_VISIBLE_DEVICES=\${OMPI_COMM_WORLD_LOCAL_RANK};
+    ${EXE} ${INPUTS}" \
+  > output.txt
diff --git a/Tools/machines/karolina-it4i/karolina_gpu_warpx.profile.example b/Tools/machines/karolina-it4i/karolina_gpu_warpx.profile.example
new file mode 100644
index 00000000000..f657916dfcd
--- /dev/null
+++ b/Tools/machines/karolina-it4i/karolina_gpu_warpx.profile.example
@@ -0,0 +1,62 @@
+# please set your project account
+export proj=""  # change me!
+
+# remembers the location of this script
+export MY_PROFILE=$(cd $(dirname $BASH_SOURCE) && pwd)"/"$(basename $BASH_SOURCE)
+if [ -z ${proj-} ]; then echo "WARNING: The 'proj' variable is not yet set in your $MY_PROFILE file! Please edit its line 2 to continue!"; return; fi
+
+# required dependencies
+module purge
+ml GCCcore/11.3.0
+ml CUDA/11.7.0
+ml OpenMPI/4.1.4-GCC-11.3.0-CUDA-11.7.0
+ml CMake/3.23.1-GCCcore-11.3.0
+
+# optional: for QED support with detailed tables
+ml Boost/1.79.0-GCC-11.3.0
+
+# optional: for openPMD and PSATD+RZ support
+ml OpenBLAS/0.3.20-GCC-11.3.0
+export CMAKE_PREFIX_PATH=${HOME}/sw/karolina/gpu/hdf5-1.14.1.2:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${HOME}/sw/karolina/gpu/c-blosc-1.21.1:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${HOME}/sw/karolina/gpu/adios2-2.8.3:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${HOME}/sw/karolina/gpu/blaspp-master:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${HOME}/sw/karolina/gpu/lapackpp-master:$CMAKE_PREFIX_PATH
+
+export LD_LIBRARY_PATH=${HOME}/sw/karolina/gpu/hdf5-1.14.1.2/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${HOME}/sw/karolina/gpu/c-blosc-1.21.1/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${HOME}/sw/karolina/gpu/adios2-2.8.3/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${HOME}/sw/karolina/gpu/blaspp-master/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${HOME}/sw/karolina/gpu/lapackpp-master/lib64:$LD_LIBRARY_PATH
+
+# optional: CCache (not found)
+#ml ccache
+
+# optional: for Python bindings or libEnsemble
+ml Python/3.10.4-GCCcore-11.3.0-bare
+
+if [ -d "${HOME}/sw/karolina/gpu/venvs/warpx-gpu" ]
+then
+  source ${HOME}/sw/karolina/gpu/venvs/warpx-gpu/bin/activate
+fi
+
+# an alias to request an interactive batch node for one hour (TODO)
+#   for parallel execution, start on the batch node: srun <command>
+alias getNode="qsub -q qgpu -A $proj -l select=1:ncpus=32:ngpus=4 -l walltime=1:00:00 -I"
+# an alias to run a command on a batch node for up to 1hr
+#   usage: runNode <command>
+alias runNode='echo -e "#!/bin/bash\nmpirun -n 4 $1" | qsub -q qgpu -A $proj -l select=1:ncpus=32:ngpus=4 -l walltime=1:00:00'
+
+# optimize CUDA compilation for A100
+export AMREX_CUDA_ARCH=8.0
+
+# optimize CPU microarchitecture for ... (TODO)
+#export CXXFLAGS="-march=abc"
+#export CFLAGS="-march=def"
+
+# compiler environment hints
+export CC=$(which gcc)
+export CXX=$(which g++)
+export FC=$(which gfortran)
+export CUDACXX=$(which nvcc)
+export CUDAHOSTCXX=${CXX}