From da85e1a7502fe2b576b91826b647d50a65c9ba3e Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Mon, 25 Sep 2023 19:31:03 -0400
Subject: [PATCH 1/8] some changes to the SDC Newton solver to make it more
 robust we now reject a solve if the mass fractions are too far out of [0, 1]
 also do the total energy update conservatively

---
 Source/sdc/Castro_sdc_util.H  | 11 ++-------
 Source/sdc/sdc_newton_solve.H | 42 +++++++++++++++++++++++++++--------
 2 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/Source/sdc/Castro_sdc_util.H b/Source/sdc/Castro_sdc_util.H
index 164daff206..c765ad9a16 100644
--- a/Source/sdc/Castro_sdc_util.H
+++ b/Source/sdc/Castro_sdc_util.H
@@ -52,13 +52,6 @@ sdc_solve(const Real dt_m,
     int ierr;
     Real err_out;
 
-    // for debugging
-    GpuArray<Real, NUM_STATE> U_orig;
-
-    for (int n = 0; n < NUM_STATE; ++n) {
-        U_orig[n] = U_old[n];
-    }
-
     if (sdc_solver == NEWTON_SOLVE) {
         // We are going to assume we already have a good guess
         // for the solve in U_new and just pass the solve onto
@@ -66,7 +59,7 @@ sdc_solve(const Real dt_m,
         sdc_newton_subdivide(dt_m, U_old, U_new, C, sdc_iteration, err_out, ierr);
 
         // failing?
-        if (ierr != NEWTON_SUCCESS) {
+        if (ierr != newton::NEWTON_SUCCESS) {
             Abort("Newton subcycling failed in sdc_solve");
         }
     } else if (sdc_solver == VODE_SOLVE) {
@@ -85,7 +78,7 @@ sdc_solve(const Real dt_m,
         sdc_newton_subdivide(dt_m, U_old, U_new, C, sdc_iteration, err_out, ierr);
 
         // Failing?
-        if (ierr != NEWTON_SUCCESS) {
+        if (ierr != newton::NEWTON_SUCCESS) {
             Abort("Newton failure in sdc_solve");
         }
     }
diff --git a/Source/sdc/sdc_newton_solve.H b/Source/sdc/sdc_newton_solve.H
index 312837cc68..06040b30ff 100644
--- a/Source/sdc/sdc_newton_solve.H
+++ b/Source/sdc/sdc_newton_solve.H
@@ -5,10 +5,14 @@
 #include <linpack.H>
 
 // error codes
-constexpr int NEWTON_SUCCESS = 0;
-constexpr int SINGULAR_MATRIX = -1;
-constexpr int CONVERGENCE_FAILURE = -2;
+namespace newton {
+    constexpr int NEWTON_SUCCESS = 0;
+    constexpr int SINGULAR_MATRIX = -1;
+    constexpr int CONVERGENCE_FAILURE = -2;
+    constexpr int BAD_MASS_FRACTIONS = -3;
 
+    constexpr Real species_failure_tolerance = 1.e-2_rt;
+};
 
 #ifdef REACTIONS
 
@@ -147,7 +151,7 @@ sdc_newton_solve(const Real dt_m,
 
     const int MAX_ITER = 100;
 
-    ierr = NEWTON_SUCCESS;
+    ierr = newton::NEWTON_SUCCESS;
 
     // update the density and momenta for this zone -- they don't react
 
@@ -219,7 +223,7 @@ sdc_newton_solve(const Real dt_m,
         dgefa<NumSpec+1>(Jac, ipvt, info);
 #endif
         if (info != 0) {
-            ierr = SINGULAR_MATRIX;
+            ierr = newton::SINGULAR_MATRIX;
             return;
         }
 
@@ -273,7 +277,7 @@ sdc_newton_solve(const Real dt_m,
     err_out = err;
 
     if (!converged) {
-        ierr = CONVERGENCE_FAILURE;
+        ierr = newton::CONVERGENCE_FAILURE;
         return;
     }
 
@@ -290,7 +294,13 @@ sdc_newton_solve(const Real dt_m,
     }
 
     U_new[UEINT] = U_react(NumSpec+1);
-    U_new[UEDEN] = U_new[UEINT] + 0.5_rt * v2 / U_new[URHO];
+
+    // we want to do a conservative update for (rho E), so first figure out the 
+    // energy generation rate
+
+    Real rho_Sdot = (U_new[UEINT] - U_old[UEINT]) / dt_m - C[UEINT];
+
+    U_new[UEDEN] = U_old[UEDEN] + dt_m * (C[UEDEN] + rho_Sdot);
 
 }
 
@@ -320,13 +330,13 @@ sdc_newton_subdivide(const Real dt_m,
     // use the old time solution.
 
     int nsub = 1;
-    ierr = CONVERGENCE_FAILURE;
+    ierr = newton::CONVERGENCE_FAILURE;
 
     for (int n = 0; n < NUM_STATE; ++n) {
         U_begin[n] = U_old[n];
     }
 
-    while (nsub < MAX_NSUB && ierr != NEWTON_SUCCESS) {
+    while (nsub < MAX_NSUB && ierr != newton::NEWTON_SUCCESS) {
         if (nsub > 1) {
             for (int n = 0; n < NUM_STATE; ++n) {
                 U_new[n] = U_old[n];
@@ -347,6 +357,20 @@ sdc_newton_subdivide(const Real dt_m,
 
             sdc_newton_solve(dt_sub, U_begin, U_new, C, sdc_iteration, err_out, ierr);
 
+            if (ierr != newton::NEWTON_SUCCESS) {
+                // no point in continuing this subdivision if one stage failed
+                break;
+            }
+
+            // our solve may have resulted in mass fractions outside
+            // of [0, 1] -- reject if this is the case
+            for (int n = 0; n <= NumSpec; ++n) {
+                if (U_new[UFS+n] < -newton::species_failure_tolerance * U_new[URHO] ||
+                    U_new[UFS+n] > (1.0_rt + newton::species_failure_tolerance) * U_new[URHO]) {
+                    ierr = newton::BAD_MASS_FRACTIONS;
+                }
+            }
+
             for (int n = 0; n < NUM_STATE; ++n) {
                 U_begin[n] = U_new[n];
             }

From bbfffcdb0624cbb65765f18d5a5a6de7bb4091bf Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Mon, 25 Sep 2023 19:34:25 -0400
Subject: [PATCH 2/8] fix check

---
 Source/sdc/sdc_newton_solve.H | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Source/sdc/sdc_newton_solve.H b/Source/sdc/sdc_newton_solve.H
index 06040b30ff..c76637c521 100644
--- a/Source/sdc/sdc_newton_solve.H
+++ b/Source/sdc/sdc_newton_solve.H
@@ -364,7 +364,7 @@ sdc_newton_subdivide(const Real dt_m,
 
             // our solve may have resulted in mass fractions outside
             // of [0, 1] -- reject if this is the case
-            for (int n = 0; n <= NumSpec; ++n) {
+            for (int n = 0; n < NumSpec; ++n) {
                 if (U_new[UFS+n] < -newton::species_failure_tolerance * U_new[URHO] ||
                     U_new[UFS+n] > (1.0_rt + newton::species_failure_tolerance) * U_new[URHO]) {
                     ierr = newton::BAD_MASS_FRACTIONS;

From 58efc8ff9ded542706040d42c886ac44935bcd72 Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Thu, 28 Sep 2023 12:06:55 -0400
Subject: [PATCH 3/8] some more robustness

---
 Source/driver/Castro_advance_sdc.cpp |  1 -
 Source/sdc/Castro_sdc.cpp            | 27 +++++++++++++++------------
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/Source/driver/Castro_advance_sdc.cpp b/Source/driver/Castro_advance_sdc.cpp
index d0c1da2263..369071ebd6 100644
--- a/Source/driver/Castro_advance_sdc.cpp
+++ b/Source/driver/Castro_advance_sdc.cpp
@@ -321,7 +321,6 @@ Castro::do_advance_sdc (Real time,
 
       Array4<const Real> const Sburn_arr = Sburn.array(mfi);
 
-      // we don't worry about the difference between centers and averages
       ca_store_reaction_state(obx, Sburn_arr, U_center_arr, R_center_arr);
 
       // convert R_new from centers to averages in place
diff --git a/Source/sdc/Castro_sdc.cpp b/Source/sdc/Castro_sdc.cpp
index 8da8e9925a..6003532203 100644
--- a/Source/sdc/Castro_sdc.cpp
+++ b/Source/sdc/Castro_sdc.cpp
@@ -209,8 +209,6 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
             {
                 normalize_species_sdc(i, j, k, U_center_arr);
             });
-            // ca_normalize_species(AMREX_INT_ANYD(bx1.loVect()), AMREX_INT_ANYD(bx1.hiVect()),
-            //                      BL_TO_FORTRAN_ANYD(U_center));
 
             // convert the C source to cell-centers
             C_center.resize(bx1, NUM_STATE);
@@ -235,15 +233,20 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
                 sdc_update_centers_o4(i, j, k, U_center_arr, U_new_center_arr, C_center_arr, dt_m, sdc_iteration);
             });
 
+            // enforce that the species sum to one after the reaction solve
+            amrex::ParallelFor(bx1,
+            [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+            {
+                normalize_species_sdc(i, j, k, U_new_center_arr);
+            });
+
+
             // compute R_i and in 1 ghost cell and then convert to <R> in
             // place (only for the interior)
             R_new.resize(bx1, NUM_STATE);
             Elixir elix_R_new = R_new.elixir();
             Array4<Real> const& R_new_arr = R_new.array();
 
-            // ca_instantaneous_react(BL_TO_FORTRAN_BOX(bx1),
-            //                        BL_TO_FORTRAN_3D(U_new_center),
-            //                        BL_TO_FORTRAN_3D(R_new));
             amrex::ParallelFor(bx1,
             [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
@@ -354,14 +357,18 @@ Castro::construct_old_react_source(MultiFab& U_state,
 
             make_cell_center(obx, U_state.array(mfi), U_center_arr, domain_lo, domain_hi);
 
+            // sometimes the Laplacian can make the species go negative near discontinuities
+            amrex::ParallelFor(bx1,
+            [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+            {
+                normalize_species_sdc(i, j, k, U_center_arr);
+            });
+
             // burn, including one ghost cell
             R_center.resize(obx, NUM_STATE);
             Elixir elix_r_center = R_center.elixir();
             auto const R_center_arr = R_center.array();
 
-            // ca_instantaneous_react(BL_TO_FORTRAN_BOX(obx),
-            //                        BL_TO_FORTRAN_3D(U_center),
-            //                        BL_TO_FORTRAN_3D(R_center));
             amrex::ParallelFor(obx,
             [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
@@ -398,10 +405,6 @@ Castro::construct_old_react_source(MultiFab& U_state,
             auto const U_state_arr = U_state.array(mfi);
             auto const R_source_arr = R_source.array(mfi);
 
-            // construct the reactive source term
-            // ca_instantaneous_react(BL_TO_FORTRAN_BOX(bx),
-            //                        BL_TO_FORTRAN_3D(U_state[mfi]),
-            //                        BL_TO_FORTRAN_3D(R_source[mfi]));
             amrex::ParallelFor(bx,
             [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {

From 730621485f63959722ab6e93e3202d868200054a Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Thu, 28 Sep 2023 12:12:13 -0400
Subject: [PATCH 4/8] fix

---
 Source/sdc/Castro_sdc.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Source/sdc/Castro_sdc.cpp b/Source/sdc/Castro_sdc.cpp
index 6003532203..d268041db1 100644
--- a/Source/sdc/Castro_sdc.cpp
+++ b/Source/sdc/Castro_sdc.cpp
@@ -358,7 +358,7 @@ Castro::construct_old_react_source(MultiFab& U_state,
             make_cell_center(obx, U_state.array(mfi), U_center_arr, domain_lo, domain_hi);
 
             // sometimes the Laplacian can make the species go negative near discontinuities
-            amrex::ParallelFor(bx1,
+            amrex::ParallelFor(obx,
             [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
                 normalize_species_sdc(i, j, k, U_center_arr);

From 4561f853e047e1bccce9f855eabf75c712ee9f16 Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Fri, 29 Sep 2023 13:05:02 -0400
Subject: [PATCH 5/8] add another normalization

---
 Source/driver/Castro_advance_sdc.cpp | 1 +
 Source/sdc/Castro_sdc.cpp            | 7 ++++++-
 Source/sdc/sdc_util.cpp              | 8 ++++++--
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/Source/driver/Castro_advance_sdc.cpp b/Source/driver/Castro_advance_sdc.cpp
index 369071ebd6..a9746e7171 100644
--- a/Source/driver/Castro_advance_sdc.cpp
+++ b/Source/driver/Castro_advance_sdc.cpp
@@ -72,6 +72,7 @@ Castro::do_advance_sdc (Real time,
     // our staging area.  Note we need to pass new_time here to the
     // FillPatch so it only pulls from the new MF -- this will not
     // work for multilevel.
+
     MultiFab::Copy(S_new, *(k_new[m]), 0, 0, S_new.nComp(), 0);
     clean_state(S_new, cur_time, 0);
     expand_state(Sborder, cur_time, NUM_GROW);
diff --git a/Source/sdc/Castro_sdc.cpp b/Source/sdc/Castro_sdc.cpp
index d268041db1..18da975358 100644
--- a/Source/sdc/Castro_sdc.cpp
+++ b/Source/sdc/Castro_sdc.cpp
@@ -240,7 +240,6 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
                 normalize_species_sdc(i, j, k, U_new_center_arr);
             });
 
-
             // compute R_i and in 1 ghost cell and then convert to <R> in
             // place (only for the interior)
             R_new.resize(bx1, NUM_STATE);
@@ -264,6 +263,12 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
             ca_sdc_conservative_update(bx, dt_m, k_new_m_start_arr, k_new_m_end_arr,
                                        C_source_arr, R_new_arr);
 
+            amrex::ParallelFor(bx,
+            [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+            {
+                normalize_species_sdc(i, j, k, k_new_m_end_arr);
+            });
+
         }
 #else
         Array4<const Real> const& k_new_m_start_arr=
diff --git a/Source/sdc/sdc_util.cpp b/Source/sdc/sdc_util.cpp
index 53f67f6b36..44099d940e 100644
--- a/Source/sdc/sdc_util.cpp
+++ b/Source/sdc/sdc_util.cpp
@@ -365,9 +365,13 @@ Castro::ca_sdc_conservative_update(const Box& bx, Real const dt_m,
     // given <U>_old, <R>_new, and <C>, compute <U>_new
 
     // now consider the reacting system
-    AMREX_PARALLEL_FOR_4D(bx, U_new.nComp(), i, j, k, n,
+    amrex::ParallelFor(bx, 
+    [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
     {
-        U_new(i,j,k,n) = U_old(i,j,k,n) + dt_m * R_new(i,j,k,n) + dt_m * C(i,j,k,n);
+        for (int n = 0; n < NUM_STATE; ++n) {
+            U_new(i,j,k,n) = U_old(i,j,k,n) + dt_m * R_new(i,j,k,n) + dt_m * C(i,j,k,n);
+        }
+
     });
 } // end subroutine ca_sdc_conservative_update
 #endif

From 2b41b89eb632e95c52a9b893d06069ee154fbc06 Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Mon, 9 Oct 2023 10:17:43 -0400
Subject: [PATCH 6/8] some reverts

---
 Source/driver/Castro_advance_sdc.cpp | 2 --
 Source/sdc/Castro_sdc.cpp            | 6 ------
 Source/sdc/sdc_util.cpp              | 9 ++-------
 3 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/Source/driver/Castro_advance_sdc.cpp b/Source/driver/Castro_advance_sdc.cpp
index d350caff83..e6bec0804c 100644
--- a/Source/driver/Castro_advance_sdc.cpp
+++ b/Source/driver/Castro_advance_sdc.cpp
@@ -1,6 +1,5 @@
 
 #include <Castro.H>
-#include <Castro_F.H>
 
 #ifdef RADIATION
 #include <Radiation.H>
@@ -72,7 +71,6 @@ Castro::do_advance_sdc (Real time,
     // our staging area.  Note we need to pass new_time here to the
     // FillPatch so it only pulls from the new MF -- this will not
     // work for multilevel.
-
     MultiFab::Copy(S_new, *(k_new[m]), 0, 0, S_new.nComp(), 0);
     clean_state(S_new, cur_time, 0);
     expand_state(Sborder, cur_time, NUM_GROW);
diff --git a/Source/sdc/Castro_sdc.cpp b/Source/sdc/Castro_sdc.cpp
index 7297719366..0240b2939d 100644
--- a/Source/sdc/Castro_sdc.cpp
+++ b/Source/sdc/Castro_sdc.cpp
@@ -263,12 +263,6 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
             ca_sdc_conservative_update(bx, dt_m, k_new_m_start_arr, k_new_m_end_arr,
                                        C_source_arr, R_new_arr);
 
-            amrex::ParallelFor(bx,
-            [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-            {
-                normalize_species_sdc(i, j, k, k_new_m_end_arr);
-            });
-
         }
 #else
         Array4<const Real> const& k_new_m_start_arr=
diff --git a/Source/sdc/sdc_util.cpp b/Source/sdc/sdc_util.cpp
index 26fe2a5e78..6d471ab9a3 100644
--- a/Source/sdc/sdc_util.cpp
+++ b/Source/sdc/sdc_util.cpp
@@ -1,5 +1,4 @@
 #include <Castro.H>
-#include <Castro_F.H>
 
 using namespace amrex;
 
@@ -365,13 +364,9 @@ Castro::ca_sdc_conservative_update(const Box& bx, Real const dt_m,
     // given <U>_old, <R>_new, and <C>, compute <U>_new
 
     // now consider the reacting system
-    amrex::ParallelFor(bx, 
-    [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+    AMREX_PARALLEL_FOR_4D(bx, U_new.nComp(), i, j, k, n,
     {
-        for (int n = 0; n < NUM_STATE; ++n) {
-            U_new(i,j,k,n) = U_old(i,j,k,n) + dt_m * R_new(i,j,k,n) + dt_m * C(i,j,k,n);
-        }
-
+        U_new(i,j,k,n) = U_old(i,j,k,n) + dt_m * R_new(i,j,k,n) + dt_m * C(i,j,k,n);
     });
 } // end subroutine ca_sdc_conservative_update
 #endif

From fc43f16e806b16a97aea0271398ff3dda68faa82 Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Mon, 9 Oct 2023 10:33:58 -0400
Subject: [PATCH 7/8] update benchmark

---
 .../react_converge_128_true_sdc.out            | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/Exec/reacting_tests/reacting_convergence/ci-benchmarks/react_converge_128_true_sdc.out b/Exec/reacting_tests/reacting_convergence/ci-benchmarks/react_converge_128_true_sdc.out
index 2357cefc1f..35ea0d15ee 100644
--- a/Exec/reacting_tests/reacting_convergence/ci-benchmarks/react_converge_128_true_sdc.out
+++ b/Exec/reacting_tests/reacting_convergence/ci-benchmarks/react_converge_128_true_sdc.out
@@ -21,16 +21,16 @@
  rho_e_sdc_source            -6.1984065959e+22       6.3082607681e+21
  Temp_sdc_source                             0                      0
  rho_He4_sdc_source              -543231.29383           78510.973919
- rho_C12_sdc_source              -1694.4890575           10.073635142
+ rho_C12_sdc_source              -1694.4890575           10.073635144
  rho_O16_sdc_source           -0.0098093776861       7.1506350196e-06
  rho_Fe56_sdc_source         -5.4492579275e-05       7.8513247889e-06
  pressure                     1.4155320805e+22       4.2608130858e+22
- kineng                        5.228354476e-13       1.6647276226e+18
+ kineng                       3.6057617076e-14       1.6647276226e+18
  soundspeed                       212069503.63           257404342.21
  Gamma_1                          1.5601126452           1.5885713572
- MachNumber                   6.8192135042e-18        0.0086982771596
+ MachNumber                   1.7908132003e-18        0.0086982771596
  entropy                           348439780.9           349209883.57
- magvort                      6.3108872418e-30       0.00018541051835
+ magvort                                     0       0.00018541051835
  divu                            -0.1163387912          0.55816306007
  eint_E                       4.5262357143e+16       6.9937847678e+16
  eint_e                       4.5262357143e+16       6.9937843728e+16
@@ -49,11 +49,11 @@
  z_velocity                                  0                      0
  t_sound_t_enuc               0.00023710316663           0.0195732693
  enuc                         2.9131490847e+15       4.5102586513e+17
- magvel                        1.446147223e-09           2067446.1363
- radvel                      -0.00067837194793           2067446.1363
+ magvel                       3.7977686648e-10           2067446.1363
+ radvel                      -0.00067839201193           2067446.1363
  circvel                                     0           11.820144682
- magmom                       0.00072307361144       1.6422547233e+12
- angular_momentum_x                         -0                     -0
+ magmom                       0.00018988843323       1.6422547233e+12
+ angular_momentum_x                          0                      0
  angular_momentum_y                          0                      0
- angular_momentum_z          -1.2410862734e+14       1.2410862747e+14
+ angular_momentum_z           -1.241086281e+14       1.2410862748e+14
 

From d860c8f9ced85f840df5e986e031c52f94260d72 Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Tue, 10 Oct 2023 11:18:38 -0400
Subject: [PATCH 8/8] add renormalization of the species after the update in
 some sense, this mirrors what we would do with Strang or simplified-SDC,
 since we always renormalize after leaving the burner.

---
 Source/sdc/Castro_sdc.cpp     | 26 ++++++++++++++++----------
 Source/sdc/sdc_newton_solve.H |  8 ++++++--
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/Source/sdc/Castro_sdc.cpp b/Source/sdc/Castro_sdc.cpp
index d82d2764e9..06860f4fd1 100644
--- a/Source/sdc/Castro_sdc.cpp
+++ b/Source/sdc/Castro_sdc.cpp
@@ -133,6 +133,12 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
         const Box& bx = mfi.tilebox();
         const Box& bx1 = mfi.growntilebox(1);
 
+        // this is the starting data
+        Array4<const Real> const& k_new_m_start_arr = (k_new[m_start])->array(mfi);
+
+        // this is where the update will be stored
+        Array4<Real> const& k_new_m_end_arr = (k_new[m_end])->array(mfi);
+
 #ifdef REACTIONS
         // advection + reactions
         if (sdc_order == 2)
@@ -171,8 +177,6 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
 
             }
 
-            auto k_m = (*k_new[m_start]).array(mfi);
-            auto k_n = (*k_new[m_end]).array(mfi);
             auto A_m = (*A_new[m_start]).array(mfi);
             auto A_n = (*A_new[m_end]).array(mfi);
             auto C_arr = C2.array();
@@ -180,7 +184,9 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
             amrex::ParallelFor(bx,
             [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
-                sdc_update_o2(i, j, k, k_m, k_n, A_m, A_n, C_arr, dt_m, sdc_iteration, m_start);
+                sdc_update_o2(i, j, k,
+                              k_new_m_start_arr, k_new_m_end_arr,
+                              A_m, A_n, C_arr, dt_m, sdc_iteration, m_start);
             });
         }
         else
@@ -189,10 +195,7 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
             // fourth order SDC reaction update -- we need to respect the
             // difference between cell-centers and averages
 
-            Array4<const Real> const& k_new_m_start_arr=
-                (k_new[m_start])->array(mfi);
-            Array4<Real> const& k_new_m_end_arr=(k_new[m_end])->array(mfi);
-            Array4<const Real> const& C_source_arr=C_source.array(mfi);
+            Array4<const Real> const& C_source_arr = C_source.array(mfi);
 
             // convert the starting U to cell-centered on a fab-by-fab basis
             // -- including one ghost cell
@@ -264,9 +267,6 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
 
         }
 #else
-        Array4<const Real> const& k_new_m_start_arr=
-            (k_new[m_start])->array(mfi);
-        Array4<Real> const& k_new_m_end_arr=(k_new[m_end])->array(mfi);
         Array4<const Real> const& A_new_arr=(A_new[m_start])->array(mfi);
         Array4<const Real> const& A_old_0_arr=(A_old[0])->array(mfi);
         Array4<const Real> const& A_old_1_arr=(A_old[1])->array(mfi);
@@ -317,6 +317,12 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
         }
 #endif
 
+        amrex::ParallelFor(bx,
+        [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+        {
+            normalize_species_sdc(i, j, k, k_new_m_end_arr);
+        });
+
     }
 }
 
diff --git a/Source/sdc/sdc_newton_solve.H b/Source/sdc/sdc_newton_solve.H
index 9aa1fb00ca..41122b6644 100644
--- a/Source/sdc/sdc_newton_solve.H
+++ b/Source/sdc/sdc_newton_solve.H
@@ -109,7 +109,7 @@ sdc_newton_solve(const Real dt_m,
 
     Array1D<Real, 1, NumSpec+1> f;
 
-    const int MAX_ITER = 100;
+    const int MAX_ITER = 200;
 
     ierr = newton::NEWTON_SUCCESS;
 
@@ -251,7 +251,7 @@ sdc_newton_subdivide(const Real dt_m,
     // converges or reaches our limit on the number of
     // subintervals.
 
-    const int MAX_NSUB = 64;
+    const int MAX_NSUB = 128;
     GpuArray<Real, NUM_STATE> U_begin;
 
     // subdivide the timestep and do multiple Newtons. We come
@@ -308,6 +308,10 @@ sdc_newton_subdivide(const Real dt_m,
         }
         nsub *= 2;
     }
+
+    if (ierr != newton::NEWTON_SUCCESS) {
+        std::cout << "Falled. " << ierr << std::endl;
+    }
 }
 #endif