Cleanup Radau again (#28)

JonasBreuling · Aug 26, 2024 · 43d9a1f · 43d9a1f
1 parent 0716efc
commit 43d9a1f
Show file tree

Hide file tree

Showing 10 changed files with 109 additions and 109 deletions.
diff --git a/README.md b/README.md
@@ -9,7 +9,7 @@
 <a href="https://pypi.org/project/scipy_dae/"><img alt="PyPI" src="https://img.shields.io/pypi/v/scipy_dae"></a>
 </p>
 
-Python implementation of solvers for differential algebraic equations (DAE's) and implicit differential equations (IDE's) that should be added to scipy one day. See the [GitHub repository](https://github.com/JonasBreuling/scipy_dae).
+Python implementation of solvers for differential algebraic equations (DAE's) and implicit differential equations (IDE's) that should be added to scipy one day.
 
 Currently, two different methods are implemented.
 

diff --git a/data/img/Arevalo_work_precision.png b/data/img/Arevalo_work_precision.png
diff --git a/data/img/Brenan_work_precision.png b/data/img/Brenan_work_precision.png
diff --git a/data/img/Kvaerno_work_precision.png b/data/img/Kvaerno_work_precision.png
diff --git a/data/img/Robertson_work_precision.png b/data/img/Robertson_work_precision.png
diff --git a/data/img/Weissinger_work_precision.png b/data/img/Weissinger_work_precision.png
diff --git a/data/img/knife_edge_work_precision.png b/data/img/knife_edge_work_precision.png
diff --git a/scipy_dae/integrate/_dae/radau.py b/scipy_dae/integrate/_dae/radau.py
@@ -8,7 +8,7 @@
 
 DAMPING_RATIO_ERROR_ESTIMATE = 0.05 # Hairer (8.19) is obtained by the choice 1.0. 
                                     # de Swart proposes 0.067 for s=3.
-KAPPA = 1 # Factor of the smooth limiter
+KAPPA = 1 # factor of the smooth limiter
 
 # UNKNOWN_VELOCITIES = False
 UNKNOWN_VELOCITIES = True
@@ -48,8 +48,8 @@ def radau_constants(s):
     # inverse coefficient matrix
     A_inv = np.linalg.inv(A)
 
-    # eigenvalues and corresponding eigenvectors of inverse coefficient matrix
-    lambdas, V = eig(A_inv)
+    # eigenvalues and corresponding eigenvectors of coefficient matrix
+    lambdas, V = eig(A)
 
     # sort eigenvalues and permut eigenvectors accordingly
     idx = np.argsort(lambdas)[::-1]
@@ -67,10 +67,10 @@ def radau_constants(s):
     TI = np.linalg.inv(T)
 
     # check if everything worked
-    assert np.allclose(V @ np.diag(lambdas) @ np.linalg.inv(V), A_inv)
-    assert np.allclose(np.linalg.inv(V) @ A_inv @ V, np.diag(lambdas))
-    assert np.allclose(T @ Gammas @ TI, A_inv)
-    assert np.allclose(TI @ A_inv @ T, Gammas)
+    assert np.allclose(V @ np.diag(lambdas) @ np.linalg.inv(V), A)
+    assert np.allclose(np.linalg.inv(V) @ A @ V, np.diag(lambdas))
+    assert np.allclose(T @ Gammas @ TI, A)
+    assert np.allclose(TI @ A @ T, Gammas)
 
     # extract real and complex eigenvalues
     real_idx = lambdas.imag == 0
@@ -84,40 +84,38 @@ def radau_constants(s):
     c_hat = np.array([0, *c])
     vander = np.vander(c_hat, increasing=True).T
 
-    rhs = 1 / np.arange(1, s + 1)
-    b_hats2 = 1 / gammas[0] # real eigenvalue of A, i.e., 1 / gamma[0]
-    b_hat1 = DAMPING_RATIO_ERROR_ESTIMATE * b_hats2
-    rhs[0] -= b_hat1
-    rhs -= b_hats2
-
-    b_hat = np.linalg.solve(vander[:-1, 1:], rhs)
-    v = b - b_hat
-
-    rhs2 = 1 / np.arange(1, s + 1)
-    rhs2[0] -= 1 / gammas[0] # Hairer (8.16)
-
-    b_hat2 = np.linalg.solve(vander[:-1, 1:], rhs2)
-    v2 = b_hat2 - b
-
-    # Compute the inverse of the Vandermonde matrix to get the interpolation matrix P.
+    # explicit embedded method
+    rhs_explicit = 1 / np.arange(1, s + 1)
+    rhs_explicit[0] -= gammas[0]
+    b_hat_explicit = np.linalg.solve(vander[:-1, 1:], rhs_explicit)
+    v_explicit = b_hat_explicit - b
+
+    # implicit embedded method
+    rhs_implicit = 1 / np.arange(1, s + 1)
+    b_hats_implicit = gammas[0] # real eigenvalue of A
+    b_hat1_implicit = DAMPING_RATIO_ERROR_ESTIMATE * b_hats_implicit
+    rhs_implicit[0] -= b_hat1_implicit
+    rhs_implicit -= b_hats_implicit
+    b_hat_implicit = np.linalg.solve(vander[:-1, 1:], rhs_implicit)
+    v_implicit = b - b_hat_implicit
+
+    # compute the inverse of the Vandermonde matrix to get the interpolation matrix P
     P = np.linalg.inv(vander)[1:, 1:]
 
-    # Compute coefficients using Vandermonde matrix.
+    # compute coefficients using Vandermonde matrix
     vander2 = np.vander(c, increasing=True)
     P2 = np.linalg.inv(vander2)
 
-    # These linear combinations are used in the algorithm.
+    # these linear combinations are used in the algorithm
     MU_REAL = gammas[0]
     MU_COMPLEX = alphas -1j * betas
-    TI_REAL = TI[0]
-    TI_COMPLEX = TI[1::2] + 1j * TI[2::2]
 
-    return A, A_inv, c, T, TI, P, P2, b_hat1, v, v2, MU_REAL, MU_COMPLEX, TI_REAL, TI_COMPLEX, b_hat, b_hat2, p
+    return A, A_inv, c, T, TI, P, P2, b_hat1_implicit, v_implicit, v_explicit, MU_REAL, MU_COMPLEX, b_hat_implicit, b_hat_explicit, p
 
 
-def solve_collocation_system(fun, t, y, h, Z0, scale, tol,
-                             LU_real, LU_complex, solve_lu,
-                             C, T, TI, A, A_inv, newton_max_iter):
+def solve_collocation_system_Z(fun, t, y, h, Z0, scale, tol,
+                               LU_real, LU_complex, solve_lu,
+                               C, T, TI, A, A_inv, newton_max_iter):
     """Solve the collocation system.
 
     Parameters
@@ -220,21 +218,20 @@ def solve_collocation_system(fun, t, y, h, Z0, scale, tol,
     return converged, k + 1, Y, Yp, Z, rate
 
 
-def solve_collocation_system2(fun, t, y, h, Yp0, scale, tol,
-                              LU_real, LU_complex, solve_lu,
-                              C, T, TI, A, newton_max_iter):
+def solve_collocation_system_Yp(fun, t, y, h, Yp0, scale, tol,
+                                LU_real, LU_complex, solve_lu,
+                                C, T, TI, A, newton_max_iter):
     s, n = Yp0.shape
     ncs = s // 2
     tau = t + h * C
 
     Yp = Yp0
     Y = y + h * A.dot(Yp)
-    V_dot = TI.dot(Yp)
 
     F = np.empty((s, n))
 
     dY_norm_old = None
-    dV_dot = np.empty_like(V_dot)
+    dW_dot = np.empty_like(F)
     converged = False
     rate = None
     for k in range(newton_max_iter):
@@ -244,23 +241,23 @@ def solve_collocation_system2(fun, t, y, h, Yp0, scale, tol,
         if not np.all(np.isfinite(F)):
             break
 
-        U = TI @ F
-        f_real = -U[0]
-        f_complex = np.empty((ncs, n), dtype=complex)
+        G = TI @ F
+        G_real = -G[0]
+        G_complex = np.empty((ncs, n), dtype=complex)
         for i in range(ncs):
-            f_complex[i] = -(U[2 * i + 1] + 1j * U[2 * i + 2])
+            G_complex[i] = -(G[2 * i + 1] + 1j * G[2 * i + 2])
 
-        dV_dot_real = solve_lu(LU_real, f_real)
-        dV_dot_complex = np.empty_like(f_complex)
+        dW_dot_real = solve_lu(LU_real, G_real)
+        dW_dot_complex = np.empty_like(G_complex)
         for i in range(ncs):
-            dV_dot_complex[i] = solve_lu(LU_complex[i], f_complex[i])
+            dW_dot_complex[i] = solve_lu(LU_complex[i], G_complex[i])
 
-        dV_dot[0] = dV_dot_real
+        dW_dot[0] = dW_dot_real
         for i in range(ncs):
-            dV_dot[2 * i + 1] = dV_dot_complex[i].real
-            dV_dot[2 * i + 2] = dV_dot_complex[i].imag
+            dW_dot[2 * i + 1] = dW_dot_complex[i].real
+            dW_dot[2 * i + 2] = dW_dot_complex[i].imag
 
-        dYp = T.dot(dV_dot)
+        dYp = T.dot(dW_dot)
         dY = h * A.dot(dYp)
 
         Yp += dYp
@@ -493,9 +490,8 @@ def __init__(self, fun, t0, y0, yp0, t_bound, stages=3,
         self.stages = stages
         (
             self.A, self.A_inv, self.C, self.T, self.TI, self.P, self.P2, 
-            self.b0, self.v, self.v2, self.MU_REAL, self.MU_COMPLEX, 
-            self.TI_REAL, self.TI_COMPLEX, self.b_hat, self.b_hat2, 
-            self.order,
+            self.b_hat1_implicit, self.v_implicit, self.v_explicit, self.MU_REAL, self.MU_COMPLEX, 
+            self.b_hat_implicit, self.b_hat_explicit, self.order,
         ) = radau_constants(stages)
 
         self.h_abs_old = None
@@ -561,9 +557,9 @@ def _step_impl(self):
         TI = self.TI
         A = self.A
         A_inv = self.A_inv
-        v = self.v
-        v2 = self.v2
-        b0 = self.b0
+        v_implicit = self.v_implicit
+        v_explicit = self.v_explicit
+        b_hat1_implicit = self.b_hat1_implicit
         P2 = self.P2
 
         max_step = self.max_step
@@ -612,18 +608,18 @@ def _step_impl(self):
 
             if self.sol is None:
                 if UNKNOWN_VELOCITIES:
-                    Y0 = np.tile(y, s).reshape(s, -1)
                     Yp0 = np.tile(yp, s).reshape(s, -1)
                 else:
                     Z0 = np.zeros((s, y.shape[0]))
             else:
                 if UNKNOWN_VELOCITIES:
-                    # note: this only works when we exrapolate the derivative 
+                    # note: this only works when we extrapolate the derivative 
                     # of the collocation polynomial but do not use the sth order 
                     # collocation polynomial for the derivatives as well.
                     Yp0 = self.sol(t + h * C)[1].T
-                    # # Z0 = self.sol(t + h * C)[0].T - y
-                    # # Yp0 = (1 / h) * A_inv @ Z0
+                    # # Zp0 = self.sol(t + h * C)[1].T - yp
+                    # Z0 = self.sol(t + h * C)[0].T - y
+                    # Yp0 = (1 / h) * A_inv @ Z0
                 else:
                     Z0 = self.sol(t + h * C)[0].T - y
             scale = atol + np.abs(y) * rtol
@@ -632,19 +628,19 @@ def _step_impl(self):
             while not converged:
                 if LU_real is None or LU_complex is None:
                     if UNKNOWN_VELOCITIES:
-                        LU_real = self.lu(Jyp + h / MU_REAL * Jy)
-                        LU_complex = [self.lu(Jyp + h / MU * Jy) for MU in MU_COMPLEX]
+                        LU_real = self.lu(Jyp + h * MU_REAL * Jy)
+                        LU_complex = [self.lu(Jyp + h * MU * Jy) for MU in MU_COMPLEX]
                     else:
-                        LU_real = self.lu(MU_REAL / h * Jyp + Jy)
-                        LU_complex = [self.lu(MU / h * Jyp + Jy) for MU in MU_COMPLEX]
+                        LU_real = self.lu(1 / (MU_REAL * h) * Jyp + Jy)
+                        LU_complex = [self.lu(1 / (MU * h) * Jyp + Jy) for MU in MU_COMPLEX]
 
                 if UNKNOWN_VELOCITIES:
-                    converged, n_iter, Y, Yp, Z, rate = solve_collocation_system2(
+                    converged, n_iter, Y, Yp, Z, rate = solve_collocation_system_Yp(
                         self.fun, t, y, h, Yp0, scale, newton_tol,
                         LU_real, LU_complex, self.solve_lu,
                         C, T, TI, A, newton_max_iter)
                 else:
-                    converged, n_iter, Y, Yp, Z, rate = solve_collocation_system(
+                    converged, n_iter, Y, Yp, Z, rate = solve_collocation_system_Z(
                         self.fun, t, y, h, Z0, scale, newton_tol,
                         LU_real, LU_complex, self.solve_lu,
                         C, T, TI, A, A_inv, newton_max_iter)
@@ -675,36 +671,29 @@ def _step_impl(self):
             # embedded error measures
             if self.newton_iter_embedded == 0:
                 # explicit embedded method with R(z) = +oo for z -> oo
-                if UNKNOWN_VELOCITIES:
-                    raise NotImplementedError
-                else:
-                    error_embedded = h * (yp / MU_REAL + v2 @ Yp)
+                error_embedded = h * (MU_REAL * yp + v_explicit @ Yp)
             elif self.newton_iter_embedded == 1:
-
                 # compute implicit embedded method with a single Newton iteration;
                 # R(z) = b_hat1 / b_hats2 = DAMPING_RATIO_ERROR_ESTIMATE for z -> oo
-                yp_hat_new = MU_REAL * (v @ Yp - b0 * yp)
+                yp_hat_new = (v_implicit @ Yp - b_hat1_implicit * yp) / MU_REAL
                 F = self.fun(t_new, y_new, yp_hat_new)
                 if UNKNOWN_VELOCITIES:
-                    error_embedded = -h / MU_REAL * self.solve_lu(LU_real, F)
+                    error_embedded = -h * MU_REAL * self.solve_lu(LU_real, F)
                 else:
                     error_embedded = -self.solve_lu(LU_real, F)
             else:
                 # compute implicit embedded method with `newton_iter_embedded`` iterations
-                if UNKNOWN_VELOCITIES:
-                    raise NotImplementedError
-                else:
-                    y_hat_new = y_new.copy() # initial guess
-                    for _ in range(self.newton_iter_embedded):
-                        yp_hat_new = MU_REAL * (
-                            (y_hat_new - y) / h
-                            - b0 * yp
-                            - self.b_hat @ Yp
-                        )
-                        F = self.fun(t_new, y_hat_new, yp_hat_new)
+                yp_hat_new0 = -(y / h + + b_hat1_implicit * yp + self.b_hat_implicit @ Yp) / MU_REAL 
+                y_hat_new = y_new.copy() # initial guess
+                for _ in range(self.newton_iter_embedded):
+                    yp_hat_new = yp_hat_new0 + y_hat_new / (h * MU_REAL)
+                    F = self.fun(t_new, y_hat_new, yp_hat_new)
+                    if UNKNOWN_VELOCITIES:
+                        y_hat_new -= h * MU_REAL * self.solve_lu(LU_real, F)
+                    else:
                         y_hat_new -= self.solve_lu(LU_real, F)
 
-                    error_embedded = y_hat_new - y_new 
+                error_embedded = y_hat_new - y_new 
 
             # mix embedded error with collocation error as proposed in Guglielmi2001/ Guglielmi2003
             error = (
@@ -779,15 +768,15 @@ def _step_impl(self):
         return step_accepted, message
 
     def _compute_dense_output(self):
-        Q = np.dot(self.Z.T, self.P)
-        h = self.t - self.t_old
-        Yp = (self.A_inv / h) @ self.Z
-        Zp = Yp - self.yp_old
-        Qp = np.dot(Zp.T, self.P)
-        # Z = self.Y - self.y_old
-        # Q = np.dot(Z.T, self.P)
-        # Zp = self.Yp - self.yp_old
+        # Q = np.dot(self.Z.T, self.P)
+        # h = self.t - self.t_old
+        # Yp = (self.A_inv / h) @ self.Z
+        # Zp = Yp - self.yp_old
         # Qp = np.dot(Zp.T, self.P)
+        Z = self.Y - self.y_old
+        Q = np.dot(Z.T, self.P)
+        Zp = self.Yp - self.yp_old
+        Qp = np.dot(Zp.T, self.P)
         return RadauDenseOutput(self.t_old, self.t, self.y_old, Q, self.yp_old, Qp)
 
     def _dense_output_impl(self):

diff --git a/scipy_dae/integrate/_dae/tests/test_integration_rational.py b/scipy_dae/integrate/_dae/tests/test_integration_rational.py
@@ -66,10 +66,12 @@ def test_integration_rational(first_step, vectorized, method, t_span, jac, jac_s
     assert_equal(res.status, 0)
 
     if method == "BDF":
+        assert_(0 < res.nfev < 31)
         assert_(0 < res.njev < 3)
-        assert_(0 < res.nlu < 10)
+        assert_(0 < res.nlu < 7)
     else: # Radau
-        assert_(0 < res.njev < 4)
+        assert_(0 < res.nfev < 46)
+        assert_(0 < res.njev < 3)
         assert_(0 < res.nlu < 13)
 
     y_true = sol_rational(res.t)
@@ -92,6 +94,6 @@ def test_integration_rational(first_step, vectorized, method, t_span, jac, jac_s
 
     assert_allclose(res.sol(res.t)[0], res.y, rtol=1e-15, atol=1e-15)
 
-# if __name__ == "__main__":
-#     for param in parameters_rational:
-#         test_integration_rational(*param)
+if __name__ == "__main__":
+    for param in parameters_rational:
+        test_integration_rational(*param)