From 5c5f61a09ee4e49ed81716fa9485527092fcc6b0 Mon Sep 17 00:00:00 2001
From: Aaron Keesing <agkphysics@gmail.com>
Date: Thu, 9 Mar 2023 21:46:07 +1300
Subject: [PATCH] Add check for noncentrality parameters. (#347)

Fixes #343, caused by scipy/scipy#17916.

Co-authored-by: Raphael Vallat <raphaelvallat9@gmail.com>
---
 pingouin/power.py            | 34 ++++++++++++++++++++++++++++++++++
 pingouin/tests/test_power.py | 23 +++++++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/pingouin/power.py b/pingouin/power.py
index 45e5589d..c77cc81c 100644
--- a/pingouin/power.py
+++ b/pingouin/power.py
@@ -15,6 +15,22 @@
 ]
 
 
+def _check_nc(dist, nc):
+    """Check if non-centrality parameter is too large for the given distribution.
+    This is a workaround for scipy/scipy#17916 which can hopefully be removed at some point.
+    """
+    if dist is stats.ncx2 and nc >= float(2**32 - 1):
+        warnings.warn("Non-centrality parameter is too large for the ncx2 distribution.")
+        return False
+    if dist is stats.ncf and nc >= float(2**32):
+        warnings.warn("Non-centrality parameter is too large for the ncf distribution.")
+        return False
+    if dist is stats.nct and (nc <= float(-(2**16)) or nc >= float(2**16)):
+        warnings.warn("Non-centrality parameter is too large for the nct distribution.")
+        return False
+    return True
+
+
 def power_ttest(
     d=None, n=None, power=None, alpha=0.05, contrast="two-samples", alternative="two-sided"
 ):
@@ -149,6 +165,8 @@ def func(d, n, power, alpha):
             dof = (n - 1) * tsample
             nc = d * np.sqrt(n / tsample)
             tcrit = stats.t.ppf(alpha / tside, dof)
+            if not _check_nc(stats.nct, nc):
+                return np.nan
             return stats.nct.cdf(tcrit, dof, nc)
 
     elif alternative == "two-sided":
@@ -157,6 +175,8 @@ def func(d, n, power, alpha):
             dof = (n - 1) * tsample
             nc = d * np.sqrt(n / tsample)
             tcrit = stats.t.ppf(1 - alpha / tside, dof)
+            if not _check_nc(stats.nct, nc):
+                return np.nan
             return stats.nct.sf(tcrit, dof, nc) + stats.nct.cdf(-tcrit, dof, nc)
 
     else:  # Alternative = 'greater'
@@ -165,6 +185,8 @@ def func(d, n, power, alpha):
             dof = (n - 1) * tsample
             nc = d * np.sqrt(n / tsample)
             tcrit = stats.t.ppf(1 - alpha / tside, dof)
+            if not _check_nc(stats.nct, nc):
+                return np.nan
             return stats.nct.sf(tcrit, dof, nc)
 
     # Evaluate missing variable
@@ -316,6 +338,8 @@ def func(d, nx, ny, power, alpha):
             dof = nx + ny - 2
             nc = d * (1 / np.sqrt(1 / nx + 1 / ny))
             tcrit = stats.t.ppf(alpha / tside, dof)
+            if not _check_nc(stats.nct, nc):
+                return np.nan
             return stats.nct.cdf(tcrit, dof, nc)
 
     elif alternative == "two-sided":
@@ -324,6 +348,8 @@ def func(d, nx, ny, power, alpha):
             dof = nx + ny - 2
             nc = d * (1 / np.sqrt(1 / nx + 1 / ny))
             tcrit = stats.t.ppf(1 - alpha / tside, dof)
+            if not _check_nc(stats.nct, nc):
+                return np.nan
             return stats.nct.sf(tcrit, dof, nc) + stats.nct.cdf(-tcrit, dof, nc)
 
     else:  # Alternative = 'greater'
@@ -332,6 +358,8 @@ def func(d, nx, ny, power, alpha):
             dof = nx + ny - 2
             nc = d * (1 / np.sqrt(1 / nx + 1 / ny))
             tcrit = stats.t.ppf(1 - alpha / tside, dof)
+            if not _check_nc(stats.nct, nc):
+                return np.nan
             return stats.nct.sf(tcrit, dof, nc)
 
     # Evaluate missing variable
@@ -487,6 +515,8 @@ def func(f_sq, k, n, power, alpha):
         dof1 = k - 1
         dof2 = (n * k) - k
         fcrit = stats.f.ppf(1 - alpha, dof1, dof2)
+        if not _check_nc(stats.ncf, nc):
+            return np.nan
         return stats.ncf.sf(fcrit, dof1, dof2, nc)
 
     # Evaluate missing variable
@@ -715,6 +745,8 @@ def func(f_sq, m, n, power, alpha, corr):
         dof2 = (n - 1) * dof1
         nc = (f_sq * n * m * epsilon) / (1 - corr)
         fcrit = stats.f.ppf(1 - alpha, dof1, dof2)
+        if not _check_nc(stats.ncf, nc):
+            return np.nan
         return stats.ncf.sf(fcrit, dof1, dof2, nc)
 
     # Evaluate missing variable
@@ -1033,6 +1065,8 @@ def power_chi2(dof, w=None, n=None, power=None, alpha=0.05):
     def func(w, n, power, alpha):
         k = stats.chi2.ppf(1 - alpha, dof)
         nc = n * w**2
+        if not _check_nc(stats.ncx2, nc):
+            return np.nan
         return stats.ncx2.sf(k, dof, nc)
 
     # Evaluate missing variable
diff --git a/pingouin/tests/test_power.py b/pingouin/tests/test_power.py
index f16fbe31..0d06ab82 100644
--- a/pingouin/tests/test_power.py
+++ b/pingouin/tests/test_power.py
@@ -99,6 +99,13 @@ def test_power_ttest(self):
         # Error
         with pytest.raises(ValueError):
             power_ttest(d=0.5)
+        # Too high values of nc
+        with pytest.warns(UserWarning, match="Non-centrality parameter"):
+            assert np.isnan(power_ttest(d=2**16, n=20))
+            assert np.isnan(power_ttest(d=-(2**16), n=20))
+            assert np.isnan(power_ttest(d=5000.0, n=1500))
+            assert np.isnan(power_ttest(d=5000.0, n=1500, alternative="less"))
+            assert np.isnan(power_ttest(d=5000.0, n=1500, alternative="greater"))
 
     def test_power_ttest2n(self):
         """Test function power_ttest2n.
@@ -131,6 +138,12 @@ def test_power_ttest2n(self):
         # Error
         with pytest.raises(ValueError):
             power_ttest2n(nx=20, ny=20)
+        # Too high values of nc
+        with pytest.warns(UserWarning, match="Non-centrality parameter"):
+            assert np.isnan(power_ttest2n(nx=1500, ny=450, d=5000.0))
+            assert np.isnan(power_ttest2n(nx=1500, ny=450, d=5000.0, alternative="less"))
+            assert np.isnan(power_ttest2n(nx=1500, ny=450, d=5000.0, alternative="greater"))
+            assert np.isnan(power_ttest2n(nx=5, ny=7, d=float(-(2**16))))
 
     def test_power_anova(self):
         """Test function power_anova.
@@ -147,6 +160,10 @@ def test_power_anova(self):
         # Error
         with pytest.raises(ValueError):
             power_anova(eta_squared=eta, k=2)
+        # Too high values of nc
+        with pytest.warns(UserWarning, match="Non-centrality parameter"):
+            assert np.isnan(power_anova(eta_squared=0.9999, k=40, n=100000))
+            assert np.isnan(power_anova(eta_squared=0.9999, k=10, n=50000))
 
     def test_power_rm_anova(self):
         """Test function power_rm_anova.
@@ -200,6 +217,9 @@ def test_power_rm_anova(self):
         # Error
         with pytest.raises(ValueError):
             power_rm_anova(eta_squared=eta, m=2)
+        # Too high values of nc
+        with pytest.warns(UserWarning, match="Non-centrality parameter"):
+            assert np.isnan(power_rm_anova(eta_squared=0.999, m=50, n=100000))
 
     def test_power_corr(self):
         """Test function power_corr.
@@ -255,3 +275,6 @@ def test_power_chi2(self):
         # Error
         with pytest.raises(ValueError):
             power_chi2(1, w=0.3)
+        # Too high values of nc
+        with pytest.warns(UserWarning, match="Non-centrality parameter"):
+            assert np.isnan(power_chi2(dof=10, w=0.999, n=10000000000))