From 5c5f61a09ee4e49ed81716fa9485527092fcc6b0 Mon Sep 17 00:00:00 2001 From: Aaron Keesing Date: Thu, 9 Mar 2023 21:46:07 +1300 Subject: [PATCH] Add check for noncentrality parameters. (#347) Fixes #343, caused by scipy/scipy#17916. Co-authored-by: Raphael Vallat --- pingouin/power.py | 34 ++++++++++++++++++++++++++++++++++ pingouin/tests/test_power.py | 23 +++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/pingouin/power.py b/pingouin/power.py index 45e5589d..c77cc81c 100644 --- a/pingouin/power.py +++ b/pingouin/power.py @@ -15,6 +15,22 @@ ] +def _check_nc(dist, nc): + """Check if non-centrality parameter is too large for the given distribution. + This is a workaround for scipy/scipy#17916 which can hopefully be removed at some point. + """ + if dist is stats.ncx2 and nc >= float(2**32 - 1): + warnings.warn("Non-centrality parameter is too large for the ncx2 distribution.") + return False + if dist is stats.ncf and nc >= float(2**32): + warnings.warn("Non-centrality parameter is too large for the ncf distribution.") + return False + if dist is stats.nct and (nc <= float(-(2**16)) or nc >= float(2**16)): + warnings.warn("Non-centrality parameter is too large for the nct distribution.") + return False + return True + + def power_ttest( d=None, n=None, power=None, alpha=0.05, contrast="two-samples", alternative="two-sided" ): @@ -149,6 +165,8 @@ def func(d, n, power, alpha): dof = (n - 1) * tsample nc = d * np.sqrt(n / tsample) tcrit = stats.t.ppf(alpha / tside, dof) + if not _check_nc(stats.nct, nc): + return np.nan return stats.nct.cdf(tcrit, dof, nc) elif alternative == "two-sided": @@ -157,6 +175,8 @@ def func(d, n, power, alpha): dof = (n - 1) * tsample nc = d * np.sqrt(n / tsample) tcrit = stats.t.ppf(1 - alpha / tside, dof) + if not _check_nc(stats.nct, nc): + return np.nan return stats.nct.sf(tcrit, dof, nc) + stats.nct.cdf(-tcrit, dof, nc) else: # Alternative = 'greater' @@ -165,6 +185,8 @@ def func(d, n, power, alpha): dof = (n - 1) * tsample nc = d * np.sqrt(n / tsample) tcrit = stats.t.ppf(1 - alpha / tside, dof) + if not _check_nc(stats.nct, nc): + return np.nan return stats.nct.sf(tcrit, dof, nc) # Evaluate missing variable @@ -316,6 +338,8 @@ def func(d, nx, ny, power, alpha): dof = nx + ny - 2 nc = d * (1 / np.sqrt(1 / nx + 1 / ny)) tcrit = stats.t.ppf(alpha / tside, dof) + if not _check_nc(stats.nct, nc): + return np.nan return stats.nct.cdf(tcrit, dof, nc) elif alternative == "two-sided": @@ -324,6 +348,8 @@ def func(d, nx, ny, power, alpha): dof = nx + ny - 2 nc = d * (1 / np.sqrt(1 / nx + 1 / ny)) tcrit = stats.t.ppf(1 - alpha / tside, dof) + if not _check_nc(stats.nct, nc): + return np.nan return stats.nct.sf(tcrit, dof, nc) + stats.nct.cdf(-tcrit, dof, nc) else: # Alternative = 'greater' @@ -332,6 +358,8 @@ def func(d, nx, ny, power, alpha): dof = nx + ny - 2 nc = d * (1 / np.sqrt(1 / nx + 1 / ny)) tcrit = stats.t.ppf(1 - alpha / tside, dof) + if not _check_nc(stats.nct, nc): + return np.nan return stats.nct.sf(tcrit, dof, nc) # Evaluate missing variable @@ -487,6 +515,8 @@ def func(f_sq, k, n, power, alpha): dof1 = k - 1 dof2 = (n * k) - k fcrit = stats.f.ppf(1 - alpha, dof1, dof2) + if not _check_nc(stats.ncf, nc): + return np.nan return stats.ncf.sf(fcrit, dof1, dof2, nc) # Evaluate missing variable @@ -715,6 +745,8 @@ def func(f_sq, m, n, power, alpha, corr): dof2 = (n - 1) * dof1 nc = (f_sq * n * m * epsilon) / (1 - corr) fcrit = stats.f.ppf(1 - alpha, dof1, dof2) + if not _check_nc(stats.ncf, nc): + return np.nan return stats.ncf.sf(fcrit, dof1, dof2, nc) # Evaluate missing variable @@ -1033,6 +1065,8 @@ def power_chi2(dof, w=None, n=None, power=None, alpha=0.05): def func(w, n, power, alpha): k = stats.chi2.ppf(1 - alpha, dof) nc = n * w**2 + if not _check_nc(stats.ncx2, nc): + return np.nan return stats.ncx2.sf(k, dof, nc) # Evaluate missing variable diff --git a/pingouin/tests/test_power.py b/pingouin/tests/test_power.py index f16fbe31..0d06ab82 100644 --- a/pingouin/tests/test_power.py +++ b/pingouin/tests/test_power.py @@ -99,6 +99,13 @@ def test_power_ttest(self): # Error with pytest.raises(ValueError): power_ttest(d=0.5) + # Too high values of nc + with pytest.warns(UserWarning, match="Non-centrality parameter"): + assert np.isnan(power_ttest(d=2**16, n=20)) + assert np.isnan(power_ttest(d=-(2**16), n=20)) + assert np.isnan(power_ttest(d=5000.0, n=1500)) + assert np.isnan(power_ttest(d=5000.0, n=1500, alternative="less")) + assert np.isnan(power_ttest(d=5000.0, n=1500, alternative="greater")) def test_power_ttest2n(self): """Test function power_ttest2n. @@ -131,6 +138,12 @@ def test_power_ttest2n(self): # Error with pytest.raises(ValueError): power_ttest2n(nx=20, ny=20) + # Too high values of nc + with pytest.warns(UserWarning, match="Non-centrality parameter"): + assert np.isnan(power_ttest2n(nx=1500, ny=450, d=5000.0)) + assert np.isnan(power_ttest2n(nx=1500, ny=450, d=5000.0, alternative="less")) + assert np.isnan(power_ttest2n(nx=1500, ny=450, d=5000.0, alternative="greater")) + assert np.isnan(power_ttest2n(nx=5, ny=7, d=float(-(2**16)))) def test_power_anova(self): """Test function power_anova. @@ -147,6 +160,10 @@ def test_power_anova(self): # Error with pytest.raises(ValueError): power_anova(eta_squared=eta, k=2) + # Too high values of nc + with pytest.warns(UserWarning, match="Non-centrality parameter"): + assert np.isnan(power_anova(eta_squared=0.9999, k=40, n=100000)) + assert np.isnan(power_anova(eta_squared=0.9999, k=10, n=50000)) def test_power_rm_anova(self): """Test function power_rm_anova. @@ -200,6 +217,9 @@ def test_power_rm_anova(self): # Error with pytest.raises(ValueError): power_rm_anova(eta_squared=eta, m=2) + # Too high values of nc + with pytest.warns(UserWarning, match="Non-centrality parameter"): + assert np.isnan(power_rm_anova(eta_squared=0.999, m=50, n=100000)) def test_power_corr(self): """Test function power_corr. @@ -255,3 +275,6 @@ def test_power_chi2(self): # Error with pytest.raises(ValueError): power_chi2(1, w=0.3) + # Too high values of nc + with pytest.warns(UserWarning, match="Non-centrality parameter"): + assert np.isnan(power_chi2(dof=10, w=0.999, n=10000000000))