diff --git a/pingouin/pairwise.py b/pingouin/pairwise.py index 69e61f4e..8549939d 100644 --- a/pingouin/pairwise.py +++ b/pingouin/pairwise.py @@ -475,7 +475,9 @@ def pairwise_tests( # designs. Indeed, a similar groupby is applied by default on # each within-subject factor of a two-way repeated measures design. if all([agg[i], marginal]): - tmp = data.groupby([subject, f], as_index=False, observed=True, sort=True).mean() + tmp = data.groupby([subject, f], as_index=False, observed=True, sort=True).mean( + numeric_only=True + ) else: tmp = data pt = pairwise_tests( @@ -880,7 +882,7 @@ def pairwise_tukey(data=None, dv=None, between=None, effsize="hedges"): # See https://github.com/raphaelvallat/pingouin/issues/111 labels = np.array(list(grp.groups.keys())) n = grp.count().to_numpy() - gmeans = grp.mean().to_numpy() + gmeans = grp.mean(numeric_only=True).to_numpy() gvar = aov.at[1, "MS"] / n # Pairwise combinations @@ -1046,8 +1048,8 @@ def pairwise_gameshowell(data=None, dv=None, between=None, effsize="hedges"): # See https://github.com/raphaelvallat/pingouin/issues/111 labels = np.array(list(grp.groups.keys())) n = grp.count().to_numpy() - gmeans = grp.mean().to_numpy() - gvars = grp.var().to_numpy() + gmeans = grp.mean(numeric_only=True).to_numpy() + gvars = grp.var().to_numpy() # numeric_only=True added in pandas 1.5, set to False in 2.0 # Pairwise combinations g1, g2 = np.array(list(combinations(np.arange(ng), 2))).T @@ -1425,7 +1427,7 @@ def traverse(o, tree_types=(list, tuple)): [c in keys for c in covar] ), "Covariate(s) are either not in data or not numeric." # And we make sure that X or Y does not contain covar - stats = stats[~stats[["X", "Y"]].isin(covar).any(1)] + stats = stats[~stats[["X", "Y"]].isin(covar).any(axis=1)] stats = stats.reset_index(drop=True) if stats.shape[0] == 0: raise ValueError( diff --git a/pingouin/parametric.py b/pingouin/parametric.py index 3a11c7a9..0296dcb1 100644 --- a/pingouin/parametric.py +++ b/pingouin/parametric.py @@ -552,13 +552,13 @@ def rm_anova( grandmean = data[dv].mean() # Calculate sums of squares - ss_with = ((grp_with.mean() - grandmean) ** 2 * grp_with.count()).sum() + ss_with = ((grp_with.mean(numeric_only=True) - grandmean) ** 2 * grp_with.count()).sum() ss_resall = grp_with.apply(lambda x: (x - x.mean()) ** 2).sum() # sstotal = sstime + ss_resall = sstime + (sssubj + sserror) # ss_total = ((data[dv] - grandmean)**2).sum() # We can further divide the residuals into a within and between component: grp_subj = data.groupby(subject, observed=True)[dv] - ss_resbetw = n_rm * np.sum((grp_subj.mean() - grandmean) ** 2) + ss_resbetw = n_rm * np.sum((grp_subj.mean(numeric_only=True) - grandmean) ** 2) ss_reswith = ss_resall - ss_resbetw # Calculate degrees of freedom @@ -702,12 +702,12 @@ def rm_anova2(data=None, dv=None, within=None, subject=None, effsize="ng2"): # Groupby means # I think that observed=True is actually not needed here since we have already used # `observed=True` in pivot_table. - grp_s = data.groupby(subject, observed=True)[dv].mean() - grp_a = data.groupby([a], observed=True)[dv].mean() - grp_b = data.groupby([b], observed=True)[dv].mean() - grp_ab = data.groupby([a, b], observed=True)[dv].mean() - grp_as = data.groupby([a, subject], observed=True)[dv].mean() - grp_bs = data.groupby([b, subject], observed=True)[dv].mean() + grp_s = data.groupby(subject, observed=True)[dv].mean(numeric_only=True) + grp_a = data.groupby([a], observed=True)[dv].mean(numeric_only=True) + grp_b = data.groupby([b], observed=True)[dv].mean(numeric_only=True) + grp_ab = data.groupby([a, b], observed=True)[dv].mean(numeric_only=True) + grp_as = data.groupby([a, subject], observed=True)[dv].mean(numeric_only=True) + grp_bs = data.groupby([b, subject], observed=True)[dv].mean(numeric_only=True) # Sums of squares ss_tot = np.sum((data[dv] - mu) ** 2) @@ -991,7 +991,7 @@ def anova(data=None, dv=None, between=None, ss_type=2, detailed=False, effsize=" # Calculate sums of squares grp = data.groupby(between, observed=True, group_keys=False)[dv] # Between effect - ssbetween = ((grp.mean() - data[dv].mean()) ** 2 * grp.count()).sum() + ssbetween = ((grp.mean(numeric_only=True) - data[dv].mean()) ** 2 * grp.count()).sum() # Within effect (= error between) # = (grp.var(ddof=0) * grp.count()).sum() sserror = grp.transform(lambda x: (x - x.mean()) ** 2).sum() @@ -1346,8 +1346,8 @@ def welch_anova(data=None, dv=None, between=None): # Sums of squares (regular and adjusted) ss_res = grp.apply(lambda x: (x - x.mean()) ** 2).sum() - ss_bet = ((grp.mean() - data[dv].mean()) ** 2 * grp.count()).sum() - ss_betadj = np.sum(weights * np.square(grp.mean() - adj_grandmean)) + ss_bet = ((grp.mean(numeric_only=True) - data[dv].mean()) ** 2 * grp.count()).sum() + ss_betadj = np.sum(weights * np.square(grp.mean(numeric_only=True) - adj_grandmean)) ms_betadj = ss_betadj / ddof1 # Calculate lambda, F-value, p-value and np2 diff --git a/pingouin/regression.py b/pingouin/regression.py index d2e38f8a..5a518270 100644 --- a/pingouin/regression.py +++ b/pingouin/regression.py @@ -727,7 +727,7 @@ def logistic_regression( first level of our categorical variable (species = Adelie) which will be used as the reference level: - >>> df = pd.get_dummies(df, columns=['species'], drop_first=True) + >>> df = pd.get_dummies(df, columns=['species'], dtype=float, drop_first=True) >>> X = df[['body_mass_kg', 'species_Chinstrap', 'species_Gentoo']] >>> y = df['male'] >>> lom = pg.logistic_regression(X, y, remove_na=True) diff --git a/pingouin/tests/test_regression.py b/pingouin/tests/test_regression.py index 55661608..984f45e6 100644 --- a/pingouin/tests/test_regression.py +++ b/pingouin/tests/test_regression.py @@ -349,7 +349,7 @@ def test_logistic_regression(self): # R: >>> glm("male ~ body_mass_kg + species", family=binomial, ...) # >>> confint.default(model) # Wald CI # See https://stats.stackexchange.com/a/275421/253579 - data_dum = pd.get_dummies(data, columns=["species"], drop_first=True) + data_dum = pd.get_dummies(data, columns=["species"], drop_first=True, dtype=float) X = data_dum[["body_mass_kg", "species_Chinstrap", "species_Gentoo"]] y = data_dum["male"] lom = logistic_regression(X, y, as_dataframe=False) diff --git a/requirements.txt b/requirements.txt index 5a32a1a9..17f1c4fe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ numpy>=1.19 scipy>=1.7 -pandas>=1.0 +pandas>=1.1 matplotlib>=3.0.2 seaborn>=0.11 statsmodels>=0.13 diff --git a/setup.py b/setup.py index 4da11d4c..4f8c8956 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ def read(fname): INSTALL_REQUIRES = [ "numpy>=1.19", "scipy>=1.7", - "pandas>=1.0", + "pandas>=1.1", "matplotlib>=3.0.2", "seaborn>=0.11", "statsmodels>=0.13", @@ -41,10 +41,10 @@ def read(fname): CLASSIFIERS = [ "Intended Audience :: Science/Research", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering :: Mathematics", "Operating System :: POSIX", "Operating System :: Unix",