From a619857582204d949bfb0851f29c2116e43c81ef Mon Sep 17 00:00:00 2001 From: Morgan Williams Date: Fri, 17 May 2024 10:53:51 +1000 Subject: [PATCH] Update for pandas deprecations - util.time, util.pd --- pyrolite/util/pd.py | 14 ++++++-------- pyrolite/util/time.py | 9 ++++++--- test/util/util_pd.py | 9 +++------ 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/pyrolite/util/pd.py b/pyrolite/util/pd.py index afafb7a4..fb960955 100644 --- a/pyrolite/util/pd.py +++ b/pyrolite/util/pd.py @@ -225,21 +225,19 @@ def outliers( """ """ if not cols: cols = df.columns - colfltr = (df.dtypes == float) & ([i in cols for i in df.columns]) + _df = df.select_dtypes(include=[np.number]) + _df = _df.loc[:, [i in cols for i in _df.columns]] low, high = np.min(quantile_select), np.max(quantile_select) if not logquantile: - quantile = df.loc[:, colfltr].quantile([low, high]) + quantile = _df.quantile([low, high]) else: - quantile = df.loc[:, colfltr].apply(np.log).quantile([low, high]) + quantile = _df.apply(np.log).quantile([low, high]) whereout = ( - df.loc[:, colfltr] - .apply(detect, args=(quantile, quantile_select), axis=0) - .sum(axis=1) - > 0 + _df.apply(detect, args=(quantile, quantile_select), axis=0).sum(axis=1) > 0 ) if not exclude: whereout = np.logical_not(whereout) - return df.loc[whereout, colfltr] + return _df.loc[whereout, :] def concat_columns(df, columns=None, astype=str, **kwargs): diff --git a/pyrolite/util/time.py b/pyrolite/util/time.py index 95ac7375..c07b390a 100644 --- a/pyrolite/util/time.py +++ b/pyrolite/util/time.py @@ -251,19 +251,22 @@ def named_age(self, age, level="Specific", **kwargs): """ level = titlecase(level) - wthn_rng = lambda x: (age <= x.Start) & (age >= x.End) + + def wthn_rng(x): + return (age <= x.Start) & (age >= x.End) + relevant = self.data.loc[self.data.apply(wthn_rng, axis=1).values, :] if level == "Specific": # take the rightmost grouping relevant = relevant.loc[:, self.levels] counts = (~pd.isnull(relevant)).count(axis=1) if sum(counts == counts.max()) > 1: idx_rel_row = counts.index[ - max([ix for (ix, r) in enumerate(counts) if r == counts[0]]) + max([ix for (ix, r) in enumerate(counts) if r == counts.iloc[0]]) ] else: idx_rel_row = counts.idxmax() rel_row = relevant.loc[idx_rel_row, :] - return age_name(rel_row[~pd.isnull(rel_row)], **kwargs) + return age_name(rel_row[~pd.isnull(rel_row)].to_list(), **kwargs) else: unique_values = relevant.loc[:, level].unique() return unique_values[~pd.isnull(unique_values)][0] diff --git a/test/util/util_pd.py b/test/util/util_pd.py index 901aa977..f43792c1 100644 --- a/test/util/util_pd.py +++ b/test/util/util_pd.py @@ -133,15 +133,12 @@ def test_numeric(self): def test_error_methods(self): df = self.df df.loc[0, "SiO2"] = "Low" - for method in ["ignore", "raise", "coerce"]: + for method in ["raise", "coerce"]: with self.subTest(method=method): try: result = to_numeric(df, errors=method) - self.assertTrue(method in ["ignore", "coerce"]) - if method == "ignore": - self.assertTrue(result.loc[0, "SiO2"] == "Low") - else: - self.assertTrue(pd.isnull(result.loc[0, "SiO2"])) + self.assertTrue(method in [ "coerce"]) + self.assertTrue(pd.isnull(result.loc[0, "SiO2"])) except ValueError: # should raise with can't parse 'low' self.assertTrue(method == "raise")