Skip to content

Commit

Permalink
ENH: Support NamedAggs in kwargs in Rolling/Expanding/EWM agg method (#…
Browse files Browse the repository at this point in the history
…60549)

* ENH: Support NamedAggs in kwargs in Rolling/Expanding/EWM agg method

* Pre-commit fix

* Fix typing

* Fix typing retry

* Fix typing retry 2

* Update pandas/core/window/rolling.py

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* Add type ignore

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
  • Loading branch information
snitish and mroeschke authored Dec 13, 2024
1 parent 069253d commit 9501650
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ Other enhancements
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
- :meth:`str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ def online(
klass="Series/Dataframe",
axis="",
)
def aggregate(self, func, *args, **kwargs):
def aggregate(self, func=None, *args, **kwargs):
return super().aggregate(func, *args, **kwargs)

agg = aggregate
Expand Down Expand Up @@ -981,7 +981,7 @@ def reset(self) -> None:
"""
self._mean.reset()

def aggregate(self, func, *args, **kwargs):
def aggregate(self, func=None, *args, **kwargs):
raise NotImplementedError("aggregate is not implemented.")

def std(self, bias: bool = False, *args, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/window/expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def _get_window_indexer(self) -> BaseIndexer:
klass="Series/Dataframe",
axis="",
)
def aggregate(self, func, *args, **kwargs):
def aggregate(self, func=None, *args, **kwargs):
return super().aggregate(func, *args, **kwargs)

agg = aggregate
Expand Down
15 changes: 11 additions & 4 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@

from pandas.core._numba import executor
from pandas.core.algorithms import factorize
from pandas.core.apply import ResamplerWindowApply
from pandas.core.apply import (
ResamplerWindowApply,
reconstruct_func,
)
from pandas.core.arrays import ExtensionArray
from pandas.core.base import SelectionMixin
import pandas.core.common as com
Expand Down Expand Up @@ -646,8 +649,12 @@ def _numba_apply(
out = obj._constructor(result, index=index, columns=columns)
return self._resolve_output(out, obj)

def aggregate(self, func, *args, **kwargs):
def aggregate(self, func=None, *args, **kwargs):
relabeling, func, columns, order = reconstruct_func(func, **kwargs)
result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
if isinstance(result, ABCDataFrame) and relabeling:
result = result.iloc[:, order]
result.columns = columns # type: ignore[union-attr]
if result is None:
return self.apply(func, raw=False, args=args, kwargs=kwargs)
return result
Expand Down Expand Up @@ -1239,7 +1246,7 @@ def calc(x):
klass="Series/DataFrame",
axis="",
)
def aggregate(self, func, *args, **kwargs):
def aggregate(self, func=None, *args, **kwargs):
result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
if result is None:
# these must apply directly
Expand Down Expand Up @@ -1951,7 +1958,7 @@ def _raise_monotonic_error(self, msg: str):
klass="Series/Dataframe",
axis="",
)
def aggregate(self, func, *args, **kwargs):
def aggregate(self, func=None, *args, **kwargs):
return super().aggregate(func, *args, **kwargs)

agg = aggregate
Expand Down
96 changes: 96 additions & 0 deletions pandas/tests/window/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
DatetimeIndex,
Index,
MultiIndex,
NamedAgg,
Series,
Timestamp,
date_range,
Expand Down Expand Up @@ -489,6 +490,36 @@ def test_groupby_rolling_subset_with_closed(self):
)
tm.assert_series_equal(result, expected)

def test_groupby_rolling_agg_namedagg(self):
# GH#28333
df = DataFrame(
{
"kind": ["cat", "dog", "cat", "dog", "cat", "dog"],
"height": [9.1, 6.0, 9.5, 34.0, 12.0, 8.0],
"weight": [7.9, 7.5, 9.9, 198.0, 10.0, 42.0],
}
)
result = (
df.groupby("kind")
.rolling(2)
.agg(
total_weight=NamedAgg(column="weight", aggfunc=sum),
min_height=NamedAgg(column="height", aggfunc=min),
)
)
expected = DataFrame(
{
"total_weight": [np.nan, 17.8, 19.9, np.nan, 205.5, 240.0],
"min_height": [np.nan, 9.1, 9.5, np.nan, 6.0, 8.0],
},
index=MultiIndex(
[["cat", "dog"], [0, 1, 2, 3, 4, 5]],
[[0, 0, 0, 1, 1, 1], [0, 2, 4, 1, 3, 5]],
names=["kind", None],
),
)
tm.assert_frame_equal(result, expected)

def test_groupby_subset_rolling_subset_with_closed(self):
# GH 35549
df = DataFrame(
Expand Down Expand Up @@ -1134,6 +1165,36 @@ def test_expanding_apply(self, raw, frame):
expected.index = expected_index
tm.assert_frame_equal(result, expected)

def test_groupby_expanding_agg_namedagg(self):
# GH#28333
df = DataFrame(
{
"kind": ["cat", "dog", "cat", "dog", "cat", "dog"],
"height": [9.1, 6.0, 9.5, 34.0, 12.0, 8.0],
"weight": [7.9, 7.5, 9.9, 198.0, 10.0, 42.0],
}
)
result = (
df.groupby("kind")
.expanding(1)
.agg(
total_weight=NamedAgg(column="weight", aggfunc=sum),
min_height=NamedAgg(column="height", aggfunc=min),
)
)
expected = DataFrame(
{
"total_weight": [7.9, 17.8, 27.8, 7.5, 205.5, 247.5],
"min_height": [9.1, 9.1, 9.1, 6.0, 6.0, 6.0],
},
index=MultiIndex(
[["cat", "dog"], [0, 1, 2, 3, 4, 5]],
[[0, 0, 0, 1, 1, 1], [0, 2, 4, 1, 3, 5]],
names=["kind", None],
),
)
tm.assert_frame_equal(result, expected)


class TestEWM:
@pytest.mark.parametrize(
Expand Down Expand Up @@ -1162,6 +1223,41 @@ def test_methods(self, method, expected_data):
)
tm.assert_frame_equal(result, expected)

def test_groupby_ewm_agg_namedagg(self):
# GH#28333
df = DataFrame({"A": ["a"] * 4, "B": range(4)})
result = (
df.groupby("A")
.ewm(com=1.0)
.agg(
B_mean=NamedAgg(column="B", aggfunc="mean"),
B_std=NamedAgg(column="B", aggfunc="std"),
B_var=NamedAgg(column="B", aggfunc="var"),
)
)
expected = DataFrame(
{
"B_mean": [
0.0,
0.6666666666666666,
1.4285714285714286,
2.2666666666666666,
],
"B_std": [np.nan, 0.707107, 0.963624, 1.177164],
"B_var": [np.nan, 0.5, 0.9285714285714286, 1.3857142857142857],
},
index=MultiIndex.from_tuples(
[
("a", 0),
("a", 1),
("a", 2),
("a", 3),
],
names=["A", None],
),
)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"method, expected_data",
[["corr", [np.nan, 1.0, 1.0, 1]], ["cov", [np.nan, 0.5, 0.928571, 1.385714]]],
Expand Down

0 comments on commit 9501650

Please sign in to comment.