Skip to content

Commit

Permalink
Fix meta calculation for to_datetime
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl committed Oct 18, 2024
1 parent 2898409 commit 59725ca
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 0 deletions.
2 changes: 2 additions & 0 deletions dask_expr/_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -6192,6 +6192,8 @@ def to_datetime(arg, meta=None, **kwargs):
meta = meta_series_constructor(arg)([pd.Timestamp("2000", **tz_kwarg)])
meta.index = meta.index.astype(arg.index.dtype)
meta.index.name = arg.index.name
else:
meta = make_meta(meta)

kwargs.pop("infer_datetime_format", None)

Expand Down
4 changes: 4 additions & 0 deletions dask_expr/_expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1461,6 +1461,10 @@ class ToDatetime(Elemwise):
_defaults = {"kwargs": None}
_keyword_only = ["kwargs", "meta"]

@functools.cached_property
def _meta(self):
return self.operand("meta")

@staticmethod
def operation(*args, **kwargs):
return get_meta_library(args[0]).to_datetime(*args, **kwargs)
Expand Down
8 changes: 8 additions & 0 deletions dask_expr/tests/test_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,14 @@ def test_abs_errors():
pytest.raises((TypeError, NotImplementedError), lambda: ddf.abs())


def test_to_datetime_timezone():
pdf = pd.DataFrame({"a": ["2023-04-01 22:12:11.932417+00:00"]})
df = from_pandas(pdf)
df["a"] = to_datetime(df.a, format="ISO8601", meta=("a", "datetime64[ns, UTC]"))
pdf["a"] = to_datetime(pdf.a, format="ISO8601")
assert_eq(df, pdf)


def test_to_datetime():
pdf = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})
df = from_pandas(pdf, npartitions=2)
Expand Down

0 comments on commit 59725ca

Please sign in to comment.