Skip to content

Commit

Permalink
BUG: Fixed bug when creating new column with missing values when sett…
Browse files Browse the repository at this point in the history
…ing a single string value (#56321)
  • Loading branch information
MarcoGorelli authored Dec 20, 2023
1 parent 98e1d2f commit 6a65c64
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 5 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,7 @@ Indexing
- Bug in :meth:`DataFrame.loc` when setting :class:`Series` with extension dtype into NumPy dtype (:issue:`55604`)
- Bug in :meth:`Index.difference` not returning a unique set of values when ``other`` is empty or ``other`` is considered non-comparable (:issue:`55113`)
- Bug in setting :class:`Categorical` values into a :class:`DataFrame` with numpy dtypes raising ``RecursionError`` (:issue:`52927`)
- Fixed bug when creating new column with missing values when setting a single string value (:issue:`56204`)

Missing
^^^^^^^
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
from pandas.core.construction import (
array as pd_array,
extract_array,
sanitize_array,
)
from pandas.core.indexers import (
check_array_indexer,
Expand Down Expand Up @@ -1876,7 +1877,13 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc"):
return

self.obj[key] = empty_value

elif not is_list_like(value):
# Find our empty_value dtype by constructing an array
# from our value and doing a .take on it
arr = sanitize_array(value, Index(range(1)), copy=False)
taker = -1 * np.ones(len(self.obj), dtype=np.intp)
empty_value = algos.take_nd(arr, taker)
self.obj[key] = empty_value
else:
# FIXME: GH#42099#issuecomment-864326014
self.obj[key] = infer_fill_value(value)
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1935,6 +1935,26 @@ def test_adding_new_conditional_column() -> None:
tm.assert_frame_equal(df, expected)


@pytest.mark.parametrize(
("dtype", "infer_string"),
[
(object, False),
("string[pyarrow_numpy]", True),
],
)
def test_adding_new_conditional_column_with_string(dtype, infer_string) -> None:
# https://github.com/pandas-dev/pandas/issues/56204
pytest.importorskip("pyarrow")

df = DataFrame({"a": [1, 2], "b": [3, 4]})
with pd.option_context("future.infer_string", infer_string):
df.loc[df["a"] == 1, "c"] = "1"
expected = DataFrame({"a": [1, 2], "b": [3, 4], "c": ["1", float("nan")]}).astype(
{"a": "int64", "b": "int64", "c": dtype}
)
tm.assert_frame_equal(df, expected)


def test_add_new_column_infer_string():
# GH#55366
pytest.importorskip("pyarrow")
Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/frame/indexing/test_set_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,7 @@ def test_set_value_resize(self, float_frame, using_infer_string):
else:
assert res["baz"].dtype == np.object_
res = float_frame.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
res._set_value("foobar", "baz", True)
res._set_value("foobar", "baz", True)
assert res["baz"].dtype == np.object_

res = float_frame.copy()
Expand Down

0 comments on commit 6a65c64

Please sign in to comment.