Skip to content

Commit

Permalink
CoW: Remove cow branches from more tests (#57270)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Feb 6, 2024
1 parent 8baee5d commit 17aa2ba
Show file tree
Hide file tree
Showing 16 changed files with 102 additions and 286 deletions.
8 changes: 2 additions & 6 deletions pandas/tests/indexing/multiindex/test_chaining_and_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_detect_chained_assignment():
zed["eyes"]["right"].fillna(value=555, inplace=True)


def test_cache_updating(using_copy_on_write):
def test_cache_updating():
# 5216
# make sure that we don't try to set a dead cache
a = np.random.default_rng(2).random((10, 3))
Expand All @@ -47,11 +47,7 @@ def test_cache_updating(using_copy_on_write):
with tm.raises_chained_assignment_error():
df.loc[0]["z"].iloc[0] = 1.0

if using_copy_on_write:
assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"]
else:
result = df.loc[(0, 0), "z"]
assert result == 1
assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"]

# correct setting
df.loc[(0, 0), "z"] = 2
Expand Down
16 changes: 4 additions & 12 deletions pandas/tests/indexing/multiindex/test_partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ def test_getitem_partial_column_select(self):
def test_partial_set(
self,
multiindex_year_month_day_dataframe_random_data,
using_copy_on_write,
):
# GH #397
ymd = multiindex_year_month_day_dataframe_random_data
Expand All @@ -129,13 +128,9 @@ def test_partial_set(
exp.iloc[65:85] = 0
tm.assert_frame_equal(df, exp)

if using_copy_on_write:
with tm.raises_chained_assignment_error():
df["A"].loc[2000, 4] = 1
df.loc[(2000, 4), "A"] = 1
else:
with tm.raises_chained_assignment_error():
df["A"].loc[2000, 4] = 1
with tm.raises_chained_assignment_error():
df["A"].loc[2000, 4] = 1
df.loc[(2000, 4), "A"] = 1
exp.iloc[65:85, 0] = 1
tm.assert_frame_equal(df, exp)

Expand All @@ -146,10 +141,7 @@ def test_partial_set(
# this works...for now
with tm.raises_chained_assignment_error():
df["A"].iloc[14] = 5
if using_copy_on_write:
assert df["A"].iloc[14] == exp["A"].iloc[14]
else:
assert df["A"].iloc[14] == 5
assert df["A"].iloc[14] == exp["A"].iloc[14]

@pytest.mark.parametrize("dtype", [int, float])
def test_getitem_intkey_leading_level(
Expand Down
30 changes: 7 additions & 23 deletions pandas/tests/indexing/multiindex/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def test_multiindex_assignment(self):
df.loc[4, "d"] = arr
tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d"))

def test_multiindex_assignment_single_dtype(self, using_copy_on_write):
def test_multiindex_assignment_single_dtype(self):
# GH3777 part 2b
# single dtype
arr = np.array([0.0, 1.0])
Expand All @@ -205,7 +205,6 @@ def test_multiindex_assignment_single_dtype(self, using_copy_on_write):
index=[[4, 4, 8], [8, 10, 12]],
dtype=np.int64,
)
view = df["c"].iloc[:2].values

# arr can be losslessly cast to int, so this setitem is inplace
# INFO(CoW-warn) this does not warn because we directly took .values
Expand All @@ -215,10 +214,6 @@ def test_multiindex_assignment_single_dtype(self, using_copy_on_write):
result = df.loc[4, "c"]
tm.assert_series_equal(result, exp)

# extra check for inplace-ness
if not using_copy_on_write:
tm.assert_numpy_array_equal(view, exp.values)

# arr + 0.5 cannot be cast losslessly to int, so we upcast
with tm.assert_produces_warning(
FutureWarning, match="item of incompatible dtype"
Expand Down Expand Up @@ -412,9 +407,7 @@ def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
reindexed = dft.reindex(columns=[("foo", "two")])
tm.assert_series_equal(reindexed["foo", "two"], s > s.median())

def test_set_column_scalar_with_loc(
self, multiindex_dataframe_random_data, using_copy_on_write
):
def test_set_column_scalar_with_loc(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
subset = frame.index[[1, 4, 5]]

Expand All @@ -424,11 +417,8 @@ def test_set_column_scalar_with_loc(
frame_original = frame.copy()
col = frame["B"]
col[subset] = 97
if using_copy_on_write:
# chained setitem doesn't work with CoW
tm.assert_frame_equal(frame, frame_original)
else:
assert (frame.loc[subset, "B"] == 97).all()
# chained setitem doesn't work with CoW
tm.assert_frame_equal(frame, frame_original)

def test_nonunique_assignment_1750(self):
df = DataFrame(
Expand Down Expand Up @@ -505,19 +495,13 @@ def test_setitem_enlargement_keep_index_names(self):
tm.assert_frame_equal(df, expected)


def test_frame_setitem_view_direct(
multiindex_dataframe_random_data, using_copy_on_write
):
def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
# this works because we are modifying the underlying array
# really a no-no
df = multiindex_dataframe_random_data.T
if using_copy_on_write:
with pytest.raises(ValueError, match="read-only"):
df["foo"].values[:] = 0
assert (df["foo"].values != 0).all()
else:
with pytest.raises(ValueError, match="read-only"):
df["foo"].values[:] = 0
assert (df["foo"].values == 0).all()
assert (df["foo"].values != 0).all()


def test_frame_setitem_copy_raises(multiindex_dataframe_random_data):
Expand Down
81 changes: 20 additions & 61 deletions pandas/tests/indexing/test_chaining_and_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def random_text(nobs=100):


class TestCaching:
def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write):
def test_slice_consolidate_invalidate_item_cache(self):
# this is chained assignment, but will 'work'
with option_context("chained_assignment", None):
# #3970
Expand Down Expand Up @@ -61,7 +61,7 @@ def test_setitem_cache_updating(self, do_ref):
assert df.loc[0, "c"] == 0.0
assert df.loc[7, "c"] == 1.0

def test_setitem_cache_updating_slices(self, using_copy_on_write):
def test_setitem_cache_updating_slices(self):
# GH 7084
# not updating cache on series setting with slices
expected = DataFrame(
Expand All @@ -85,15 +85,11 @@ def test_setitem_cache_updating_slices(self, using_copy_on_write):
out_original = out.copy()
for ix, row in df.iterrows():
v = out[row["C"]][six:eix] + row["D"]
with tm.raises_chained_assignment_error((ix == 0) or using_copy_on_write):
with tm.raises_chained_assignment_error():
out[row["C"]][six:eix] = v

if not using_copy_on_write:
tm.assert_frame_equal(out, expected)
tm.assert_series_equal(out["A"], expected["A"])
else:
tm.assert_frame_equal(out, out_original)
tm.assert_series_equal(out["A"], out_original["A"])
tm.assert_frame_equal(out, out_original)
tm.assert_series_equal(out["A"], out_original["A"])

out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
for ix, row in df.iterrows():
Expand All @@ -102,7 +98,7 @@ def test_setitem_cache_updating_slices(self, using_copy_on_write):
tm.assert_frame_equal(out, expected)
tm.assert_series_equal(out["A"], expected["A"])

def test_altering_series_clears_parent_cache(self, using_copy_on_write):
def test_altering_series_clears_parent_cache(self):
# GH #33675
df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"])
ser = df["A"]
Expand All @@ -116,49 +112,36 @@ def test_altering_series_clears_parent_cache(self, using_copy_on_write):


class TestChaining:
def test_setitem_chained_setfault(self, using_copy_on_write):
def test_setitem_chained_setfault(self):
# GH6026
data = ["right", "left", "left", "left", "right", "left", "timeout"]
mdata = ["right", "left", "left", "left", "right", "left", "none"]

df = DataFrame({"response": np.array(data)})
mask = df.response == "timeout"
with tm.raises_chained_assignment_error():
df.response[mask] = "none"
if using_copy_on_write:
tm.assert_frame_equal(df, DataFrame({"response": data}))
else:
tm.assert_frame_equal(df, DataFrame({"response": mdata}))
tm.assert_frame_equal(df, DataFrame({"response": data}))

recarray = np.rec.fromarrays([data], names=["response"])
df = DataFrame(recarray)
mask = df.response == "timeout"
with tm.raises_chained_assignment_error():
df.response[mask] = "none"
if using_copy_on_write:
tm.assert_frame_equal(df, DataFrame({"response": data}))
else:
tm.assert_frame_equal(df, DataFrame({"response": mdata}))
tm.assert_frame_equal(df, DataFrame({"response": data}))

df = DataFrame({"response": data, "response1": data})
df_original = df.copy()
mask = df.response == "timeout"
with tm.raises_chained_assignment_error():
df.response[mask] = "none"
if using_copy_on_write:
tm.assert_frame_equal(df, df_original)
else:
tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data}))
tm.assert_frame_equal(df, df_original)

# GH 6056
expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]})
df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])})
with tm.raises_chained_assignment_error():
df["A"].iloc[0] = np.nan
if using_copy_on_write:
expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]})
else:
expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]})
expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]})
result = df.head()
tm.assert_frame_equal(result, expected)

Expand All @@ -169,10 +152,9 @@ def test_setitem_chained_setfault(self, using_copy_on_write):
tm.assert_frame_equal(result, expected)

@pytest.mark.arm_slow
def test_detect_chained_assignment(self, using_copy_on_write):
def test_detect_chained_assignment(self):
with option_context("chained_assignment", "raise"):
# work with the chain
expected = DataFrame([[-5, 1], [-6, 3]], columns=list("AB"))
df = DataFrame(
np.arange(4).reshape(2, 2), columns=list("AB"), dtype="int64"
)
Expand All @@ -182,10 +164,7 @@ def test_detect_chained_assignment(self, using_copy_on_write):
df["A"][0] = -5
with tm.raises_chained_assignment_error():
df["A"][1] = -6
if using_copy_on_write:
tm.assert_frame_equal(df, df_original)
else:
tm.assert_frame_equal(df, expected)
tm.assert_frame_equal(df, df_original)

@pytest.mark.arm_slow
def test_detect_chained_assignment_raises(self):
Expand Down Expand Up @@ -340,9 +319,7 @@ def test_detect_chained_assignment_warnings_errors(self):
df.loc[0]["A"] = 111

@pytest.mark.parametrize("rhs", [3, DataFrame({0: [1, 2, 3, 4]})])
def test_detect_chained_assignment_warning_stacklevel(
self, rhs, using_copy_on_write
):
def test_detect_chained_assignment_warning_stacklevel(self, rhs):
# GH#42570
df = DataFrame(np.arange(25).reshape(5, 5))
df_original = df.copy()
Expand Down Expand Up @@ -379,7 +356,7 @@ def test_cache_updating(self):
assert "Hello Friend" in df["A"].index
assert "Hello Friend" in df["B"].index

def test_cache_updating2(self, using_copy_on_write):
def test_cache_updating2(self):
# 10264
df = DataFrame(
np.zeros((5, 5), dtype="int64"),
Expand All @@ -388,26 +365,11 @@ def test_cache_updating2(self, using_copy_on_write):
)
df["f"] = 0
df_orig = df.copy()
if using_copy_on_write:
with pytest.raises(ValueError, match="read-only"):
df.f.values[3] = 1
tm.assert_frame_equal(df, df_orig)
return

df.f.values[3] = 1

df.f.values[3] = 2
expected = DataFrame(
np.zeros((5, 6), dtype="int64"),
columns=["a", "b", "c", "d", "e", "f"],
index=range(5),
)
expected.at[3, "f"] = 2
tm.assert_frame_equal(df, expected)
expected = Series([0, 0, 0, 2, 0], name="f")
tm.assert_series_equal(df.f, expected)
with pytest.raises(ValueError, match="read-only"):
df.f.values[3] = 1
tm.assert_frame_equal(df, df_orig)

def test_iloc_setitem_chained_assignment(self, using_copy_on_write):
def test_iloc_setitem_chained_assignment(self):
# GH#3970
with option_context("chained_assignment", None):
df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
Expand All @@ -424,10 +386,7 @@ def test_iloc_setitem_chained_assignment(self, using_copy_on_write):
with tm.raises_chained_assignment_error():
df["bb"].iloc[0] = 0.15

if not using_copy_on_write:
assert df["bb"].iloc[0] == 0.15
else:
assert df["bb"].iloc[0] == 2.2
assert df["bb"].iloc[0] == 2.2

def test_getitem_loc_assignment_slice_state(self):
# GH 13569
Expand Down
18 changes: 0 additions & 18 deletions pandas/tests/indexing/test_iat.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,3 @@ def test_iat_getitem_series_with_period_index():
expected = ser[index[0]]
result = ser.iat[0]
assert expected == result


def test_iat_setitem_item_cache_cleared(indexer_ial, using_copy_on_write):
# GH#45684
data = {"x": np.arange(8, dtype=np.int64), "y": np.int64(0)}
df = DataFrame(data).copy()
ser = df["y"]

# previously this iat setting would split the block and fail to clear
# the item_cache.
indexer_ial(df)[7, 0] = 9999

indexer_ial(df)[7, 1] = 1234

assert df.iat[7, 1] == 1234
if not using_copy_on_write:
assert ser.iloc[-1] == 1234
assert df.iloc[-1, -1] == 1234
Loading

0 comments on commit 17aa2ba

Please sign in to comment.