Skip to content

Commit 46bcf37

Browse files
authored
DEPR: df.iloc[:, foo] = bar attempt to set inplace (#45333)
1 parent d2a7eff commit 46bcf37

27 files changed

+334
-73
lines changed

doc/source/user_guide/10min.rst

+1
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ Setting values by position:
328328
Setting by assigning with a NumPy array:
329329

330330
.. ipython:: python
331+
:okwarning:
331332
332333
df.loc[:, "D"] = np.array([5] * len(df))
333334

doc/source/whatsnew/v1.5.0.rst

+67
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,73 @@ As ``group_keys=True`` is the default value of :meth:`DataFrame.groupby` and
533533
raise a ``FutureWarning``. This can be silenced and the previous behavior
534534
retained by specifying ``group_keys=False``.
535535

536+
.. _whatsnew_150.notable_bug_fixes.setitem_column_try_inplace:
537+
_ see also _whatsnew_130.notable_bug_fixes.setitem_column_try_inplace
538+
539+
Try operating inplace when setting values with ``loc`` and ``iloc``
540+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
541+
Most of the time setting values with ``frame.iloc`` attempts to set values
542+
in-place, only falling back to inserting a new array if necessary. In the past,
543+
setting entire columns has been an exception to this rule:
544+
545+
.. ipython:: python
546+
547+
values = np.arange(4).reshape(2, 2)
548+
df = pd.DataFrame(values)
549+
ser = df[0]
550+
551+
*Old behavior*:
552+
553+
.. code-block:: ipython
554+
555+
In [3]: df.iloc[:, 0] = np.array([10, 11])
556+
In [4]: ser
557+
Out[4]:
558+
0 0
559+
1 2
560+
Name: 0, dtype: int64
561+
562+
This behavior is deprecated. In a future version, setting an entire column with
563+
iloc will attempt to operate inplace.
564+
565+
*Future behavior*:
566+
567+
.. code-block:: ipython
568+
569+
In [3]: df.iloc[:, 0] = np.array([10, 11])
570+
In [4]: ser
571+
Out[4]:
572+
0 10
573+
1 11
574+
Name: 0, dtype: int64
575+
576+
To get the old behavior, use :meth:`DataFrame.__setitem__` directly:
577+
578+
*Future behavior*:
579+
580+
.. code-block:: ipython
581+
582+
In [5]: df[0] = np.array([21, 31])
583+
In [4]: ser
584+
Out[4]:
585+
0 10
586+
1 11
587+
Name: 0, dtype: int64
588+
589+
In the case where ``df.columns`` is not unique, use :meth:`DataFrame.isetitem`:
590+
591+
*Future behavior*:
592+
593+
.. code-block:: ipython
594+
595+
In [5]: df.columns = ["A", "A"]
596+
In [5]: df.isetitem(0, np.array([21, 31]))
597+
In [4]: ser
598+
Out[4]:
599+
0 10
600+
1 11
601+
Name: 0, dtype: int64
602+
536603
.. _whatsnew_150.deprecations.numeric_only_default:
537604

538605
``numeric_only`` default value

pandas/core/frame.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -3687,6 +3687,29 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar:
36873687
loc = engine.get_loc(index)
36883688
return series._values[loc]
36893689

3690+
def isetitem(self, loc, value) -> None:
3691+
"""
3692+
Set the given value in the column with position 'loc'.
3693+
3694+
This is a positional analogue to __setitem__.
3695+
3696+
Parameters
3697+
----------
3698+
loc : int or sequence of ints
3699+
value : scalar or arraylike
3700+
3701+
Notes
3702+
-----
3703+
Unlike `frame.iloc[:, i] = value`, `frame.isetitem(loc, value)` will
3704+
_never_ try to set the values in place, but will always insert a new
3705+
array.
3706+
3707+
In cases where `frame.columns` is unique, this is equivalent to
3708+
`frame[frame.columns[i]] = value`.
3709+
"""
3710+
arraylike = self._sanitize_column(value)
3711+
self._iset_item_mgr(loc, arraylike, inplace=False)
3712+
36903713
def __setitem__(self, key, value):
36913714
key = com.apply_if_callable(key, self)
36923715

@@ -5467,7 +5490,7 @@ def _replace_columnwise(
54675490
target, value = mapping[ax[i]]
54685491
newobj = ser.replace(target, value, regex=regex)
54695492

5470-
res.iloc[:, i] = newobj
5493+
res._iset_item(i, newobj)
54715494

54725495
if inplace:
54735496
return

pandas/core/groupby/groupby.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -2257,7 +2257,13 @@ def sem(self, ddof: int = 1, numeric_only: bool | lib.NoDefault = lib.no_default
22572257
counts = self.count()
22582258
result_ilocs = result.columns.get_indexer_for(cols)
22592259
count_ilocs = counts.columns.get_indexer_for(cols)
2260-
result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs])
2260+
with warnings.catch_warnings():
2261+
# TODO(2.0): once iloc[:, foo] = bar depecation is enforced,
2262+
# this catching will be unnecessary
2263+
warnings.filterwarnings(
2264+
"ignore", ".*will attempt to set the values inplace.*"
2265+
)
2266+
result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs])
22612267
return result
22622268

22632269
@final

pandas/core/indexing.py

+41-7
Original file line numberDiff line numberDiff line change
@@ -1937,24 +1937,25 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
19371937
pi = plane_indexer
19381938

19391939
ser = self.obj._ixs(loc, axis=1)
1940+
orig_values = ser._values
19401941

19411942
# perform the equivalent of a setitem on the info axis
19421943
# as we have a null slice or a slice with full bounds
19431944
# which means essentially reassign to the columns of a
19441945
# multi-dim object
19451946
# GH#6149 (null slice), GH#10408 (full bounds)
19461947
if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)):
1947-
ser = value
1948+
pass
19481949
elif (
19491950
is_array_like(value)
19501951
and is_exact_shape_match(ser, value)
19511952
and not is_empty_indexer(pi)
19521953
):
19531954
if is_list_like(pi):
1954-
ser = value[np.argsort(pi)]
1955+
value = value[np.argsort(pi)]
19551956
else:
19561957
# in case of slice
1957-
ser = value[pi]
1958+
value = value[pi]
19581959
else:
19591960
# set the item, first attempting to operate inplace, then
19601961
# falling back to casting if necessary; see
@@ -1970,8 +1971,40 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
19701971
self.obj._iset_item(loc, ser)
19711972
return
19721973

1973-
# reset the sliced object if unique
1974-
self.obj._iset_item(loc, ser)
1974+
# We will not operate in-place, but will attempt to in the future.
1975+
# To determine whether we need to issue a FutureWarning, see if the
1976+
# setting in-place would work, i.e. behavior will change.
1977+
warn = can_hold_element(ser._values, value)
1978+
# Don't issue the warning yet, as we can still trim a few cases where
1979+
# behavior will not change.
1980+
1981+
self.obj._iset_item(loc, value)
1982+
1983+
if warn:
1984+
new_values = self.obj._ixs(loc, axis=1)._values
1985+
1986+
if (
1987+
isinstance(new_values, np.ndarray)
1988+
and isinstance(orig_values, np.ndarray)
1989+
and np.shares_memory(new_values, orig_values)
1990+
):
1991+
# TODO: get something like tm.shares_memory working?
1992+
# The values were set inplace after all, no need to warn,
1993+
# e.g. test_rename_nocopy
1994+
pass
1995+
else:
1996+
warnings.warn(
1997+
"In a future version, `df.iloc[:, i] = newvals` will attempt "
1998+
"to set the values inplace instead of always setting a new "
1999+
"array. To retain the old behavior, use either "
2000+
"`df[df.columns[i]] = newvals` or, if columns are non-unique, "
2001+
"`df.isetitem(i, newvals)`",
2002+
FutureWarning,
2003+
stacklevel=find_stack_level(),
2004+
)
2005+
# TODO: how to get future behavior?
2006+
# TODO: what if we got here indirectly via loc?
2007+
return
19752008

19762009
def _setitem_single_block(self, indexer, value, name: str):
19772010
"""
@@ -1981,7 +2014,6 @@ def _setitem_single_block(self, indexer, value, name: str):
19812014

19822015
info_axis = self.obj._info_axis_number
19832016
item_labels = self.obj._get_axis(info_axis)
1984-
19852017
if isinstance(indexer, tuple):
19862018

19872019
# if we are setting on the info axis ONLY
@@ -1996,7 +2028,9 @@ def _setitem_single_block(self, indexer, value, name: str):
19962028
if len(item_labels.get_indexer_for([col])) == 1:
19972029
# e.g. test_loc_setitem_empty_append_expands_rows
19982030
loc = item_labels.get_loc(col)
1999-
self.obj._iset_item(loc, value)
2031+
# Go through _setitem_single_column to get
2032+
# FutureWarning if relevant.
2033+
self._setitem_single_column(loc, value, indexer[0])
20002034
return
20012035

20022036
indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align

pandas/tests/extension/base/setitem.py

+25-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
import numpy as np
22
import pytest
33

4+
from pandas.core.dtypes.dtypes import (
5+
DatetimeTZDtype,
6+
IntervalDtype,
7+
PandasDtype,
8+
PeriodDtype,
9+
)
10+
411
import pandas as pd
512
import pandas._testing as tm
613
from pandas.tests.extension.base.base import BaseExtensionTests
@@ -361,6 +368,11 @@ def test_setitem_frame_2d_values(self, data):
361368
# GH#44514
362369
df = pd.DataFrame({"A": data})
363370

371+
# These dtypes have non-broken implementations of _can_hold_element
372+
has_can_hold_element = isinstance(
373+
data.dtype, (PandasDtype, PeriodDtype, IntervalDtype, DatetimeTZDtype)
374+
)
375+
364376
# Avoiding using_array_manager fixture
365377
# https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
366378
using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
@@ -369,13 +381,24 @@ def test_setitem_frame_2d_values(self, data):
369381

370382
orig = df.copy()
371383

372-
df.iloc[:] = df
384+
msg = "will attempt to set the values inplace instead"
385+
warn = None
386+
if has_can_hold_element and not isinstance(data.dtype, PandasDtype):
387+
# PandasDtype excluded because it isn't *really* supported.
388+
warn = FutureWarning
389+
390+
with tm.assert_produces_warning(warn, match=msg):
391+
df.iloc[:] = df
373392
self.assert_frame_equal(df, orig)
374393

375394
df.iloc[:-1] = df.iloc[:-1]
376395
self.assert_frame_equal(df, orig)
377396

378-
df.iloc[:] = df.values
397+
if isinstance(data.dtype, DatetimeTZDtype):
398+
# no warning bc df.values casts to object dtype
399+
warn = None
400+
with tm.assert_produces_warning(warn, match=msg):
401+
df.iloc[:] = df.values
379402
self.assert_frame_equal(df, orig)
380403
if not using_array_manager:
381404
# GH#33457 Check that this setting occurred in-place

pandas/tests/frame/indexing/test_coercion.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@ def test_loc_setitem_multiindex_columns(self, consolidate):
3636
A.loc[2:3, (1, slice(2, 3))] = np.ones((2, 2), dtype=np.float32)
3737
assert (A.dtypes == np.float32).all()
3838

39-
A.loc[0:5, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)
39+
msg = "will attempt to set the values inplace instead"
40+
with tm.assert_produces_warning(FutureWarning, match=msg):
41+
A.loc[0:5, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)
42+
4043
assert (A.dtypes == np.float32).all()
4144

4245
A.loc[:, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)
@@ -129,7 +132,10 @@ def test_iloc_setitem_unnecesssary_float_upcasting():
129132
orig = df.copy()
130133

131134
values = df[0].values.reshape(2, 1)
132-
df.iloc[:, 0:1] = values
135+
136+
msg = "will attempt to set the values inplace instead"
137+
with tm.assert_produces_warning(FutureWarning, match=msg):
138+
df.iloc[:, 0:1] = values
133139

134140
tm.assert_frame_equal(df, orig)
135141

pandas/tests/frame/indexing/test_indexing.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -688,7 +688,7 @@ def test_getitem_setitem_boolean_multi(self):
688688
expected.loc[[0, 2], [1]] = 5
689689
tm.assert_frame_equal(df, expected)
690690

691-
def test_getitem_setitem_float_labels(self):
691+
def test_getitem_setitem_float_labels(self, using_array_manager):
692692
index = Index([1.5, 2, 3, 4, 5])
693693
df = DataFrame(np.random.randn(5, 5), index=index)
694694

@@ -771,7 +771,10 @@ def test_getitem_setitem_float_labels(self):
771771
assert len(result) == 5
772772

773773
cp = df.copy()
774-
cp.loc[1.0:5.0] = 0
774+
warn = FutureWarning if using_array_manager else None
775+
msg = "will attempt to set the values inplace"
776+
with tm.assert_produces_warning(warn, match=msg):
777+
cp.loc[1.0:5.0] = 0
775778
result = cp.loc[1.0:5.0]
776779
assert (result == 0).values.all()
777780

pandas/tests/frame/indexing/test_setitem.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -399,10 +399,14 @@ def test_setitem_frame_length_0_str_key(self, indexer):
399399

400400
def test_setitem_frame_duplicate_columns(self, using_array_manager):
401401
# GH#15695
402+
warn = FutureWarning if using_array_manager else None
403+
msg = "will attempt to set the values inplace"
404+
402405
cols = ["A", "B", "C"] * 2
403406
df = DataFrame(index=range(3), columns=cols)
404407
df.loc[0, "A"] = (0, 3)
405-
df.loc[:, "B"] = (1, 4)
408+
with tm.assert_produces_warning(warn, match=msg):
409+
df.loc[:, "B"] = (1, 4)
406410
df["C"] = (2, 5)
407411
expected = DataFrame(
408412
[
@@ -769,7 +773,9 @@ def test_setitem_string_column_numpy_dtype_raising(self):
769773
def test_setitem_empty_df_duplicate_columns(self):
770774
# GH#38521
771775
df = DataFrame(columns=["a", "b", "b"], dtype="float64")
772-
df.loc[:, "a"] = list(range(2))
776+
msg = "will attempt to set the values inplace instead"
777+
with tm.assert_produces_warning(FutureWarning, match=msg):
778+
df.loc[:, "a"] = list(range(2))
773779
expected = DataFrame(
774780
[[0, np.nan, np.nan], [1, np.nan, np.nan]], columns=["a", "b", "b"]
775781
)

pandas/tests/frame/indexing/test_where.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ def test_where_bug_transposition(self):
365365
result = a.where(do_not_replace, b)
366366
tm.assert_frame_equal(result, expected)
367367

368-
def test_where_datetime(self):
368+
def test_where_datetime(self, using_array_manager):
369369

370370
# GH 3311
371371
df = DataFrame(
@@ -385,7 +385,11 @@ def test_where_datetime(self):
385385

386386
expected = df.copy()
387387
expected.loc[[0, 1], "A"] = np.nan
388-
expected.loc[:, "C"] = np.nan
388+
389+
warn = FutureWarning if using_array_manager else None
390+
msg = "will attempt to set the values inplace"
391+
with tm.assert_produces_warning(warn, match=msg):
392+
expected.loc[:, "C"] = np.nan
389393
tm.assert_frame_equal(result, expected)
390394

391395
def test_where_none(self):
@@ -513,7 +517,7 @@ def test_where_axis_with_upcast(self):
513517
assert return_value is None
514518
tm.assert_frame_equal(result, expected)
515519

516-
def test_where_axis_multiple_dtypes(self):
520+
def test_where_axis_multiple_dtypes(self, using_array_manager):
517521
# Multiple dtypes (=> multiple Blocks)
518522
df = pd.concat(
519523
[
@@ -569,7 +573,10 @@ def test_where_axis_multiple_dtypes(self):
569573

570574
d2 = df.copy().drop(1, axis=1)
571575
expected = df.copy()
572-
expected.loc[:, 1] = np.nan
576+
warn = FutureWarning if using_array_manager else None
577+
msg = "will attempt to set the values inplace"
578+
with tm.assert_produces_warning(warn, match=msg):
579+
expected.loc[:, 1] = np.nan
573580

574581
result = df.where(mask, d2)
575582
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)