From f5e9871eb058f274c2b0c800ce778c6d693bafd7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 17 Jan 2021 21:10:30 +0100 Subject: [PATCH 01/14] DEPR: raise deprecation warning in numpy ufuncs on DataFrames if not aligned + fallback to <1.2.0 behaviour --- pandas/core/arraylike.py | 51 ++++++++++++++++++ pandas/tests/frame/test_ufunc.py | 91 ++++++++++++++++++++++++++------ 2 files changed, 127 insertions(+), 15 deletions(-) diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 6b28f8f135769..183b39447faca 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -157,11 +157,62 @@ def array_ufunc(self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any) -------- numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__ """ + from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame from pandas.core.internals import BlockManager + from pandas.core.series import Series cls = type(self) + is_ndframe = [isinstance(x, NDFrame) for x in inputs] + is_frame = [isinstance(x, DataFrame) for x in inputs] + + if (len(inputs) == 2) and (sum(is_ndframe) == 2) and (sum(is_frame) >= 1): + # if there are 2 alignable inputs, of which at least 1 is a + # DataFrame -> we would have had no alignment before -> warn that this + # will align in the future + + # check if the two objects are aligned or not + aligned = False + if sum(is_frame) == 2: + if inputs[0]._indexed_same(inputs[1]): + aligned = True + else: + # DataFrame / Series + if isinstance(inputs[0], DataFrame): + if inputs[0].columns.equals(inputs[1].index): + aligned = True + else: + if inputs[1].columns.equals(inputs[0].index): + aligned = True + + # TODO need to check if Series index matches DataFrame columns + pass + + # only warn and fallback to array behaviour if not aligned + if not aligned: + # TODO expand warning + warnings.warn("Will align in the future", FutureWarning, stacklevel=3) + + # keep the first dataframe of the inputs, other DataFrame/Series is + # converted to array for fallback behaviour + new_inputs = [] + frame_count = 0 + for x in inputs: + if isinstance(x, DataFrame): + if frame_count == 0: + new_inputs.append(x) + else: + new_inputs.append(np.asarray(x)) + frame_count += 1 + elif isinstance(x, Series): + new_inputs.append(np.asarray(x)) + else: + new_inputs.append(x) + + # call the ufunc on those transformed inputs + return getattr(ufunc, method)(*new_inputs, **kwargs) + # for binary ops, use our custom dunder methods result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs) if result is not NotImplemented: diff --git a/pandas/tests/frame/test_ufunc.py b/pandas/tests/frame/test_ufunc.py index 8a29c2f2f89a1..c3e894cdf0676 100644 --- a/pandas/tests/frame/test_ufunc.py +++ b/pandas/tests/frame/test_ufunc.py @@ -78,12 +78,19 @@ def test_binary_input_aligns_columns(request, dtype_a, dtype_b): dtype_b["C"] = dtype_b.pop("B") df2 = pd.DataFrame({"A": [1, 2], "C": [3, 4]}).astype(dtype_b) - result = np.heaviside(df1, df2) - expected = np.heaviside( - np.array([[1, 3, np.nan], [2, 4, np.nan]]), - np.array([[1, np.nan, 3], [2, np.nan, 4]]), - ) - expected = pd.DataFrame(expected, index=[0, 1], columns=["A", "B", "C"]) + with tm.assert_produces_warning(FutureWarning): + result = np.heaviside(df1, df2) + # Expected future behaviour: + # expected = np.heaviside( + # np.array([[1, 3, np.nan], [2, 4, np.nan]]), + # np.array([[1, np.nan, 3], [2, np.nan, 4]]), + # ) + # expected = pd.DataFrame(expected, index=[0, 1], columns=["A", "B", "C"]) + expected = pd.DataFrame([[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + # ensure the expected is the same when applying with numpy array + result = np.heaviside(df1, df2.values) tm.assert_frame_equal(result, expected) @@ -97,27 +104,81 @@ def test_binary_input_aligns_index(request, dtype): ) df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).astype(dtype) df2 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "c"]).astype(dtype) - result = np.heaviside(df1, df2) - expected = np.heaviside( - np.array([[1, 3], [3, 4], [np.nan, np.nan]]), - np.array([[1, 3], [np.nan, np.nan], [3, 4]]), + with tm.assert_produces_warning(FutureWarning): + result = np.heaviside(df1, df2) + # Expected future behaviour: + # expected = np.heaviside( + # np.array([[1, 3], [3, 4], [np.nan, np.nan]]), + # np.array([[1, 3], [np.nan, np.nan], [3, 4]]), + # ) + # # TODO(FloatArray): this will be Float64Dtype. + # expected = pd.DataFrame(expected, index=["a", "b", "c"], columns=["A", "B"]) + expected = pd.DataFrame( + [[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"], index=["a", "b"] ) - # TODO(FloatArray): this will be Float64Dtype. - expected = pd.DataFrame(expected, index=["a", "b", "c"], columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + # ensure the expected is the same when applying with numpy array + result = np.heaviside(df1, df2.values) tm.assert_frame_equal(result, expected) def test_binary_frame_series_raises(): # We don't currently implement df = pd.DataFrame({"A": [1, 2]}) - with pytest.raises(NotImplementedError, match="logaddexp"): + # with pytest.raises(NotImplementedError, match="logaddexp"): + with pytest.raises(ValueError, match=""): np.logaddexp(df, df["A"]) - with pytest.raises(NotImplementedError, match="logaddexp"): + # with pytest.raises(NotImplementedError, match="logaddexp"): + with pytest.raises(ValueError, match=""): np.logaddexp(df["A"], df) def test_frame_outer_deprecated(): df = pd.DataFrame({"A": [1, 2]}) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): np.subtract.outer(df, df) + + +def test_alignment_deprecation(): + + df1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df2 = pd.DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]}) + s1 = pd.Series([1, 2], index=["a", "b"]) + s2 = pd.Series([1, 2], index=["b", "c"]) + + # binary + with tm.assert_produces_warning(None): + # aligned -> no warning! + result = np.add(df1, df1) + expected = pd.DataFrame({"a": [2, 4, 6], "b": [8, 10, 12]}) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = np.add(df1, df2) + tm.assert_frame_equal(result, expected) + + result = np.add(df1, df2.values) + tm.assert_frame_equal(result, expected) + + result = np.add(df1.values, df2) + expected = pd.DataFrame({"b": [2, 4, 6], "c": [8, 10, 12]}) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = np.add(df1, s2) + expected = pd.DataFrame({"a": [2, 3, 4], "b": [6, 7, 8]}) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(None): + # aligned -> no warning! + result = np.add(df1, s1) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = np.add(s2, df1) + tm.assert_frame_equal(result, expected) + + result = np.add(df1, s2.values) + tm.assert_frame_equal(result, expected) From e02392a7cad3cc5985e59e8827ce5c9c2d30077a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 18 Jan 2021 08:59:08 +0100 Subject: [PATCH 02/14] simplify / clean-up --- pandas/core/arraylike.py | 61 +++++++++++++++++--------------- pandas/tests/frame/test_ufunc.py | 15 +++++--- 2 files changed, 43 insertions(+), 33 deletions(-) diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 183b39447faca..ce869e7fec5e2 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -160,52 +160,57 @@ def array_ufunc(self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any) from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame from pandas.core.internals import BlockManager - from pandas.core.series import Series cls = type(self) is_ndframe = [isinstance(x, NDFrame) for x in inputs] is_frame = [isinstance(x, DataFrame) for x in inputs] - if (len(inputs) == 2) and (sum(is_ndframe) == 2) and (sum(is_frame) >= 1): + if (len(inputs) == 2) and (sum(is_ndframe) >= 2) and (sum(is_frame) >= 1): # if there are 2 alignable inputs, of which at least 1 is a # DataFrame -> we would have had no alignment before -> warn that this # will align in the future - # check if the two objects are aligned or not - aligned = False - if sum(is_frame) == 2: - if inputs[0]._indexed_same(inputs[1]): - aligned = True - else: - # DataFrame / Series - if isinstance(inputs[0], DataFrame): - if inputs[0].columns.equals(inputs[1].index): - aligned = True + # the first frame is what determines the output index/columns in pandas < 1.2 + for x in inputs: + if isinstance(x, DataFrame): + first_frame = x + break + + # check if the objects are aligned or not + def is_aligned(frame, other): + if isinstance(other, DataFrame): + return frame._indexed_same(other) else: - if inputs[1].columns.equals(inputs[0].index): - aligned = True + # Series -> match index + return frame.columns.equals(other.index) - # TODO need to check if Series index matches DataFrame columns - pass + non_aligned = sum( + not is_aligned(first_frame, x) for x in inputs if isinstance(x, NDFrame) + ) - # only warn and fallback to array behaviour if not aligned - if not aligned: - # TODO expand warning - warnings.warn("Will align in the future", FutureWarning, stacklevel=3) + # if at least one is not aligned -> warn and fallback to array behaviour + if non_aligned: + warnings.warn( + "Calling a ufunc on non-aligned DataFrames/Series. Currently, the " + "indices are ignored and the result takes the index/rows of the first " + "DataFrame. In the future (pandas 2.0), the DataFrames/Series will be " + "aligned before applying the ufunc.\nConvert one of the arguments to " + "a numpy array (eg 'ufunc(df1, np.asarray(df2)') to keep the current " + "behaviour, or align manually (eg 'df1, df2 = df1.align(df2)') before " + "passing to the ufunc to obtain the future behaviour and silence this " + "warning.", + FutureWarning, + stacklevel=3, + ) # keep the first dataframe of the inputs, other DataFrame/Series is # converted to array for fallback behaviour new_inputs = [] - frame_count = 0 for x in inputs: - if isinstance(x, DataFrame): - if frame_count == 0: - new_inputs.append(x) - else: - new_inputs.append(np.asarray(x)) - frame_count += 1 - elif isinstance(x, Series): + if x is first_frame: + new_inputs.append(x) + elif isinstance(x, NDFrame): new_inputs.append(np.asarray(x)) else: new_inputs.append(x) diff --git a/pandas/tests/frame/test_ufunc.py b/pandas/tests/frame/test_ufunc.py index c3e894cdf0676..76bb655218be4 100644 --- a/pandas/tests/frame/test_ufunc.py +++ b/pandas/tests/frame/test_ufunc.py @@ -123,6 +123,7 @@ def test_binary_input_aligns_index(request, dtype): tm.assert_frame_equal(result, expected) +@pytest.mark.filterwarnings("ignore:Calling a ufunc on non-aligned:FutureWarning") def test_binary_frame_series_raises(): # We don't currently implement df = pd.DataFrame({"A": [1, 2]}) @@ -148,14 +149,16 @@ def test_alignment_deprecation(): s1 = pd.Series([1, 2], index=["a", "b"]) s2 = pd.Series([1, 2], index=["b", "c"]) - # binary + # binary dataframe / dataframe + expected = pd.DataFrame({"a": [2, 4, 6], "b": [8, 10, 12]}) + with tm.assert_produces_warning(None): # aligned -> no warning! result = np.add(df1, df1) - expected = pd.DataFrame({"a": [2, 4, 6], "b": [8, 10, 12]}) tm.assert_frame_equal(result, expected) with tm.assert_produces_warning(FutureWarning): + # non-aligned -> warns result = np.add(df1, df2) tm.assert_frame_equal(result, expected) @@ -166,16 +169,18 @@ def test_alignment_deprecation(): expected = pd.DataFrame({"b": [2, 4, 6], "c": [8, 10, 12]}) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning): - result = np.add(df1, s2) + # binary dataframe / series expected = pd.DataFrame({"a": [2, 3, 4], "b": [6, 7, 8]}) - tm.assert_frame_equal(result, expected) with tm.assert_produces_warning(None): # aligned -> no warning! result = np.add(df1, s1) tm.assert_frame_equal(result, expected) + with tm.assert_produces_warning(FutureWarning): + result = np.add(df1, s2) + tm.assert_frame_equal(result, expected) + with tm.assert_produces_warning(FutureWarning): result = np.add(s2, df1) tm.assert_frame_equal(result, expected) From c6f689840ade5d7f10574e9520e526277dba3976 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 18 Jan 2021 09:18:37 +0100 Subject: [PATCH 03/14] allow >2 inputs --- pandas/core/arraylike.py | 2 +- pandas/tests/frame/test_ufunc.py | 48 ++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index ce869e7fec5e2..44b925f850e14 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -166,7 +166,7 @@ def array_ufunc(self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any) is_ndframe = [isinstance(x, NDFrame) for x in inputs] is_frame = [isinstance(x, DataFrame) for x in inputs] - if (len(inputs) == 2) and (sum(is_ndframe) >= 2) and (sum(is_frame) >= 1): + if (sum(is_ndframe) >= 2) and (sum(is_frame) >= 1): # if there are 2 alignable inputs, of which at least 1 is a # DataFrame -> we would have had no alignment before -> warn that this # will align in the future diff --git a/pandas/tests/frame/test_ufunc.py b/pandas/tests/frame/test_ufunc.py index 76bb655218be4..7d4a5382a71c2 100644 --- a/pandas/tests/frame/test_ufunc.py +++ b/pandas/tests/frame/test_ufunc.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd import pandas._testing as tm @@ -138,12 +140,12 @@ def test_binary_frame_series_raises(): def test_frame_outer_deprecated(): df = pd.DataFrame({"A": [1, 2]}) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): np.subtract.outer(df, df) def test_alignment_deprecation(): - + # https://github.com/pandas-dev/pandas/issues/39184 df1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) df2 = pd.DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]}) s1 = pd.Series([1, 2], index=["a", "b"]) @@ -187,3 +189,45 @@ def test_alignment_deprecation(): result = np.add(df1, s2.values) tm.assert_frame_equal(result, expected) + + +@td.skip_if_no("numba", "0.46.0") +def test_alignment_deprecation_many_inputs(): + # https://github.com/pandas-dev/pandas/issues/39184 + # test that the deprecation also works with > 2 inputs -> using a numba + # written ufunc for this because numpy itself doesn't have such ufuncs + from numba import float64, vectorize + + @vectorize([float64(float64, float64, float64)]) + def my_ufunc(x, y, z): + return x + y + z + + df1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df2 = pd.DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]}) + df3 = pd.DataFrame({"a": [1, 2, 3], "c": [4, 5, 6]}) + + with tm.assert_produces_warning(FutureWarning): + result = my_ufunc(df1, df2, df3) + expected = pd.DataFrame([[3.0, 12.0], [6.0, 15.0], [9.0, 18.0]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + # all aligned -> no warning + with tm.assert_produces_warning(None): + result = my_ufunc(df1, df1, df1) + tm.assert_frame_equal(result, expected) + + # mixed frame / arrays + with tm.assert_produces_warning(FutureWarning): + result = my_ufunc(df1, df2, df3.values) + tm.assert_frame_equal(result, expected) + + # single frame -> no warning + with tm.assert_produces_warning(None): + result = my_ufunc(df1, df2.values, df3.values) + tm.assert_frame_equal(result, expected) + + # takes indices of first frame + with tm.assert_produces_warning(FutureWarning): + result = my_ufunc(df1.values, df2, df3) + expected = expected.set_axis(["b", "c"], axis=1) + tm.assert_frame_equal(result, expected) From 87003214c6cb81b166e342901cad2d675076d0b3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 18 Jan 2021 09:44:33 +0100 Subject: [PATCH 04/14] add whatsnew --- doc/source/whatsnew/v1.2.1.rst | 70 ++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index fa5347aa7a507..adc3fcbe21a3f 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -36,6 +36,76 @@ Fixed regressions .. --------------------------------------------------------------------------- +.. _whatsnew_121.ufunc_deprecation: + +Calling numpy ufuncs on non-aligned DataFrames +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Before pandas 1.2.0, calling a numpy ufunc on non-aligned DataFrames (or +DataFrame / Series combination) would ignore the indices, only match +the inputs by shape, and use the index/rows of the first DataFrame for +the result: + +.. code-block:: python + + >>> df1 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=[0, 1]) + ... df2 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=[1, 2]) + >>> df1 + a b + 0 1 3 + 1 2 4 + >>> df2 + a b + 1 1 3 + 2 2 4 + + >>> np.add(df1, df2) + a b + 0 2 6 + 1 4 8 + +This contrasts with how other pandas operations work, which first align +the inputs: + +.. code-block:: python + + >>> df1 + df2 + a b + 0 NaN NaN + 1 3.0 7.0 + 2 NaN NaN + +In pandas 1.2.0, we refactored how numpy ufuncs are called on DataFrames, +and this started to align the inputs first, as happens in other pandas +operations. For pandas 1.2.1, we restored the previous behaviour to avoid +a breaking change, but the above example of ``np.add(df1, df2)`` with +non-aligned inputs will now to raise a warning, and a future pandas 2.0 +release will start aligning the inputs first. + +To avoid the warning and keep the current behaviour of ignoring the indices, +convert one of the arguments to a numpy array: + +.. code-block:: python + + >>> np.add(df1, np.asarray(df2)) + a b + 0 2 6 + 1 4 8 + +To obtain the future behaviour and silence the warning, you can align manually +before passing the arguments to the ufunc: + +.. code-block:: python + + >>> df1, df2 = df1.align(df2) + >>> np.add(df1, df2) + a b + 0 NaN NaN + 1 3.0 7.0 + 2 NaN NaN + +.. --------------------------------------------------------------------------- + .. _whatsnew_121.bug_fixes: Bug fixes From 1a6f257538a2c9ed2a801d848ea333edc2d7a455 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 18 Jan 2021 15:06:59 +0100 Subject: [PATCH 05/14] update for feedback --- doc/source/whatsnew/v1.2.1.rst | 23 +++++++++++++---------- pandas/core/arraylike.py | 14 +++++++------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index adc3fcbe21a3f..8c39b43595b5e 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -38,12 +38,12 @@ Fixed regressions .. _whatsnew_121.ufunc_deprecation: -Calling numpy ufuncs on non-aligned DataFrames +Calling NumPy ufuncs on non-aligned DataFrames ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Before pandas 1.2.0, calling a numpy ufunc on non-aligned DataFrames (or +Before pandas 1.2.0, calling a NumPy ufunc on non-aligned DataFrames (or DataFrame / Series combination) would ignore the indices, only match -the inputs by shape, and use the index/rows of the first DataFrame for +the inputs by shape, and use the index/columns of the first DataFrame for the result: .. code-block:: python @@ -75,15 +75,18 @@ the inputs: 1 3.0 7.0 2 NaN NaN -In pandas 1.2.0, we refactored how numpy ufuncs are called on DataFrames, -and this started to align the inputs first, as happens in other pandas -operations. For pandas 1.2.1, we restored the previous behaviour to avoid -a breaking change, but the above example of ``np.add(df1, df2)`` with -non-aligned inputs will now to raise a warning, and a future pandas 2.0 -release will start aligning the inputs first. +In pandas 1.2.0, we refactored how NumPy ufuncs are called on DataFrames, and +this started to align the inputs first, as happens in other pandas operations +and as it happens for ufuncs called on Series objects. + +For pandas 1.2.1, we restored the previous behaviour to avoid a breaking +change, but the above example of ``np.add(df1, df2)`` with non-aligned inputs +will now to raise a warning, and a future pandas 2.0 release will start +aligning the inputs first (:issue:`39184`). Calling a NumPy ufunc on Series +objects (eg ``np.add(s1, s2)``) already aligns and continues to do so. To avoid the warning and keep the current behaviour of ignoring the indices, -convert one of the arguments to a numpy array: +convert one of the arguments to a NumPy array: .. code-block:: python diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 44b925f850e14..1361aaedc1ecc 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -193,13 +193,13 @@ def is_aligned(frame, other): if non_aligned: warnings.warn( "Calling a ufunc on non-aligned DataFrames/Series. Currently, the " - "indices are ignored and the result takes the index/rows of the first " - "DataFrame. In the future (pandas 2.0), the DataFrames/Series will be " - "aligned before applying the ufunc.\nConvert one of the arguments to " - "a numpy array (eg 'ufunc(df1, np.asarray(df2)') to keep the current " - "behaviour, or align manually (eg 'df1, df2 = df1.align(df2)') before " - "passing to the ufunc to obtain the future behaviour and silence this " - "warning.", + "indices are ignored and the result takes the index/columns of the " + "first DataFrame. In the future (pandas 2.0), the DataFrames/Series " + "will be aligned before applying the ufunc.\nConvert one of the " + "arguments to a NumPy array (eg 'ufunc(df1, np.asarray(df2)') to keep " + "the current behaviour, or align manually (eg " + "'df1, df2 = df1.align(df2)') before passing to the ufunc to obtain " + "the future behaviour and silence this warning.", FutureWarning, stacklevel=3, ) From 64b9430a2af43d9dbf20c0655fc7f8f9bd496c06 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 18 Jan 2021 20:47:41 +0100 Subject: [PATCH 06/14] clarify wording in warning --- pandas/core/arraylike.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 1361aaedc1ecc..8722683308789 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -192,14 +192,14 @@ def is_aligned(frame, other): # if at least one is not aligned -> warn and fallback to array behaviour if non_aligned: warnings.warn( - "Calling a ufunc on non-aligned DataFrames/Series. Currently, the " - "indices are ignored and the result takes the index/columns of the " - "first DataFrame. In the future (pandas 2.0), the DataFrames/Series " - "will be aligned before applying the ufunc.\nConvert one of the " - "arguments to a NumPy array (eg 'ufunc(df1, np.asarray(df2)') to keep " - "the current behaviour, or align manually (eg " - "'df1, df2 = df1.align(df2)') before passing to the ufunc to obtain " - "the future behaviour and silence this warning.", + "Calling a ufunc on non-aligned DataFrames (or DataFrame/Series " + "combination). Currently, the indices are ignored and the result " + "takes the index/columns of the first DataFrame. In the future " + "(pandas 2.0), the DataFrames/Series will be aligned before " + "applying the ufunc.\nConvert one of the arguments to a NumPy array " + "(eg 'ufunc(df1, np.asarray(df2)') to keep the current behaviour, " + "or align manually (eg 'df1, df2 = df1.align(df2)') before passing to " + "the ufunc to obtain the future behaviour and silence this warning.", FutureWarning, stacklevel=3, ) From 3b66b14a7657ec5abaee854700b9fe2844b9f938 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 19 Jan 2021 15:35:42 +0100 Subject: [PATCH 07/14] refactor into separate helper function --- pandas/core/arraylike.py | 70 ++++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 21 deletions(-) diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 8722683308789..954126bd8ea24 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -149,44 +149,50 @@ def __rpow__(self, other): return self._arith_method(other, roperator.rpow) -def array_ufunc(self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any): +# ----------------------------------------------------------------------------- +# Helpers to implement __array_ufunc__ + + +def _is_aligned(frame, other): """ - Compatibility with numpy ufuncs. + Helper to check if a DataFrame is aligned with another DataFrame or Series. + """ + from pandas.core.frame import DataFrame - See also - -------- - numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__ + if isinstance(other, DataFrame): + return frame._indexed_same(other) + else: + # Series -> match index + return frame.columns.equals(other.index) + + +def _maybe_fallback(ufunc: Callable, method: str, *inputs: Any, **kwargs: Any): + """ + In the future DataFrame, inputs to ufuncs will be aligned before applying + the ufunc, but for now we ignore the index but raise a warning if behaviour + would change in the future. + This helper detects the case where a warning is needed and then fallbacks + to applying the ufunc on arrays to avoid alignment. + + See https://github.com/pandas-dev/pandas/pull/39239 """ from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame - from pandas.core.internals import BlockManager - - cls = type(self) is_ndframe = [isinstance(x, NDFrame) for x in inputs] is_frame = [isinstance(x, DataFrame) for x in inputs] if (sum(is_ndframe) >= 2) and (sum(is_frame) >= 1): - # if there are 2 alignable inputs, of which at least 1 is a + # if there are 2 alignable inputs (NDFrames), of which at least 1 is a # DataFrame -> we would have had no alignment before -> warn that this # will align in the future # the first frame is what determines the output index/columns in pandas < 1.2 - for x in inputs: - if isinstance(x, DataFrame): - first_frame = x - break + first_frame = next(x for x in inputs if isinstance(x, DataFrame)) # check if the objects are aligned or not - def is_aligned(frame, other): - if isinstance(other, DataFrame): - return frame._indexed_same(other) - else: - # Series -> match index - return frame.columns.equals(other.index) - non_aligned = sum( - not is_aligned(first_frame, x) for x in inputs if isinstance(x, NDFrame) + not _is_aligned(first_frame, x) for x in inputs if isinstance(x, NDFrame) ) # if at least one is not aligned -> warn and fallback to array behaviour @@ -217,6 +223,28 @@ def is_aligned(frame, other): # call the ufunc on those transformed inputs return getattr(ufunc, method)(*new_inputs, **kwargs) + else: + # signal that we didn't fallback / execute the ufunc yet + return NotImplemented + + +def array_ufunc(self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any): + """ + Compatibility with numpy ufuncs. + + See also + -------- + numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__ + """ + from pandas.core.generic import NDFrame + from pandas.core.internals import BlockManager + + cls = type(self) + + # for backwards compatibility check and potentially fallback for non-aligned frames + result = _maybe_fallback(ufunc, method, *inputs, **kwargs) + if result is not NotImplemented: + return result # for binary ops, use our custom dunder methods result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs) From 4dcde0e65d3285ab7a8f7e2f83f68b0f4a606691 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 19 Jan 2021 15:38:44 +0100 Subject: [PATCH 08/14] fixup --- pandas/core/arraylike.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 954126bd8ea24..d84ee01923de2 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -207,7 +207,7 @@ def _maybe_fallback(ufunc: Callable, method: str, *inputs: Any, **kwargs: Any): "or align manually (eg 'df1, df2 = df1.align(df2)') before passing to " "the ufunc to obtain the future behaviour and silence this warning.", FutureWarning, - stacklevel=3, + stacklevel=4, ) # keep the first dataframe of the inputs, other DataFrame/Series is @@ -223,9 +223,9 @@ def _maybe_fallback(ufunc: Callable, method: str, *inputs: Any, **kwargs: Any): # call the ufunc on those transformed inputs return getattr(ufunc, method)(*new_inputs, **kwargs) - else: - # signal that we didn't fallback / execute the ufunc yet - return NotImplemented + + # signal that we didn't fallback / execute the ufunc yet + return NotImplemented def array_ufunc(self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any): From 20be3c75b2cd2528d3a5d44e95d6ab16c0333c2b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 19 Jan 2021 15:42:09 +0100 Subject: [PATCH 09/14] add link to original PR --- doc/source/whatsnew/v1.2.1.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 8c39b43595b5e..5cf9fdf6e8b7a 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -76,8 +76,8 @@ the inputs: 2 NaN NaN In pandas 1.2.0, we refactored how NumPy ufuncs are called on DataFrames, and -this started to align the inputs first, as happens in other pandas operations -and as it happens for ufuncs called on Series objects. +this started to align the inputs first (:issue:`39184`), as happens in other +pandas operations and as it happens for ufuncs called on Series objects. For pandas 1.2.1, we restored the previous behaviour to avoid a breaking change, but the above example of ``np.add(df1, df2)`` with non-aligned inputs From f80b780c5f577a78de7d03286af31e964bb8fcc4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 19 Jan 2021 16:41:46 +0100 Subject: [PATCH 10/14] add note to v1.2.0 as well --- doc/source/whatsnew/v1.2.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 95757448a7978..688bcea1e698a 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -286,6 +286,8 @@ Other enhancements - Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`) - Calling a NumPy ufunc on a ``DataFrame`` with extension types now preserves the extension types when possible (:issue:`23743`) - Calling a binary-input NumPy ufunc on multiple ``DataFrame`` objects now aligns, matching the behavior of binary operations and ufuncs on ``Series`` (:issue:`23743`). + This change has been reverted in pandas 1.2.1, and the behaviour to not align DataFrames + is deprecated instead, see the :ref:`the 1.2.1 release notes `. - Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`) - :meth:`DataFrame.to_parquet` now supports :class:`MultiIndex` for columns in parquet format (:issue:`34777`) - :func:`read_parquet` gained a ``use_nullable_dtypes=True`` option to use nullable dtypes that use ``pd.NA`` as missing value indicator where possible for the resulting DataFrame (default is ``False``, and only applicable for ``engine="pyarrow"``) (:issue:`31242`) From eaa83ed9d6a063072867c7531b785ce95eb9dce1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 19 Jan 2021 19:36:00 +0100 Subject: [PATCH 11/14] clean-up based on review --- pandas/core/arraylike.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 7dea2fec4bee7..158255d8c938b 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -157,7 +157,7 @@ def _is_aligned(frame, other): """ Helper to check if a DataFrame is aligned with another DataFrame or Series. """ - from pandas.core.frame import DataFrame + from pandas import DataFrame if isinstance(other, DataFrame): return frame._indexed_same(other) @@ -176,15 +176,15 @@ def _maybe_fallback(ufunc: Callable, method: str, *inputs: Any, **kwargs: Any): See https://github.com/pandas-dev/pandas/pull/39239 """ - from pandas.core.frame import DataFrame + from pandas import DataFrame from pandas.core.generic import NDFrame - is_ndframe = [isinstance(x, NDFrame) for x in inputs] - is_frame = [isinstance(x, DataFrame) for x in inputs] + n_alignable = sum(isinstance(x, NDFrame) for x in inputs) + n_frames = sum(isinstance(x, DataFrame) for x in inputs) - if (sum(is_ndframe) >= 2) and (sum(is_frame) >= 1): - # if there are 2 alignable inputs (NDFrames), of which at least 1 is a - # DataFrame -> we would have had no alignment before -> warn that this + if n_alignable >= 2 and n_frames >= 1: + # if there are 2 alignable inputs (Series or DataFrame), of which at least 1 + # is a DataFrame -> we would have had no alignment before -> warn that this # will align in the future # the first frame is what determines the output index/columns in pandas < 1.2 From 81e7c846b980686a620f4d99c3fbc2206a4ca41b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 19 Jan 2021 21:53:16 +0100 Subject: [PATCH 12/14] add longer note in deprecation section of v1.2.0 docs --- doc/source/whatsnew/v1.2.0.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 688bcea1e698a..dfd23309faaef 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -538,6 +538,14 @@ Deprecations - The ``inplace`` parameter of :meth:`Categorical.remove_unused_categories` is deprecated and will be removed in a future version (:issue:`37643`) - The ``null_counts`` parameter of :meth:`DataFrame.info` is deprecated and replaced by ``show_counts``. It will be removed in a future version (:issue:`37999`) +**Calling NumPy ufuncs on non-aligned DataFrames** + +Calling NumPy ufuncs on non-aligned DataFrames changed behaviour in pandas +1.2.0 (to align the inputs before calling the ufunc), but this change is +reverted in pandas 1.2.1. The behaviour to not align is now deprecated instead, +see the :ref:`the 1.2.1 release notes ` for +more details. + .. --------------------------------------------------------------------------- From 4703410a00c94d2cad2d123f1113a6626b447f99 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 20 Jan 2021 08:18:54 +0100 Subject: [PATCH 13/14] remove pandas 2.0 mention --- pandas/core/arraylike.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 158255d8c938b..cb185dcf78f63 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -200,9 +200,9 @@ def _maybe_fallback(ufunc: Callable, method: str, *inputs: Any, **kwargs: Any): warnings.warn( "Calling a ufunc on non-aligned DataFrames (or DataFrame/Series " "combination). Currently, the indices are ignored and the result " - "takes the index/columns of the first DataFrame. In the future " - "(pandas 2.0), the DataFrames/Series will be aligned before " - "applying the ufunc.\nConvert one of the arguments to a NumPy array " + "takes the index/columns of the first DataFrame. In the future , " + "the DataFrames/Series will be aligned before applying the ufunc.\n" + "Convert one of the arguments to a NumPy array " "(eg 'ufunc(df1, np.asarray(df2)') to keep the current behaviour, " "or align manually (eg 'df1, df2 = df1.align(df2)') before passing to " "the ufunc to obtain the future behaviour and silence this warning.", From 5ed00bb75ebfc4dbc7c294a3a8b6772cba4bb775 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 20 Jan 2021 08:19:17 +0100 Subject: [PATCH 14/14] update date release notes --- doc/source/whatsnew/v1.2.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 4e09e9e733a97..474970601022c 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -1,6 +1,6 @@ .. _whatsnew_121: -What's new in 1.2.1 (January 18, 2021) +What's new in 1.2.1 (January 20, 2021) -------------------------------------- These are the changes in pandas 1.2.1. See :ref:`release` for a full changelog