From d7446f4de63cf98dc055e3e6caa85736cc4bfbd0 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Wed, 17 Jul 2024 12:38:19 +0300 Subject: [PATCH 01/78] add simple when --- narwhals/_pandas_like/expr.py | 62 ++++++++++++++++++++++++++++++++ narwhals/expression.py | 29 +++++++++++++++ narwhals/expressions/whenthen.py | 0 tests/test_common.py | 10 ++++++ 4 files changed, 101 insertions(+) create mode 100644 narwhals/expressions/whenthen.py diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index afe57e780..bc65d2d73 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -4,8 +4,10 @@ from typing import Any from typing import Callable from typing import Literal +from typing import Iterable from narwhals._pandas_like.series import PandasSeries +from narwhals._pandas_like.typing import IntoPandasExpr from narwhals._pandas_like.utils import reuse_series_implementation from narwhals._pandas_like.utils import reuse_series_namespace_implementation @@ -296,6 +298,14 @@ def str(self) -> PandasExprStringNamespace: def dt(self) -> PandasExprDateTimeNamespace: return PandasExprDateTimeNamespace(self) + def when(self, *predicates: PandasExpr | Iterable[PandasExpr], **conditions: Any) -> PandasWhen: + # TODO: Support conditions + from narwhals._pandas_like.namespace import PandasNamespace + + plx = PandasNamespace(self._implementation) + condition = plx.all_horizontal(*predicates) + return PandasWhen(self, condition) + class PandasExprStringNamespace: def __init__(self, expr: PandasExpr) -> None: @@ -380,3 +390,55 @@ def total_nanoseconds(self) -> PandasExpr: return reuse_series_namespace_implementation( self._expr, "dt", "total_nanoseconds" ) + +class PandasWhen: + def __init__(self, condition: PandasExpr) -> None: + self._condition = condition + + def then(self, value: Any) -> PandasThen: + return PandasThen(self, value=value, implementation=self._condition._implementation) + +class PandasThen(PandasExpr): + def __init__(self, when: PandasWhen, *, value: Any, implementation: str) -> None: + self._when = when + self._then_value = value + self._implementation = implementation + + def func(df: PandasDataFrame) -> list[PandasSeries]: + from narwhals._pandas_like.namespace import PandasNamespace + + plx = PandasNamespace(implementation=self._implementation) + + condition = self._when._condition._call(df)[0] + + value_series = plx._create_series_from_scalar(self._then_value, condition) + none_series = plx._create_series_from_scalar(None, condition) + return [ + value_series.zip_with(condition, none_series) + ] + + self._call = func + self._depth = 0 + self._function_name = "whenthen" + self._root_names = None + self._output_names = None + + def otherwise(self, value: Any) -> PandasExpr: + def func(df: PandasDataFrame) -> list[PandasSeries]: + from narwhals._pandas_like.namespace import PandasNamespace + plx = PandasNamespace(implementation=self._implementation) + condition = self._when._condition._call(df)[0] + value_series = plx._create_series_from_scalar(self._then_value, condition) + otherwise_series = plx._create_series_from_scalar(value, condition) + return [ + value_series.zip_with(condition, otherwise_series) + ] + + return PandasExpr( + func, + depth=0, + function_name="whenthenotherwise", + root_names=None, + output_names=None, + implementation=self._implementation, + ) diff --git a/narwhals/expression.py b/narwhals/expression.py index b4c1ed242..93ddac3b0 100644 --- a/narwhals/expression.py +++ b/narwhals/expression.py @@ -11,6 +11,9 @@ from narwhals.utils import flatten from narwhals.utils import parse_version + +from functools import reduce + if TYPE_CHECKING: from narwhals.typing import IntoExpr @@ -2633,6 +2636,32 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: lambda plx: plx.sum_horizontal([extract_native(plx, v) for v in flatten(exprs)]) ) +class When: + def __init__(self, condition: Expr) -> None: + self._condition = condition + + def then(self, value: Any) -> Then: + return Then(self, value=value) + +class Then(Expr): + def __init__(self, when: When, *, value: Any) -> None: + self._when = when + self._then_value = value + + def func(plx): + return plx.when(self._when._condition._call(plx)).then(self._then_value) + + self._call = func + + def otherwise(self, value: Any) -> Expr: + def func(plx): + return plx.when(self._when._condition._call(plx)).then(self._then_value).otherwise(value) + + return Expr(func) + +def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: + return When(reduce(lambda a, b: a & b, flatten([predicates]))) + __all__ = [ "Expr", diff --git a/narwhals/expressions/whenthen.py b/narwhals/expressions/whenthen.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test_common.py b/tests/test_common.py index 90b5c21af..4162d7b24 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -706,3 +706,13 @@ def test_quantile( df.select(nw.all().quantile(quantile=q, interpolation=interpolation)) ) compare_dicts(result, expected) + +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) +def test_when(df_raw: Any) -> None: + df = nw.DataFrame(df_raw) + result = df.with_columns( + a=nw.when(nw.col("a") > 2, 1).otherwise(0), + b=nw.when(nw.col("a") > 2, 1).when(nw.col("a") < 1, -1).otherwise(0), + ) + expected = {"a": [0, 1, 0], "b": [0, 1, 0], "z": [7.0, 8.0, 9.0]} + compare_dicts(result, expected) From 6ebc78bcbe1b3edcf3ff3d814ea8719968f34e46 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Wed, 17 Jul 2024 12:43:10 +0300 Subject: [PATCH 02/78] delete unnecessary file --- narwhals/expressions/whenthen.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 narwhals/expressions/whenthen.py diff --git a/narwhals/expressions/whenthen.py b/narwhals/expressions/whenthen.py deleted file mode 100644 index e69de29bb..000000000 From a3fdcc5e23f9f45873df4a4f6c5564ee858b59c7 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Wed, 17 Jul 2024 12:44:10 +0300 Subject: [PATCH 03/78] lint with ruff --- narwhals/_pandas_like/expr.py | 3 +-- narwhals/expression.py | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index bc65d2d73..b2c6ae1e3 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -3,11 +3,10 @@ from typing import TYPE_CHECKING from typing import Any from typing import Callable -from typing import Literal from typing import Iterable +from typing import Literal from narwhals._pandas_like.series import PandasSeries -from narwhals._pandas_like.typing import IntoPandasExpr from narwhals._pandas_like.utils import reuse_series_implementation from narwhals._pandas_like.utils import reuse_series_namespace_implementation diff --git a/narwhals/expression.py b/narwhals/expression.py index 93ddac3b0..eeeeae961 100644 --- a/narwhals/expression.py +++ b/narwhals/expression.py @@ -1,5 +1,6 @@ from __future__ import annotations +from functools import reduce from typing import TYPE_CHECKING from typing import Any from typing import Callable @@ -11,9 +12,6 @@ from narwhals.utils import flatten from narwhals.utils import parse_version - -from functools import reduce - if TYPE_CHECKING: from narwhals.typing import IntoExpr From 1ad1c94d91852cc0a069bdaadf74c5fa3dfae6cd Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Wed, 17 Jul 2024 17:06:18 +0300 Subject: [PATCH 04/78] use lambda expression --- narwhals/expression.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/narwhals/expression.py b/narwhals/expression.py index eeeeae961..090aa0cec 100644 --- a/narwhals/expression.py +++ b/narwhals/expression.py @@ -2646,18 +2646,12 @@ def __init__(self, when: When, *, value: Any) -> None: self._when = when self._then_value = value - def func(plx): - return plx.when(self._when._condition._call(plx)).then(self._then_value) - - self._call = func + self._call = lambda plx: plx.when(self._when._condition._call(plx)).then(self._then_value) def otherwise(self, value: Any) -> Expr: - def func(plx): - return plx.when(self._when._condition._call(plx)).then(self._then_value).otherwise(value) - - return Expr(func) + return Expr(lambda plx: plx.when(self._when._condition._call(plx)).then(self._then_value).otherwise(value)) -def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: +def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: # noqa: ARG001 return When(reduce(lambda a, b: a & b, flatten([predicates]))) From 93e712193deb3018b0157cc5af1a8dcebca54d9d Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Thu, 18 Jul 2024 12:46:47 +0300 Subject: [PATCH 05/78] remove deleted file --- tests/test_common.py | 718 ------------------------------------------- 1 file changed, 718 deletions(-) delete mode 100644 tests/test_common.py diff --git a/tests/test_common.py b/tests/test_common.py deleted file mode 100644 index 4162d7b24..000000000 --- a/tests/test_common.py +++ /dev/null @@ -1,718 +0,0 @@ -from __future__ import annotations - -import os -import warnings -from typing import Any -from typing import Literal - -import numpy as np -import pandas as pd -import polars as pl -import pytest -from pandas.testing import assert_series_equal as pd_assert_series_equal -from polars.testing import assert_series_equal as pl_assert_series_equal - -import narwhals as nw -from narwhals.utils import parse_version -from tests.utils import compare_dicts - -df_pandas = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) -if parse_version(pd.__version__) >= parse_version("1.5.0"): - df_pandas_pyarrow = pd.DataFrame( - {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - ).astype( - { - "a": "Int64[pyarrow]", - "b": "Int64[pyarrow]", - "z": "Float64[pyarrow]", - } - ) - df_pandas_nullable = pd.DataFrame( - {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - ).astype( - { - "a": "Int64", - "b": "Int64", - "z": "Float64", - } - ) -else: # pragma: no cover - df_pandas_pyarrow = df_pandas - df_pandas_nullable = df_pandas -df_polars = pl.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) -df_lazy = pl.LazyFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) -df_pandas_na = pd.DataFrame({"a": [None, 3, 2], "b": [4, 4, 6], "z": [7.0, None, 9]}) -df_lazy_na = pl.LazyFrame({"a": [None, 3, 2], "b": [4, 4, 6], "z": [7.0, None, 9]}) -df_right_pandas = pd.DataFrame({"c": [6, 12, -1], "d": [0, -4, 2]}) -df_right_lazy = pl.LazyFrame({"c": [6, 12, -1], "d": [0, -4, 2]}) - -if os.environ.get("CI", None): - try: - import modin.pandas as mpd - except ImportError: # pragma: no cover - df_mpd = df_pandas.copy() - else: - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=UserWarning) - df_mpd = mpd.DataFrame( - pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) - ) -else: # pragma: no cover - df_mpd = df_pandas.copy() - - -@pytest.mark.parametrize( - "df_raw", - [df_pandas, df_polars, df_lazy, df_pandas_nullable, df_pandas_pyarrow], -) -def test_sort(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.sort("a", "b") - result_native = nw.to_native(result) - expected = { - "a": [1, 2, 3], - "b": [4, 6, 4], - "z": [7.0, 9.0, 8.0], - } - compare_dicts(result_native, expected) - result = df.sort("a", "b", descending=[True, False]) - result_native = nw.to_native(result) - expected = { - "a": [3, 2, 1], - "b": [4, 6, 4], - "z": [8.0, 9.0, 7.0], - } - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", - [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow], -) -def test_filter(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.filter(nw.col("a") > 1) - result_native = nw.to_native(result) - expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9.0]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", - [df_pandas, df_polars], -) -def test_filter_series(df_raw: Any) -> None: - df = nw.DataFrame(df_raw).with_columns(mask=nw.col("a") > 1) - result = df.filter(df["mask"]).drop("mask") - result_native = nw.to_native(result) - expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9.0]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", - [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow], -) -def test_add(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.with_columns( - c=nw.col("a") + nw.col("b"), - d=nw.col("a") - nw.col("a").mean(), - e=nw.col("a") - nw.col("a").std(), - ) - result_native = nw.to_native(result) - expected = { - "a": [1, 3, 2], - "b": [4, 4, 6], - "z": [7.0, 8.0, 9.0], - "c": [5, 7, 8], - "d": [-1.0, 1.0, 0.0], - "e": [0.0, 2.0, 1.0], - } - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", - [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow], -) -def test_std(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.select( - nw.col("a").std().alias("a_ddof_default"), - nw.col("a").std(ddof=1).alias("a_ddof_1"), - nw.col("a").std(ddof=0).alias("a_ddof_0"), - nw.col("b").std(ddof=2).alias("b_ddof_2"), - nw.col("z").std(ddof=0).alias("z_ddof_0"), - ) - result_native = nw.to_native(result) - expected = { - "a_ddof_default": [1.0], - "a_ddof_1": [1.0], - "a_ddof_0": [0.816497], - "b_ddof_2": [1.632993], - "z_ddof_0": [0.816497], - } - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", - [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow], -) -def test_double(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.with_columns(nw.all() * 2) - result_native = nw.to_native(result) - expected = {"a": [2, 6, 4], "b": [8, 8, 12], "z": [14.0, 16.0, 18.0]} - compare_dicts(result_native, expected) - result = df.with_columns(nw.col("a").alias("o"), nw.all() * 2) - result_native = nw.to_native(result) - expected = {"o": [1, 3, 2], "a": [2, 6, 4], "b": [8, 8, 12], "z": [14.0, 16.0, 18.0]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", - [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow], -) -def test_select(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.select("a") - result_native = nw.to_native(result) - expected = {"a": [1, 3, 2]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize("df_raw", [df_pandas, df_lazy, df_pandas_nullable]) -def test_sumh(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.with_columns(horizonal_sum=nw.sum_horizontal(nw.col("a"), nw.col("b"))) - result_native = nw.to_native(result) - expected = { - "a": [1, 3, 2], - "b": [4, 4, 6], - "z": [7.0, 8.0, 9.0], - "horizonal_sum": [5, 7, 8], - } - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_sumh_literal(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.with_columns(horizonal_sum=nw.sum_horizontal("a", nw.col("b"))) - result_native = nw.to_native(result) - expected = { - "a": [1, 3, 2], - "b": [4, 4, 6], - "z": [7.0, 8.0, 9.0], - "horizonal_sum": [5, 7, 8], - } - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_sum_all(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.select(nw.all().sum()) - result_native = nw.to_native(result) - expected = {"a": [6], "b": [14], "z": [24.0]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_double_selected(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.select(nw.col("a", "b") * 2) - result_native = nw.to_native(result) - expected = {"a": [2, 6, 4], "b": [8, 8, 12]} - compare_dicts(result_native, expected) - result = df.select("z", nw.col("a", "b") * 2) - result_native = nw.to_native(result) - expected = {"z": [7, 8, 9], "a": [2, 6, 4], "b": [8, 8, 12]} - compare_dicts(result_native, expected) - result = df.select("a").select(nw.col("a") + nw.all()) - result_native = nw.to_native(result) - expected = {"a": [2, 6, 4]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_rename(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.rename({"a": "x", "b": "y"}) - result_native = nw.to_native(result) - expected = {"x": [1, 3, 2], "y": [4, 4, 6], "z": [7.0, 8, 9]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_join(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - df_right = df - result = df.join(df_right, left_on=["a", "b"], right_on=["a", "b"], how="inner") - result_native = nw.to_native(result) - expected = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9], "z_right": [7.0, 8, 9]} - compare_dicts(result_native, expected) - - with pytest.raises(NotImplementedError): - result = df.join(df_right, left_on="a", right_on="a", how="left") # type: ignore[arg-type] - - result = df.collect().join(df_right.collect(), left_on="a", right_on="a", how="inner") # type: ignore[assignment] - result_native = nw.to_native(result) - expected = { - "a": [1, 3, 2], - "b": [4, 4, 6], - "b_right": [4, 4, 6], - "z": [7.0, 8, 9], - "z_right": [7.0, 8, 9], - } - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_schema(df_raw: Any) -> None: - result = nw.LazyFrame(df_raw).schema - expected = {"a": nw.Int64, "b": nw.Int64, "z": nw.Float64} - assert result == expected - result = nw.LazyFrame(df_raw).collect().schema - expected = {"a": nw.Int64, "b": nw.Int64, "z": nw.Float64} - assert result == expected - result = nw.LazyFrame(df_raw).columns # type: ignore[assignment] - expected = ["a", "b", "z"] # type: ignore[assignment] - assert result == expected - result = nw.LazyFrame(df_raw).collect().columns # type: ignore[assignment] - expected = ["a", "b", "z"] # type: ignore[assignment] - assert result == expected - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_columns(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.columns - expected = ["a", "b", "z"] - assert result == expected - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd, df_lazy]) -def test_lazy_instantiation(df_raw: Any) -> None: - result = nw.LazyFrame(df_raw) - result_native = nw.to_native(result) - expected = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize("df_raw", [df_lazy]) -def test_lazy_instantiation_error(df_raw: Any) -> None: - with pytest.raises( - TypeError, match="Can't instantiate DataFrame from Polars LazyFrame." - ): - _ = nw.DataFrame(df_raw).shape - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd]) -def test_eager_instantiation(df_raw: Any) -> None: - result = nw.DataFrame(df_raw) - result_native = nw.to_native(result) - expected = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - compare_dicts(result_native, expected) - - -def test_accepted_dataframes() -> None: - array = np.array([[0, 4.0], [2, 5]]) - with pytest.raises( - TypeError, - match="Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: ", - ): - nw.DataFrame(array) - with pytest.raises( - TypeError, - match="Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: ", - ): - nw.LazyFrame(array) - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd]) -@pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning") -def test_convert_pandas(df_raw: Any) -> None: - result = nw.from_native(df_raw).to_pandas() # type: ignore[union-attr] - expected = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) - pd.testing.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_polars, df_pandas, df_mpd, df_pandas_nullable, df_pandas_pyarrow] -) -@pytest.mark.filterwarnings( - r"ignore:np\.find_common_type is deprecated\.:DeprecationWarning" -) -def test_convert_numpy(df_raw: Any) -> None: - result = nw.DataFrame(df_raw).to_numpy() - expected = np.array([[1, 3, 2], [4, 4, 6], [7.0, 8, 9]]).T - np.testing.assert_array_equal(result, expected) - assert result.dtype == "float64" - result = nw.DataFrame(df_raw).__array__() - np.testing.assert_array_equal(result, expected) - assert result.dtype == "float64" - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd]) -def test_shape(df_raw: Any) -> None: - result = nw.DataFrame(df_raw).shape - expected = (3, 3) - assert result == expected - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd, df_lazy]) -def test_expr_binary(df_raw: Any) -> None: - result = nw.LazyFrame(df_raw).with_columns( - a=(1 + 3 * nw.col("a")) * (1 / nw.col("a")), - b=nw.col("z") / (2 - nw.col("b")), - c=nw.col("a") + nw.col("b") / 2, - d=nw.col("a") - nw.col("b"), - e=((nw.col("a") > nw.col("b")) & (nw.col("a") >= nw.col("z"))).cast(nw.Int64), - f=( - (nw.col("a") < nw.col("b")) - | (nw.col("a") <= nw.col("z")) - | (nw.col("a") == 1) - ).cast(nw.Int64), - g=nw.col("a") != 1, - h=(False & (nw.col("a") != 1)), - i=(False | (nw.col("a") != 1)), - j=2 ** nw.col("a"), - k=2 // nw.col("a"), - l=nw.col("a") // 2, - m=nw.col("a") ** 2, - n=nw.col("a") % 2, - o=2 % nw.col("a"), - ) - result_native = nw.to_native(result) - expected = { - "a": [4, 3.333333, 3.5], - "b": [-3.5, -4.0, -2.25], - "z": [7.0, 8.0, 9.0], - "c": [3, 5, 5], - "d": [-3, -1, -4], - "e": [0, 0, 0], - "f": [1, 1, 1], - "g": [False, True, True], - "h": [False, False, False], - "i": [False, True, True], - "j": [2, 8, 4], - "k": [2, 0, 1], - "l": [0, 1, 1], - "m": [1, 9, 4], - "n": [1, 1, 0], - "o": [0, 2, 0], - } - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_lazy]) -def test_expr_unary(df_raw: Any) -> None: - result = ( - nw.from_native(df_raw) - .with_columns( - a_mean=nw.col("a").mean(), - a_sum=nw.col("a").sum(), - b_nunique=nw.col("b").n_unique(), - z_min=nw.col("z").min(), - z_max=nw.col("z").max(), - ) - .select(nw.col("a_mean", "a_sum", "b_nunique", "z_min", "z_max").unique()) - ) - result_native = nw.to_native(result) - expected = {"a_mean": [2], "a_sum": [6], "b_nunique": [2], "z_min": [7], "z_max": [9]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd, df_lazy]) -def test_expr_transform(df_raw: Any) -> None: - result = nw.LazyFrame(df_raw).with_columns( - a=nw.col("a").is_between(-1, 1), b=nw.col("b").is_in([4, 5]) - ) - result_native = nw.to_native(result) - expected = {"a": [True, False, False], "b": [True, True, False], "z": [7, 8, 9]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_lazy]) -def test_expr_min_max(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result_min = nw.to_native(df.select(nw.min("a", "b", "z"))) - result_max = nw.to_native(df.select(nw.max("a", "b", "z"))) - expected_min = {"a": [1], "b": [4], "z": [7]} - expected_max = {"a": [3], "b": [6], "z": [9]} - compare_dicts(result_min, expected_min) - compare_dicts(result_max, expected_max) - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd, df_lazy]) -def test_expr_sample(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result_shape = nw.to_native(df.select(nw.col("a").sample(n=2)).collect()).shape - expected = (2, 1) - assert result_shape == expected - result_shape = nw.to_native(df.collect()["a"].sample(n=2)).shape - expected = (2,) # type: ignore[assignment] - assert result_shape == expected - - -@pytest.mark.parametrize("df_raw", [df_pandas_na, df_lazy_na]) -def test_expr_na(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result_nna = nw.to_native( - df.filter((~nw.col("a").is_null()) & (~df.collect()["z"].is_null())) - ) - expected = {"a": [2], "b": [6], "z": [9]} - compare_dicts(result_nna, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_head(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = nw.to_native(df.head(2)) - expected = {"a": [1, 3], "b": [4, 4], "z": [7.0, 8.0]} - compare_dicts(result, expected) - result = nw.to_native(df.collect().head(2)) - expected = {"a": [1, 3], "b": [4, 4], "z": [7.0, 8.0]} - compare_dicts(result, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_unique(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = nw.to_native(df.unique("b").sort("b")) - expected = {"a": [1, 2], "b": [4, 6], "z": [7.0, 9.0]} - compare_dicts(result, expected) - result = nw.to_native(df.collect().unique("b").sort("b")) - expected = {"a": [1, 2], "b": [4, 6], "z": [7.0, 9.0]} - compare_dicts(result, expected) - - -@pytest.mark.parametrize("df_raw", [df_pandas_na, df_lazy_na]) -def test_drop_nulls(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = nw.to_native(df.select(nw.col("a").drop_nulls())) - expected = {"a": [3, 2]} - compare_dicts(result, expected) - result = nw.to_native(df.select(df.collect()["a"].drop_nulls())) - expected = {"a": [3, 2]} - compare_dicts(result, expected) - - -@pytest.mark.parametrize( - ("df_raw", "df_raw_right"), [(df_pandas, df_right_pandas), (df_lazy, df_right_lazy)] -) -def test_concat_horizontal(df_raw: Any, df_raw_right: Any) -> None: - df_left = nw.LazyFrame(df_raw) - df_right = nw.LazyFrame(df_raw_right) - result = nw.concat([df_left, df_right], how="horizontal") - result_native = nw.to_native(result) - expected = { - "a": [1, 3, 2], - "b": [4, 4, 6], - "z": [7.0, 8, 9], - "c": [6, 12, -1], - "d": [0, -4, 2], - } - compare_dicts(result_native, expected) - - with pytest.raises(ValueError, match="No items"): - nw.concat([]) - - -@pytest.mark.parametrize( - ("df_raw", "df_raw_right"), [(df_pandas, df_right_pandas), (df_lazy, df_right_lazy)] -) -def test_concat_vertical(df_raw: Any, df_raw_right: Any) -> None: - df_left = nw.LazyFrame(df_raw).collect().rename({"a": "c", "b": "d"}).lazy().drop("z") - df_right = nw.LazyFrame(df_raw_right) - result = nw.concat([df_left, df_right], how="vertical") - result_native = nw.to_native(result) - expected = {"c": [1, 3, 2, 6, 12, -1], "d": [4, 4, 6, 0, -4, 2]} - compare_dicts(result_native, expected) - with pytest.raises(ValueError, match="No items"): - nw.concat([], how="vertical") - with pytest.raises(Exception, match="unable to vstack"): - nw.concat([df_left, df_right.rename({"d": "i"})], how="vertical").collect() # type: ignore[union-attr] - - -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) -def test_lazy(df_raw: Any) -> None: - df = nw.DataFrame(df_raw) - result = df.lazy() - assert isinstance(result, nw.LazyFrame) - - -def test_to_dict() -> None: - df = nw.DataFrame(df_pandas) - result = df.to_dict(as_series=True) - expected = { - "a": pd.Series([1, 3, 2], name="a"), - "b": pd.Series([4, 4, 6], name="b"), - "z": pd.Series([7.0, 8, 9], name="z"), - } - for key in expected: - pd_assert_series_equal(nw.to_native(result[key]), expected[key]) - - df = nw.DataFrame(df_polars) - result = df.to_dict(as_series=True) - expected = { - "a": pl.Series("a", [1, 3, 2]), - "b": pl.Series("b", [4, 4, 6]), - "z": pl.Series("z", [7.0, 8, 9]), - } - for key in expected: - pl_assert_series_equal(nw.to_native(result[key]), expected[key]) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_any_all(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = nw.to_native(df.select((nw.all() > 1).all())) - expected = {"a": [False], "b": [True], "z": [True]} - compare_dicts(result, expected) - result = nw.to_native(df.select((nw.all() > 1).any())) - expected = {"a": [True], "b": [True], "z": [True]} - compare_dicts(result, expected) - - -def test_invalid() -> None: - df = nw.LazyFrame(df_pandas) - with pytest.raises(ValueError, match="Multi-output"): - df.select(nw.all() + nw.all()) - with pytest.raises(TypeError, match="Perhaps you:"): - df.select([pl.col("a")]) # type: ignore[list-item] - with pytest.raises(TypeError, match="Perhaps you:"): - df.select([nw.col("a").cast(pl.Int64)]) - - -@pytest.mark.parametrize("df_raw", [df_pandas]) -def test_reindex(df_raw: Any) -> None: - df = nw.DataFrame(df_raw) - result = df.select("b", df["a"].sort(descending=True)) - expected = {"b": [4, 4, 6], "a": [3, 2, 1]} - compare_dicts(result, expected) - result = df.select("b", nw.col("a").sort(descending=True)) - compare_dicts(result, expected) - - s = df["a"] - result_s = s > s.sort() - assert not result_s[0] - assert result_s[1] - assert not result_s[2] - result = df.with_columns(s.sort()) - expected = {"a": [1, 2, 3], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]} # type: ignore[list-item] - compare_dicts(result, expected) - with pytest.raises(ValueError, match="Multi-output expressions are not supported"): - nw.to_native(df.with_columns(nw.all() + nw.all())) - - -@pytest.mark.parametrize( - ("df_raw", "df_raw_right"), - [(df_pandas, df_polars), (df_polars, df_pandas)], -) -def test_library(df_raw: Any, df_raw_right: Any) -> None: - df_left = nw.LazyFrame(df_raw) - df_right = nw.LazyFrame(df_raw_right) - with pytest.raises( - NotImplementedError, match="Cross-library comparisons aren't supported" - ): - nw.concat([df_left, df_right], how="horizontal") - with pytest.raises( - NotImplementedError, match="Cross-library comparisons aren't supported" - ): - nw.concat([df_left, df_right], how="vertical") - with pytest.raises( - NotImplementedError, match="Cross-library comparisons aren't supported" - ): - df_left.join(df_right, left_on=["a"], right_on=["a"], how="inner") - - -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) -def test_is_duplicated(df_raw: Any) -> None: - df = nw.DataFrame(df_raw) - result = nw.concat([df, df.head(1)]).is_duplicated() # type: ignore [union-attr] - expected = np.array([True, False, False, True]) - assert (result.to_numpy() == expected).all() - - -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) -@pytest.mark.parametrize(("threshold", "expected"), [(0, False), (10, True)]) -def test_is_empty(df_raw: Any, threshold: Any, expected: Any) -> None: - df = nw.DataFrame(df_raw) - result = df.filter(nw.col("a") > threshold).is_empty() - assert result == expected - - -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) -def test_is_unique(df_raw: Any) -> None: - df = nw.DataFrame(df_raw) - result = nw.concat([df, df.head(1)]).is_unique() # type: ignore [union-attr] - expected = np.array([False, True, True, False]) - assert (result.to_numpy() == expected).all() - - -@pytest.mark.parametrize("df_raw", [df_pandas_na, df_lazy_na.collect()]) -def test_null_count(df_raw: Any) -> None: - df = nw.DataFrame(df_raw) - result = nw.to_native(df.null_count()) - expected = {"a": [1], "b": [0], "z": [1]} - compare_dicts(result, expected) - - -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) -@pytest.mark.parametrize( - ("interpolation", "expected"), - [ - ("lower", {"a": [1.0], "b": [4.0], "z": [7.0]}), - ("higher", {"a": [2.0], "b": [4.0], "z": [8.0]}), - ("midpoint", {"a": [1.5], "b": [4.0], "z": [7.5]}), - ("linear", {"a": [1.6], "b": [4.0], "z": [7.6]}), - ("nearest", {"a": [2.0], "b": [4.0], "z": [8.0]}), - ], -) -def test_quantile( - df_raw: Any, - interpolation: Literal["nearest", "higher", "lower", "midpoint", "linear"], - expected: dict[str, list[float]], -) -> None: - q = 0.3 - - df = nw.from_native(df_raw) - result = nw.to_native( - df.select(nw.all().quantile(quantile=q, interpolation=interpolation)) - ) - compare_dicts(result, expected) - -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) -def test_when(df_raw: Any) -> None: - df = nw.DataFrame(df_raw) - result = df.with_columns( - a=nw.when(nw.col("a") > 2, 1).otherwise(0), - b=nw.when(nw.col("a") > 2, 1).when(nw.col("a") < 1, -1).otherwise(0), - ) - expected = {"a": [0, 1, 0], "b": [0, 1, 0], "z": [7.0, 8.0, 9.0]} - compare_dicts(result, expected) From f3770b75d31836532b6708ceddb5724d569d766e Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Mon, 22 Jul 2024 16:38:38 +0300 Subject: [PATCH 06/78] Fix errors from the migration --- narwhals/_pandas_like/expr.py | 63 ----------------------- narwhals/_pandas_like/namespace.py | 80 ++++++++++++++++++++++++++++++ narwhals/expression.py | 15 +++--- tests/test_where.py | 47 ++++++++++++++++++ 4 files changed, 134 insertions(+), 71 deletions(-) create mode 100644 tests/test_where.py diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 50e67a70b..aaaa550a1 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING from typing import Any from typing import Callable -from typing import Iterable from typing import Literal from narwhals._expression_parsing import reuse_series_implementation @@ -337,14 +336,6 @@ def dt(self) -> PandasLikeExprDateTimeNamespace: def cat(self) -> PandasLikeExprCatNamespace: return PandasLikeExprCatNamespace(self) - def when(self, *predicates: PandasLikeExpr | Iterable[PandasExpr], **conditions: Any) -> PandasWhen: - # TODO: Support conditions - from narwhals._pandas_like.namespace import PandasLikeNamespace - - plx = PandasLikeNamespace(self._implementation) - condition = plx.all_horizontal(*predicates) - return PandasWhen(self, condition) - class PandasLikeExprCatNamespace: def __init__(self, expr: PandasLikeExpr) -> None: @@ -474,57 +465,3 @@ def to_string(self, format: str) -> PandasLikeExpr: # noqa: A002 return reuse_series_namespace_implementation( self._expr, "dt", "to_string", format ) - -class PandasWhen: - def __init__(self, condition: PandasLikeExpr) -> None: - self._condition = condition - - def then(self, value: Any) -> PandasThen: - return PandasThen(self, value=value, implementation=self._condition._implementation) - -class PandasThen(PandasLikeExpr): - def __init__(self, when: PandasWhen, *, value: Any, implementation: Implementation, backend_version: tuple[int, ...]) -> None: - self._when = when - self._then_value = value - self._implementation = implementation - self.backend_version = backend_version - - def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: - from narwhals._pandas_like.namespace import PandasLikeNamespace - - plx = PandasLikeNamespace(implementation=self._implementation, backend_version=self.backend_version) - - condition = self._when._condition._call(df)[0] - - value_series = plx._create_series_from_scalar(self._then_value, condition) - none_series = plx._create_series_from_scalar(None, condition) - return [ - value_series.zip_with(condition, none_series) - ] - - self._call = func - self._depth = 0 - self._function_name = "whenthen" - self._root_names = None - self._output_names = None - - def otherwise(self, value: Any) -> PandasLikeExpr: - def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: - from narwhals._pandas_like.namespace import PandasLikeNamespace - plx = PandasLikeNamespace(implementation=self._implementation, backend_version=self.backend_version) - condition = self._when._condition._call(df)[0] - value_series = plx._create_series_from_scalar(self._then_value, condition) - otherwise_series = plx._create_series_from_scalar(value, condition) - return [ - value_series.zip_with(condition, otherwise_series) - ] - - return PandasLikeExpr( - func, - depth=0, - function_name="whenthenotherwise", - root_names=None, - output_names=None, - implementation=self._implementation, - backend_version=self.backend_version, - ) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index dbae3bbb7..b62327e86 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -86,6 +86,17 @@ def _create_series_from_scalar( backend_version=self._backend_version, ) + def _create_broadcast_series_from_scalar( + self, value: Any, series: PandasLikeSeries + ) -> PandasLikeSeries: + return PandasLikeSeries._from_iterable( + [value] * len(series._native_series), + name=series._native_series.name, + index=series._native_series.index, + implementation=self._implementation, + backend_version=self._backend_version, + ) + def _create_expr_from_series(self, series: PandasLikeSeries) -> PandasLikeExpr: return PandasLikeExpr( lambda _df: [series], @@ -246,3 +257,72 @@ def concat( backend_version=self._backend_version, ) raise NotImplementedError + + def when(self, *predicates: IntoPandasLikeExpr | Iterable[IntoPandasLikeExpr], **conditions: Any) -> PandasWhen: # noqa: ARG002 + plx = self.__class__(self._implementation, self._backend_version) + condition = plx.all_horizontal(*predicates) + return PandasWhen(condition) + +class InnerPandasWhen: + def __init__(self, implementation: Implementation, backend_version: tuple[int, ...], condition: PandasLikeExpr, value: Any, otherise_value: Any = None) -> None: + self._implementation = implementation + self._backend_version = backend_version + self._condition = condition + self._value = value + self._otherwise_value = otherise_value + + def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: + from narwhals._pandas_like.namespace import PandasLikeNamespace + + plx = PandasLikeNamespace(implementation=self._implementation, backend_version=self._backend_version) + + condition = self._condition._call(df)[0] + + value_series = plx._create_broadcast_series_from_scalar(self._value, condition) + none_series = plx._create_broadcast_series_from_scalar(self._otherwise_value, condition) + return [ + value_series.zip_with(condition, none_series) + ] + +class PandasWhen: + def __init__(self, condition: PandasLikeExpr) -> None: + self._condition = condition + + def then(self, value: Any) -> PandasThen: + + return PandasThen( + InnerPandasWhen(self._condition._implementation, self._condition._backend_version, self._condition, value), + depth=0, + function_name="whenthen", + root_names=None, + output_names=None, + implementation=self._condition._implementation, + backend_version=self._condition._backend_version, + ) + +class PandasThen(PandasLikeExpr): + + def __init__( + self, + call: InnerPandasWhen, + *, + depth: int, + function_name: str, + root_names: list[str] | None, + output_names: list[str] | None, + implementation: Implementation, + backend_version: tuple[int, ...], + ) -> None: + self._implementation = implementation + self._backend_version = backend_version + + self._call = call + self._depth = depth + self._function_name = function_name + self._root_names = root_names + self._output_names = output_names + + def otherwise(self, value: Any) -> PandasLikeExpr: + self._call._otherwise_value = value + self._function_name = "whenotherwise" + return self diff --git a/narwhals/expression.py b/narwhals/expression.py index 904d164a9..4d1786f67 100644 --- a/narwhals/expression.py +++ b/narwhals/expression.py @@ -2968,7 +2968,7 @@ def to_string(self, format: str) -> Expr: # noqa: A002 of trailing zeros. Nonetheless, this is probably consistent enough for most applications. - If you have an application where this is not enough, please open an issue + If you have an application here this is not enough, please open an issue and let us know. Examples: @@ -3352,19 +3352,18 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: class When: def __init__(self, condition: Expr) -> None: self._condition = condition + self._then_value = None + self._otehrwise_value = None def then(self, value: Any) -> Then: - return Then(self, value=value) + return Then(lambda plx: plx.when(self._condition._call(plx)).then(value)) class Then(Expr): - def __init__(self, when: When, *, value: Any) -> None: - self._when = when - self._then_value = value - - self._call = lambda plx: plx.when(self._when._condition._call(plx)).then(self._then_value) + def __init__(self, call) -> None: # noqa: ANN001 + self._call = call def otherwise(self, value: Any) -> Expr: - return Expr(lambda plx: plx.when(self._when._condition._call(plx)).then(self._then_value).otherwise(value)) + return Expr(lambda plx: self._call(plx).otherwise(value)) def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: # noqa: ARG001 return When(reduce(lambda a, b: a & b, flatten([predicates]))) diff --git a/tests/test_where.py b/tests/test_where.py new file mode 100644 index 000000000..3661db0c5 --- /dev/null +++ b/tests/test_where.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +from typing import Any + +import pytest + +import narwhals.stable.v1 as nw +from narwhals.expression import when +from tests.utils import compare_dicts + +data = { + "a": [1, 1, 2], + "b": ["a", "b", "c"], + "c": [4.1, 5.0, 6.0], + "d": [True, False, True], +} + + +def test_when(request: Any, constructor: Any) -> None: + if "pyarrow_table" in str(constructor): + request.applymarker(pytest.mark.xfail) + + df = nw.from_native(constructor(data)) + result = df.with_columns(when(nw.col("a") == 1).then(value=3).alias("a_when")) + expected = { + "a": [1, 1, 2], + "b": ["a", "b", "c"], + "c": [4.1, 5.0, 6.0], + "d": [True, False, True], + "a_when": [3, 3, None], + } + compare_dicts(result, expected) + +def test_when_otherwise(request: Any, constructor: Any) -> None: + if "pyarrow_table" in str(constructor): + request.applymarker(pytest.mark.xfail) + + df = nw.from_native(constructor(data)) + result = df.with_columns(when(nw.col("a") == 1).then(3).otherwise(6).alias("a_when")) + expected = { + "a": [1, 1, 2], + "b": ["a", "b", "c"], + "c": [4.1, 5.0, 6.0], + "d": [True, False, True], + "a_when": [3, 3, 6], + } + compare_dicts(result, expected) From a7f442ae497feef623f5e79409d754711a08b231 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Mon, 22 Jul 2024 16:49:41 +0300 Subject: [PATCH 07/78] remove unnecessary changes --- narwhals/_pandas_like/expr.py | 5 ++--- narwhals/expr.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 31120a88f..d5ec89c32 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -470,8 +470,8 @@ def total_nanoseconds(self) -> PandasLikeExpr: def to_string(self, format: str) -> PandasLikeExpr: # noqa: A002 return reuse_series_namespace_implementation( - self._expr, "dt", "to_string", format - ) + self._expr, "dt", "to_string", format + ) class PandasLikeExprNameNamespace: def __init__(self: Self, expr: PandasLikeExpr) -> None: @@ -625,4 +625,3 @@ def to_uppercase(self: Self) -> PandasLikeExpr: implementation=self._expr._implementation, backend_version=self._expr._backend_version, ) ->>>>>>> main diff --git a/narwhals/expr.py b/narwhals/expr.py index a489fc0af..3dff0c676 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -3016,7 +3016,7 @@ def to_string(self, format: str) -> Expr: # noqa: A002 of trailing zeros. Nonetheless, this is probably consistent enough for most applications. - If you have an application here this is not enough, please open an issue + If you have an application where this is not enough, please open an issue and let us know. Examples: From 7f23f051d9aecdb7dc6d3e84d9a85d36d8e86a5e Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Mon, 22 Jul 2024 16:51:44 +0300 Subject: [PATCH 08/78] add back the change in version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 884d4a680..d00b10e89 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "narwhals" -version = "1.1.1" +version = "1.1.3" authors = [ { name="Marco Gorelli", email="33491632+MarcoGorelli@users.noreply.github.com" }, ] From 7cc3aad3926d0e448770eeebdf9bf485b42f6a5c Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Mon, 22 Jul 2024 17:02:44 +0300 Subject: [PATCH 09/78] fix rename change --- tests/test_where.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_where.py b/tests/test_where.py index 3661db0c5..cc95cc347 100644 --- a/tests/test_where.py +++ b/tests/test_where.py @@ -5,7 +5,7 @@ import pytest import narwhals.stable.v1 as nw -from narwhals.expression import when +from narwhals.expr import when from tests.utils import compare_dicts data = { From ab85e406c69f7ece65da31190900f7fa969f36dd Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Mon, 22 Jul 2024 17:03:52 +0300 Subject: [PATCH 10/78] rename test file --- tests/{test_where.py => test_when.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_where.py => test_when.py} (100%) diff --git a/tests/test_where.py b/tests/test_when.py similarity index 100% rename from tests/test_where.py rename to tests/test_when.py From 4a8ac56f9cb340cc8c34fec5db6199f3cb527f2e Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Mon, 22 Jul 2024 19:02:14 +0300 Subject: [PATCH 11/78] fix forgotten memeber change --- narwhals/_pandas_like/namespace.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index b62327e86..10a31b19e 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -258,17 +258,17 @@ def concat( ) raise NotImplementedError - def when(self, *predicates: IntoPandasLikeExpr | Iterable[IntoPandasLikeExpr], **conditions: Any) -> PandasWhen: # noqa: ARG002 + def when(self, *predicates: IntoPandasLikeExpr, **conditions: Any) -> PandasWhen: # noqa: ARG002 plx = self.__class__(self._implementation, self._backend_version) condition = plx.all_horizontal(*predicates) - return PandasWhen(condition) + return PandasWhen(condition, self._implementation, self._backend_version) -class InnerPandasWhen: - def __init__(self, implementation: Implementation, backend_version: tuple[int, ...], condition: PandasLikeExpr, value: Any, otherise_value: Any = None) -> None: +class PandasWhen: + def __init__(self, condition: PandasLikeExpr, implementation: Implementation, backend_version: tuple[int, ...], then_value: Any = None, otherise_value: Any = None) -> None: self._implementation = implementation self._backend_version = backend_version self._condition = condition - self._value = value + self._then_value = then_value self._otherwise_value = otherise_value def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: @@ -278,20 +278,18 @@ def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: condition = self._condition._call(df)[0] - value_series = plx._create_broadcast_series_from_scalar(self._value, condition) - none_series = plx._create_broadcast_series_from_scalar(self._otherwise_value, condition) + value_series = plx._create_broadcast_series_from_scalar(self._then_value, condition) + otherwise_series = plx._create_broadcast_series_from_scalar(self._otherwise_value, condition) return [ - value_series.zip_with(condition, none_series) + value_series.zip_with(condition, otherwise_series) ] -class PandasWhen: - def __init__(self, condition: PandasLikeExpr) -> None: - self._condition = condition - def then(self, value: Any) -> PandasThen: + self._then_value = value + return PandasThen( - InnerPandasWhen(self._condition._implementation, self._condition._backend_version, self._condition, value), + self, depth=0, function_name="whenthen", root_names=None, @@ -304,7 +302,7 @@ class PandasThen(PandasLikeExpr): def __init__( self, - call: InnerPandasWhen, + call: PandasWhen, *, depth: int, function_name: str, From 8283f2481294ee5216c9e67e52069b1ab797bd34 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Mon, 22 Jul 2024 19:08:48 +0300 Subject: [PATCH 12/78] make api identical --- narwhals/_pandas_like/namespace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 10a31b19e..06af399d3 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -258,7 +258,7 @@ def concat( ) raise NotImplementedError - def when(self, *predicates: IntoPandasLikeExpr, **conditions: Any) -> PandasWhen: # noqa: ARG002 + def when(self, *predicates: IntoPandasLikeExpr, **constraints: Any) -> PandasWhen: # noqa: ARG002 plx = self.__class__(self._implementation, self._backend_version) condition = plx.all_horizontal(*predicates) return PandasWhen(condition, self._implementation, self._backend_version) From f1c667eb57f5b7ea791a811dd74c24903f12f78d Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Mon, 22 Jul 2024 19:12:09 +0300 Subject: [PATCH 13/78] remove unnecessary diff --- .gitignore | 2 +- narwhals/_pandas_like/expr.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index e3bba127f..3911158a8 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,4 @@ todo.md site/ .coverage.* .nox -docs/api-completeness.md +docs/api-completeness.md \ No newline at end of file diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index d5ec89c32..f846da610 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -473,6 +473,7 @@ def to_string(self, format: str) -> PandasLikeExpr: # noqa: A002 self._expr, "dt", "to_string", format ) + class PandasLikeExprNameNamespace: def __init__(self: Self, expr: PandasLikeExpr) -> None: self._expr = expr From 74937ea9f37d263c3efac218ebd0afbeddc4d9b5 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 23 Jul 2024 12:15:36 +0300 Subject: [PATCH 14/78] add when documentation --- narwhals/expr.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/narwhals/expr.py b/narwhals/expr.py index 3dff0c676..5aba8cc61 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -3622,6 +3622,51 @@ def otherwise(self, value: Any) -> Expr: return Expr(lambda plx: self._call(plx).otherwise(value)) def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: # noqa: ARG001 + """ + Start a `when-then-otherwise` expression. + Expression similar to an `if-else` statement in Python. Always initiated by a `pl.when().then()`., and optionally followed by chaining one or more `.when().then()` statements. + Chained when-then operations should be read as Python `if, elif, ... elif` blocks, not as `if, if, ... if`, i.e. the first condition that evaluates to `True` will be picked. + If none of the conditions are `True`, an optional `.otherwise()` can be appended at the end. If not appended, and none of the conditions are `True`, `None` will be returned. + + Parameters: + predicates + Condition(s) that must be met in order to apply the subsequent statement. Accepts one or more boolean expressions, which are implicitly combined with `&`. String input is parsed as a column name. + constraints + Apply conditions as `col_name = value` keyword arguments that are treated as equality matches, such as `x = 123`. As with the predicates parameter, multiple conditions are implicitly combined using `&`. + + Examples: + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]}) + >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]}) + + We define a dataframe-agnostic function: + + >>> @nw.narwhalify + ... def func(df_any): + ... from narwhals.expr import when + ... return df_any.with_columns(when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when")) + + We can then pass either pandas or polars to `func`: + + >>> func(df_pd) + a b a_when + 0 1 5 5 + 1 2 10 5 + 2 3 15 6 + >>> func(df_pl) + shape: (3, 3) + ┌─────┬─────┬────────┐ + │ a ┆ b ┆ a_when │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i32 │ + ╞═════╪═════╪════════╡ + │ 1 ┆ 5 ┆ 5 │ + │ 2 ┆ 10 ┆ 5 │ + │ 3 ┆ 15 ┆ 6 │ + └─────┴─────┴────────┘ + """ return When(reduce(lambda a, b: a & b, flatten([predicates]))) From 5b030d660ecbbc38e2e2fcb3b5bafd1a8fb8c9a7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 23 Jul 2024 09:22:15 +0000 Subject: [PATCH 15/78] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- narwhals/_pandas_like/namespace.py | 31 ++++++++++++++++++++---------- narwhals/expr.py | 12 +++++++++--- tests/test_when.py | 1 + 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 06af399d3..51c82648f 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -258,13 +258,21 @@ def concat( ) raise NotImplementedError - def when(self, *predicates: IntoPandasLikeExpr, **constraints: Any) -> PandasWhen: # noqa: ARG002 + def when(self, *predicates: IntoPandasLikeExpr, **constraints: Any) -> PandasWhen: # noqa: ARG002 plx = self.__class__(self._implementation, self._backend_version) condition = plx.all_horizontal(*predicates) return PandasWhen(condition, self._implementation, self._backend_version) + class PandasWhen: - def __init__(self, condition: PandasLikeExpr, implementation: Implementation, backend_version: tuple[int, ...], then_value: Any = None, otherise_value: Any = None) -> None: + def __init__( + self, + condition: PandasLikeExpr, + implementation: Implementation, + backend_version: tuple[int, ...], + then_value: Any = None, + otherise_value: Any = None, + ) -> None: self._implementation = implementation self._backend_version = backend_version self._condition = condition @@ -274,18 +282,21 @@ def __init__(self, condition: PandasLikeExpr, implementation: Implementation, ba def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: from narwhals._pandas_like.namespace import PandasLikeNamespace - plx = PandasLikeNamespace(implementation=self._implementation, backend_version=self._backend_version) + plx = PandasLikeNamespace( + implementation=self._implementation, backend_version=self._backend_version + ) condition = self._condition._call(df)[0] - value_series = plx._create_broadcast_series_from_scalar(self._then_value, condition) - otherwise_series = plx._create_broadcast_series_from_scalar(self._otherwise_value, condition) - return [ - value_series.zip_with(condition, otherwise_series) - ] + value_series = plx._create_broadcast_series_from_scalar( + self._then_value, condition + ) + otherwise_series = plx._create_broadcast_series_from_scalar( + self._otherwise_value, condition + ) + return [value_series.zip_with(condition, otherwise_series)] def then(self, value: Any) -> PandasThen: - self._then_value = value return PandasThen( @@ -298,8 +309,8 @@ def then(self, value: Any) -> PandasThen: backend_version=self._condition._backend_version, ) -class PandasThen(PandasLikeExpr): +class PandasThen(PandasLikeExpr): def __init__( self, call: PandasWhen, diff --git a/narwhals/expr.py b/narwhals/expr.py index 5aba8cc61..6eb0dd135 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -3605,6 +3605,7 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: ) ) + class When: def __init__(self, condition: Expr) -> None: self._condition = condition @@ -3614,14 +3615,16 @@ def __init__(self, condition: Expr) -> None: def then(self, value: Any) -> Then: return Then(lambda plx: plx.when(self._condition._call(plx)).then(value)) + class Then(Expr): - def __init__(self, call) -> None: # noqa: ANN001 + def __init__(self, call) -> None: # noqa: ANN001 self._call = call def otherwise(self, value: Any) -> Expr: return Expr(lambda plx: self._call(plx).otherwise(value)) -def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: # noqa: ARG001 + +def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: # noqa: ARG001 """ Start a `when-then-otherwise` expression. Expression similar to an `if-else` statement in Python. Always initiated by a `pl.when().then()`., and optionally followed by chaining one or more `.when().then()` statements. @@ -3646,7 +3649,10 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When >>> @nw.narwhalify ... def func(df_any): ... from narwhals.expr import when - ... return df_any.with_columns(when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when")) + ... + ... return df_any.with_columns( + ... when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when") + ... ) We can then pass either pandas or polars to `func`: diff --git a/tests/test_when.py b/tests/test_when.py index cc95cc347..90df13180 100644 --- a/tests/test_when.py +++ b/tests/test_when.py @@ -31,6 +31,7 @@ def test_when(request: Any, constructor: Any) -> None: } compare_dicts(result, expected) + def test_when_otherwise(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor): request.applymarker(pytest.mark.xfail) From 7390e1aeacd5034ecf41cbd9214b199003604a2c Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 23 Jul 2024 13:05:29 +0300 Subject: [PATCH 16/78] address mypy issues --- narwhals/_pandas_like/namespace.py | 5 ++++- narwhals/expr.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 51c82648f..cd1deee18 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -332,6 +332,9 @@ def __init__( self._output_names = output_names def otherwise(self, value: Any) -> PandasLikeExpr: - self._call._otherwise_value = value + # type ignore because we are setting the `_call` attribute to a + # callable object of type `PandasWhen`, base class has the attribute as + # only a `Callable` + self._call._otherwise_value = value # type: ignore self._function_name = "whenotherwise" return self diff --git a/narwhals/expr.py b/narwhals/expr.py index 6eb0dd135..2ef1e5308 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -3617,7 +3617,7 @@ def then(self, value: Any) -> Then: class Then(Expr): - def __init__(self, call) -> None: # noqa: ANN001 + def __init__(self, call: Callable[[Any], Any]) -> None: self._call = call def otherwise(self, value: Any) -> Expr: From 279e3ad5796ebf50e20967f57114eb82e7df9929 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 23 Jul 2024 10:08:20 +0000 Subject: [PATCH 17/78] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- narwhals/_pandas_like/namespace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index cd1deee18..9411ba500 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -335,6 +335,6 @@ def otherwise(self, value: Any) -> PandasLikeExpr: # type ignore because we are setting the `_call` attribute to a # callable object of type `PandasWhen`, base class has the attribute as # only a `Callable` - self._call._otherwise_value = value # type: ignore + self._call._otherwise_value = value # type: ignore self._function_name = "whenotherwise" return self From 63048ee3752b7c13a7773e6fdbda21bdea478249 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 23 Jul 2024 13:11:57 +0300 Subject: [PATCH 18/78] address ruff type-ignore blanket issue --- narwhals/_pandas_like/namespace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 9411ba500..e05df30e8 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -335,6 +335,6 @@ def otherwise(self, value: Any) -> PandasLikeExpr: # type ignore because we are setting the `_call` attribute to a # callable object of type `PandasWhen`, base class has the attribute as # only a `Callable` - self._call._otherwise_value = value # type: ignore + self._call._otherwise_value = value # type: ignore[attr-defined] self._function_name = "whenotherwise" return self From e96af891e41c163c581db3cce0c1039448b953e1 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 23 Jul 2024 15:00:42 +0300 Subject: [PATCH 19/78] support `Iterable[Expr]` in the pandas api --- narwhals/_pandas_like/namespace.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index e05df30e8..840414525 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -258,9 +258,13 @@ def concat( ) raise NotImplementedError - def when(self, *predicates: IntoPandasLikeExpr, **constraints: Any) -> PandasWhen: # noqa: ARG002 + def when( + self, + *predicates: IntoPandasLikeExpr | Iterable[IntoPandasLikeExpr], + **constraints: Any, # noqa: ARG002 + ) -> PandasWhen: plx = self.__class__(self._implementation, self._backend_version) - condition = plx.all_horizontal(*predicates) + condition = plx.all_horizontal(*flatten(predicates)) return PandasWhen(condition, self._implementation, self._backend_version) From d4f0e9cc78df6a3cf577ce7409ebe4cad83e3848 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 23 Jul 2024 15:24:34 +0300 Subject: [PATCH 20/78] move when test file to a better location --- tests/{ => expr_and_series}/test_when.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{ => expr_and_series}/test_when.py (100%) diff --git a/tests/test_when.py b/tests/expr_and_series/test_when.py similarity index 100% rename from tests/test_when.py rename to tests/expr_and_series/test_when.py From 99d9899e42c63d671e26f6a86ddffa2364477657 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 23 Jul 2024 15:30:06 +0300 Subject: [PATCH 21/78] make when test filename similar to other tests --- tests/expr_and_series/{test_when.py => when_test.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/expr_and_series/{test_when.py => when_test.py} (100%) diff --git a/tests/expr_and_series/test_when.py b/tests/expr_and_series/when_test.py similarity index 100% rename from tests/expr_and_series/test_when.py rename to tests/expr_and_series/when_test.py From 71e542d4f611d4e2ffeb8a031671b02bc300b887 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Wed, 17 Jul 2024 12:38:19 +0300 Subject: [PATCH 22/78] add simple when --- narwhals/_pandas_like/expr.py | 10 +- narwhals/expr.py | 29 ++ narwhals/expressions/whenthen.py | 0 tests/test_common.py | 718 +++++++++++++++++++++++++++++++ 4 files changed, 756 insertions(+), 1 deletion(-) create mode 100644 narwhals/expressions/whenthen.py create mode 100644 tests/test_common.py diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index f846da610..40dc15232 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -4,11 +4,11 @@ from typing import Any from typing import Callable from typing import Literal +from typing import Iterable from narwhals._expression_parsing import reuse_series_implementation from narwhals._expression_parsing import reuse_series_namespace_implementation from narwhals._pandas_like.series import PandasLikeSeries - if TYPE_CHECKING: from typing_extensions import Self @@ -343,6 +343,14 @@ def cat(self: Self) -> PandasLikeExprCatNamespace: def name(self: Self) -> PandasLikeExprNameNamespace: return PandasLikeExprNameNamespace(self) + def when(self, *predicates: PandasExpr | Iterable[PandasExpr], **conditions: Any) -> PandasWhen: + # TODO: Support conditions + from narwhals._pandas_like.namespace import PandasNamespace + + plx = PandasNamespace(self._implementation) + condition = plx.all_horizontal(*predicates) + return PandasWhen(self, condition) + class PandasLikeExprCatNamespace: def __init__(self, expr: PandasLikeExpr) -> None: diff --git a/narwhals/expr.py b/narwhals/expr.py index 2ef1e5308..c14256936 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -12,6 +12,9 @@ from narwhals.dtypes import translate_dtype from narwhals.utils import flatten + +from functools import reduce + if TYPE_CHECKING: from typing_extensions import Self @@ -3605,6 +3608,32 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: ) ) +class When: + def __init__(self, condition: Expr) -> None: + self._condition = condition + + def then(self, value: Any) -> Then: + return Then(self, value=value) + +class Then(Expr): + def __init__(self, when: When, *, value: Any) -> None: + self._when = when + self._then_value = value + + def func(plx): + return plx.when(self._when._condition._call(plx)).then(self._then_value) + + self._call = func + + def otherwise(self, value: Any) -> Expr: + def func(plx): + return plx.when(self._when._condition._call(plx)).then(self._then_value).otherwise(value) + + return Expr(func) + +def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: + return When(reduce(lambda a, b: a & b, flatten([predicates]))) + class When: def __init__(self, condition: Expr) -> None: diff --git a/narwhals/expressions/whenthen.py b/narwhals/expressions/whenthen.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test_common.py b/tests/test_common.py new file mode 100644 index 000000000..4162d7b24 --- /dev/null +++ b/tests/test_common.py @@ -0,0 +1,718 @@ +from __future__ import annotations + +import os +import warnings +from typing import Any +from typing import Literal + +import numpy as np +import pandas as pd +import polars as pl +import pytest +from pandas.testing import assert_series_equal as pd_assert_series_equal +from polars.testing import assert_series_equal as pl_assert_series_equal + +import narwhals as nw +from narwhals.utils import parse_version +from tests.utils import compare_dicts + +df_pandas = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) +if parse_version(pd.__version__) >= parse_version("1.5.0"): + df_pandas_pyarrow = pd.DataFrame( + {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} + ).astype( + { + "a": "Int64[pyarrow]", + "b": "Int64[pyarrow]", + "z": "Float64[pyarrow]", + } + ) + df_pandas_nullable = pd.DataFrame( + {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} + ).astype( + { + "a": "Int64", + "b": "Int64", + "z": "Float64", + } + ) +else: # pragma: no cover + df_pandas_pyarrow = df_pandas + df_pandas_nullable = df_pandas +df_polars = pl.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) +df_lazy = pl.LazyFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) +df_pandas_na = pd.DataFrame({"a": [None, 3, 2], "b": [4, 4, 6], "z": [7.0, None, 9]}) +df_lazy_na = pl.LazyFrame({"a": [None, 3, 2], "b": [4, 4, 6], "z": [7.0, None, 9]}) +df_right_pandas = pd.DataFrame({"c": [6, 12, -1], "d": [0, -4, 2]}) +df_right_lazy = pl.LazyFrame({"c": [6, 12, -1], "d": [0, -4, 2]}) + +if os.environ.get("CI", None): + try: + import modin.pandas as mpd + except ImportError: # pragma: no cover + df_mpd = df_pandas.copy() + else: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + df_mpd = mpd.DataFrame( + pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) + ) +else: # pragma: no cover + df_mpd = df_pandas.copy() + + +@pytest.mark.parametrize( + "df_raw", + [df_pandas, df_polars, df_lazy, df_pandas_nullable, df_pandas_pyarrow], +) +def test_sort(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = df.sort("a", "b") + result_native = nw.to_native(result) + expected = { + "a": [1, 2, 3], + "b": [4, 6, 4], + "z": [7.0, 9.0, 8.0], + } + compare_dicts(result_native, expected) + result = df.sort("a", "b", descending=[True, False]) + result_native = nw.to_native(result) + expected = { + "a": [3, 2, 1], + "b": [4, 6, 4], + "z": [8.0, 9.0, 7.0], + } + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize( + "df_raw", + [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow], +) +def test_filter(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = df.filter(nw.col("a") > 1) + result_native = nw.to_native(result) + expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9.0]} + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize( + "df_raw", + [df_pandas, df_polars], +) +def test_filter_series(df_raw: Any) -> None: + df = nw.DataFrame(df_raw).with_columns(mask=nw.col("a") > 1) + result = df.filter(df["mask"]).drop("mask") + result_native = nw.to_native(result) + expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9.0]} + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize( + "df_raw", + [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow], +) +def test_add(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = df.with_columns( + c=nw.col("a") + nw.col("b"), + d=nw.col("a") - nw.col("a").mean(), + e=nw.col("a") - nw.col("a").std(), + ) + result_native = nw.to_native(result) + expected = { + "a": [1, 3, 2], + "b": [4, 4, 6], + "z": [7.0, 8.0, 9.0], + "c": [5, 7, 8], + "d": [-1.0, 1.0, 0.0], + "e": [0.0, 2.0, 1.0], + } + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize( + "df_raw", + [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow], +) +def test_std(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = df.select( + nw.col("a").std().alias("a_ddof_default"), + nw.col("a").std(ddof=1).alias("a_ddof_1"), + nw.col("a").std(ddof=0).alias("a_ddof_0"), + nw.col("b").std(ddof=2).alias("b_ddof_2"), + nw.col("z").std(ddof=0).alias("z_ddof_0"), + ) + result_native = nw.to_native(result) + expected = { + "a_ddof_default": [1.0], + "a_ddof_1": [1.0], + "a_ddof_0": [0.816497], + "b_ddof_2": [1.632993], + "z_ddof_0": [0.816497], + } + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize( + "df_raw", + [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow], +) +def test_double(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = df.with_columns(nw.all() * 2) + result_native = nw.to_native(result) + expected = {"a": [2, 6, 4], "b": [8, 8, 12], "z": [14.0, 16.0, 18.0]} + compare_dicts(result_native, expected) + result = df.with_columns(nw.col("a").alias("o"), nw.all() * 2) + result_native = nw.to_native(result) + expected = {"o": [1, 3, 2], "a": [2, 6, 4], "b": [8, 8, 12], "z": [14.0, 16.0, 18.0]} + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize( + "df_raw", + [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow], +) +def test_select(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = df.select("a") + result_native = nw.to_native(result) + expected = {"a": [1, 3, 2]} + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize("df_raw", [df_pandas, df_lazy, df_pandas_nullable]) +def test_sumh(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = df.with_columns(horizonal_sum=nw.sum_horizontal(nw.col("a"), nw.col("b"))) + result_native = nw.to_native(result) + expected = { + "a": [1, 3, 2], + "b": [4, 4, 6], + "z": [7.0, 8.0, 9.0], + "horizonal_sum": [5, 7, 8], + } + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize( + "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] +) +def test_sumh_literal(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = df.with_columns(horizonal_sum=nw.sum_horizontal("a", nw.col("b"))) + result_native = nw.to_native(result) + expected = { + "a": [1, 3, 2], + "b": [4, 4, 6], + "z": [7.0, 8.0, 9.0], + "horizonal_sum": [5, 7, 8], + } + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize( + "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] +) +def test_sum_all(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = df.select(nw.all().sum()) + result_native = nw.to_native(result) + expected = {"a": [6], "b": [14], "z": [24.0]} + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize( + "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] +) +def test_double_selected(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = df.select(nw.col("a", "b") * 2) + result_native = nw.to_native(result) + expected = {"a": [2, 6, 4], "b": [8, 8, 12]} + compare_dicts(result_native, expected) + result = df.select("z", nw.col("a", "b") * 2) + result_native = nw.to_native(result) + expected = {"z": [7, 8, 9], "a": [2, 6, 4], "b": [8, 8, 12]} + compare_dicts(result_native, expected) + result = df.select("a").select(nw.col("a") + nw.all()) + result_native = nw.to_native(result) + expected = {"a": [2, 6, 4]} + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize( + "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] +) +def test_rename(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = df.rename({"a": "x", "b": "y"}) + result_native = nw.to_native(result) + expected = {"x": [1, 3, 2], "y": [4, 4, 6], "z": [7.0, 8, 9]} + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize( + "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] +) +def test_join(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + df_right = df + result = df.join(df_right, left_on=["a", "b"], right_on=["a", "b"], how="inner") + result_native = nw.to_native(result) + expected = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9], "z_right": [7.0, 8, 9]} + compare_dicts(result_native, expected) + + with pytest.raises(NotImplementedError): + result = df.join(df_right, left_on="a", right_on="a", how="left") # type: ignore[arg-type] + + result = df.collect().join(df_right.collect(), left_on="a", right_on="a", how="inner") # type: ignore[assignment] + result_native = nw.to_native(result) + expected = { + "a": [1, 3, 2], + "b": [4, 4, 6], + "b_right": [4, 4, 6], + "z": [7.0, 8, 9], + "z_right": [7.0, 8, 9], + } + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize( + "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] +) +def test_schema(df_raw: Any) -> None: + result = nw.LazyFrame(df_raw).schema + expected = {"a": nw.Int64, "b": nw.Int64, "z": nw.Float64} + assert result == expected + result = nw.LazyFrame(df_raw).collect().schema + expected = {"a": nw.Int64, "b": nw.Int64, "z": nw.Float64} + assert result == expected + result = nw.LazyFrame(df_raw).columns # type: ignore[assignment] + expected = ["a", "b", "z"] # type: ignore[assignment] + assert result == expected + result = nw.LazyFrame(df_raw).collect().columns # type: ignore[assignment] + expected = ["a", "b", "z"] # type: ignore[assignment] + assert result == expected + + +@pytest.mark.parametrize( + "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] +) +def test_columns(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = df.columns + expected = ["a", "b", "z"] + assert result == expected + + +@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd, df_lazy]) +def test_lazy_instantiation(df_raw: Any) -> None: + result = nw.LazyFrame(df_raw) + result_native = nw.to_native(result) + expected = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize("df_raw", [df_lazy]) +def test_lazy_instantiation_error(df_raw: Any) -> None: + with pytest.raises( + TypeError, match="Can't instantiate DataFrame from Polars LazyFrame." + ): + _ = nw.DataFrame(df_raw).shape + + +@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd]) +def test_eager_instantiation(df_raw: Any) -> None: + result = nw.DataFrame(df_raw) + result_native = nw.to_native(result) + expected = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} + compare_dicts(result_native, expected) + + +def test_accepted_dataframes() -> None: + array = np.array([[0, 4.0], [2, 5]]) + with pytest.raises( + TypeError, + match="Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: ", + ): + nw.DataFrame(array) + with pytest.raises( + TypeError, + match="Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: ", + ): + nw.LazyFrame(array) + + +@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd]) +@pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning") +def test_convert_pandas(df_raw: Any) -> None: + result = nw.from_native(df_raw).to_pandas() # type: ignore[union-attr] + expected = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) + pd.testing.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "df_raw", [df_polars, df_pandas, df_mpd, df_pandas_nullable, df_pandas_pyarrow] +) +@pytest.mark.filterwarnings( + r"ignore:np\.find_common_type is deprecated\.:DeprecationWarning" +) +def test_convert_numpy(df_raw: Any) -> None: + result = nw.DataFrame(df_raw).to_numpy() + expected = np.array([[1, 3, 2], [4, 4, 6], [7.0, 8, 9]]).T + np.testing.assert_array_equal(result, expected) + assert result.dtype == "float64" + result = nw.DataFrame(df_raw).__array__() + np.testing.assert_array_equal(result, expected) + assert result.dtype == "float64" + + +@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd]) +def test_shape(df_raw: Any) -> None: + result = nw.DataFrame(df_raw).shape + expected = (3, 3) + assert result == expected + + +@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd, df_lazy]) +def test_expr_binary(df_raw: Any) -> None: + result = nw.LazyFrame(df_raw).with_columns( + a=(1 + 3 * nw.col("a")) * (1 / nw.col("a")), + b=nw.col("z") / (2 - nw.col("b")), + c=nw.col("a") + nw.col("b") / 2, + d=nw.col("a") - nw.col("b"), + e=((nw.col("a") > nw.col("b")) & (nw.col("a") >= nw.col("z"))).cast(nw.Int64), + f=( + (nw.col("a") < nw.col("b")) + | (nw.col("a") <= nw.col("z")) + | (nw.col("a") == 1) + ).cast(nw.Int64), + g=nw.col("a") != 1, + h=(False & (nw.col("a") != 1)), + i=(False | (nw.col("a") != 1)), + j=2 ** nw.col("a"), + k=2 // nw.col("a"), + l=nw.col("a") // 2, + m=nw.col("a") ** 2, + n=nw.col("a") % 2, + o=2 % nw.col("a"), + ) + result_native = nw.to_native(result) + expected = { + "a": [4, 3.333333, 3.5], + "b": [-3.5, -4.0, -2.25], + "z": [7.0, 8.0, 9.0], + "c": [3, 5, 5], + "d": [-3, -1, -4], + "e": [0, 0, 0], + "f": [1, 1, 1], + "g": [False, True, True], + "h": [False, False, False], + "i": [False, True, True], + "j": [2, 8, 4], + "k": [2, 0, 1], + "l": [0, 1, 1], + "m": [1, 9, 4], + "n": [1, 1, 0], + "o": [0, 2, 0], + } + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_lazy]) +def test_expr_unary(df_raw: Any) -> None: + result = ( + nw.from_native(df_raw) + .with_columns( + a_mean=nw.col("a").mean(), + a_sum=nw.col("a").sum(), + b_nunique=nw.col("b").n_unique(), + z_min=nw.col("z").min(), + z_max=nw.col("z").max(), + ) + .select(nw.col("a_mean", "a_sum", "b_nunique", "z_min", "z_max").unique()) + ) + result_native = nw.to_native(result) + expected = {"a_mean": [2], "a_sum": [6], "b_nunique": [2], "z_min": [7], "z_max": [9]} + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd, df_lazy]) +def test_expr_transform(df_raw: Any) -> None: + result = nw.LazyFrame(df_raw).with_columns( + a=nw.col("a").is_between(-1, 1), b=nw.col("b").is_in([4, 5]) + ) + result_native = nw.to_native(result) + expected = {"a": [True, False, False], "b": [True, True, False], "z": [7, 8, 9]} + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_lazy]) +def test_expr_min_max(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result_min = nw.to_native(df.select(nw.min("a", "b", "z"))) + result_max = nw.to_native(df.select(nw.max("a", "b", "z"))) + expected_min = {"a": [1], "b": [4], "z": [7]} + expected_max = {"a": [3], "b": [6], "z": [9]} + compare_dicts(result_min, expected_min) + compare_dicts(result_max, expected_max) + + +@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd, df_lazy]) +def test_expr_sample(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result_shape = nw.to_native(df.select(nw.col("a").sample(n=2)).collect()).shape + expected = (2, 1) + assert result_shape == expected + result_shape = nw.to_native(df.collect()["a"].sample(n=2)).shape + expected = (2,) # type: ignore[assignment] + assert result_shape == expected + + +@pytest.mark.parametrize("df_raw", [df_pandas_na, df_lazy_na]) +def test_expr_na(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result_nna = nw.to_native( + df.filter((~nw.col("a").is_null()) & (~df.collect()["z"].is_null())) + ) + expected = {"a": [2], "b": [6], "z": [9]} + compare_dicts(result_nna, expected) + + +@pytest.mark.parametrize( + "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] +) +def test_head(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = nw.to_native(df.head(2)) + expected = {"a": [1, 3], "b": [4, 4], "z": [7.0, 8.0]} + compare_dicts(result, expected) + result = nw.to_native(df.collect().head(2)) + expected = {"a": [1, 3], "b": [4, 4], "z": [7.0, 8.0]} + compare_dicts(result, expected) + + +@pytest.mark.parametrize( + "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] +) +def test_unique(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = nw.to_native(df.unique("b").sort("b")) + expected = {"a": [1, 2], "b": [4, 6], "z": [7.0, 9.0]} + compare_dicts(result, expected) + result = nw.to_native(df.collect().unique("b").sort("b")) + expected = {"a": [1, 2], "b": [4, 6], "z": [7.0, 9.0]} + compare_dicts(result, expected) + + +@pytest.mark.parametrize("df_raw", [df_pandas_na, df_lazy_na]) +def test_drop_nulls(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = nw.to_native(df.select(nw.col("a").drop_nulls())) + expected = {"a": [3, 2]} + compare_dicts(result, expected) + result = nw.to_native(df.select(df.collect()["a"].drop_nulls())) + expected = {"a": [3, 2]} + compare_dicts(result, expected) + + +@pytest.mark.parametrize( + ("df_raw", "df_raw_right"), [(df_pandas, df_right_pandas), (df_lazy, df_right_lazy)] +) +def test_concat_horizontal(df_raw: Any, df_raw_right: Any) -> None: + df_left = nw.LazyFrame(df_raw) + df_right = nw.LazyFrame(df_raw_right) + result = nw.concat([df_left, df_right], how="horizontal") + result_native = nw.to_native(result) + expected = { + "a": [1, 3, 2], + "b": [4, 4, 6], + "z": [7.0, 8, 9], + "c": [6, 12, -1], + "d": [0, -4, 2], + } + compare_dicts(result_native, expected) + + with pytest.raises(ValueError, match="No items"): + nw.concat([]) + + +@pytest.mark.parametrize( + ("df_raw", "df_raw_right"), [(df_pandas, df_right_pandas), (df_lazy, df_right_lazy)] +) +def test_concat_vertical(df_raw: Any, df_raw_right: Any) -> None: + df_left = nw.LazyFrame(df_raw).collect().rename({"a": "c", "b": "d"}).lazy().drop("z") + df_right = nw.LazyFrame(df_raw_right) + result = nw.concat([df_left, df_right], how="vertical") + result_native = nw.to_native(result) + expected = {"c": [1, 3, 2, 6, 12, -1], "d": [4, 4, 6, 0, -4, 2]} + compare_dicts(result_native, expected) + with pytest.raises(ValueError, match="No items"): + nw.concat([], how="vertical") + with pytest.raises(Exception, match="unable to vstack"): + nw.concat([df_left, df_right.rename({"d": "i"})], how="vertical").collect() # type: ignore[union-attr] + + +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) +def test_lazy(df_raw: Any) -> None: + df = nw.DataFrame(df_raw) + result = df.lazy() + assert isinstance(result, nw.LazyFrame) + + +def test_to_dict() -> None: + df = nw.DataFrame(df_pandas) + result = df.to_dict(as_series=True) + expected = { + "a": pd.Series([1, 3, 2], name="a"), + "b": pd.Series([4, 4, 6], name="b"), + "z": pd.Series([7.0, 8, 9], name="z"), + } + for key in expected: + pd_assert_series_equal(nw.to_native(result[key]), expected[key]) + + df = nw.DataFrame(df_polars) + result = df.to_dict(as_series=True) + expected = { + "a": pl.Series("a", [1, 3, 2]), + "b": pl.Series("b", [4, 4, 6]), + "z": pl.Series("z", [7.0, 8, 9]), + } + for key in expected: + pl_assert_series_equal(nw.to_native(result[key]), expected[key]) + + +@pytest.mark.parametrize( + "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] +) +def test_any_all(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = nw.to_native(df.select((nw.all() > 1).all())) + expected = {"a": [False], "b": [True], "z": [True]} + compare_dicts(result, expected) + result = nw.to_native(df.select((nw.all() > 1).any())) + expected = {"a": [True], "b": [True], "z": [True]} + compare_dicts(result, expected) + + +def test_invalid() -> None: + df = nw.LazyFrame(df_pandas) + with pytest.raises(ValueError, match="Multi-output"): + df.select(nw.all() + nw.all()) + with pytest.raises(TypeError, match="Perhaps you:"): + df.select([pl.col("a")]) # type: ignore[list-item] + with pytest.raises(TypeError, match="Perhaps you:"): + df.select([nw.col("a").cast(pl.Int64)]) + + +@pytest.mark.parametrize("df_raw", [df_pandas]) +def test_reindex(df_raw: Any) -> None: + df = nw.DataFrame(df_raw) + result = df.select("b", df["a"].sort(descending=True)) + expected = {"b": [4, 4, 6], "a": [3, 2, 1]} + compare_dicts(result, expected) + result = df.select("b", nw.col("a").sort(descending=True)) + compare_dicts(result, expected) + + s = df["a"] + result_s = s > s.sort() + assert not result_s[0] + assert result_s[1] + assert not result_s[2] + result = df.with_columns(s.sort()) + expected = {"a": [1, 2, 3], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]} # type: ignore[list-item] + compare_dicts(result, expected) + with pytest.raises(ValueError, match="Multi-output expressions are not supported"): + nw.to_native(df.with_columns(nw.all() + nw.all())) + + +@pytest.mark.parametrize( + ("df_raw", "df_raw_right"), + [(df_pandas, df_polars), (df_polars, df_pandas)], +) +def test_library(df_raw: Any, df_raw_right: Any) -> None: + df_left = nw.LazyFrame(df_raw) + df_right = nw.LazyFrame(df_raw_right) + with pytest.raises( + NotImplementedError, match="Cross-library comparisons aren't supported" + ): + nw.concat([df_left, df_right], how="horizontal") + with pytest.raises( + NotImplementedError, match="Cross-library comparisons aren't supported" + ): + nw.concat([df_left, df_right], how="vertical") + with pytest.raises( + NotImplementedError, match="Cross-library comparisons aren't supported" + ): + df_left.join(df_right, left_on=["a"], right_on=["a"], how="inner") + + +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) +def test_is_duplicated(df_raw: Any) -> None: + df = nw.DataFrame(df_raw) + result = nw.concat([df, df.head(1)]).is_duplicated() # type: ignore [union-attr] + expected = np.array([True, False, False, True]) + assert (result.to_numpy() == expected).all() + + +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) +@pytest.mark.parametrize(("threshold", "expected"), [(0, False), (10, True)]) +def test_is_empty(df_raw: Any, threshold: Any, expected: Any) -> None: + df = nw.DataFrame(df_raw) + result = df.filter(nw.col("a") > threshold).is_empty() + assert result == expected + + +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) +def test_is_unique(df_raw: Any) -> None: + df = nw.DataFrame(df_raw) + result = nw.concat([df, df.head(1)]).is_unique() # type: ignore [union-attr] + expected = np.array([False, True, True, False]) + assert (result.to_numpy() == expected).all() + + +@pytest.mark.parametrize("df_raw", [df_pandas_na, df_lazy_na.collect()]) +def test_null_count(df_raw: Any) -> None: + df = nw.DataFrame(df_raw) + result = nw.to_native(df.null_count()) + expected = {"a": [1], "b": [0], "z": [1]} + compare_dicts(result, expected) + + +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) +@pytest.mark.parametrize( + ("interpolation", "expected"), + [ + ("lower", {"a": [1.0], "b": [4.0], "z": [7.0]}), + ("higher", {"a": [2.0], "b": [4.0], "z": [8.0]}), + ("midpoint", {"a": [1.5], "b": [4.0], "z": [7.5]}), + ("linear", {"a": [1.6], "b": [4.0], "z": [7.6]}), + ("nearest", {"a": [2.0], "b": [4.0], "z": [8.0]}), + ], +) +def test_quantile( + df_raw: Any, + interpolation: Literal["nearest", "higher", "lower", "midpoint", "linear"], + expected: dict[str, list[float]], +) -> None: + q = 0.3 + + df = nw.from_native(df_raw) + result = nw.to_native( + df.select(nw.all().quantile(quantile=q, interpolation=interpolation)) + ) + compare_dicts(result, expected) + +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) +def test_when(df_raw: Any) -> None: + df = nw.DataFrame(df_raw) + result = df.with_columns( + a=nw.when(nw.col("a") > 2, 1).otherwise(0), + b=nw.when(nw.col("a") > 2, 1).when(nw.col("a") < 1, -1).otherwise(0), + ) + expected = {"a": [0, 1, 0], "b": [0, 1, 0], "z": [7.0, 8.0, 9.0]} + compare_dicts(result, expected) From 8b1355a24e6ffbe3d5682f1b4143f3fc16bf08c9 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Wed, 17 Jul 2024 12:44:10 +0300 Subject: [PATCH 23/78] lint with ruff --- narwhals/_pandas_like/expr.py | 9 +++++---- narwhals/expr.py | 3 --- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 40dc15232..58bb46ed7 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -3,12 +3,13 @@ from typing import TYPE_CHECKING from typing import Any from typing import Callable -from typing import Literal from typing import Iterable +from typing import Literal + +from narwhals._pandas_like.series import PandasSeries +from narwhals._pandas_like.utils import reuse_series_implementation +from narwhals._pandas_like.utils import reuse_series_namespace_implementation -from narwhals._expression_parsing import reuse_series_implementation -from narwhals._expression_parsing import reuse_series_namespace_implementation -from narwhals._pandas_like.series import PandasLikeSeries if TYPE_CHECKING: from typing_extensions import Self diff --git a/narwhals/expr.py b/narwhals/expr.py index c14256936..3a876cc27 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -12,9 +12,6 @@ from narwhals.dtypes import translate_dtype from narwhals.utils import flatten - -from functools import reduce - if TYPE_CHECKING: from typing_extensions import Self From eb361649024c0698cab6d52c3a9f6a6e9382c2ed Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Wed, 17 Jul 2024 17:06:18 +0300 Subject: [PATCH 24/78] use lambda expression --- narwhals/expr.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/narwhals/expr.py b/narwhals/expr.py index 3a876cc27..6af61ed96 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -3617,18 +3617,12 @@ def __init__(self, when: When, *, value: Any) -> None: self._when = when self._then_value = value - def func(plx): - return plx.when(self._when._condition._call(plx)).then(self._then_value) - - self._call = func + self._call = lambda plx: plx.when(self._when._condition._call(plx)).then(self._then_value) def otherwise(self, value: Any) -> Expr: - def func(plx): - return plx.when(self._when._condition._call(plx)).then(self._then_value).otherwise(value) - - return Expr(func) + return Expr(lambda plx: plx.when(self._when._condition._call(plx)).then(self._then_value).otherwise(value)) -def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: +def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: # noqa: ARG001 return When(reduce(lambda a, b: a & b, flatten([predicates]))) From c9b09bfb370ee0e6f5d95f0021ace8bd45fad01d Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Mon, 22 Jul 2024 16:38:38 +0300 Subject: [PATCH 25/78] Fix errors from the migration --- narwhals/_pandas_like/expr.py | 64 +++++++++++++++++++++++++++++++++-- narwhals/expr.py | 15 ++++---- tests/test_where.py | 47 +++++++++++++++++++++++++ 3 files changed, 116 insertions(+), 10 deletions(-) create mode 100644 tests/test_where.py diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 58bb46ed7..34ed69fdb 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING from typing import Any from typing import Callable -from typing import Iterable from typing import Literal from narwhals._pandas_like.series import PandasSeries @@ -352,7 +351,6 @@ def when(self, *predicates: PandasExpr | Iterable[PandasExpr], **conditions: Any condition = plx.all_horizontal(*predicates) return PandasWhen(self, condition) - class PandasLikeExprCatNamespace: def __init__(self, expr: PandasLikeExpr) -> None: self._expr = expr @@ -479,6 +477,7 @@ def total_nanoseconds(self) -> PandasLikeExpr: def to_string(self, format: str) -> PandasLikeExpr: # noqa: A002 return reuse_series_namespace_implementation( +<<<<<<< HEAD self._expr, "dt", "to_string", format ) @@ -635,3 +634,64 @@ def to_uppercase(self: Self) -> PandasLikeExpr: implementation=self._expr._implementation, backend_version=self._expr._backend_version, ) +||||||| parent of f3770b7 (Fix errors from the migration) + self._expr, "dt", "to_string", format + ) + +class PandasWhen: + def __init__(self, condition: PandasLikeExpr) -> None: + self._condition = condition + + def then(self, value: Any) -> PandasThen: + return PandasThen(self, value=value, implementation=self._condition._implementation) + +class PandasThen(PandasLikeExpr): + def __init__(self, when: PandasWhen, *, value: Any, implementation: Implementation, backend_version: tuple[int, ...]) -> None: + self._when = when + self._then_value = value + self._implementation = implementation + self.backend_version = backend_version + + def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: + from narwhals._pandas_like.namespace import PandasLikeNamespace + + plx = PandasLikeNamespace(implementation=self._implementation, backend_version=self.backend_version) + + condition = self._when._condition._call(df)[0] + + value_series = plx._create_series_from_scalar(self._then_value, condition) + none_series = plx._create_series_from_scalar(None, condition) + return [ + value_series.zip_with(condition, none_series) + ] + + self._call = func + self._depth = 0 + self._function_name = "whenthen" + self._root_names = None + self._output_names = None + + def otherwise(self, value: Any) -> PandasLikeExpr: + def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: + from narwhals._pandas_like.namespace import PandasLikeNamespace + plx = PandasLikeNamespace(implementation=self._implementation, backend_version=self.backend_version) + condition = self._when._condition._call(df)[0] + value_series = plx._create_series_from_scalar(self._then_value, condition) + otherwise_series = plx._create_series_from_scalar(value, condition) + return [ + value_series.zip_with(condition, otherwise_series) + ] + + return PandasLikeExpr( + func, + depth=0, + function_name="whenthenotherwise", + root_names=None, + output_names=None, + implementation=self._implementation, + backend_version=self.backend_version, + ) +======= + self._expr, "dt", "to_string", format + ) +>>>>>>> f3770b7 (Fix errors from the migration) diff --git a/narwhals/expr.py b/narwhals/expr.py index 6af61ed96..cc394487a 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -3016,7 +3016,7 @@ def to_string(self, format: str) -> Expr: # noqa: A002 of trailing zeros. Nonetheless, this is probably consistent enough for most applications. - If you have an application where this is not enough, please open an issue + If you have an application here this is not enough, please open an issue and let us know. Examples: @@ -3608,19 +3608,18 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: class When: def __init__(self, condition: Expr) -> None: self._condition = condition + self._then_value = None + self._otehrwise_value = None def then(self, value: Any) -> Then: - return Then(self, value=value) + return Then(lambda plx: plx.when(self._condition._call(plx)).then(value)) class Then(Expr): - def __init__(self, when: When, *, value: Any) -> None: - self._when = when - self._then_value = value - - self._call = lambda plx: plx.when(self._when._condition._call(plx)).then(self._then_value) + def __init__(self, call) -> None: # noqa: ANN001 + self._call = call def otherwise(self, value: Any) -> Expr: - return Expr(lambda plx: plx.when(self._when._condition._call(plx)).then(self._then_value).otherwise(value)) + return Expr(lambda plx: self._call(plx).otherwise(value)) def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: # noqa: ARG001 return When(reduce(lambda a, b: a & b, flatten([predicates]))) diff --git a/tests/test_where.py b/tests/test_where.py new file mode 100644 index 000000000..3661db0c5 --- /dev/null +++ b/tests/test_where.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +from typing import Any + +import pytest + +import narwhals.stable.v1 as nw +from narwhals.expression import when +from tests.utils import compare_dicts + +data = { + "a": [1, 1, 2], + "b": ["a", "b", "c"], + "c": [4.1, 5.0, 6.0], + "d": [True, False, True], +} + + +def test_when(request: Any, constructor: Any) -> None: + if "pyarrow_table" in str(constructor): + request.applymarker(pytest.mark.xfail) + + df = nw.from_native(constructor(data)) + result = df.with_columns(when(nw.col("a") == 1).then(value=3).alias("a_when")) + expected = { + "a": [1, 1, 2], + "b": ["a", "b", "c"], + "c": [4.1, 5.0, 6.0], + "d": [True, False, True], + "a_when": [3, 3, None], + } + compare_dicts(result, expected) + +def test_when_otherwise(request: Any, constructor: Any) -> None: + if "pyarrow_table" in str(constructor): + request.applymarker(pytest.mark.xfail) + + df = nw.from_native(constructor(data)) + result = df.with_columns(when(nw.col("a") == 1).then(3).otherwise(6).alias("a_when")) + expected = { + "a": [1, 1, 2], + "b": ["a", "b", "c"], + "c": [4.1, 5.0, 6.0], + "d": [True, False, True], + "a_when": [3, 3, 6], + } + compare_dicts(result, expected) From 2b1eabcb0eddccabbe3acc3ccf5810b11ac47fa5 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Mon, 22 Jul 2024 16:49:41 +0300 Subject: [PATCH 26/78] remove unnecessary changes --- narwhals/_pandas_like/expr.py | 9 ++++----- narwhals/expr.py | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 34ed69fdb..a16bb768a 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -477,11 +477,9 @@ def total_nanoseconds(self) -> PandasLikeExpr: def to_string(self, format: str) -> PandasLikeExpr: # noqa: A002 return reuse_series_namespace_implementation( -<<<<<<< HEAD self._expr, "dt", "to_string", format ) - class PandasLikeExprNameNamespace: def __init__(self: Self, expr: PandasLikeExpr) -> None: self._expr = expr @@ -634,9 +632,6 @@ def to_uppercase(self: Self) -> PandasLikeExpr: implementation=self._expr._implementation, backend_version=self._expr._backend_version, ) -||||||| parent of f3770b7 (Fix errors from the migration) - self._expr, "dt", "to_string", format - ) class PandasWhen: def __init__(self, condition: PandasLikeExpr) -> None: @@ -695,3 +690,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: self._expr, "dt", "to_string", format ) >>>>>>> f3770b7 (Fix errors from the migration) +||||||| parent of a7f442a (remove unnecessary changes) +>>>>>>> main +======= +>>>>>>> a7f442a (remove unnecessary changes) diff --git a/narwhals/expr.py b/narwhals/expr.py index cc394487a..6997c42f6 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -3016,7 +3016,7 @@ def to_string(self, format: str) -> Expr: # noqa: A002 of trailing zeros. Nonetheless, this is probably consistent enough for most applications. - If you have an application here this is not enough, please open an issue + If you have an application where this is not enough, please open an issue and let us know. Examples: From 2ef564e93a8dd14f93b932f0592def68003ffb89 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Mon, 22 Jul 2024 19:12:09 +0300 Subject: [PATCH 27/78] remove unnecessary diff --- narwhals/_pandas_like/expr.py | 1 + 1 file changed, 1 insertion(+) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index a16bb768a..5a8e63d47 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -480,6 +480,7 @@ def to_string(self, format: str) -> PandasLikeExpr: # noqa: A002 self._expr, "dt", "to_string", format ) + class PandasLikeExprNameNamespace: def __init__(self: Self, expr: PandasLikeExpr) -> None: self._expr = expr From 0e4773d51cee4ac4e62fed33f233874b5a6323c7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 23 Jul 2024 12:56:07 +0000 Subject: [PATCH 28/78] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- narwhals/expr.py | 7 +++++-- tests/test_common.py | 1 + tests/test_where.py | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/narwhals/expr.py b/narwhals/expr.py index 6997c42f6..61199b5fa 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -3605,6 +3605,7 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: ) ) + class When: def __init__(self, condition: Expr) -> None: self._condition = condition @@ -3614,14 +3615,16 @@ def __init__(self, condition: Expr) -> None: def then(self, value: Any) -> Then: return Then(lambda plx: plx.when(self._condition._call(plx)).then(value)) + class Then(Expr): - def __init__(self, call) -> None: # noqa: ANN001 + def __init__(self, call) -> None: # noqa: ANN001 self._call = call def otherwise(self, value: Any) -> Expr: return Expr(lambda plx: self._call(plx).otherwise(value)) -def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: # noqa: ARG001 + +def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: # noqa: ARG001 return When(reduce(lambda a, b: a & b, flatten([predicates]))) diff --git a/tests/test_common.py b/tests/test_common.py index 4162d7b24..8abc0b765 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -707,6 +707,7 @@ def test_quantile( ) compare_dicts(result, expected) + @pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) def test_when(df_raw: Any) -> None: df = nw.DataFrame(df_raw) diff --git a/tests/test_where.py b/tests/test_where.py index 3661db0c5..776480ece 100644 --- a/tests/test_where.py +++ b/tests/test_where.py @@ -31,6 +31,7 @@ def test_when(request: Any, constructor: Any) -> None: } compare_dicts(result, expected) + def test_when_otherwise(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor): request.applymarker(pytest.mark.xfail) From 151fe14b3e8c2f96c67365712e01567f6a86a377 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 23 Jul 2024 16:01:50 +0300 Subject: [PATCH 29/78] fix rebase error --- narwhals/_pandas_like/expr.py | 14 +++----------- narwhals/expressions/whenthen.py | 0 2 files changed, 3 insertions(+), 11 deletions(-) delete mode 100644 narwhals/expressions/whenthen.py diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 5a8e63d47..52fdc0068 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -5,9 +5,9 @@ from typing import Callable from typing import Literal -from narwhals._pandas_like.series import PandasSeries -from narwhals._pandas_like.utils import reuse_series_implementation -from narwhals._pandas_like.utils import reuse_series_namespace_implementation +from narwhals._expression_parsing import reuse_series_implementation +from narwhals._expression_parsing import reuse_series_namespace_implementation +from narwhals._pandas_like.series import PandasLikeSeries if TYPE_CHECKING: from typing_extensions import Self @@ -687,11 +687,3 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: implementation=self._implementation, backend_version=self.backend_version, ) -======= - self._expr, "dt", "to_string", format - ) ->>>>>>> f3770b7 (Fix errors from the migration) -||||||| parent of a7f442a (remove unnecessary changes) ->>>>>>> main -======= ->>>>>>> a7f442a (remove unnecessary changes) diff --git a/narwhals/expressions/whenthen.py b/narwhals/expressions/whenthen.py deleted file mode 100644 index e69de29bb..000000000 From add7b896f8996ab97d582f4756658000085eab63 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 23 Jul 2024 16:03:23 +0300 Subject: [PATCH 30/78] remove files left from wrong rebase --- tests/test_common.py | 719 ------------------------------------------- tests/test_where.py | 48 --- 2 files changed, 767 deletions(-) delete mode 100644 tests/test_common.py delete mode 100644 tests/test_where.py diff --git a/tests/test_common.py b/tests/test_common.py deleted file mode 100644 index 8abc0b765..000000000 --- a/tests/test_common.py +++ /dev/null @@ -1,719 +0,0 @@ -from __future__ import annotations - -import os -import warnings -from typing import Any -from typing import Literal - -import numpy as np -import pandas as pd -import polars as pl -import pytest -from pandas.testing import assert_series_equal as pd_assert_series_equal -from polars.testing import assert_series_equal as pl_assert_series_equal - -import narwhals as nw -from narwhals.utils import parse_version -from tests.utils import compare_dicts - -df_pandas = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) -if parse_version(pd.__version__) >= parse_version("1.5.0"): - df_pandas_pyarrow = pd.DataFrame( - {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - ).astype( - { - "a": "Int64[pyarrow]", - "b": "Int64[pyarrow]", - "z": "Float64[pyarrow]", - } - ) - df_pandas_nullable = pd.DataFrame( - {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - ).astype( - { - "a": "Int64", - "b": "Int64", - "z": "Float64", - } - ) -else: # pragma: no cover - df_pandas_pyarrow = df_pandas - df_pandas_nullable = df_pandas -df_polars = pl.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) -df_lazy = pl.LazyFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) -df_pandas_na = pd.DataFrame({"a": [None, 3, 2], "b": [4, 4, 6], "z": [7.0, None, 9]}) -df_lazy_na = pl.LazyFrame({"a": [None, 3, 2], "b": [4, 4, 6], "z": [7.0, None, 9]}) -df_right_pandas = pd.DataFrame({"c": [6, 12, -1], "d": [0, -4, 2]}) -df_right_lazy = pl.LazyFrame({"c": [6, 12, -1], "d": [0, -4, 2]}) - -if os.environ.get("CI", None): - try: - import modin.pandas as mpd - except ImportError: # pragma: no cover - df_mpd = df_pandas.copy() - else: - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=UserWarning) - df_mpd = mpd.DataFrame( - pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) - ) -else: # pragma: no cover - df_mpd = df_pandas.copy() - - -@pytest.mark.parametrize( - "df_raw", - [df_pandas, df_polars, df_lazy, df_pandas_nullable, df_pandas_pyarrow], -) -def test_sort(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.sort("a", "b") - result_native = nw.to_native(result) - expected = { - "a": [1, 2, 3], - "b": [4, 6, 4], - "z": [7.0, 9.0, 8.0], - } - compare_dicts(result_native, expected) - result = df.sort("a", "b", descending=[True, False]) - result_native = nw.to_native(result) - expected = { - "a": [3, 2, 1], - "b": [4, 6, 4], - "z": [8.0, 9.0, 7.0], - } - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", - [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow], -) -def test_filter(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.filter(nw.col("a") > 1) - result_native = nw.to_native(result) - expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9.0]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", - [df_pandas, df_polars], -) -def test_filter_series(df_raw: Any) -> None: - df = nw.DataFrame(df_raw).with_columns(mask=nw.col("a") > 1) - result = df.filter(df["mask"]).drop("mask") - result_native = nw.to_native(result) - expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9.0]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", - [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow], -) -def test_add(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.with_columns( - c=nw.col("a") + nw.col("b"), - d=nw.col("a") - nw.col("a").mean(), - e=nw.col("a") - nw.col("a").std(), - ) - result_native = nw.to_native(result) - expected = { - "a": [1, 3, 2], - "b": [4, 4, 6], - "z": [7.0, 8.0, 9.0], - "c": [5, 7, 8], - "d": [-1.0, 1.0, 0.0], - "e": [0.0, 2.0, 1.0], - } - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", - [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow], -) -def test_std(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.select( - nw.col("a").std().alias("a_ddof_default"), - nw.col("a").std(ddof=1).alias("a_ddof_1"), - nw.col("a").std(ddof=0).alias("a_ddof_0"), - nw.col("b").std(ddof=2).alias("b_ddof_2"), - nw.col("z").std(ddof=0).alias("z_ddof_0"), - ) - result_native = nw.to_native(result) - expected = { - "a_ddof_default": [1.0], - "a_ddof_1": [1.0], - "a_ddof_0": [0.816497], - "b_ddof_2": [1.632993], - "z_ddof_0": [0.816497], - } - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", - [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow], -) -def test_double(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.with_columns(nw.all() * 2) - result_native = nw.to_native(result) - expected = {"a": [2, 6, 4], "b": [8, 8, 12], "z": [14.0, 16.0, 18.0]} - compare_dicts(result_native, expected) - result = df.with_columns(nw.col("a").alias("o"), nw.all() * 2) - result_native = nw.to_native(result) - expected = {"o": [1, 3, 2], "a": [2, 6, 4], "b": [8, 8, 12], "z": [14.0, 16.0, 18.0]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", - [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow], -) -def test_select(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.select("a") - result_native = nw.to_native(result) - expected = {"a": [1, 3, 2]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize("df_raw", [df_pandas, df_lazy, df_pandas_nullable]) -def test_sumh(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.with_columns(horizonal_sum=nw.sum_horizontal(nw.col("a"), nw.col("b"))) - result_native = nw.to_native(result) - expected = { - "a": [1, 3, 2], - "b": [4, 4, 6], - "z": [7.0, 8.0, 9.0], - "horizonal_sum": [5, 7, 8], - } - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_sumh_literal(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.with_columns(horizonal_sum=nw.sum_horizontal("a", nw.col("b"))) - result_native = nw.to_native(result) - expected = { - "a": [1, 3, 2], - "b": [4, 4, 6], - "z": [7.0, 8.0, 9.0], - "horizonal_sum": [5, 7, 8], - } - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_sum_all(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.select(nw.all().sum()) - result_native = nw.to_native(result) - expected = {"a": [6], "b": [14], "z": [24.0]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_double_selected(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.select(nw.col("a", "b") * 2) - result_native = nw.to_native(result) - expected = {"a": [2, 6, 4], "b": [8, 8, 12]} - compare_dicts(result_native, expected) - result = df.select("z", nw.col("a", "b") * 2) - result_native = nw.to_native(result) - expected = {"z": [7, 8, 9], "a": [2, 6, 4], "b": [8, 8, 12]} - compare_dicts(result_native, expected) - result = df.select("a").select(nw.col("a") + nw.all()) - result_native = nw.to_native(result) - expected = {"a": [2, 6, 4]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_rename(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.rename({"a": "x", "b": "y"}) - result_native = nw.to_native(result) - expected = {"x": [1, 3, 2], "y": [4, 4, 6], "z": [7.0, 8, 9]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_join(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - df_right = df - result = df.join(df_right, left_on=["a", "b"], right_on=["a", "b"], how="inner") - result_native = nw.to_native(result) - expected = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9], "z_right": [7.0, 8, 9]} - compare_dicts(result_native, expected) - - with pytest.raises(NotImplementedError): - result = df.join(df_right, left_on="a", right_on="a", how="left") # type: ignore[arg-type] - - result = df.collect().join(df_right.collect(), left_on="a", right_on="a", how="inner") # type: ignore[assignment] - result_native = nw.to_native(result) - expected = { - "a": [1, 3, 2], - "b": [4, 4, 6], - "b_right": [4, 4, 6], - "z": [7.0, 8, 9], - "z_right": [7.0, 8, 9], - } - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_schema(df_raw: Any) -> None: - result = nw.LazyFrame(df_raw).schema - expected = {"a": nw.Int64, "b": nw.Int64, "z": nw.Float64} - assert result == expected - result = nw.LazyFrame(df_raw).collect().schema - expected = {"a": nw.Int64, "b": nw.Int64, "z": nw.Float64} - assert result == expected - result = nw.LazyFrame(df_raw).columns # type: ignore[assignment] - expected = ["a", "b", "z"] # type: ignore[assignment] - assert result == expected - result = nw.LazyFrame(df_raw).collect().columns # type: ignore[assignment] - expected = ["a", "b", "z"] # type: ignore[assignment] - assert result == expected - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_columns(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = df.columns - expected = ["a", "b", "z"] - assert result == expected - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd, df_lazy]) -def test_lazy_instantiation(df_raw: Any) -> None: - result = nw.LazyFrame(df_raw) - result_native = nw.to_native(result) - expected = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize("df_raw", [df_lazy]) -def test_lazy_instantiation_error(df_raw: Any) -> None: - with pytest.raises( - TypeError, match="Can't instantiate DataFrame from Polars LazyFrame." - ): - _ = nw.DataFrame(df_raw).shape - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd]) -def test_eager_instantiation(df_raw: Any) -> None: - result = nw.DataFrame(df_raw) - result_native = nw.to_native(result) - expected = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - compare_dicts(result_native, expected) - - -def test_accepted_dataframes() -> None: - array = np.array([[0, 4.0], [2, 5]]) - with pytest.raises( - TypeError, - match="Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: ", - ): - nw.DataFrame(array) - with pytest.raises( - TypeError, - match="Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: ", - ): - nw.LazyFrame(array) - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd]) -@pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning") -def test_convert_pandas(df_raw: Any) -> None: - result = nw.from_native(df_raw).to_pandas() # type: ignore[union-attr] - expected = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) - pd.testing.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_polars, df_pandas, df_mpd, df_pandas_nullable, df_pandas_pyarrow] -) -@pytest.mark.filterwarnings( - r"ignore:np\.find_common_type is deprecated\.:DeprecationWarning" -) -def test_convert_numpy(df_raw: Any) -> None: - result = nw.DataFrame(df_raw).to_numpy() - expected = np.array([[1, 3, 2], [4, 4, 6], [7.0, 8, 9]]).T - np.testing.assert_array_equal(result, expected) - assert result.dtype == "float64" - result = nw.DataFrame(df_raw).__array__() - np.testing.assert_array_equal(result, expected) - assert result.dtype == "float64" - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd]) -def test_shape(df_raw: Any) -> None: - result = nw.DataFrame(df_raw).shape - expected = (3, 3) - assert result == expected - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd, df_lazy]) -def test_expr_binary(df_raw: Any) -> None: - result = nw.LazyFrame(df_raw).with_columns( - a=(1 + 3 * nw.col("a")) * (1 / nw.col("a")), - b=nw.col("z") / (2 - nw.col("b")), - c=nw.col("a") + nw.col("b") / 2, - d=nw.col("a") - nw.col("b"), - e=((nw.col("a") > nw.col("b")) & (nw.col("a") >= nw.col("z"))).cast(nw.Int64), - f=( - (nw.col("a") < nw.col("b")) - | (nw.col("a") <= nw.col("z")) - | (nw.col("a") == 1) - ).cast(nw.Int64), - g=nw.col("a") != 1, - h=(False & (nw.col("a") != 1)), - i=(False | (nw.col("a") != 1)), - j=2 ** nw.col("a"), - k=2 // nw.col("a"), - l=nw.col("a") // 2, - m=nw.col("a") ** 2, - n=nw.col("a") % 2, - o=2 % nw.col("a"), - ) - result_native = nw.to_native(result) - expected = { - "a": [4, 3.333333, 3.5], - "b": [-3.5, -4.0, -2.25], - "z": [7.0, 8.0, 9.0], - "c": [3, 5, 5], - "d": [-3, -1, -4], - "e": [0, 0, 0], - "f": [1, 1, 1], - "g": [False, True, True], - "h": [False, False, False], - "i": [False, True, True], - "j": [2, 8, 4], - "k": [2, 0, 1], - "l": [0, 1, 1], - "m": [1, 9, 4], - "n": [1, 1, 0], - "o": [0, 2, 0], - } - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_lazy]) -def test_expr_unary(df_raw: Any) -> None: - result = ( - nw.from_native(df_raw) - .with_columns( - a_mean=nw.col("a").mean(), - a_sum=nw.col("a").sum(), - b_nunique=nw.col("b").n_unique(), - z_min=nw.col("z").min(), - z_max=nw.col("z").max(), - ) - .select(nw.col("a_mean", "a_sum", "b_nunique", "z_min", "z_max").unique()) - ) - result_native = nw.to_native(result) - expected = {"a_mean": [2], "a_sum": [6], "b_nunique": [2], "z_min": [7], "z_max": [9]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd, df_lazy]) -def test_expr_transform(df_raw: Any) -> None: - result = nw.LazyFrame(df_raw).with_columns( - a=nw.col("a").is_between(-1, 1), b=nw.col("b").is_in([4, 5]) - ) - result_native = nw.to_native(result) - expected = {"a": [True, False, False], "b": [True, True, False], "z": [7, 8, 9]} - compare_dicts(result_native, expected) - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_lazy]) -def test_expr_min_max(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result_min = nw.to_native(df.select(nw.min("a", "b", "z"))) - result_max = nw.to_native(df.select(nw.max("a", "b", "z"))) - expected_min = {"a": [1], "b": [4], "z": [7]} - expected_max = {"a": [3], "b": [6], "z": [9]} - compare_dicts(result_min, expected_min) - compare_dicts(result_max, expected_max) - - -@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd, df_lazy]) -def test_expr_sample(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result_shape = nw.to_native(df.select(nw.col("a").sample(n=2)).collect()).shape - expected = (2, 1) - assert result_shape == expected - result_shape = nw.to_native(df.collect()["a"].sample(n=2)).shape - expected = (2,) # type: ignore[assignment] - assert result_shape == expected - - -@pytest.mark.parametrize("df_raw", [df_pandas_na, df_lazy_na]) -def test_expr_na(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result_nna = nw.to_native( - df.filter((~nw.col("a").is_null()) & (~df.collect()["z"].is_null())) - ) - expected = {"a": [2], "b": [6], "z": [9]} - compare_dicts(result_nna, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_head(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = nw.to_native(df.head(2)) - expected = {"a": [1, 3], "b": [4, 4], "z": [7.0, 8.0]} - compare_dicts(result, expected) - result = nw.to_native(df.collect().head(2)) - expected = {"a": [1, 3], "b": [4, 4], "z": [7.0, 8.0]} - compare_dicts(result, expected) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_unique(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = nw.to_native(df.unique("b").sort("b")) - expected = {"a": [1, 2], "b": [4, 6], "z": [7.0, 9.0]} - compare_dicts(result, expected) - result = nw.to_native(df.collect().unique("b").sort("b")) - expected = {"a": [1, 2], "b": [4, 6], "z": [7.0, 9.0]} - compare_dicts(result, expected) - - -@pytest.mark.parametrize("df_raw", [df_pandas_na, df_lazy_na]) -def test_drop_nulls(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = nw.to_native(df.select(nw.col("a").drop_nulls())) - expected = {"a": [3, 2]} - compare_dicts(result, expected) - result = nw.to_native(df.select(df.collect()["a"].drop_nulls())) - expected = {"a": [3, 2]} - compare_dicts(result, expected) - - -@pytest.mark.parametrize( - ("df_raw", "df_raw_right"), [(df_pandas, df_right_pandas), (df_lazy, df_right_lazy)] -) -def test_concat_horizontal(df_raw: Any, df_raw_right: Any) -> None: - df_left = nw.LazyFrame(df_raw) - df_right = nw.LazyFrame(df_raw_right) - result = nw.concat([df_left, df_right], how="horizontal") - result_native = nw.to_native(result) - expected = { - "a": [1, 3, 2], - "b": [4, 4, 6], - "z": [7.0, 8, 9], - "c": [6, 12, -1], - "d": [0, -4, 2], - } - compare_dicts(result_native, expected) - - with pytest.raises(ValueError, match="No items"): - nw.concat([]) - - -@pytest.mark.parametrize( - ("df_raw", "df_raw_right"), [(df_pandas, df_right_pandas), (df_lazy, df_right_lazy)] -) -def test_concat_vertical(df_raw: Any, df_raw_right: Any) -> None: - df_left = nw.LazyFrame(df_raw).collect().rename({"a": "c", "b": "d"}).lazy().drop("z") - df_right = nw.LazyFrame(df_raw_right) - result = nw.concat([df_left, df_right], how="vertical") - result_native = nw.to_native(result) - expected = {"c": [1, 3, 2, 6, 12, -1], "d": [4, 4, 6, 0, -4, 2]} - compare_dicts(result_native, expected) - with pytest.raises(ValueError, match="No items"): - nw.concat([], how="vertical") - with pytest.raises(Exception, match="unable to vstack"): - nw.concat([df_left, df_right.rename({"d": "i"})], how="vertical").collect() # type: ignore[union-attr] - - -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) -def test_lazy(df_raw: Any) -> None: - df = nw.DataFrame(df_raw) - result = df.lazy() - assert isinstance(result, nw.LazyFrame) - - -def test_to_dict() -> None: - df = nw.DataFrame(df_pandas) - result = df.to_dict(as_series=True) - expected = { - "a": pd.Series([1, 3, 2], name="a"), - "b": pd.Series([4, 4, 6], name="b"), - "z": pd.Series([7.0, 8, 9], name="z"), - } - for key in expected: - pd_assert_series_equal(nw.to_native(result[key]), expected[key]) - - df = nw.DataFrame(df_polars) - result = df.to_dict(as_series=True) - expected = { - "a": pl.Series("a", [1, 3, 2]), - "b": pl.Series("b", [4, 4, 6]), - "z": pl.Series("z", [7.0, 8, 9]), - } - for key in expected: - pl_assert_series_equal(nw.to_native(result[key]), expected[key]) - - -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_any_all(df_raw: Any) -> None: - df = nw.LazyFrame(df_raw) - result = nw.to_native(df.select((nw.all() > 1).all())) - expected = {"a": [False], "b": [True], "z": [True]} - compare_dicts(result, expected) - result = nw.to_native(df.select((nw.all() > 1).any())) - expected = {"a": [True], "b": [True], "z": [True]} - compare_dicts(result, expected) - - -def test_invalid() -> None: - df = nw.LazyFrame(df_pandas) - with pytest.raises(ValueError, match="Multi-output"): - df.select(nw.all() + nw.all()) - with pytest.raises(TypeError, match="Perhaps you:"): - df.select([pl.col("a")]) # type: ignore[list-item] - with pytest.raises(TypeError, match="Perhaps you:"): - df.select([nw.col("a").cast(pl.Int64)]) - - -@pytest.mark.parametrize("df_raw", [df_pandas]) -def test_reindex(df_raw: Any) -> None: - df = nw.DataFrame(df_raw) - result = df.select("b", df["a"].sort(descending=True)) - expected = {"b": [4, 4, 6], "a": [3, 2, 1]} - compare_dicts(result, expected) - result = df.select("b", nw.col("a").sort(descending=True)) - compare_dicts(result, expected) - - s = df["a"] - result_s = s > s.sort() - assert not result_s[0] - assert result_s[1] - assert not result_s[2] - result = df.with_columns(s.sort()) - expected = {"a": [1, 2, 3], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]} # type: ignore[list-item] - compare_dicts(result, expected) - with pytest.raises(ValueError, match="Multi-output expressions are not supported"): - nw.to_native(df.with_columns(nw.all() + nw.all())) - - -@pytest.mark.parametrize( - ("df_raw", "df_raw_right"), - [(df_pandas, df_polars), (df_polars, df_pandas)], -) -def test_library(df_raw: Any, df_raw_right: Any) -> None: - df_left = nw.LazyFrame(df_raw) - df_right = nw.LazyFrame(df_raw_right) - with pytest.raises( - NotImplementedError, match="Cross-library comparisons aren't supported" - ): - nw.concat([df_left, df_right], how="horizontal") - with pytest.raises( - NotImplementedError, match="Cross-library comparisons aren't supported" - ): - nw.concat([df_left, df_right], how="vertical") - with pytest.raises( - NotImplementedError, match="Cross-library comparisons aren't supported" - ): - df_left.join(df_right, left_on=["a"], right_on=["a"], how="inner") - - -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) -def test_is_duplicated(df_raw: Any) -> None: - df = nw.DataFrame(df_raw) - result = nw.concat([df, df.head(1)]).is_duplicated() # type: ignore [union-attr] - expected = np.array([True, False, False, True]) - assert (result.to_numpy() == expected).all() - - -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) -@pytest.mark.parametrize(("threshold", "expected"), [(0, False), (10, True)]) -def test_is_empty(df_raw: Any, threshold: Any, expected: Any) -> None: - df = nw.DataFrame(df_raw) - result = df.filter(nw.col("a") > threshold).is_empty() - assert result == expected - - -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) -def test_is_unique(df_raw: Any) -> None: - df = nw.DataFrame(df_raw) - result = nw.concat([df, df.head(1)]).is_unique() # type: ignore [union-attr] - expected = np.array([False, True, True, False]) - assert (result.to_numpy() == expected).all() - - -@pytest.mark.parametrize("df_raw", [df_pandas_na, df_lazy_na.collect()]) -def test_null_count(df_raw: Any) -> None: - df = nw.DataFrame(df_raw) - result = nw.to_native(df.null_count()) - expected = {"a": [1], "b": [0], "z": [1]} - compare_dicts(result, expected) - - -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) -@pytest.mark.parametrize( - ("interpolation", "expected"), - [ - ("lower", {"a": [1.0], "b": [4.0], "z": [7.0]}), - ("higher", {"a": [2.0], "b": [4.0], "z": [8.0]}), - ("midpoint", {"a": [1.5], "b": [4.0], "z": [7.5]}), - ("linear", {"a": [1.6], "b": [4.0], "z": [7.6]}), - ("nearest", {"a": [2.0], "b": [4.0], "z": [8.0]}), - ], -) -def test_quantile( - df_raw: Any, - interpolation: Literal["nearest", "higher", "lower", "midpoint", "linear"], - expected: dict[str, list[float]], -) -> None: - q = 0.3 - - df = nw.from_native(df_raw) - result = nw.to_native( - df.select(nw.all().quantile(quantile=q, interpolation=interpolation)) - ) - compare_dicts(result, expected) - - -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) -def test_when(df_raw: Any) -> None: - df = nw.DataFrame(df_raw) - result = df.with_columns( - a=nw.when(nw.col("a") > 2, 1).otherwise(0), - b=nw.when(nw.col("a") > 2, 1).when(nw.col("a") < 1, -1).otherwise(0), - ) - expected = {"a": [0, 1, 0], "b": [0, 1, 0], "z": [7.0, 8.0, 9.0]} - compare_dicts(result, expected) diff --git a/tests/test_where.py b/tests/test_where.py deleted file mode 100644 index 776480ece..000000000 --- a/tests/test_where.py +++ /dev/null @@ -1,48 +0,0 @@ -from __future__ import annotations - -from typing import Any - -import pytest - -import narwhals.stable.v1 as nw -from narwhals.expression import when -from tests.utils import compare_dicts - -data = { - "a": [1, 1, 2], - "b": ["a", "b", "c"], - "c": [4.1, 5.0, 6.0], - "d": [True, False, True], -} - - -def test_when(request: Any, constructor: Any) -> None: - if "pyarrow_table" in str(constructor): - request.applymarker(pytest.mark.xfail) - - df = nw.from_native(constructor(data)) - result = df.with_columns(when(nw.col("a") == 1).then(value=3).alias("a_when")) - expected = { - "a": [1, 1, 2], - "b": ["a", "b", "c"], - "c": [4.1, 5.0, 6.0], - "d": [True, False, True], - "a_when": [3, 3, None], - } - compare_dicts(result, expected) - - -def test_when_otherwise(request: Any, constructor: Any) -> None: - if "pyarrow_table" in str(constructor): - request.applymarker(pytest.mark.xfail) - - df = nw.from_native(constructor(data)) - result = df.with_columns(when(nw.col("a") == 1).then(3).otherwise(6).alias("a_when")) - expected = { - "a": [1, 1, 2], - "b": ["a", "b", "c"], - "c": [4.1, 5.0, 6.0], - "d": [True, False, True], - "a_when": [3, 3, 6], - } - compare_dicts(result, expected) From fd21c78d8fa8c31fac04c7a920bdfae93b6b8daa Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 23 Jul 2024 13:08:47 +0000 Subject: [PATCH 31/78] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- narwhals/_pandas_like/expr.py | 37 ++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 52fdc0068..fa6d4988a 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -343,7 +343,9 @@ def cat(self: Self) -> PandasLikeExprCatNamespace: def name(self: Self) -> PandasLikeExprNameNamespace: return PandasLikeExprNameNamespace(self) - def when(self, *predicates: PandasExpr | Iterable[PandasExpr], **conditions: Any) -> PandasWhen: + def when( + self, *predicates: PandasExpr | Iterable[PandasExpr], **conditions: Any + ) -> PandasWhen: # TODO: Support conditions from narwhals._pandas_like.namespace import PandasNamespace @@ -351,6 +353,7 @@ def when(self, *predicates: PandasExpr | Iterable[PandasExpr], **conditions: Any condition = plx.all_horizontal(*predicates) return PandasWhen(self, condition) + class PandasLikeExprCatNamespace: def __init__(self, expr: PandasLikeExpr) -> None: self._expr = expr @@ -634,15 +637,26 @@ def to_uppercase(self: Self) -> PandasLikeExpr: backend_version=self._expr._backend_version, ) + class PandasWhen: def __init__(self, condition: PandasLikeExpr) -> None: self._condition = condition def then(self, value: Any) -> PandasThen: - return PandasThen(self, value=value, implementation=self._condition._implementation) + return PandasThen( + self, value=value, implementation=self._condition._implementation + ) + class PandasThen(PandasLikeExpr): - def __init__(self, when: PandasWhen, *, value: Any, implementation: Implementation, backend_version: tuple[int, ...]) -> None: + def __init__( + self, + when: PandasWhen, + *, + value: Any, + implementation: Implementation, + backend_version: tuple[int, ...], + ) -> None: self._when = when self._then_value = value self._implementation = implementation @@ -651,15 +665,15 @@ def __init__(self, when: PandasWhen, *, value: Any, implementation: Implementati def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: from narwhals._pandas_like.namespace import PandasLikeNamespace - plx = PandasLikeNamespace(implementation=self._implementation, backend_version=self.backend_version) + plx = PandasLikeNamespace( + implementation=self._implementation, backend_version=self.backend_version + ) condition = self._when._condition._call(df)[0] value_series = plx._create_series_from_scalar(self._then_value, condition) none_series = plx._create_series_from_scalar(None, condition) - return [ - value_series.zip_with(condition, none_series) - ] + return [value_series.zip_with(condition, none_series)] self._call = func self._depth = 0 @@ -670,13 +684,14 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: def otherwise(self, value: Any) -> PandasLikeExpr: def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: from narwhals._pandas_like.namespace import PandasLikeNamespace - plx = PandasLikeNamespace(implementation=self._implementation, backend_version=self.backend_version) + + plx = PandasLikeNamespace( + implementation=self._implementation, backend_version=self.backend_version + ) condition = self._when._condition._call(df)[0] value_series = plx._create_series_from_scalar(self._then_value, condition) otherwise_series = plx._create_series_from_scalar(value, condition) - return [ - value_series.zip_with(condition, otherwise_series) - ] + return [value_series.zip_with(condition, otherwise_series)] return PandasLikeExpr( func, From 2f03bd0dbb4979bbec1e24ef8c4fcc504752e778 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 23 Jul 2024 16:41:54 +0300 Subject: [PATCH 32/78] chore: remove all wrong rebase leftover code --- narwhals/_pandas_like/expr.py | 76 ----------------------------------- narwhals/expr.py | 22 ---------- 2 files changed, 98 deletions(-) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index fa6d4988a..f846da610 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -343,16 +343,6 @@ def cat(self: Self) -> PandasLikeExprCatNamespace: def name(self: Self) -> PandasLikeExprNameNamespace: return PandasLikeExprNameNamespace(self) - def when( - self, *predicates: PandasExpr | Iterable[PandasExpr], **conditions: Any - ) -> PandasWhen: - # TODO: Support conditions - from narwhals._pandas_like.namespace import PandasNamespace - - plx = PandasNamespace(self._implementation) - condition = plx.all_horizontal(*predicates) - return PandasWhen(self, condition) - class PandasLikeExprCatNamespace: def __init__(self, expr: PandasLikeExpr) -> None: @@ -636,69 +626,3 @@ def to_uppercase(self: Self) -> PandasLikeExpr: implementation=self._expr._implementation, backend_version=self._expr._backend_version, ) - - -class PandasWhen: - def __init__(self, condition: PandasLikeExpr) -> None: - self._condition = condition - - def then(self, value: Any) -> PandasThen: - return PandasThen( - self, value=value, implementation=self._condition._implementation - ) - - -class PandasThen(PandasLikeExpr): - def __init__( - self, - when: PandasWhen, - *, - value: Any, - implementation: Implementation, - backend_version: tuple[int, ...], - ) -> None: - self._when = when - self._then_value = value - self._implementation = implementation - self.backend_version = backend_version - - def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: - from narwhals._pandas_like.namespace import PandasLikeNamespace - - plx = PandasLikeNamespace( - implementation=self._implementation, backend_version=self.backend_version - ) - - condition = self._when._condition._call(df)[0] - - value_series = plx._create_series_from_scalar(self._then_value, condition) - none_series = plx._create_series_from_scalar(None, condition) - return [value_series.zip_with(condition, none_series)] - - self._call = func - self._depth = 0 - self._function_name = "whenthen" - self._root_names = None - self._output_names = None - - def otherwise(self, value: Any) -> PandasLikeExpr: - def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: - from narwhals._pandas_like.namespace import PandasLikeNamespace - - plx = PandasLikeNamespace( - implementation=self._implementation, backend_version=self.backend_version - ) - condition = self._when._condition._call(df)[0] - value_series = plx._create_series_from_scalar(self._then_value, condition) - otherwise_series = plx._create_series_from_scalar(value, condition) - return [value_series.zip_with(condition, otherwise_series)] - - return PandasLikeExpr( - func, - depth=0, - function_name="whenthenotherwise", - root_names=None, - output_names=None, - implementation=self._implementation, - backend_version=self.backend_version, - ) diff --git a/narwhals/expr.py b/narwhals/expr.py index 61199b5fa..2ef1e5308 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -3616,28 +3616,6 @@ def then(self, value: Any) -> Then: return Then(lambda plx: plx.when(self._condition._call(plx)).then(value)) -class Then(Expr): - def __init__(self, call) -> None: # noqa: ANN001 - self._call = call - - def otherwise(self, value: Any) -> Expr: - return Expr(lambda plx: self._call(plx).otherwise(value)) - - -def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: # noqa: ARG001 - return When(reduce(lambda a, b: a & b, flatten([predicates]))) - - -class When: - def __init__(self, condition: Expr) -> None: - self._condition = condition - self._then_value = None - self._otehrwise_value = None - - def then(self, value: Any) -> Then: - return Then(lambda plx: plx.when(self._condition._call(plx)).then(value)) - - class Then(Expr): def __init__(self, call: Callable[[Any], Any]) -> None: self._call = call From 37cc634e91c0df3aeb63de8adf9e9733f4624029 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 25 Jul 2024 17:29:54 +0000 Subject: [PATCH 33/78] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- narwhals/_pandas_like/namespace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 840414525..dee544b31 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -261,7 +261,7 @@ def concat( def when( self, *predicates: IntoPandasLikeExpr | Iterable[IntoPandasLikeExpr], - **constraints: Any, # noqa: ARG002 + **constraints: Any, ) -> PandasWhen: plx = self.__class__(self._implementation, self._backend_version) condition = plx.all_horizontal(*flatten(predicates)) From 0454ac4a0b41d785f8d4e75f30c9d5013e1464ec Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Thu, 25 Jul 2024 20:59:18 +0300 Subject: [PATCH 34/78] misc: keep api the same --- narwhals/_pandas_like/namespace.py | 13 ++++++++++++- narwhals/expr.py | 23 +++++++++++++++-------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index dee544b31..48767e80f 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -264,7 +264,18 @@ def when( **constraints: Any, ) -> PandasWhen: plx = self.__class__(self._implementation, self._backend_version) - condition = plx.all_horizontal(*flatten(predicates)) + if predicates: + condition = plx.all_horizontal(*flatten(predicates)) + elif constraints: + import narwhals as nw + + condition = plx.all_horizontal( + *flatten((nw.col(key) == value) for key, value in constraints.items()) + ) + else: + msg = "Must provide either predicates or constraints" + raise ValueError(msg) + return PandasWhen(condition, self._implementation, self._backend_version) diff --git a/narwhals/expr.py b/narwhals/expr.py index 2ef1e5308..3a91faaa8 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -1,6 +1,5 @@ from __future__ import annotations -from functools import reduce from typing import TYPE_CHECKING from typing import Any from typing import Callable @@ -3607,13 +3606,21 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: class When: - def __init__(self, condition: Expr) -> None: - self._condition = condition - self._then_value = None - self._otehrwise_value = None + def __init__( + self, *predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any + ) -> None: + self._predicates = flatten([predicates]) + self._constraints = constraints + + def _extract_predicates(self, plx: Any) -> Any: + return [extract_compliant(plx, v) for v in self._predicates] def then(self, value: Any) -> Then: - return Then(lambda plx: plx.when(self._condition._call(plx)).then(value)) + return Then( + lambda plx: plx.when( + *self._extract_predicates(plx), **self._constraints + ).then(value) + ) class Then(Expr): @@ -3624,7 +3631,7 @@ def otherwise(self, value: Any) -> Expr: return Expr(lambda plx: self._call(plx).otherwise(value)) -def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: # noqa: ARG001 +def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: """ Start a `when-then-otherwise` expression. Expression similar to an `if-else` statement in Python. Always initiated by a `pl.when().then()`., and optionally followed by chaining one or more `.when().then()` statements. @@ -3673,7 +3680,7 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When │ 3 ┆ 15 ┆ 6 │ └─────┴─────┴────────┘ """ - return When(reduce(lambda a, b: a & b, flatten([predicates]))) + return When(*predicates, **constraints) def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: From 4ad28b7b4966a160b8292ce8f96a2d2ad27cf372 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Thu, 25 Jul 2024 21:06:16 +0300 Subject: [PATCH 35/78] test: add test for multiple predicates --- tests/expr_and_series/when_test.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index 90df13180..47b19e70b 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -9,7 +9,7 @@ from tests.utils import compare_dicts data = { - "a": [1, 1, 2], + "a": [1, 2, 3], "b": ["a", "b", "c"], "c": [4.1, 5.0, 6.0], "d": [True, False, True], @@ -23,11 +23,11 @@ def test_when(request: Any, constructor: Any) -> None: df = nw.from_native(constructor(data)) result = df.with_columns(when(nw.col("a") == 1).then(value=3).alias("a_when")) expected = { - "a": [1, 1, 2], + "a": [1, 2, 3], "b": ["a", "b", "c"], "c": [4.1, 5.0, 6.0], "d": [True, False, True], - "a_when": [3, 3, None], + "a_when": [3, None, None], } compare_dicts(result, expected) @@ -39,10 +39,28 @@ def test_when_otherwise(request: Any, constructor: Any) -> None: df = nw.from_native(constructor(data)) result = df.with_columns(when(nw.col("a") == 1).then(3).otherwise(6).alias("a_when")) expected = { - "a": [1, 1, 2], + "a": [1, 2, 3], "b": ["a", "b", "c"], "c": [4.1, 5.0, 6.0], "d": [True, False, True], - "a_when": [3, 3, 6], + "a_when": [3, 6, 6], + } + compare_dicts(result, expected) + + +def test_multiple_conditions(request: Any, constructor: Any) -> None: + if "pyarrow_table" in str(constructor): + request.applymarker(pytest.mark.xfail) + + df = nw.from_native(constructor(data)) + result = df.with_columns( + when(nw.col("a") < 3, nw.col("c") < 5.0).then(3).alias("a_when") + ) + expected = { + "a": [1, 2, 3], + "b": ["a", "b", "c"], + "c": [4.1, 5.0, 6.0], + "d": [True, False, True], + "a_when": [3, None, None], } compare_dicts(result, expected) From 0ded39307f9294a8faf8c70cc260fdd90a898f23 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Mon, 29 Jul 2024 03:53:12 +0300 Subject: [PATCH 36/78] misc: make when stable --- narwhals/stable/v1.py | 74 ++++++++++++++++++++++++++++++ tests/expr_and_series/when_test.py | 2 +- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/narwhals/stable/v1.py b/narwhals/stable/v1.py index 18f520bdf..0760f0c8f 100644 --- a/narwhals/stable/v1.py +++ b/narwhals/stable/v1.py @@ -34,6 +34,9 @@ from narwhals.dtypes import UInt64 from narwhals.dtypes import Unknown from narwhals.expr import Expr as NwExpr +from narwhals.expr import Then as NwThen +from narwhals.expr import When as NwWhen +from narwhals.expr import when as nw_when from narwhals.functions import concat from narwhals.functions import show_versions from narwhals.schema import Schema as NwSchema @@ -1391,6 +1394,76 @@ def get_level( return nw.get_level(obj) +class When(NwWhen): + @classmethod + def from_when(cls, when: NwWhen) -> Self: + return cls(*when._predicates, **when._constraints) + + def then(self, value: Any) -> Then: + return Then( + lambda plx: plx.when( + *self._extract_predicates(plx), **self._constraints + ).then(value) + ) + + +class Then(NwThen): + def otherwise(self, value: Any) -> Expr: + return _stableify(super().otherwise(value)) + + +def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: + """ + Start a `when-then-otherwise` expression. + Expression similar to an `if-else` statement in Python. Always initiated by a `pl.when().then()`., and optionally followed by chaining one or more `.when().then()` statements. + Chained when-then operations should be read as Python `if, elif, ... elif` blocks, not as `if, if, ... if`, i.e. the first condition that evaluates to `True` will be picked. + If none of the conditions are `True`, an optional `.otherwise()` can be appended at the end. If not appended, and none of the conditions are `True`, `None` will be returned. + + Parameters: + predicates + Condition(s) that must be met in order to apply the subsequent statement. Accepts one or more boolean expressions, which are implicitly combined with `&`. String input is parsed as a column name. + constraints + Apply conditions as `col_name = value` keyword arguments that are treated as equality matches, such as `x = 123`. As with the predicates parameter, multiple conditions are implicitly combined using `&`. + + Examples: + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]}) + >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]}) + + We define a dataframe-agnostic function: + + >>> @nw.narwhalify + ... def func(df_any): + ... from narwhals.expr import when + ... + ... return df_any.with_columns( + ... when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when") + ... ) + + We can then pass either pandas or polars to `func`: + + >>> func(df_pd) + a b a_when + 0 1 5 5 + 1 2 10 5 + 2 3 15 6 + >>> func(df_pl) + shape: (3, 3) + ┌─────┬─────┬────────┐ + │ a ┆ b ┆ a_when │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i32 │ + ╞═════╪═════╪════════╡ + │ 1 ┆ 5 ┆ 5 │ + │ 2 ┆ 10 ┆ 5 │ + │ 3 ┆ 15 ┆ 6 │ + └─────┴─────┴────────┘ + """ + return When.from_when(nw_when(*predicates, **constraints)) + + __all__ = [ "selectors", "concat", @@ -1412,6 +1485,7 @@ def get_level( "mean", "sum", "sum_horizontal", + "when", "DataFrame", "LazyFrame", "Series", diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index 47b19e70b..b17d92e61 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -5,7 +5,7 @@ import pytest import narwhals.stable.v1 as nw -from narwhals.expr import when +from narwhals.stable.v1 import when from tests.utils import compare_dicts data = { From 3280a3ca3fad62ed03e16158eadd96b6ee596cc1 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Mon, 29 Jul 2024 12:56:46 +0300 Subject: [PATCH 37/78] bug: make stable v1 `Then` a stable expr `Expr` docs: update stable v1 `when` docs to use stable api --- narwhals/stable/v1.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/narwhals/stable/v1.py b/narwhals/stable/v1.py index 0760f0c8f..ee025dc1c 100644 --- a/narwhals/stable/v1.py +++ b/narwhals/stable/v1.py @@ -1407,7 +1407,7 @@ def then(self, value: Any) -> Then: ) -class Then(NwThen): +class Then(NwThen, Expr): def otherwise(self, value: Any) -> Expr: return _stableify(super().otherwise(value)) @@ -1428,7 +1428,7 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When Examples: >>> import pandas as pd >>> import polars as pl - >>> import narwhals as nw + >>> import narwhals.stable.v1 as nw >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]}) >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]}) From 5c6deed0332c56f8aca102b8c82ffa7154a52024 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Mon, 29 Jul 2024 13:09:12 +0300 Subject: [PATCH 38/78] bug: fix when constraints pandas implementation --- narwhals/_pandas_like/namespace.py | 4 +--- tests/expr_and_series/when_test.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 48767e80f..c46785ae3 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -267,10 +267,8 @@ def when( if predicates: condition = plx.all_horizontal(*flatten(predicates)) elif constraints: - import narwhals as nw - condition = plx.all_horizontal( - *flatten((nw.col(key) == value) for key, value in constraints.items()) + *flatten([plx.col(key) == value for key, value in constraints.items()]) ) else: msg = "Must provide either predicates or constraints" diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index b17d92e61..adbdbe061 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -64,3 +64,19 @@ def test_multiple_conditions(request: Any, constructor: Any) -> None: "a_when": [3, None, None], } compare_dicts(result, expected) + + +def test_when_constraint(request: Any, constructor: Any) -> None: + if "pyarrow_table" in str(constructor): + request.applymarker(pytest.mark.xfail) + + df = nw.from_native(constructor(data)) + result = df.with_columns(when(a=1).then(value=3).alias("a_when")) + expected = { + "a": [1, 2, 3], + "b": ["a", "b", "c"], + "c": [4.1, 5.0, 6.0], + "d": [True, False, True], + "a_when": [3, None, None], + } + compare_dicts(result, expected) From 8688491cababc9ee29fc116c8d42b78dcde739de Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Mon, 29 Jul 2024 13:27:36 +0300 Subject: [PATCH 39/78] test: stabalise all paths and test error on no arg --- narwhals/_pandas_like/namespace.py | 4 ++-- narwhals/stable/v1.py | 10 +++++----- tests/expr_and_series/when_test.py | 9 +++++++++ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index f14211ac8..fc1bf8626 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -276,8 +276,8 @@ def when( *flatten([plx.col(key) == value for key, value in constraints.items()]) ) else: - msg = "Must provide either predicates or constraints" - raise ValueError(msg) + msg = "at least one predicate or constraint must be provided" + raise TypeError(msg) return PandasWhen(condition, self._implementation, self._backend_version) diff --git a/narwhals/stable/v1.py b/narwhals/stable/v1.py index 62a64a427..778152c74 100644 --- a/narwhals/stable/v1.py +++ b/narwhals/stable/v1.py @@ -1478,14 +1478,14 @@ def from_when(cls, when: NwWhen) -> Self: return cls(*when._predicates, **when._constraints) def then(self, value: Any) -> Then: - return Then( - lambda plx: plx.when( - *self._extract_predicates(plx), **self._constraints - ).then(value) - ) + return Then.from_then(super().then(value)) class Then(NwThen, Expr): + @classmethod + def from_then(cls, then: NwThen) -> Self: + return cls(then._call) + def otherwise(self, value: Any) -> Expr: return _stableify(super().otherwise(value)) diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index adbdbe061..e8aeb2d3c 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -80,3 +80,12 @@ def test_when_constraint(request: Any, constructor: Any) -> None: "a_when": [3, None, None], } compare_dicts(result, expected) + + +def test_no_arg_when_fail(request: Any, constructor: Any) -> None: + if "pyarrow_table" in str(constructor): + request.applymarker(pytest.mark.xfail) + + df = nw.from_native(constructor(data)) + with pytest.raises(TypeError): + df.with_columns(when().then(value=3).alias("a_when")) From 81039bf59cc016d9c30af17f6a1be75cda6408e1 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Mon, 29 Jul 2024 13:32:46 +0300 Subject: [PATCH 40/78] misc: add when to main api --- docs/api-reference/narwhals.md | 1 + narwhals/__init__.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/docs/api-reference/narwhals.md b/docs/api-reference/narwhals.md index 16bc6621c..42c1a2e44 100644 --- a/docs/api-reference/narwhals.md +++ b/docs/api-reference/narwhals.md @@ -27,6 +27,7 @@ Here are the top-level functions available in Narwhals. - narwhalify - sum - sum_horizontal + - when - show_versions - to_native show_source: false diff --git a/narwhals/__init__.py b/narwhals/__init__.py index 3ad1468af..c31459447 100644 --- a/narwhals/__init__.py +++ b/narwhals/__init__.py @@ -33,6 +33,7 @@ from narwhals.expr import min from narwhals.expr import sum from narwhals.expr import sum_horizontal +from narwhals.expr import when from narwhals.functions import concat from narwhals.functions import from_dict from narwhals.functions import get_level @@ -73,6 +74,7 @@ "mean", "sum", "sum_horizontal", + "when", "DataFrame", "LazyFrame", "Series", From 1196fab51299a2aadedb45f5c629163ec7e23afb Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 30 Jul 2024 10:33:56 +0300 Subject: [PATCH 41/78] misc: remove constraints --- narwhals/_pandas_like/namespace.py | 7 +------ narwhals/expr.py | 17 ++++------------- narwhals/stable/v1.py | 8 +++----- tests/expr_and_series/when_test.py | 16 ---------------- 4 files changed, 8 insertions(+), 40 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index fc1bf8626..4f9c9fa3e 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -266,17 +266,12 @@ def concat( def when( self, *predicates: IntoPandasLikeExpr | Iterable[IntoPandasLikeExpr], - **constraints: Any, ) -> PandasWhen: plx = self.__class__(self._implementation, self._backend_version) if predicates: condition = plx.all_horizontal(*flatten(predicates)) - elif constraints: - condition = plx.all_horizontal( - *flatten([plx.col(key) == value for key, value in constraints.items()]) - ) else: - msg = "at least one predicate or constraint must be provided" + msg = "at least one predicate needs to be provided" raise TypeError(msg) return PandasWhen(condition, self._implementation, self._backend_version) diff --git a/narwhals/expr.py b/narwhals/expr.py index d99c91068..fa8fa2101 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -3644,21 +3644,14 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: class When: - def __init__( - self, *predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any - ) -> None: + def __init__(self, *predicates: IntoExpr | Iterable[IntoExpr]) -> None: self._predicates = flatten([predicates]) - self._constraints = constraints def _extract_predicates(self, plx: Any) -> Any: return [extract_compliant(plx, v) for v in self._predicates] def then(self, value: Any) -> Then: - return Then( - lambda plx: plx.when( - *self._extract_predicates(plx), **self._constraints - ).then(value) - ) + return Then(lambda plx: plx.when(*self._extract_predicates(plx)).then(value)) class Then(Expr): @@ -3669,7 +3662,7 @@ def otherwise(self, value: Any) -> Expr: return Expr(lambda plx: self._call(plx).otherwise(value)) -def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: +def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When: """ Start a `when-then-otherwise` expression. Expression similar to an `if-else` statement in Python. Always initiated by a `pl.when().then()`., and optionally followed by chaining one or more `.when().then()` statements. @@ -3679,8 +3672,6 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When Parameters: predicates Condition(s) that must be met in order to apply the subsequent statement. Accepts one or more boolean expressions, which are implicitly combined with `&`. String input is parsed as a column name. - constraints - Apply conditions as `col_name = value` keyword arguments that are treated as equality matches, such as `x = 123`. As with the predicates parameter, multiple conditions are implicitly combined using `&`. Examples: >>> import pandas as pd @@ -3718,7 +3709,7 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When │ 3 ┆ 15 ┆ 6 │ └─────┴─────┴────────┘ """ - return When(*predicates, **constraints) + return When(*predicates) def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: diff --git a/narwhals/stable/v1.py b/narwhals/stable/v1.py index 778152c74..a0ae5e01f 100644 --- a/narwhals/stable/v1.py +++ b/narwhals/stable/v1.py @@ -1475,7 +1475,7 @@ def get_level( class When(NwWhen): @classmethod def from_when(cls, when: NwWhen) -> Self: - return cls(*when._predicates, **when._constraints) + return cls(*when._predicates) def then(self, value: Any) -> Then: return Then.from_then(super().then(value)) @@ -1490,7 +1490,7 @@ def otherwise(self, value: Any) -> Expr: return _stableify(super().otherwise(value)) -def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When: +def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When: """ Start a `when-then-otherwise` expression. Expression similar to an `if-else` statement in Python. Always initiated by a `pl.when().then()`., and optionally followed by chaining one or more `.when().then()` statements. @@ -1500,8 +1500,6 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When Parameters: predicates Condition(s) that must be met in order to apply the subsequent statement. Accepts one or more boolean expressions, which are implicitly combined with `&`. String input is parsed as a column name. - constraints - Apply conditions as `col_name = value` keyword arguments that are treated as equality matches, such as `x = 123`. As with the predicates parameter, multiple conditions are implicitly combined using `&`. Examples: >>> import pandas as pd @@ -1539,7 +1537,7 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any) -> When │ 3 ┆ 15 ┆ 6 │ └─────┴─────┴────────┘ """ - return When.from_when(nw_when(*predicates, **constraints)) + return When.from_when(nw_when(*predicates)) def from_dict( diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index e8aeb2d3c..8d8d554b4 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -66,22 +66,6 @@ def test_multiple_conditions(request: Any, constructor: Any) -> None: compare_dicts(result, expected) -def test_when_constraint(request: Any, constructor: Any) -> None: - if "pyarrow_table" in str(constructor): - request.applymarker(pytest.mark.xfail) - - df = nw.from_native(constructor(data)) - result = df.with_columns(when(a=1).then(value=3).alias("a_when")) - expected = { - "a": [1, 2, 3], - "b": ["a", "b", "c"], - "c": [4.1, 5.0, 6.0], - "d": [True, False, True], - "a_when": [3, None, None], - } - compare_dicts(result, expected) - - def test_no_arg_when_fail(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor): request.applymarker(pytest.mark.xfail) From beba175fd3a371f9bcfbcc73a060b9ffaaa8a1d2 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 30 Jul 2024 10:47:07 +0300 Subject: [PATCH 42/78] docs: remove wrong import in stable --- narwhals/stable/v1.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/narwhals/stable/v1.py b/narwhals/stable/v1.py index a0ae5e01f..572760478 100644 --- a/narwhals/stable/v1.py +++ b/narwhals/stable/v1.py @@ -1512,10 +1512,8 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When: >>> @nw.narwhalify ... def func(df_any): - ... from narwhals.expr import when - ... ... return df_any.with_columns( - ... when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when") + ... nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when") ... ) We can then pass either pandas or polars to `func`: From 45684d4d76b322816f3cc345dd1306284a390918 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 30 Jul 2024 10:52:37 +0300 Subject: [PATCH 43/78] docs: remove wrong import in main docstring --- narwhals/expr.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/narwhals/expr.py b/narwhals/expr.py index fa8fa2101..22473a86b 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -3684,10 +3684,8 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When: >>> @nw.narwhalify ... def func(df_any): - ... from narwhals.expr import when - ... ... return df_any.with_columns( - ... when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when") + ... nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when") ... ) We can then pass either pandas or polars to `func`: From 21e33844d4ff7af99f3dc7004b2a86660c21dc47 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 30 Jul 2024 16:54:54 +0300 Subject: [PATCH 44/78] feat: add series and expression support for when then --- narwhals/_pandas_like/namespace.py | 53 +++++++++++++++++++++++++----- narwhals/expr.py | 8 +++-- tests/expr_and_series/when_test.py | 52 +++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 10 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 4f9c9fa3e..a0461c8da 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -277,6 +277,42 @@ def when( return PandasWhen(condition, self._implementation, self._backend_version) +def _when_then_value_arg_process( + plx: PandasLikeNamespace, + value: PandasLikeExpr | PandasLikeSeries | Any, + *, + shape: tuple[int] | None = None, + series_with_shape: PandasLikeSeries | None = None, +) -> PandasLikeExpr: + from narwhals.dependencies import get_numpy + + np = get_numpy() + + if not np: + raise ImportError("numpy is required for this function") + if isinstance(value, PandasLikeExpr): + return value + elif isinstance(value, PandasLikeSeries): + return plx._create_expr_from_series(value) + elif isinstance(value, np.ndarray) and not isinstance(value, str): + return plx._create_expr_from_series(plx._create_compliant_series(value)) + elif series_with_shape is not None: + return plx._create_expr_from_series( + plx._create_compliant_series( + [value] * len(series_with_shape._native_series) + ) + ) + elif shape is not None: + if len(shape) != 1: + raise ValueError("shape must be a tuple of a single integer") + + return plx._create_expr_from_series( + plx._create_compliant_series([value] * shape[0]) + ) + else: + raise TypeError("shape or series_with_shape must be provided") + + class PandasWhen: def __init__( self, @@ -301,15 +337,16 @@ def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: condition = self._condition._call(df)[0] - value_series = plx._create_broadcast_series_from_scalar( - self._then_value, condition - ) - otherwise_series = plx._create_broadcast_series_from_scalar( - self._otherwise_value, condition - ) + value_series = _when_then_value_arg_process( + plx, self._then_value, shape=condition.shape + )._call(df)[0] + otherwise_series = _when_then_value_arg_process( + plx, self._otherwise_value, shape=condition.shape + )._call(df)[0] + return [value_series.zip_with(condition, otherwise_series)] - def then(self, value: Any) -> PandasThen: + def then(self, value: PandasLikeExpr | PandasLikeSeries | Any) -> PandasThen: self._then_value = value return PandasThen( @@ -344,7 +381,7 @@ def __init__( self._root_names = root_names self._output_names = output_names - def otherwise(self, value: Any) -> PandasLikeExpr: + def otherwise(self, value: PandasLikeExpr | PandasLikeSeries | Any) -> PandasLikeExpr: # type ignore because we are setting the `_call` attribute to a # callable object of type `PandasWhen`, base class has the attribute as # only a `Callable` diff --git a/narwhals/expr.py b/narwhals/expr.py index 22473a86b..526581938 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -3651,7 +3651,11 @@ def _extract_predicates(self, plx: Any) -> Any: return [extract_compliant(plx, v) for v in self._predicates] def then(self, value: Any) -> Then: - return Then(lambda plx: plx.when(*self._extract_predicates(plx)).then(value)) + return Then( + lambda plx: plx.when(*self._extract_predicates(plx)).then( + extract_compliant(plx, value) + ) + ) class Then(Expr): @@ -3659,7 +3663,7 @@ def __init__(self, call: Callable[[Any], Any]) -> None: self._call = call def otherwise(self, value: Any) -> Expr: - return Expr(lambda plx: self._call(plx).otherwise(value)) + return Expr(lambda plx: self._call(plx).otherwise(extract_compliant(plx, value))) def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When: diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index 8d8d554b4..e567575d3 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -73,3 +73,55 @@ def test_no_arg_when_fail(request: Any, constructor: Any) -> None: df = nw.from_native(constructor(data)) with pytest.raises(TypeError): df.with_columns(when().then(value=3).alias("a_when")) + + +def test_value_numpy_array(request: Any, constructor: Any) -> None: + if "pyarrow_table" in str(constructor): + request.applymarker(pytest.mark.xfail) + df = nw.from_native(constructor(data)) + import numpy as np + + result = df.with_columns( + when(nw.col("a") == 1).then(np.asanyarray([3, 4, 5])).alias("a_when") + ) + expected = { + "a": [1, 2, 3], + "b": ["a", "b", "c"], + "c": [4.1, 5.0, 6.0], + "d": [True, False, True], + "a_when": [3, None, None], + } + compare_dicts(result, expected) + + +def test_value_series(request: Any, constructor: Any) -> None: + if "pyarrow_table" in str(constructor): + request.applymarker(pytest.mark.xfail) + df = nw.from_native(constructor(data)) + s_data = {"s": [3, 4, 5]} + s = nw.from_native(constructor(s_data))["s"] + assert isinstance(s, nw.Series) + result = df.with_columns(when(nw.col("a") == 1).then(s).alias("a_when")) + expected = { + "a": [1, 2, 3], + "b": ["a", "b", "c"], + "c": [4.1, 5.0, 6.0], + "d": [True, False, True], + "a_when": [3, None, None], + } + compare_dicts(result, expected) + + +def test_value_expression(request: Any, constructor: Any) -> None: + if "pyarrow_table" in str(constructor): + request.applymarker(pytest.mark.xfail) + df = nw.from_native(constructor(data)) + result = df.with_columns(when(nw.col("a") == 1).then(nw.col("a") + 9).alias("a_when")) + expected = { + "a": [1, 2, 3], + "b": ["a", "b", "c"], + "c": [4.1, 5.0, 6.0], + "d": [True, False, True], + "a_when": [10, None, None], + } + compare_dicts(result, expected) From e2e0b92d5d6bbef77fca1dcc6e8c5551af5b0cba Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:55:56 +0000 Subject: [PATCH 45/78] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- narwhals/_pandas_like/namespace.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index a0461c8da..958f4de3b 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -298,9 +298,7 @@ def _when_then_value_arg_process( return plx._create_expr_from_series(plx._create_compliant_series(value)) elif series_with_shape is not None: return plx._create_expr_from_series( - plx._create_compliant_series( - [value] * len(series_with_shape._native_series) - ) + plx._create_compliant_series([value] * len(series_with_shape._native_series)) ) elif shape is not None: if len(shape) != 1: From ee2d9ba99cb7656c6415218b1c6ef25b22567cc8 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 30 Jul 2024 17:03:18 +0300 Subject: [PATCH 46/78] misc: simplify condition --- narwhals/_pandas_like/namespace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 958f4de3b..661662adc 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -294,7 +294,7 @@ def _when_then_value_arg_process( return value elif isinstance(value, PandasLikeSeries): return plx._create_expr_from_series(value) - elif isinstance(value, np.ndarray) and not isinstance(value, str): + elif isinstance(value, np.ndarray): return plx._create_expr_from_series(plx._create_compliant_series(value)) elif series_with_shape is not None: return plx._create_expr_from_series( From e7f31bf3389e86101373222b0028d757a74f43e9 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 30 Jul 2024 17:09:16 +0300 Subject: [PATCH 47/78] test: skip lazy test on series when --- tests/expr_and_series/when_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index e567575d3..6bc850062 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -94,12 +94,12 @@ def test_value_numpy_array(request: Any, constructor: Any) -> None: compare_dicts(result, expected) -def test_value_series(request: Any, constructor: Any) -> None: - if "pyarrow_table" in str(constructor): +def test_value_series(request: Any, constructor_eager: Any) -> None: + if "pyarrow_table" in str(constructor_eager): request.applymarker(pytest.mark.xfail) - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor_eager(data)) s_data = {"s": [3, 4, 5]} - s = nw.from_native(constructor(s_data))["s"] + s = nw.from_native(constructor_eager(s_data))["s"] assert isinstance(s, nw.Series) result = df.with_columns(when(nw.col("a") == 1).then(s).alias("a_when")) expected = { From c79a24cf1ed5c6483988f2a0257eafe42370ebb9 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 30 Jul 2024 17:11:51 +0300 Subject: [PATCH 48/78] misc: fix ruff check --- narwhals/_pandas_like/namespace.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 661662adc..cb4b2187d 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -289,7 +289,8 @@ def _when_then_value_arg_process( np = get_numpy() if not np: - raise ImportError("numpy is required for this function") + msg = "numpy is required for this function" + raise ImportError(msg) if isinstance(value, PandasLikeExpr): return value elif isinstance(value, PandasLikeSeries): @@ -302,13 +303,15 @@ def _when_then_value_arg_process( ) elif shape is not None: if len(shape) != 1: - raise ValueError("shape must be a tuple of a single integer") + msg = "shape must be a tuple of a single integer" + raise ValueError(msg) return plx._create_expr_from_series( plx._create_compliant_series([value] * shape[0]) ) else: - raise TypeError("shape or series_with_shape must be provided") + msg = "shape or series_with_shape must be provided" + raise TypeError(msg) class PandasWhen: From f1db4c4313e930a5d437e147a03de06f5d289f65 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 30 Jul 2024 17:13:53 +0300 Subject: [PATCH 49/78] misc: remove dead code --- narwhals/_pandas_like/namespace.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index cb4b2187d..20564a534 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -86,17 +86,6 @@ def _create_series_from_scalar( backend_version=self._backend_version, ) - def _create_broadcast_series_from_scalar( - self, value: Any, series: PandasLikeSeries - ) -> PandasLikeSeries: - return PandasLikeSeries._from_iterable( - [value] * len(series._native_series), - name=series._native_series.name, - index=series._native_series.index, - implementation=self._implementation, - backend_version=self._backend_version, - ) - def _create_expr_from_series(self, series: PandasLikeSeries) -> PandasLikeExpr: return PandasLikeExpr( lambda _df: [series], From 06a3a97a43396791e58b2774a1b925acd7933a3c Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Wed, 31 Jul 2024 07:11:40 +0300 Subject: [PATCH 50/78] misc: remove unused code --- narwhals/_pandas_like/namespace.py | 16 ++-------------- tests/expr_and_series/when_test.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 20564a534..7d2bb751f 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -270,8 +270,7 @@ def _when_then_value_arg_process( plx: PandasLikeNamespace, value: PandasLikeExpr | PandasLikeSeries | Any, *, - shape: tuple[int] | None = None, - series_with_shape: PandasLikeSeries | None = None, + shape: tuple[int], ) -> PandasLikeExpr: from narwhals.dependencies import get_numpy @@ -286,21 +285,10 @@ def _when_then_value_arg_process( return plx._create_expr_from_series(value) elif isinstance(value, np.ndarray): return plx._create_expr_from_series(plx._create_compliant_series(value)) - elif series_with_shape is not None: - return plx._create_expr_from_series( - plx._create_compliant_series([value] * len(series_with_shape._native_series)) - ) - elif shape is not None: - if len(shape) != 1: - msg = "shape must be a tuple of a single integer" - raise ValueError(msg) - + else: return plx._create_expr_from_series( plx._create_compliant_series([value] * shape[0]) ) - else: - msg = "shape or series_with_shape must be provided" - raise TypeError(msg) class PandasWhen: diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index 6bc850062..3b6766160 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -125,3 +125,22 @@ def test_value_expression(request: Any, constructor: Any) -> None: "a_when": [10, None, None], } compare_dicts(result, expected) + + +def test_numpy_not_available(request: Any, constructor: Any, monkeypatch: Any) -> None: + df = nw.from_native(constructor(data)) + context_manager = monkeypatch.context() + if "pyarrow_table" in str(constructor): + request.applymarker(pytest.mark.xfail) + if "pandas" in str(constructor): + + def no_numpy() -> None: + return None + + from narwhals import dependencies + + monkeypatch.setattr(dependencies, "get_numpy", no_numpy) + context_manager = pytest.raises(ImportError) + + with context_manager: + df.with_columns(when(nw.col("a") == 1).then(9).alias("a_when")) From bba70ea25b9c7af9dd6d5e16e9663c8b61df75ad Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Wed, 31 Jul 2024 07:21:13 +0300 Subject: [PATCH 51/78] misc: use available impl/backend version in object --- narwhals/_pandas_like/namespace.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 7d2bb751f..b2820ed47 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -333,8 +333,8 @@ def then(self, value: PandasLikeExpr | PandasLikeSeries | Any) -> PandasThen: function_name="whenthen", root_names=None, output_names=None, - implementation=self._condition._implementation, - backend_version=self._condition._backend_version, + implementation=self._implementation, + backend_version=self._backend_version, ) From e21243aa7dd249fcedae0ea776a13217d5f3f0ac Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Tue, 6 Aug 2024 03:42:48 +0300 Subject: [PATCH 52/78] test: fix python 3.8 failing test docs: fix doc build failing argument --- narwhals/expr.py | 5 ++--- narwhals/stable/v1.py | 5 ++--- tests/expr_and_series/when_test.py | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/narwhals/expr.py b/narwhals/expr.py index 526581938..bf754875a 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -3673,9 +3673,8 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When: Chained when-then operations should be read as Python `if, elif, ... elif` blocks, not as `if, if, ... if`, i.e. the first condition that evaluates to `True` will be picked. If none of the conditions are `True`, an optional `.otherwise()` can be appended at the end. If not appended, and none of the conditions are `True`, `None` will be returned. - Parameters: - predicates - Condition(s) that must be met in order to apply the subsequent statement. Accepts one or more boolean expressions, which are implicitly combined with `&`. String input is parsed as a column name. + Arguments: + predicates: Condition(s) that must be met in order to apply the subsequent statement. Accepts one or more boolean expressions, which are implicitly combined with `&`. String input is parsed as a column name. Examples: >>> import pandas as pd diff --git a/narwhals/stable/v1.py b/narwhals/stable/v1.py index 572760478..27b64fe4a 100644 --- a/narwhals/stable/v1.py +++ b/narwhals/stable/v1.py @@ -1497,9 +1497,8 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When: Chained when-then operations should be read as Python `if, elif, ... elif` blocks, not as `if, if, ... if`, i.e. the first condition that evaluates to `True` will be picked. If none of the conditions are `True`, an optional `.otherwise()` can be appended at the end. If not appended, and none of the conditions are `True`, `None` will be returned. - Parameters: - predicates - Condition(s) that must be met in order to apply the subsequent statement. Accepts one or more boolean expressions, which are implicitly combined with `&`. String input is parsed as a column name. + Arguments: + predicates: Condition(s) that must be met in order to apply the subsequent statement. Accepts one or more boolean expressions, which are implicitly combined with `&`. String input is parsed as a column name. Examples: >>> import pandas as pd diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index 3b6766160..a5606f8be 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -71,7 +71,7 @@ def test_no_arg_when_fail(request: Any, constructor: Any) -> None: request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) - with pytest.raises(TypeError): + with pytest.raises((TypeError, ValueError)): df.with_columns(when().then(value=3).alias("a_when")) From ada958823f79ad8cec65241edab30a526737c013 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Thu, 8 Aug 2024 02:01:46 +0300 Subject: [PATCH 53/78] test: ignore dask --- tests/expr_and_series/when_test.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index a5606f8be..1d04abc9d 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -17,7 +17,7 @@ def test_when(request: Any, constructor: Any) -> None: - if "pyarrow_table" in str(constructor): + if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) @@ -33,7 +33,7 @@ def test_when(request: Any, constructor: Any) -> None: def test_when_otherwise(request: Any, constructor: Any) -> None: - if "pyarrow_table" in str(constructor): + if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) @@ -49,7 +49,7 @@ def test_when_otherwise(request: Any, constructor: Any) -> None: def test_multiple_conditions(request: Any, constructor: Any) -> None: - if "pyarrow_table" in str(constructor): + if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) @@ -67,7 +67,7 @@ def test_multiple_conditions(request: Any, constructor: Any) -> None: def test_no_arg_when_fail(request: Any, constructor: Any) -> None: - if "pyarrow_table" in str(constructor): + if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) @@ -76,7 +76,7 @@ def test_no_arg_when_fail(request: Any, constructor: Any) -> None: def test_value_numpy_array(request: Any, constructor: Any) -> None: - if "pyarrow_table" in str(constructor): + if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) import numpy as np @@ -113,7 +113,7 @@ def test_value_series(request: Any, constructor_eager: Any) -> None: def test_value_expression(request: Any, constructor: Any) -> None: - if "pyarrow_table" in str(constructor): + if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) result = df.with_columns(when(nw.col("a") == 1).then(nw.col("a") + 9).alias("a_when")) @@ -130,7 +130,7 @@ def test_value_expression(request: Any, constructor: Any) -> None: def test_numpy_not_available(request: Any, constructor: Any, monkeypatch: Any) -> None: df = nw.from_native(constructor(data)) context_manager = monkeypatch.context() - if "pyarrow_table" in str(constructor): + if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): From ead6649d7cd5396e01f16e97309ef7ec0496feb8 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov <54221777+aivanoved@users.noreply.github.com> Date: Thu, 8 Aug 2024 20:14:47 +0300 Subject: [PATCH 54/78] Update namespace.py Co-authored-by: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com> --- narwhals/_pandas_like/namespace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index b2820ed47..682deae8f 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -287,7 +287,7 @@ def _when_then_value_arg_process( return plx._create_expr_from_series(plx._create_compliant_series(value)) else: return plx._create_expr_from_series( - plx._create_compliant_series([value] * shape[0]) + plx._create_compliant_series(np.full(shape[0], value)) ) From bc129ee21a6722143de225817d3d2eb6e8a323e0 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 9 Aug 2024 21:47:20 +0100 Subject: [PATCH 55/78] simplify a bit --- narwhals/_pandas_like/namespace.py | 11 ++++------- tests/expr_and_series/when_test.py | 19 ------------------- 2 files changed, 4 insertions(+), 26 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 682deae8f..4d36a69da 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -15,7 +15,6 @@ from narwhals._pandas_like.utils import create_native_series from narwhals._pandas_like.utils import horizontal_concat from narwhals._pandas_like.utils import vertical_concat -from narwhals.utils import flatten if TYPE_CHECKING: from narwhals._pandas_like.typing import IntoPandasLikeExpr @@ -254,11 +253,11 @@ def concat( def when( self, - *predicates: IntoPandasLikeExpr | Iterable[IntoPandasLikeExpr], + *predicates: IntoPandasLikeExpr, ) -> PandasWhen: plx = self.__class__(self._implementation, self._backend_version) if predicates: - condition = plx.all_horizontal(*flatten(predicates)) + condition = plx.all_horizontal(*predicates) else: msg = "at least one predicate needs to be provided" raise TypeError(msg) @@ -274,16 +273,14 @@ def _when_then_value_arg_process( ) -> PandasLikeExpr: from narwhals.dependencies import get_numpy + # NumPy is a required dependency of pandas np = get_numpy() - if not np: - msg = "numpy is required for this function" - raise ImportError(msg) if isinstance(value, PandasLikeExpr): return value elif isinstance(value, PandasLikeSeries): return plx._create_expr_from_series(value) - elif isinstance(value, np.ndarray): + elif (np := get_numpy()) is not None and isinstance(value, np.ndarray): return plx._create_expr_from_series(plx._create_compliant_series(value)) else: return plx._create_expr_from_series( diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index 1d04abc9d..1398e41e8 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -125,22 +125,3 @@ def test_value_expression(request: Any, constructor: Any) -> None: "a_when": [10, None, None], } compare_dicts(result, expected) - - -def test_numpy_not_available(request: Any, constructor: Any, monkeypatch: Any) -> None: - df = nw.from_native(constructor(data)) - context_manager = monkeypatch.context() - if "pyarrow_table" in str(constructor) or "dask" in str(constructor): - request.applymarker(pytest.mark.xfail) - if "pandas" in str(constructor): - - def no_numpy() -> None: - return None - - from narwhals import dependencies - - monkeypatch.setattr(dependencies, "get_numpy", no_numpy) - context_manager = pytest.raises(ImportError) - - with context_manager: - df.with_columns(when(nw.col("a") == 1).then(9).alias("a_when")) From ba3d1580d9cdd0ae46fdd5fc07919beb7548dd67 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Fri, 16 Aug 2024 16:24:35 +0300 Subject: [PATCH 56/78] tests: clean up tests --- tests/expr_and_series/when_test.py | 51 ++++++++---------------------- 1 file changed, 13 insertions(+), 38 deletions(-) diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index 1398e41e8..418cb7c57 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -2,6 +2,7 @@ from typing import Any +import numpy as np import pytest import narwhals.stable.v1 as nw @@ -21,13 +22,9 @@ def test_when(request: Any, constructor: Any) -> None: request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) - result = df.with_columns(when(nw.col("a") == 1).then(value=3).alias("a_when")) + result = df.select(when(nw.col("a") == 1).then(value=3).alias("a_when")) expected = { - "a": [1, 2, 3], - "b": ["a", "b", "c"], - "c": [4.1, 5.0, 6.0], - "d": [True, False, True], - "a_when": [3, None, None], + "a_when": [3, np.nan, np.nan], } compare_dicts(result, expected) @@ -37,12 +34,8 @@ def test_when_otherwise(request: Any, constructor: Any) -> None: request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) - result = df.with_columns(when(nw.col("a") == 1).then(3).otherwise(6).alias("a_when")) + result = df.select(when(nw.col("a") == 1).then(3).otherwise(6).alias("a_when")) expected = { - "a": [1, 2, 3], - "b": ["a", "b", "c"], - "c": [4.1, 5.0, 6.0], - "d": [True, False, True], "a_when": [3, 6, 6], } compare_dicts(result, expected) @@ -53,15 +46,9 @@ def test_multiple_conditions(request: Any, constructor: Any) -> None: request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) - result = df.with_columns( - when(nw.col("a") < 3, nw.col("c") < 5.0).then(3).alias("a_when") - ) + result = df.select(when(nw.col("a") < 3, nw.col("c") < 5.0).then(3).alias("a_when")) expected = { - "a": [1, 2, 3], - "b": ["a", "b", "c"], - "c": [4.1, 5.0, 6.0], - "d": [True, False, True], - "a_when": [3, None, None], + "a_when": [3, np.nan, np.nan], } compare_dicts(result, expected) @@ -72,7 +59,7 @@ def test_no_arg_when_fail(request: Any, constructor: Any) -> None: df = nw.from_native(constructor(data)) with pytest.raises((TypeError, ValueError)): - df.with_columns(when().then(value=3).alias("a_when")) + df.select(when().then(value=3).alias("a_when")) def test_value_numpy_array(request: Any, constructor: Any) -> None: @@ -81,15 +68,11 @@ def test_value_numpy_array(request: Any, constructor: Any) -> None: df = nw.from_native(constructor(data)) import numpy as np - result = df.with_columns( + result = df.select( when(nw.col("a") == 1).then(np.asanyarray([3, 4, 5])).alias("a_when") ) expected = { - "a": [1, 2, 3], - "b": ["a", "b", "c"], - "c": [4.1, 5.0, 6.0], - "d": [True, False, True], - "a_when": [3, None, None], + "a_when": [3, np.nan, np.nan], } compare_dicts(result, expected) @@ -101,13 +84,9 @@ def test_value_series(request: Any, constructor_eager: Any) -> None: s_data = {"s": [3, 4, 5]} s = nw.from_native(constructor_eager(s_data))["s"] assert isinstance(s, nw.Series) - result = df.with_columns(when(nw.col("a") == 1).then(s).alias("a_when")) + result = df.select(when(nw.col("a") == 1).then(s).alias("a_when")) expected = { - "a": [1, 2, 3], - "b": ["a", "b", "c"], - "c": [4.1, 5.0, 6.0], - "d": [True, False, True], - "a_when": [3, None, None], + "a_when": [3, np.nan, np.nan], } compare_dicts(result, expected) @@ -116,12 +95,8 @@ def test_value_expression(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) - result = df.with_columns(when(nw.col("a") == 1).then(nw.col("a") + 9).alias("a_when")) + result = df.select(when(nw.col("a") == 1).then(nw.col("a") + 9).alias("a_when")) expected = { - "a": [1, 2, 3], - "b": ["a", "b", "c"], - "c": [4.1, 5.0, 6.0], - "d": [True, False, True], - "a_when": [10, None, None], + "a_when": [10, np.nan, np.nan], } compare_dicts(result, expected) From 7cbc759afa502dd2cf7312ff13373b9e9453fd4a Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Fri, 16 Aug 2024 16:25:24 +0300 Subject: [PATCH 57/78] bug: keep `pandas` impl of `zip_with` type safe --- narwhals/_pandas_like/series.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 8cb98a9d1..14d823861 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -597,7 +597,17 @@ def quantile( def zip_with(self: Self, mask: Any, other: Any) -> PandasLikeSeries: ser = self._native_series - res = ser.where(mask._native_series, other._native_series) + other_ser = other._native_series + + null_mask = self.is_null() & mask + null_mask |= other.is_null() & ~mask + + other_sanitized = other_ser.where(~other.is_null()._native_series, ser).astype( + ser.dtype + ) + + res = ser.where(mask._native_series, other_sanitized).astype(ser.dtype) + res = res.where(~null_mask._native_series) return self._from_native_series(res) def head(self: Self, n: int) -> Self: From 28f55fb3461d5868727816703510a159a693a95b Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Fri, 16 Aug 2024 16:26:42 +0300 Subject: [PATCH 58/78] misc: add additional type safety in `pandas` impl of `when` --- narwhals/_pandas_like/namespace.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 4d36a69da..ac9b14845 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -319,7 +319,9 @@ def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: plx, self._otherwise_value, shape=condition.shape )._call(df)[0] - return [value_series.zip_with(condition, otherwise_series)] + return [ + value_series.zip_with(condition, otherwise_series).cast(value_series.dtype) + ] def then(self, value: PandasLikeExpr | PandasLikeSeries | Any) -> PandasThen: self._then_value = value From ee835c284df3bbbefd41c1491e99d57c15f64f61 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Fri, 16 Aug 2024 16:34:48 +0300 Subject: [PATCH 59/78] misc: fix typos --- narwhals/_pandas_like/namespace.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index d4367fd1d..65df8af1a 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -283,13 +283,13 @@ def __init__( implementation: Implementation, backend_version: tuple[int, ...], then_value: Any = None, - otherise_value: Any = None, + otherwise_value: Any = None, ) -> None: self._implementation = implementation self._backend_version = backend_version self._condition = condition self._then_value = then_value - self._otherwise_value = otherise_value + self._otherwise_value = otherwise_value def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: from narwhals._pandas_like.namespace import PandasLikeNamespace From 08e689ae5e47334fc1203f492097ab02cf091b05 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Fri, 16 Aug 2024 16:38:43 +0300 Subject: [PATCH 60/78] misc: remove unneeded assignment --- narwhals/_pandas_like/namespace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 65df8af1a..602efc887 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -268,7 +268,7 @@ def _when_then_value_arg_process( return value elif isinstance(value, PandasLikeSeries): return plx._create_expr_from_series(value) - elif (np := get_numpy()) is not None and isinstance(value, np.ndarray): + elif np is not None and isinstance(value, np.ndarray): return plx._create_expr_from_series(plx._create_compliant_series(value)) else: return plx._create_expr_from_series( From d8bc8b750e03323c954b93b6252f638fc38adcaa Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Fri, 16 Aug 2024 16:49:39 +0300 Subject: [PATCH 61/78] bug: integer type casting is harder, revert 28f55fb --- narwhals/_pandas_like/namespace.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 602efc887..f7641d4d8 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -307,9 +307,7 @@ def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: plx, self._otherwise_value, shape=condition.shape )._call(df)[0] - return [ - value_series.zip_with(condition, otherwise_series).cast(value_series.dtype) - ] + return [value_series.zip_with(condition, otherwise_series)] def then(self, value: PandasLikeExpr | PandasLikeSeries | Any) -> PandasThen: self._then_value = value From affcd7f09027cb16fd510d86572b4f13dad33b1f Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Fri, 16 Aug 2024 17:01:43 +0300 Subject: [PATCH 62/78] tests: add otherwise iterable tests --- tests/expr_and_series/when_test.py | 45 ++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index 418cb7c57..005a78747 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -100,3 +100,48 @@ def test_value_expression(request: Any, constructor: Any) -> None: "a_when": [10, np.nan, np.nan], } compare_dicts(result, expected) + + +def test_otherwise_numpy_array(request: Any, constructor: Any) -> None: + if "pyarrow_table" in str(constructor) or "dask" in str(constructor): + request.applymarker(pytest.mark.xfail) + df = nw.from_native(constructor(data)) + import numpy as np + + result = df.select( + when(nw.col("a") == 1) + .then(-1) + .otherwise(np.asanyarray([0, 9, 10])) + .alias("a_when") + ) + expected = { + "a_when": [-1, 9, 10], + } + compare_dicts(result, expected) + + +def test_otherwise_series(request: Any, constructor_eager: Any) -> None: + if "pyarrow_table" in str(constructor_eager): + request.applymarker(pytest.mark.xfail) + df = nw.from_native(constructor_eager(data)) + s_data = {"s": [0, 9, 10]} + s = nw.from_native(constructor_eager(s_data))["s"] + assert isinstance(s, nw.Series) + result = df.select(when(nw.col("a") == 1).then(-1).otherwise(s).alias("a_when")) + expected = { + "a_when": [-1, 9, 10], + } + compare_dicts(result, expected) + + +def test_otherwise_expression(request: Any, constructor: Any) -> None: + if "pyarrow_table" in str(constructor) or "dask" in str(constructor): + request.applymarker(pytest.mark.xfail) + df = nw.from_native(constructor(data)) + result = df.select( + when(nw.col("a") == 1).then(-1).otherwise(nw.col("a") + 7).alias("a_when") + ) + expected = { + "a_when": [-1, 9, 10], + } + compare_dicts(result, expected) From 3b085cfea80d88c84bf5e88cccc788611cbafaaf Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Fri, 16 Aug 2024 18:30:59 +0300 Subject: [PATCH 63/78] tests: disable failing modin --- tests/expr_and_series/when_test.py | 33 ++++++++++++++++++++++++++++++ tests/series_only/zip_with_test.py | 7 ++++++- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index 005a78747..28d7a4e45 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -20,6 +20,9 @@ def test_when(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) + if "modin" in str(constructor): + msg = "modin has a known issue with casting #7364" + request.applymarker(pytest.mark.xfail(reason=msg)) df = nw.from_native(constructor(data)) result = df.select(when(nw.col("a") == 1).then(value=3).alias("a_when")) @@ -32,6 +35,9 @@ def test_when(request: Any, constructor: Any) -> None: def test_when_otherwise(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) + if "modin" in str(constructor): + msg = "modin has a known issue with casting #7364" + request.applymarker(pytest.mark.xfail(reason=msg)) df = nw.from_native(constructor(data)) result = df.select(when(nw.col("a") == 1).then(3).otherwise(6).alias("a_when")) @@ -44,6 +50,9 @@ def test_when_otherwise(request: Any, constructor: Any) -> None: def test_multiple_conditions(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) + if "modin" in str(constructor): + msg = "modin has a known issue with casting #7364" + request.applymarker(pytest.mark.xfail(reason=msg)) df = nw.from_native(constructor(data)) result = df.select(when(nw.col("a") < 3, nw.col("c") < 5.0).then(3).alias("a_when")) @@ -65,6 +74,10 @@ def test_no_arg_when_fail(request: Any, constructor: Any) -> None: def test_value_numpy_array(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) + if "modin" in str(constructor): + msg = "modin has a known issue with casting #7364" + request.applymarker(pytest.mark.xfail(reason=msg)) + df = nw.from_native(constructor(data)) import numpy as np @@ -80,6 +93,10 @@ def test_value_numpy_array(request: Any, constructor: Any) -> None: def test_value_series(request: Any, constructor_eager: Any) -> None: if "pyarrow_table" in str(constructor_eager): request.applymarker(pytest.mark.xfail) + if "modin" in str(constructor_eager): + msg = "modin has a known issue with casting #7364" + request.applymarker(pytest.mark.xfail(reason=msg)) + df = nw.from_native(constructor_eager(data)) s_data = {"s": [3, 4, 5]} s = nw.from_native(constructor_eager(s_data))["s"] @@ -94,6 +111,10 @@ def test_value_series(request: Any, constructor_eager: Any) -> None: def test_value_expression(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) + if "modin" in str(constructor): + msg = "modin has a known issue with casting #7364" + request.applymarker(pytest.mark.xfail(reason=msg)) + df = nw.from_native(constructor(data)) result = df.select(when(nw.col("a") == 1).then(nw.col("a") + 9).alias("a_when")) expected = { @@ -105,6 +126,10 @@ def test_value_expression(request: Any, constructor: Any) -> None: def test_otherwise_numpy_array(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) + if "modin" in str(constructor): + msg = "modin has a known issue with casting #7364" + request.applymarker(pytest.mark.xfail(reason=msg)) + df = nw.from_native(constructor(data)) import numpy as np @@ -123,6 +148,10 @@ def test_otherwise_numpy_array(request: Any, constructor: Any) -> None: def test_otherwise_series(request: Any, constructor_eager: Any) -> None: if "pyarrow_table" in str(constructor_eager): request.applymarker(pytest.mark.xfail) + if "modin" in str(constructor_eager): + msg = "modin has a known issue with casting #7364" + request.applymarker(pytest.mark.xfail(reason=msg)) + df = nw.from_native(constructor_eager(data)) s_data = {"s": [0, 9, 10]} s = nw.from_native(constructor_eager(s_data))["s"] @@ -137,6 +166,10 @@ def test_otherwise_series(request: Any, constructor_eager: Any) -> None: def test_otherwise_expression(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) + if "modin" in str(constructor): + msg = "modin has a known issue with casting #7364" + request.applymarker(pytest.mark.xfail(reason=msg)) + df = nw.from_native(constructor(data)) result = df.select( when(nw.col("a") == 1).then(-1).otherwise(nw.col("a") + 7).alias("a_when") diff --git a/tests/series_only/zip_with_test.py b/tests/series_only/zip_with_test.py index af7b26f12..f5f0cec1c 100644 --- a/tests/series_only/zip_with_test.py +++ b/tests/series_only/zip_with_test.py @@ -2,10 +2,15 @@ from typing import Any +import pytest + import narwhals.stable.v1 as nw -def test_zip_with(constructor_eager: Any) -> None: +def test_zip_with(request: Any, constructor_eager: Any) -> None: + if "modin" in str(constructor_eager): + msg = "modin has a known issue with casting #7364" + request.applymarker(pytest.mark.xfail(reason=msg)) series1 = nw.from_native(constructor_eager({"a": [1, 3, 2]}), eager_only=True)["a"] series2 = nw.from_native(constructor_eager({"a": [4, 4, 6]}), eager_only=True)["a"] mask = nw.from_native(constructor_eager({"a": [True, False, True]}), eager_only=True)[ From 1983a94eed2447958eab305c577eec733c4fc886 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Fri, 16 Aug 2024 22:49:36 +0300 Subject: [PATCH 64/78] misc: remove unnecesary `zip_with` fix --- narwhals/_pandas_like/namespace.py | 3 ++- narwhals/_pandas_like/series.py | 12 +----------- narwhals/utils.py | 22 ++++++++++++++++++---- tests/expr_and_series/when_test.py | 27 --------------------------- tests/series_only/zip_with_test.py | 7 +------ 5 files changed, 22 insertions(+), 49 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index f7641d4d8..ad81b58c3 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -7,6 +7,7 @@ from typing import Iterable from narwhals import dtypes +from narwhals import maybe_convert_dtypes from narwhals._expression_parsing import parse_into_exprs from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.expr import PandasLikeExpr @@ -307,7 +308,7 @@ def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: plx, self._otherwise_value, shape=condition.shape )._call(df)[0] - return [value_series.zip_with(condition, otherwise_series)] + return [maybe_convert_dtypes(value_series.zip_with(condition, otherwise_series))] def then(self, value: PandasLikeExpr | PandasLikeSeries | Any) -> PandasThen: self._then_value = value diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 2e1851dca..90434ebd5 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -581,17 +581,7 @@ def quantile( def zip_with(self: Self, mask: Any, other: Any) -> PandasLikeSeries: ser = self._native_series - other_ser = other._native_series - - null_mask = self.is_null() & mask - null_mask |= other.is_null() & ~mask - - other_sanitized = other_ser.where(~other.is_null()._native_series, ser).astype( - ser.dtype - ) - - res = ser.where(mask._native_series, other_sanitized).astype(ser.dtype) - res = res.where(~null_mask._native_series) + res = ser.where(mask._native_series, other._native_series) return self._from_native_series(res) def head(self: Self, n: int) -> Self: diff --git a/narwhals/utils.py b/narwhals/utils.py index cc2a482c4..a4d222d0a 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -302,10 +302,24 @@ def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T: df_any = cast(Any, df) if isinstance(getattr(df_any, "_compliant_frame", None), PandasLikeDataFrame): - return df_any._from_compliant_dataframe( # type: ignore[no-any-return] - df_any._compliant_frame._from_native_frame( - df_any._compliant_frame._native_frame.convert_dtypes(*args, **kwargs) - ) + return cast( + T, + df_any._from_compliant_dataframe( + df_any._compliant_frame._from_native_frame( + df_any._compliant_frame._native_frame.convert_dtypes(*args, **kwargs) + ) + ), + ) + if isinstance(getattr(df_any, "_compliant_series", None), PandasLikeDataFrame): + return cast( + T, + df_any._compliant_series( + df_any._compliant_series._from_native_series( + df_any._compliant_series._native_series.convert_dtypes( + *args, **kwargs + ) + ) + ), ) return df diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index 28d7a4e45..8cb9429ee 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -20,9 +20,6 @@ def test_when(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) - if "modin" in str(constructor): - msg = "modin has a known issue with casting #7364" - request.applymarker(pytest.mark.xfail(reason=msg)) df = nw.from_native(constructor(data)) result = df.select(when(nw.col("a") == 1).then(value=3).alias("a_when")) @@ -35,9 +32,6 @@ def test_when(request: Any, constructor: Any) -> None: def test_when_otherwise(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) - if "modin" in str(constructor): - msg = "modin has a known issue with casting #7364" - request.applymarker(pytest.mark.xfail(reason=msg)) df = nw.from_native(constructor(data)) result = df.select(when(nw.col("a") == 1).then(3).otherwise(6).alias("a_when")) @@ -50,9 +44,6 @@ def test_when_otherwise(request: Any, constructor: Any) -> None: def test_multiple_conditions(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) - if "modin" in str(constructor): - msg = "modin has a known issue with casting #7364" - request.applymarker(pytest.mark.xfail(reason=msg)) df = nw.from_native(constructor(data)) result = df.select(when(nw.col("a") < 3, nw.col("c") < 5.0).then(3).alias("a_when")) @@ -74,9 +65,6 @@ def test_no_arg_when_fail(request: Any, constructor: Any) -> None: def test_value_numpy_array(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) - if "modin" in str(constructor): - msg = "modin has a known issue with casting #7364" - request.applymarker(pytest.mark.xfail(reason=msg)) df = nw.from_native(constructor(data)) import numpy as np @@ -93,9 +81,6 @@ def test_value_numpy_array(request: Any, constructor: Any) -> None: def test_value_series(request: Any, constructor_eager: Any) -> None: if "pyarrow_table" in str(constructor_eager): request.applymarker(pytest.mark.xfail) - if "modin" in str(constructor_eager): - msg = "modin has a known issue with casting #7364" - request.applymarker(pytest.mark.xfail(reason=msg)) df = nw.from_native(constructor_eager(data)) s_data = {"s": [3, 4, 5]} @@ -111,9 +96,6 @@ def test_value_series(request: Any, constructor_eager: Any) -> None: def test_value_expression(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) - if "modin" in str(constructor): - msg = "modin has a known issue with casting #7364" - request.applymarker(pytest.mark.xfail(reason=msg)) df = nw.from_native(constructor(data)) result = df.select(when(nw.col("a") == 1).then(nw.col("a") + 9).alias("a_when")) @@ -126,9 +108,6 @@ def test_value_expression(request: Any, constructor: Any) -> None: def test_otherwise_numpy_array(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) - if "modin" in str(constructor): - msg = "modin has a known issue with casting #7364" - request.applymarker(pytest.mark.xfail(reason=msg)) df = nw.from_native(constructor(data)) import numpy as np @@ -148,9 +127,6 @@ def test_otherwise_numpy_array(request: Any, constructor: Any) -> None: def test_otherwise_series(request: Any, constructor_eager: Any) -> None: if "pyarrow_table" in str(constructor_eager): request.applymarker(pytest.mark.xfail) - if "modin" in str(constructor_eager): - msg = "modin has a known issue with casting #7364" - request.applymarker(pytest.mark.xfail(reason=msg)) df = nw.from_native(constructor_eager(data)) s_data = {"s": [0, 9, 10]} @@ -166,9 +142,6 @@ def test_otherwise_series(request: Any, constructor_eager: Any) -> None: def test_otherwise_expression(request: Any, constructor: Any) -> None: if "pyarrow_table" in str(constructor) or "dask" in str(constructor): request.applymarker(pytest.mark.xfail) - if "modin" in str(constructor): - msg = "modin has a known issue with casting #7364" - request.applymarker(pytest.mark.xfail(reason=msg)) df = nw.from_native(constructor(data)) result = df.select( diff --git a/tests/series_only/zip_with_test.py b/tests/series_only/zip_with_test.py index f5f0cec1c..af7b26f12 100644 --- a/tests/series_only/zip_with_test.py +++ b/tests/series_only/zip_with_test.py @@ -2,15 +2,10 @@ from typing import Any -import pytest - import narwhals.stable.v1 as nw -def test_zip_with(request: Any, constructor_eager: Any) -> None: - if "modin" in str(constructor_eager): - msg = "modin has a known issue with casting #7364" - request.applymarker(pytest.mark.xfail(reason=msg)) +def test_zip_with(constructor_eager: Any) -> None: series1 = nw.from_native(constructor_eager({"a": [1, 3, 2]}), eager_only=True)["a"] series2 = nw.from_native(constructor_eager({"a": [4, 4, 6]}), eager_only=True)["a"] mask = nw.from_native(constructor_eager({"a": [True, False, True]}), eager_only=True)[ From a2c6661ed9f2048bb407db086001bdcae6a97552 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Fri, 16 Aug 2024 23:13:48 +0300 Subject: [PATCH 65/78] feat: `maybe_convert_dtypes` now can take as `Series` --- narwhals/utils.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index a4d222d0a..6feb13dba 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -275,7 +275,7 @@ def maybe_set_index(df: T, column_names: str | list[str]) -> T: def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T: """ - Convert columns to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like. + Convert columns or series to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like. Notes: For non-pandas-like inputs, this is a no-op. @@ -299,6 +299,7 @@ def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T: dtype: object """ from narwhals._pandas_like.dataframe import PandasLikeDataFrame + from narwhals._pandas_like.series import PandasLikeSeries df_any = cast(Any, df) if isinstance(getattr(df_any, "_compliant_frame", None), PandasLikeDataFrame): @@ -310,15 +311,12 @@ def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T: ) ), ) - if isinstance(getattr(df_any, "_compliant_series", None), PandasLikeDataFrame): + + if isinstance(df_any, PandasLikeSeries): return cast( T, - df_any._compliant_series( - df_any._compliant_series._from_native_series( - df_any._compliant_series._native_series.convert_dtypes( - *args, **kwargs - ) - ) + df_any._from_native_series( + df_any._native_series.convert_dtypes(*args, **kwargs) ), ) return df From 0201428047eab8ed16af08614af63aaaaeb2c020 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Fri, 16 Aug 2024 23:25:30 +0300 Subject: [PATCH 66/78] docs: update stable api --- narwhals/stable/v1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/stable/v1.py b/narwhals/stable/v1.py index 3794cdfcb..1fc0a6fe7 100644 --- a/narwhals/stable/v1.py +++ b/narwhals/stable/v1.py @@ -1350,7 +1350,7 @@ def maybe_align_index(lhs: T, rhs: Series | DataFrame[Any] | LazyFrame[Any]) -> def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T: """ - Convert columns to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like. + Convert columns or series to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like. Notes: For non-pandas-like inputs, this is a no-op. From e6d6117b5fa92ba76d0992069124dee0bc776cae Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Sat, 17 Aug 2024 00:09:42 +0300 Subject: [PATCH 67/78] misc: resolve old version issue --- narwhals/utils.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index 6feb13dba..8c05bb00a 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -313,11 +313,16 @@ def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T: ) if isinstance(df_any, PandasLikeSeries): + from importlib.metadata import version + + pd_version = version("pandas").split(".") + if int(pd_version[0]) < 1: + ser = df_any._native_series.apply(lambda x: x) + else: + ser = df_any._native_series.convert_dtypes(*args, **kwargs) return cast( T, - df_any._from_native_series( - df_any._native_series.convert_dtypes(*args, **kwargs) - ), + df_any._from_native_series(ser), ) return df From dbf5b6e23aa8e83dacb32e7ab13b1c2139600927 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Sat, 17 Aug 2024 00:12:57 +0300 Subject: [PATCH 68/78] mics: improve coverage, slow perf on maybe convert --- narwhals/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index 8c05bb00a..864c27733 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -316,9 +316,8 @@ def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T: from importlib.metadata import version pd_version = version("pandas").split(".") - if int(pd_version[0]) < 1: - ser = df_any._native_series.apply(lambda x: x) - else: + ser = df_any._native_series.apply(lambda x: x) + if int(pd_version[0]) > 1: ser = df_any._native_series.convert_dtypes(*args, **kwargs) return cast( T, From fde2e9afc758ff4dc79c37324d164e88ef4974e8 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Sat, 17 Aug 2024 00:34:42 +0300 Subject: [PATCH 69/78] misc: reformat to not use importlib --- narwhals/utils.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index 864c27733..929f77d98 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -313,16 +313,18 @@ def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T: ) if isinstance(df_any, PandasLikeSeries): - from importlib.metadata import version + native_ser = df_any._native_series + ser = ( + native_ser.convert_dtypes(*args, **kwargs) + if getattr(native_ser, "convert_dtypes", None) + else native_ser.apply(lambda x: x) + ) - pd_version = version("pandas").split(".") - ser = df_any._native_series.apply(lambda x: x) - if int(pd_version[0]) > 1: - ser = df_any._native_series.convert_dtypes(*args, **kwargs) return cast( T, df_any._from_native_series(ser), ) + return df From 8efb260a19530bee4e83971e1f830a8de981c707 Mon Sep 17 00:00:00 2001 From: Alexander Ivanov Date: Wed, 21 Aug 2024 03:09:24 +0300 Subject: [PATCH 70/78] misc: ignore coverage --- narwhals/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index 929f77d98..98c744d79 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -314,7 +314,7 @@ def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T: if isinstance(df_any, PandasLikeSeries): native_ser = df_any._native_series - ser = ( + ser = ( # pragma: no cover native_ser.convert_dtypes(*args, **kwargs) if getattr(native_ser, "convert_dtypes", None) else native_ser.apply(lambda x: x) From 0599200a4f2a8b13d90a7d97e5f549b71c8b48bb Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 23 Aug 2024 16:08:12 +0100 Subject: [PATCH 71/78] wip --- narwhals/_pandas_like/namespace.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index fa22108cb..94c876de6 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -7,7 +7,6 @@ from typing import Iterable from narwhals import dtypes -from narwhals import maybe_convert_dtypes from narwhals._expression_parsing import parse_into_exprs from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.expr import PandasLikeExpr @@ -280,9 +279,10 @@ def _when_then_value_arg_process( elif np is not None and isinstance(value, np.ndarray): return plx._create_expr_from_series(plx._create_compliant_series(value)) else: - return plx._create_expr_from_series( - plx._create_compliant_series(np.full(shape[0], value)) + msg = ( + "Cannot pass a scalar value to the `then` predicate of `when-then-otherwise`" ) + raise TypeError(msg) class PandasWhen: @@ -301,22 +301,33 @@ def __init__( self._otherwise_value = otherwise_value def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: + from narwhals._expression_parsing import parse_into_expr from narwhals._pandas_like.namespace import PandasLikeNamespace plx = PandasLikeNamespace( implementation=self._implementation, backend_version=self._backend_version ) + # what if `condition` was a `Series`? do we handle that? condition = self._condition._call(df)[0] - value_series = _when_then_value_arg_process( - plx, self._then_value, shape=condition.shape - )._call(df)[0] + # strategy: + # - if it's a string, then parse it as `nw.col(self._then_value)` + # - if it's a scalar (e.g. int), then just leave it as a scalar + # - if it's an expression, then...resolve it! + breakpoint() + value_series = parse_into_expr(self._then_value, namespace=plx)._call(df)[0] + if self._otherwise_value is None: + return [ + value_series._from_native_series( + value_series._native_series.where(condition._native_series) + ) + ] otherwise_series = _when_then_value_arg_process( plx, self._otherwise_value, shape=condition.shape )._call(df)[0] - return [maybe_convert_dtypes(value_series.zip_with(condition, otherwise_series))] + return [value_series.zip_with(condition, otherwise_series)] def then(self, value: PandasLikeExpr | PandasLikeSeries | Any) -> PandasThen: self._then_value = value From a90fbde88d4853ab58f6a16ef184607cefa6d85b Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 23 Aug 2024 16:26:37 +0100 Subject: [PATCH 72/78] wip --- narwhals/_pandas_like/namespace.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 94c876de6..afed5fb61 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -315,7 +315,7 @@ def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: # - if it's a string, then parse it as `nw.col(self._then_value)` # - if it's a scalar (e.g. int), then just leave it as a scalar # - if it's an expression, then...resolve it! - breakpoint() + # todo: raise if the evaluated expression has multiple outputs value_series = parse_into_expr(self._then_value, namespace=plx)._call(df)[0] if self._otherwise_value is None: return [ @@ -323,9 +323,7 @@ def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: value_series._native_series.where(condition._native_series) ) ] - otherwise_series = _when_then_value_arg_process( - plx, self._otherwise_value, shape=condition.shape - )._call(df)[0] + breakpoint() return [value_series.zip_with(condition, otherwise_series)] From 3bb2629b8b8f7647ad005892c43171950bdd51e8 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 23 Aug 2024 21:31:19 +0100 Subject: [PATCH 73/78] wip --- narwhals/_pandas_like/namespace.py | 59 ++++++++++++------------------ 1 file changed, 24 insertions(+), 35 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 238f89e7b..ed7d4264d 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -270,30 +270,6 @@ def when( return PandasWhen(condition, self._implementation, self._backend_version) -def _when_then_value_arg_process( - plx: PandasLikeNamespace, - value: PandasLikeExpr | PandasLikeSeries | Any, - *, - shape: tuple[int], -) -> PandasLikeExpr: - from narwhals.dependencies import get_numpy - - # NumPy is a required dependency of pandas - np = get_numpy() - - if isinstance(value, PandasLikeExpr): - return value - elif isinstance(value, PandasLikeSeries): - return plx._create_expr_from_series(value) - elif np is not None and isinstance(value, np.ndarray): - return plx._create_expr_from_series(plx._create_compliant_series(value)) - else: - msg = ( - "Cannot pass a scalar value to the `then` predicate of `when-then-otherwise`" - ) - raise TypeError(msg) - - class PandasWhen: def __init__( self, @@ -312,29 +288,42 @@ def __init__( def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: from narwhals._expression_parsing import parse_into_expr from narwhals._pandas_like.namespace import PandasLikeNamespace + from narwhals._pandas_like.utils import broadcast_series plx = PandasLikeNamespace( implementation=self._implementation, backend_version=self._backend_version ) - # what if `condition` was a `Series`? do we handle that? - condition = self._condition._call(df)[0] + condition = parse_into_expr(self._condition, namespace=plx)._call(df)[0] + try: + value_series = parse_into_expr(self._then_value, namespace=plx)._call(df)[0] + except TypeError: + # `self._otherwise_value` is a scalar and can't be converted to an expression + value_series = condition.__class__._from_iterable( + [self._then_value]*len(condition), + name='literal', + index=condition._native_series.index, + implementation=self._implementation, + backend_version=self._backend_version, + ) - # strategy: - # - if it's a string, then parse it as `nw.col(self._then_value)` - # - if it's a scalar (e.g. int), then just leave it as a scalar - # - if it's an expression, then...resolve it! - # TODO: raise if the evaluated expression has multiple outputs - value_series = parse_into_expr(self._then_value, namespace=plx)._call(df)[0] if self._otherwise_value is None: return [ value_series._from_native_series( value_series._native_series.where(condition._native_series) ) ] - breakpoint() - - return [value_series.zip_with(condition, otherwise_series)] + try: + otherwise_series = parse_into_expr(self._otherwise_value, namespace=plx, pass_through=True)._call(df)[0] + except TypeError: + # `self._otherwise_value` is a scalar and can't be converted to an expression + return [ + value_series._from_native_series( + value_series._native_series.where(condition._native_series, self._otherwise_value) + ) + ] + else: + return [value_series.zip_with(condition, otherwise_series)] def then(self, value: PandasLikeExpr | PandasLikeSeries | Any) -> PandasThen: self._then_value = value From eb81758e48f00144a9cfc411b18600f896f51295 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 23 Aug 2024 21:45:09 +0100 Subject: [PATCH 74/78] wip --- narwhals/_pandas_like/namespace.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index ed7d4264d..17b40a73e 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -288,7 +288,7 @@ def __init__( def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: from narwhals._expression_parsing import parse_into_expr from narwhals._pandas_like.namespace import PandasLikeNamespace - from narwhals._pandas_like.utils import broadcast_series + from narwhals._pandas_like.utils import validate_column_comparand plx = PandasLikeNamespace( implementation=self._implementation, backend_version=self._backend_version @@ -300,26 +300,31 @@ def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: except TypeError: # `self._otherwise_value` is a scalar and can't be converted to an expression value_series = condition.__class__._from_iterable( - [self._then_value]*len(condition), - name='literal', + [self._then_value] * len(condition), + name="literal", index=condition._native_series.index, implementation=self._implementation, backend_version=self._backend_version, ) + value_series_native = value_series._native_series + condition_native = validate_column_comparand(value_series_native.index, condition) + if self._otherwise_value is None: return [ value_series._from_native_series( - value_series._native_series.where(condition._native_series) + value_series_native.where(condition_native) ) ] try: - otherwise_series = parse_into_expr(self._otherwise_value, namespace=plx, pass_through=True)._call(df)[0] + otherwise_series = parse_into_expr( + self._otherwise_value, namespace=plx, pass_through=True + )._call(df)[0] except TypeError: # `self._otherwise_value` is a scalar and can't be converted to an expression return [ value_series._from_native_series( - value_series._native_series.where(condition._native_series, self._otherwise_value) + value_series_native.where(condition_native, self._otherwise_value) ) ] else: From 9654147bac9eea2ad0e79a2d09a6f67f6b4ff1cb Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 23 Aug 2024 21:52:49 +0100 Subject: [PATCH 75/78] fixup tests --- narwhals/_pandas_like/namespace.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 17b40a73e..ac19ec18a 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -5,6 +5,7 @@ from typing import Any from typing import Callable from typing import Iterable +from typing import cast from narwhals import dtypes from narwhals._expression_parsing import parse_into_exprs @@ -294,18 +295,19 @@ def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: implementation=self._implementation, backend_version=self._backend_version ) - condition = parse_into_expr(self._condition, namespace=plx)._call(df)[0] + condition = parse_into_expr(self._condition, namespace=plx)._call(df)[0] # type: ignore[arg-type] try: - value_series = parse_into_expr(self._then_value, namespace=plx)._call(df)[0] + value_series = parse_into_expr(self._then_value, namespace=plx)._call(df)[0] # type: ignore[arg-type] except TypeError: # `self._otherwise_value` is a scalar and can't be converted to an expression - value_series = condition.__class__._from_iterable( + value_series = condition.__class__._from_iterable( # type: ignore[call-arg] [self._then_value] * len(condition), name="literal", index=condition._native_series.index, implementation=self._implementation, backend_version=self._backend_version, ) + value_series = cast(PandasLikeSeries, value_series) value_series_native = value_series._native_series condition_native = validate_column_comparand(value_series_native.index, condition) @@ -318,8 +320,8 @@ def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: ] try: otherwise_series = parse_into_expr( - self._otherwise_value, namespace=plx, pass_through=True - )._call(df)[0] + self._otherwise_value, namespace=plx + )._call(df)[0] # type: ignore[arg-type] except TypeError: # `self._otherwise_value` is a scalar and can't be converted to an expression return [ From 2bc5cc0534bcdb491d0742e9cdaed53908deec64 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 23 Aug 2024 21:56:13 +0100 Subject: [PATCH 76/78] nw.when --- narwhals/expr.py | 1 + narwhals/stable/v1.py | 1 + tests/expr_and_series/when_test.py | 23 ++++++++++++----------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/narwhals/expr.py b/narwhals/expr.py index 5e2a5ae88..cc8b86956 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -3980,6 +3980,7 @@ def otherwise(self, value: Any) -> Expr: def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When: """ Start a `when-then-otherwise` expression. + Expression similar to an `if-else` statement in Python. Always initiated by a `pl.when().then()`., and optionally followed by chaining one or more `.when().then()` statements. Chained when-then operations should be read as Python `if, elif, ... elif` blocks, not as `if, if, ... if`, i.e. the first condition that evaluates to `True` will be picked. If none of the conditions are `True`, an optional `.otherwise()` can be appended at the end. If not appended, and none of the conditions are `True`, `None` will be returned. diff --git a/narwhals/stable/v1.py b/narwhals/stable/v1.py index 264395687..9c7c2d155 100644 --- a/narwhals/stable/v1.py +++ b/narwhals/stable/v1.py @@ -1517,6 +1517,7 @@ def otherwise(self, value: Any) -> Expr: def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When: """ Start a `when-then-otherwise` expression. + Expression similar to an `if-else` statement in Python. Always initiated by a `pl.when().then()`., and optionally followed by chaining one or more `.when().then()` statements. Chained when-then operations should be read as Python `if, elif, ... elif` blocks, not as `if, if, ... if`, i.e. the first condition that evaluates to `True` will be picked. If none of the conditions are `True`, an optional `.otherwise()` can be appended at the end. If not appended, and none of the conditions are `True`, `None` will be returned. diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index 8cb9429ee..023b10bc2 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -6,7 +6,6 @@ import pytest import narwhals.stable.v1 as nw -from narwhals.stable.v1 import when from tests.utils import compare_dicts data = { @@ -22,7 +21,7 @@ def test_when(request: Any, constructor: Any) -> None: request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) - result = df.select(when(nw.col("a") == 1).then(value=3).alias("a_when")) + result = df.select(nw.when(nw.col("a") == 1).then(value=3).alias("a_when")) expected = { "a_when": [3, np.nan, np.nan], } @@ -34,7 +33,7 @@ def test_when_otherwise(request: Any, constructor: Any) -> None: request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) - result = df.select(when(nw.col("a") == 1).then(3).otherwise(6).alias("a_when")) + result = df.select(nw.when(nw.col("a") == 1).then(3).otherwise(6).alias("a_when")) expected = { "a_when": [3, 6, 6], } @@ -46,7 +45,9 @@ def test_multiple_conditions(request: Any, constructor: Any) -> None: request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) - result = df.select(when(nw.col("a") < 3, nw.col("c") < 5.0).then(3).alias("a_when")) + result = df.select( + nw.when(nw.col("a") < 3, nw.col("c") < 5.0).then(3).alias("a_when") + ) expected = { "a_when": [3, np.nan, np.nan], } @@ -59,7 +60,7 @@ def test_no_arg_when_fail(request: Any, constructor: Any) -> None: df = nw.from_native(constructor(data)) with pytest.raises((TypeError, ValueError)): - df.select(when().then(value=3).alias("a_when")) + df.select(nw.when().then(value=3).alias("a_when")) def test_value_numpy_array(request: Any, constructor: Any) -> None: @@ -70,7 +71,7 @@ def test_value_numpy_array(request: Any, constructor: Any) -> None: import numpy as np result = df.select( - when(nw.col("a") == 1).then(np.asanyarray([3, 4, 5])).alias("a_when") + nw.when(nw.col("a") == 1).then(np.asanyarray([3, 4, 5])).alias("a_when") ) expected = { "a_when": [3, np.nan, np.nan], @@ -86,7 +87,7 @@ def test_value_series(request: Any, constructor_eager: Any) -> None: s_data = {"s": [3, 4, 5]} s = nw.from_native(constructor_eager(s_data))["s"] assert isinstance(s, nw.Series) - result = df.select(when(nw.col("a") == 1).then(s).alias("a_when")) + result = df.select(nw.when(nw.col("a") == 1).then(s).alias("a_when")) expected = { "a_when": [3, np.nan, np.nan], } @@ -98,7 +99,7 @@ def test_value_expression(request: Any, constructor: Any) -> None: request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) - result = df.select(when(nw.col("a") == 1).then(nw.col("a") + 9).alias("a_when")) + result = df.select(nw.when(nw.col("a") == 1).then(nw.col("a") + 9).alias("a_when")) expected = { "a_when": [10, np.nan, np.nan], } @@ -113,7 +114,7 @@ def test_otherwise_numpy_array(request: Any, constructor: Any) -> None: import numpy as np result = df.select( - when(nw.col("a") == 1) + nw.when(nw.col("a") == 1) .then(-1) .otherwise(np.asanyarray([0, 9, 10])) .alias("a_when") @@ -132,7 +133,7 @@ def test_otherwise_series(request: Any, constructor_eager: Any) -> None: s_data = {"s": [0, 9, 10]} s = nw.from_native(constructor_eager(s_data))["s"] assert isinstance(s, nw.Series) - result = df.select(when(nw.col("a") == 1).then(-1).otherwise(s).alias("a_when")) + result = df.select(nw.when(nw.col("a") == 1).then(-1).otherwise(s).alias("a_when")) expected = { "a_when": [-1, 9, 10], } @@ -145,7 +146,7 @@ def test_otherwise_expression(request: Any, constructor: Any) -> None: df = nw.from_native(constructor(data)) result = df.select( - when(nw.col("a") == 1).then(-1).otherwise(nw.col("a") + 7).alias("a_when") + nw.when(nw.col("a") == 1).then(-1).otherwise(nw.col("a") + 7).alias("a_when") ) expected = { "a_when": [-1, 9, 10], From 54d34d7d2208e7a6297400d91ac62918ff122476 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 24 Aug 2024 09:45:57 +0100 Subject: [PATCH 77/78] drive-by: remove python_version from pyproject.toml for optional dependencies --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a9a1c5394..b43184559 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,12 +26,12 @@ exclude = [ ] [project.optional-dependencies] -cudf = ["cudf>=23.08.00; python_version >= '3.9'"] +cudf = ["cudf>=23.08.00"] modin = ["modin"] pandas = ["pandas>=0.25.3"] polars = ["polars>=0.20.3"] pyarrow = ["pyarrow>=11.0.0"] -dask = ["dask[dataframe]>=2024.7; python_version >= '3.9'"] +dask = ["dask[dataframe]>=2024.7"] [project.urls] "Homepage" = "https://github.com/narwhals-dev/narwhals" From de4abfa73e731da034be4df68eac799bcb91172b Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 24 Aug 2024 09:49:04 +0100 Subject: [PATCH 78/78] add test which covers into_expr --- tests/expr_and_series/when_test.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index 023b10bc2..741c676a8 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -13,6 +13,7 @@ "b": ["a", "b", "c"], "c": [4.1, 5.0, 6.0], "d": [True, False, True], + "e": [7.0, 2.0, 1.1], } @@ -152,3 +153,13 @@ def test_otherwise_expression(request: Any, constructor: Any) -> None: "a_when": [-1, 9, 10], } compare_dicts(result, expected) + + +def test_when_then_otherwise_into_expr(request: Any, constructor: Any) -> None: + if "pyarrow_table" in str(constructor) or "dask" in str(constructor): + request.applymarker(pytest.mark.xfail) + + df = nw.from_native(constructor(data)) + result = df.select(nw.when(nw.col("a") > 1).then("c").otherwise("e")) + expected = {"c": [7, 5, 6]} + compare_dicts(result, expected)