Merge pull request #10 from raisadz/increase-coverage

MarcoGorelli · web-flow · commit 2e8db110d376 · 2024-03-16T20:34:01.000Z
Increase coverage for narwhals.dataframe
diff --git a/tests/test_common.py b/tests/test_common.py
@@ -2,6 +2,7 @@
 
 from typing import Any
 
+import numpy as np
 import pandas as pd
 import polars as pl
 import pytest
@@ -11,11 +12,12 @@
 
 df_pandas = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})
 df_polars = pl.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})
+df_lazy = pl.LazyFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})
 
 
 @pytest.mark.parametrize(
     "df_raw",
-    [df_pandas, df_polars],
+    [df_pandas, df_polars, df_lazy],
 )
 def test_sort(df_raw: Any) -> None:
     df = nw.DataFrame(df_raw)
@@ -31,7 +33,7 @@ def test_sort(df_raw: Any) -> None:
 
 @pytest.mark.parametrize(
     "df_raw",
-    [df_pandas, df_polars],
+    [df_pandas, df_polars, df_lazy],
 )
 def test_filter(df_raw: Any) -> None:
     df = nw.DataFrame(df_raw)
@@ -43,7 +45,7 @@ def test_filter(df_raw: Any) -> None:
 
 @pytest.mark.parametrize(
     "df_raw",
-    [df_pandas, df_polars],
+    [df_pandas, df_polars, df_lazy],
 )
 def test_add(df_raw: Any) -> None:
     df = nw.DataFrame(df_raw)
@@ -64,7 +66,7 @@ def test_add(df_raw: Any) -> None:
 
 @pytest.mark.parametrize(
     "df_raw",
-    [df_pandas, df_polars],
+    [df_pandas, df_polars, df_lazy],
 )
 def test_double(df_raw: Any) -> None:
     df = nw.DataFrame(df_raw)
@@ -74,7 +76,7 @@ def test_double(df_raw: Any) -> None:
     compare_dicts(result_native, expected)
 
 
-@pytest.mark.parametrize("df_raw", [df_pandas, df_polars])
+@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy])
 def test_sumh(df_raw: Any) -> None:
     df = nw.DataFrame(df_raw)
     result = df.with_columns(horizonal_sum=nw.sum_horizontal(nw.col("a"), nw.col("b")))
@@ -88,7 +90,7 @@ def test_sumh(df_raw: Any) -> None:
     compare_dicts(result_native, expected)
 
 
-@pytest.mark.parametrize("df_raw", [df_pandas, df_polars])
+@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy])
 def test_sumh_literal(df_raw: Any) -> None:
     df = nw.DataFrame(df_raw)
     result = df.with_columns(horizonal_sum=nw.sum_horizontal("a", nw.col("b")))
@@ -102,7 +104,7 @@ def test_sumh_literal(df_raw: Any) -> None:
     compare_dicts(result_native, expected)
 
 
-@pytest.mark.parametrize("df_raw", [df_pandas, df_polars])
+@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy])
 def test_sum_all(df_raw: Any) -> None:
     df = nw.DataFrame(df_raw)
     result = df.select(nw.all().sum())
@@ -111,10 +113,55 @@ def test_sum_all(df_raw: Any) -> None:
     compare_dicts(result_native, expected)
 
 
-@pytest.mark.parametrize("df_raw", [df_pandas, df_polars])
+@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy])
 def test_double_selected(df_raw: Any) -> None:
     df = nw.DataFrame(df_raw)
     result = df.select(nw.col("a", "b") * 2)
     result_native = nw.to_native(result)
     expected = {"a": [2, 6, 4], "b": [8, 8, 12]}
     compare_dicts(result_native, expected)
+
+
+@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy])
+def test_rename(df_raw: Any) -> None:
+    df = nw.DataFrame(df_raw)
+    result = df.rename({"a": "x", "b": "y"})
+    result_native = nw.to_native(result)
+    expected = {"x": [1, 3, 2], "y": [4, 4, 6], "z": [7.0, 8, 9]}
+    compare_dicts(result_native, expected)
+
+
+@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy])
+def test_join(df_raw: Any) -> None:
+    df = nw.DataFrame(df_raw)
+    df_right = df.rename({"z": "z_right"})
+    result = df.join(df_right, left_on=["a", "b"], right_on=["a", "b"], how="inner")
+    result_native = nw.to_native(result)
+    expected = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9], "z_right": [7.0, 8, 9]}
+    compare_dicts(result_native, expected)
+
+
+@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy])
+def test_schema(df_raw: Any) -> None:
+    df = nw.DataFrame(df_raw)
+    result = df.schema
+    expected = {"a": nw.dtypes.Int64, "b": nw.dtypes.Int64, "z": nw.dtypes.Float64}
+    assert result == expected
+
+
+@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy])
+def test_columns(df_raw: Any) -> None:
+    df = nw.DataFrame(df_raw)
+    result = df.columns
+    expected = ["a", "b", "z"]
+    assert len(result) == len(expected)
+    assert all(x == y for x, y in zip(result, expected))
+
+
+def test_accepted_dataframes() -> None:
+    array = np.array([[0, 4.0], [2, 5]])
+    with pytest.raises(
+        TypeError,
+        match="Expected pandas or Polars dataframe or lazyframe, got: <class 'numpy.ndarray'>",
+    ):
+        nw.DataFrame(array)
diff --git a/tests/utils.py b/tests/utils.py
@@ -1,12 +1,16 @@
-from __future__ import annotations
-
-from typing import Any
-
-
-def compare_dicts(result: dict[str, Any], expected: dict[str, Any]) -> None:
-    for key in expected:
-        for lhs, rhs in zip(result[key], expected[key]):
-            if isinstance(lhs, float):
-                assert abs(lhs - rhs) < 1e-6
-            else:
-                assert lhs == rhs
+from __future__ import annotations
+
+from typing import Any
+
+import polars as pl
+
+
+def compare_dicts(result: dict[str, Any], expected: dict[str, Any]) -> None:
+    if isinstance(result, pl.LazyFrame):
+        result = result.collect()
+    for key in expected:
+        for lhs, rhs in zip(result[key], expected[key]):
+            if isinstance(lhs, float):
+                assert abs(lhs - rhs) < 1e-6
+            else:
+                assert lhs == rhs