Skip to content

Commit f5ec176

Browse files
committed
this it?
1 parent 2bf5c2b commit f5ec176

File tree

8 files changed

+77
-102
lines changed

8 files changed

+77
-102
lines changed

demo.py

+10-11
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,24 @@
66

77

88
def func(df_raw: Any) -> Any:
9-
df = nw.NarwhalsFrame(df_raw)
10-
11-
print(df)
9+
df = nw.DataFrame(df_raw)
1210
res = df.with_columns(
1311
d=nw.col("a") + 1,
1412
e=nw.col("a") + nw.col("b"),
1513
)
16-
17-
res = res.group_by("a").agg(nw.col("b").sum())
18-
print(res)
19-
14+
res = res.group_by(["a"]).agg(
15+
nw.col("b").sum(),
16+
d=nw.col("c").sum(),
17+
# e=nw.len(),
18+
)
2019
return nw.to_native(res)
2120

2221

2322
import pandas as pd
2423

25-
# df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
26-
# print(func(df))
27-
df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
24+
df = pd.DataFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
25+
print(func(df))
26+
df = pl.DataFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
2827
print(func(df))
29-
df = pl.LazyFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
28+
df = pl.LazyFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
3029
print(func(df).collect())

narwhals/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from narwhals.containers import is_pandas
44
from narwhals.containers import is_polars
55
from narwhals.containers import is_series
6-
from narwhals.dataframe import NarwhalsFrame
6+
from narwhals.dataframe import DataFrame
77
from narwhals.expression import col
88
from narwhals.expression import len
99
from narwhals.translate import get_namespace
@@ -27,5 +27,5 @@
2727
"to_native",
2828
"col",
2929
"len",
30-
"NarwhalsFrame",
30+
"DataFrame",
3131
]

narwhals/dataframe.py

+43-65
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
from __future__ import annotations
22

3-
from narwhals.pandas_like.utils import evaluate_into_exprs
3+
from narwhals.pandas_like.dataframe import PandasDataFrame
4+
from narwhals.polars import PolarsDataFrame
45
from narwhals.translate import get_pandas
56
from narwhals.translate import get_polars
67

78

89
def extract_native(obj: Any, implementation) -> Any:
910
from narwhals.expression import NarwhalsExpr
11+
from narwhals.series import Series
1012

1113
# if isinstance(obj, NarwhalsExpr):
1214
# return obj._call(pl.col)
@@ -17,14 +19,16 @@ def extract_native(obj: Any, implementation) -> Any:
1719
return obj._call(pl.col)
1820
# if isinstance(obj, DType):
1921
# return obj._dtype
20-
if isinstance(obj, NarwhalsFrame):
22+
if isinstance(obj, DataFrame):
2123
return obj._dataframe
24+
if isinstance(obj, Series):
25+
return obj._series
2226
# if isinstance(obj, PolarsSeries):
2327
# return obj._series
2428
return obj
2529

2630

27-
class NarwhalsFrame:
31+
class DataFrame:
2832
def __init__(
2933
self, df, *, is_eager=False, is_lazy=False, implementation: str | None = None
3034
):
@@ -35,24 +39,14 @@ def __init__(
3539
self._implementation = implementation
3640
return
3741
if (pl := get_polars()) is not None:
38-
if isinstance(df, pl.DataFrame):
39-
if is_lazy:
40-
raise ValueError(
41-
"can't instantiate with `is_lazy` if you pass a polars DataFrame"
42-
)
43-
self._dataframe = df
44-
self._implementation = "polars"
45-
return
46-
elif isinstance(df, pl.LazyFrame):
47-
if is_eager:
48-
raise ValueError(
49-
"can't instantiate with `is_eager` if you pass a polars LazyFrame"
50-
)
51-
self._dataframe = df
42+
if isinstance(df, (pl.DataFrame, pl.LazyFrame)):
43+
self._dataframe = PolarsDataFrame(df, is_eager=is_eager, is_lazy=is_lazy)
5244
self._implementation = "polars"
5345
return
5446
if (pd := get_pandas()) is not None and isinstance(df, pd.DataFrame):
55-
self._dataframe = df
47+
self._dataframe = PandasDataFrame(
48+
df, is_eager=is_eager, is_lazy=is_lazy, implementation="pandas"
49+
)
5650
self._implementation = "pandas"
5751
return
5852
raise TypeError(
@@ -68,73 +62,57 @@ def _from_dataframe(self, df: Any) -> Self:
6862
implementation=self._implementation,
6963
)
7064

71-
def _extract_native(self, obj):
72-
return extract_native(obj, implementation=self._implementation)
73-
7465
def with_columns(
7566
self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr
7667
) -> Self:
77-
if self._implementation == "polars":
78-
return self._from_dataframe(
79-
self._dataframe.with_columns(
80-
*[self._extract_native(v) for v in exprs],
81-
**{
82-
key: self._extract_native(value)
83-
for key, value in named_exprs.items()
84-
},
85-
)
86-
)
87-
elif self._implementation == "pandas":
88-
new_series = evaluate_into_exprs(self, *exprs, **named_exprs)
89-
df = self._dataframe.assign(
90-
**{series.name: series._series for series in new_series}
91-
)
92-
return self._from_dataframe(df)
68+
return self._from_dataframe(
69+
self._dataframe.with_columns(*exprs, **named_exprs),
70+
)
9371

9472
def filter(self, *predicates: IntoExpr | Iterable[IntoExpr]) -> Self:
9573
return self._from_dataframe(
96-
self._dataframe.filter(*[self._extract_native(v) for v in predicates])
74+
self._dataframe.filter(*predicates),
9775
)
9876

9977
def group_by(self, *keys: str | Iterable[str]) -> GroupBy:
10078
from narwhals.group_by import NarwhalsGroupBy
10179

102-
return NarwhalsGroupBy(
103-
self,
104-
*keys,
105-
is_eager=self._is_eager,
106-
is_lazy=self._is_lazy,
107-
)
80+
return NarwhalsGroupBy(self, *keys)
10881

10982
def sort(
11083
self,
11184
by: str | Iterable[str],
11285
*more_by: str,
11386
descending: bool | Sequence[bool] = False,
11487
) -> Self:
115-
if self._implementation == "polars":
116-
return self._from_dataframe(
117-
self._dataframe.sort(by, *more_by, descending=descending)
118-
)
88+
return self._from_dataframe(
89+
self._dataframe.sort(by, *more_by, descending=descending)
90+
)
11991

12092
def collect(self) -> Self:
121-
if not self._is_lazy:
122-
raise RuntimeError(
123-
"DataFrame.collect can only be called if frame was instantiated with `is_lazy=True`"
124-
)
125-
if self._implementation == "polars":
126-
import polars as pl
127-
128-
assert isinstance(self._dataframe, pl.LazyFrame)
129-
return self.__class__(self._dataframe.collect(), is_eager=True, is_lazy=False)
93+
return self.__class__(
94+
self._dataframe.collect(),
95+
is_eager=True,
96+
is_lazy=False,
97+
implementation=self._implementation,
98+
)
13099

131100
def to_dict(self, *, as_series: bool = True) -> dict[str, Any]:
132-
if not self._is_eager:
133-
raise RuntimeError(
134-
"DataFrame.to_dict can only be called if frame was instantiated with `is_eager=True`"
135-
)
136-
if self._implementation == "polars":
137-
import polars as pl
101+
return self._dataframe.to_dict(as_series=as_series)
138102

139-
assert isinstance(self._dataframe, pl.DataFrame)
140-
return self._dataframe.to_dict(as_series=as_series)
103+
def join(
104+
self,
105+
other: Self,
106+
*,
107+
how: Literal[inner] = "inner",
108+
left_on: str | list[str],
109+
right_on: str | list[str],
110+
) -> Self:
111+
return self._from_dataframe(
112+
self._dataframe.join(
113+
other._dataframe,
114+
how=how,
115+
left_on=left_on,
116+
right_on=right_on,
117+
)
118+
)

narwhals/expression.py

+15-17
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22

33
from typing import Any
44

5-
from narwhals.translate import get_polars
6-
75

86
def extract_native(expr, other: Any) -> Any:
97
if isinstance(other, NarwhalsExpr):
@@ -97,46 +95,46 @@ def sum(self) -> Expr:
9795
return self.__class__(lambda expr: self._call(expr).sum())
9896

9997
def min(self) -> Expr:
100-
return self.__class__(self._expr.min())
98+
return self.__class__(lambda expr: self._call(expr).min())
10199

102100
def max(self) -> Expr:
103-
return self.__class__(self._expr.max())
101+
return self.__class__(lambda expr: self._call(expr).max())
104102

105103
def n_unique(self) -> Expr:
106-
return self.__class__(self._expr.n_unique())
104+
return self.__class__(lambda expr: self._call(expr).n_unique())
107105

108106
def unique(self) -> Expr:
109-
return self.__class__(self._expr.unique())
107+
return self.__class__(lambda expr: self._call(expr).unique())
110108

111109
# --- transform ---
112110
def is_between(
113111
self, lower_bound: Any, upper_bound: Any, closed: str = "both"
114112
) -> Expr:
115-
return self.__class__(self._expr.is_between(lower_bound, upper_bound, closed)) # type: ignore[arg-type]
113+
return self.__class__(
114+
lambda expr: self._call(expr).is_between(lower_bound, upper_bound, closed)
115+
) # type: ignore[arg-type]
116116

117117
def is_in(self, other: Any) -> Expr:
118-
return self.__class__(self._expr.is_in(other))
118+
return self.__class__(lambda expr: self._call(expr).is_in(other))
119119

120120
def is_null(self) -> Expr:
121-
return self.__class__(self._expr.is_null())
121+
return self.__class__(lambda expr: self._call(expr).is_null())
122122

123123
# --- partial reduction ---
124124
def drop_nulls(self) -> Expr:
125-
return self.__class__(self._expr.drop_nulls())
125+
return self.__class__(lambda expr: self._call(expr).drop_nulls())
126126

127127
def sample(self, n: int, fraction: float, *, with_replacement: bool) -> Expr:
128128
return self.__class__(
129-
self._expr.sample(n, fraction=fraction, with_replacement=with_replacement)
129+
lambda expr: self._call(expr).sample(
130+
n, fraction=fraction, with_replacement=with_replacement
131+
)
130132
)
131133

132134

133135
def col(col_name: str):
134-
return NarwhalsExpr(lambda expr: expr(col_name))
136+
return NarwhalsExpr(lambda plx: plx.col(col_name))
135137

136138

137139
def len():
138-
def func(expr):
139-
if (pl := get_polars()) is not None and issubclass(expr, pl.col):
140-
return pl.len()
141-
142-
return NarwhalsExpr(func)
140+
return NarwhalsExpr(lambda plx: plx.len())

narwhals/pandas_like/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def parse_into_expr(implementation: str, into_expr: IntoExpr) -> Expr:
105105
plx = Namespace(implementation=implementation)
106106

107107
if isinstance(into_expr, NarwhalsExpr):
108-
return into_expr._call(plx.col)
108+
return into_expr._call(plx)
109109
if isinstance(into_expr, str):
110110
return plx.col(into_expr)
111111
if isinstance(into_expr, Expr):

narwhals/polars.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def extract_native(obj: Any) -> Any:
2929
from narwhals.expression import NarwhalsExpr
3030

3131
if isinstance(obj, NarwhalsExpr):
32-
return obj._call(pl.col)
32+
return obj._call(pl)
3333
if isinstance(obj, Expr):
3434
return obj._expr
3535
if isinstance(obj, DType):

narwhals/translate.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -175,14 +175,14 @@ def get_namespace(obj: Any) -> Namespace:
175175

176176

177177
def to_native(obj: Any) -> Any:
178-
from narwhals.dataframe import NarwhalsFrame
178+
from narwhals.dataframe import DataFrame
179179
from narwhals.pandas_like.dataframe import PandasDataFrame
180180
from narwhals.pandas_like.series import PandasSeries
181181
from narwhals.polars import PolarsDataFrame
182182
from narwhals.polars import PolarsSeries
183183

184-
if isinstance(obj, NarwhalsFrame):
185-
return obj._dataframe
184+
if isinstance(obj, DataFrame):
185+
return obj._dataframe._dataframe
186186
if isinstance(obj, PandasDataFrame):
187187
return obj._dataframe
188188
if isinstance(obj, PandasSeries):

tests/tpch_q1_test.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,13 @@
1313
@pytest.mark.parametrize(
1414
"df_raw",
1515
[
16-
# (polars.read_parquet("tests/data/lineitem.parquet").to_pandas()),
16+
(polars.read_parquet("tests/data/lineitem.parquet").to_pandas()),
1717
polars.scan_parquet("tests/data/lineitem.parquet"),
1818
],
1919
)
2020
def test_q1(df_raw: Any) -> None:
2121
var_1 = datetime(1998, 9, 2)
22-
df = nw.NarwhalsFrame(df_raw, is_lazy=True)
22+
df = nw.DataFrame(df_raw, is_lazy=True)
2323
query_result = (
2424
df.filter(nw.col("l_shipdate") <= var_1)
2525
.group_by(["l_returnflag", "l_linestatus"])

0 commit comments

Comments
 (0)