Skip to content

Commit 4a4fb58

Browse files
committed
redesign
1 parent fa93e43 commit 4a4fb58

27 files changed

+1207
-1338
lines changed

README.md

+15-16
Original file line numberDiff line numberDiff line change
@@ -44,41 +44,40 @@ There are three steps to writing dataframe-agnostic code using Narwhals:
4444
Here's an example of a dataframe agnostic function:
4545

4646
```python
47-
from typing import TypeVar
47+
from typing import Any
4848
import pandas as pd
4949
import polars as pl
5050

51-
from narwhals import translate_frame, get_namespace, to_native
52-
53-
AnyDataFrame = TypeVar("AnyDataFrame")
51+
import narwhals as nw
5452

5553

5654
def my_agnostic_function(
57-
suppliers_native: AnyDataFrame,
58-
parts_native: AnyDataFrame,
59-
) -> AnyDataFrame:
60-
suppliers = translate_frame(suppliers_native)
61-
parts = translate_frame(parts_native)
62-
pl = get_namespace(suppliers)
55+
suppliers_native,
56+
parts_native,
57+
):
58+
suppliers = nw.DataFrame(suppliers_native)
59+
parts = nw.DataFrame(parts_native)
6360

6461
result = (
6562
suppliers.join(parts, left_on="city", right_on="city")
6663
.filter(
67-
pl.col("color").is_in(["Red", "Green"]),
68-
pl.col("weight") > 14,
64+
nw.col("color").is_in(["Red", "Green"]),
65+
nw.col("weight") > 14,
6966
)
7067
.group_by("s", "p")
7168
.agg(
72-
weight_mean=pl.col("weight").mean(),
73-
weight_max=pl.col("weight").max(),
69+
weight_mean=nw.col("weight").mean(),
70+
weight_max=nw.col("weight").max(),
7471
)
75-
)
76-
return to_native(result)
72+
).with_columns(nw.col("weight_max").cast(nw.Int64))
73+
return nw.to_native(result)
74+
7775
```
7876
You can pass in a pandas or Polars dataframe, the output will be the same!
7977
Let's try it out:
8078

8179
```python
80+
8281
suppliers = {
8382
"s": ["S1", "S2", "S3", "S4", "S5"],
8483
"sname": ["Smith", "Jones", "Blake", "Clark", "Adams"],

demo.py

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# ruff: noqa
2+
from typing import Any
3+
import polars as pl
4+
5+
import narwhals as nw
6+
7+
8+
def func(df_raw: Any) -> Any:
9+
df = nw.DataFrame(df_raw)
10+
res = df.with_columns(
11+
d=nw.col("a") + 1,
12+
e=nw.col("a") + nw.col("b"),
13+
)
14+
res = res.group_by(["a"]).agg(
15+
nw.col("b").sum(),
16+
d=nw.col("c").sum(),
17+
# e=nw.len(),
18+
)
19+
return nw.to_native(res)
20+
21+
22+
import pandas as pd
23+
24+
df = pd.DataFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
25+
print(func(df))
26+
df = pl.DataFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
27+
print(func(df))
28+
df = pl.LazyFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
29+
print(func(df).collect())

design.md

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Design
2+
3+
Let's do this differently.
4+
5+
Here's what I'd like to get to:
6+
7+
import narwhals as nw
8+
from narwhals.translate import (
9+
translate_frame,
10+
translate_series,
11+
to_native,
12+
)
13+
14+
dfpd = ...
15+
df = nw.DataFrame(df_any)
16+
17+
df = df.with_columns(c = nw.col('a') + nw.col('b'))
18+
19+
result = to_native(df)
20+
21+
---
22+
23+
we need to just have a single class. can't have all this nonsense...
24+
25+
then, we don't even need a spec...
26+
27+
we can still define entrypoints though?
28+
29+
---
30+
31+
where should extract native happen?

f.py

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# ruff: noqa
2+
# type: ignore
3+
from typing import Any
4+
import pandas as pd
5+
import polars as pl
6+
7+
import narwhals as nw
8+
9+
10+
def my_agnostic_function(
11+
suppliers_native,
12+
parts_native,
13+
):
14+
suppliers = nw.DataFrame(suppliers_native)
15+
parts = nw.DataFrame(parts_native)
16+
17+
result = (
18+
suppliers.join(parts, left_on="city", right_on="city")
19+
.filter(
20+
nw.col("color").is_in(["Red", "Green"]),
21+
nw.col("weight") > 14,
22+
)
23+
.group_by("s", "p")
24+
.agg(
25+
weight_mean=nw.col("weight").mean(),
26+
weight_max=nw.col("weight").max(),
27+
)
28+
).with_columns(nw.col("weight_max").cast(nw.Int64))
29+
return nw.to_native(result)
30+
31+
32+
suppliers = {
33+
"s": ["S1", "S2", "S3", "S4", "S5"],
34+
"sname": ["Smith", "Jones", "Blake", "Clark", "Adams"],
35+
"status": [20, 10, 30, 20, 30],
36+
"city": ["London", "Paris", "Paris", "London", "Athens"],
37+
}
38+
parts = {
39+
"p": ["P1", "P2", "P3", "P4", "P5", "P6"],
40+
"pname": ["Nut", "Bolt", "Screw", "Screw", "Cam", "Cog"],
41+
"color": ["Red", "Green", "Blue", "Red", "Blue", "Red"],
42+
"weight": [12.0, 17.0, 17.0, 14.0, 12.0, 19.0],
43+
"city": ["London", "Paris", "Oslo", "London", "Paris", "London"],
44+
}
45+
46+
print("pandas output:")
47+
print(
48+
my_agnostic_function(
49+
pd.DataFrame(suppliers),
50+
pd.DataFrame(parts),
51+
)
52+
)
53+
print("\nPolars output:")
54+
print(
55+
my_agnostic_function(
56+
pl.DataFrame(suppliers),
57+
pl.DataFrame(parts),
58+
)
59+
)
60+
print("\nPolars lazy output:")
61+
print(
62+
my_agnostic_function(
63+
pl.LazyFrame(suppliers),
64+
pl.LazyFrame(parts),
65+
).collect()
66+
)

narwhals/__init__.py

+21-8
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,36 @@
33
from narwhals.containers import is_pandas
44
from narwhals.containers import is_polars
55
from narwhals.containers import is_series
6-
from narwhals.translate import get_namespace
6+
from narwhals.dataframe import DataFrame
7+
from narwhals.dtypes import * # noqa: F403
8+
from narwhals.expression import all
9+
from narwhals.expression import col
10+
from narwhals.expression import len
11+
from narwhals.expression import max
12+
from narwhals.expression import mean
13+
from narwhals.expression import min
14+
from narwhals.expression import sum
15+
from narwhals.expression import sum_horizontal
16+
from narwhals.series import Series
717
from narwhals.translate import to_native
8-
from narwhals.translate import translate_any
9-
from narwhals.translate import translate_frame
10-
from narwhals.translate import translate_series
1118

1219
__version__ = "0.3.0"
1320

1421
__all__ = [
15-
"translate_frame",
16-
"translate_series",
17-
"translate_any",
1822
"is_dataframe",
1923
"is_series",
2024
"is_polars",
2125
"is_pandas",
2226
"get_implementation",
23-
"get_namespace",
2427
"to_native",
28+
"all",
29+
"col",
30+
"len",
31+
"min",
32+
"max",
33+
"mean",
34+
"sum",
35+
"sum_horizontal",
36+
"DataFrame",
37+
"Series",
2538
]

0 commit comments

Comments
 (0)