Skip to content

Commit 0b333f9

Browse files
authored
RFC, feat: add .select (by str) for duckdb and ibis backend (#1266)
1 parent 425dbe4 commit 0b333f9

File tree

5 files changed

+100
-1
lines changed

5 files changed

+100
-1
lines changed

docs/extending.md

+13
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def func(df: FrameT) -> FrameT:
3737
b_std=nw.col("b").std(),
3838
)
3939
```
40+
4041
will work for any of pandas, Polars, cuDF, Modin, and PyArrow.
4142

4243
However, sometimes you don't need to do complex operations on dataframes - all you need
@@ -57,9 +58,21 @@ def func(df: Any) -> Schema:
5758
df = nw.from_native(df, eager_or_interchange_only=True)
5859
return df.schema
5960
```
61+
6062
is also supported, meaning that, in addition to the libraries mentioned above, you can
6163
also pass Ibis, DuckDB, Vaex, and any library which implements the protocol.
6264

65+
#### Interchange-only support
66+
67+
While libraries for which we have full support can benefit from the whole Narwhals API,
68+
libraries which have interchange only support can access the following methods after
69+
converting to Narwhals DataFrame:
70+
71+
- `.schema`, hence column names via `.schema.names()` and column types via `.schema.dtypes()`
72+
- `.to_pandas()` and `.to_arrow()`, for converting to Pandas and Arrow, respectively.
73+
- `.select(names)` (Ibis and DuckDB), where `names` is a list of (string) column names. This is useful for
74+
selecting columns before converting to another library.
75+
6376
### Extending Narwhals
6477

6578
If you want your own library to be recognised too, you're welcome open a PR (with tests)!.

docs/index.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Extremely lightweight and extensible compatibility layer between dataframe libra
66

77
- **Full API support**: cuDF, Modin, pandas, Polars, PyArrow
88
- **Lazy-only support**: Dask
9-
- **Interchange-level support**: Ibis, Vaex, anything else which implements the DataFrame Interchange Protocol
9+
- **Interchange-level support**: Ibis, DuckDB, Vaex, anything else which implements the DataFrame Interchange Protocol
1010

1111
Seamlessly support all, without depending on any!
1212

narwhals/_duckdb/dataframe.py

+19
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,22 @@ def __getitem__(self, item: str) -> DuckDBInterchangeSeries:
9090
self._native_frame.select(item), dtypes=self._dtypes
9191
)
9292

93+
def select(
94+
self: Self,
95+
*exprs: Any,
96+
**named_exprs: Any,
97+
) -> Self:
98+
if named_exprs or not all(isinstance(x, str) for x in exprs): # pragma: no cover
99+
msg = (
100+
"`select`-ing not by name is not supported for DuckDB backend.\n\n"
101+
"If you would like to see this kind of object better supported in "
102+
"Narwhals, please open a feature request "
103+
"at https://github.com/narwhals-dev/narwhals/issues."
104+
)
105+
raise NotImplementedError(msg)
106+
107+
return self._from_native_frame(self._native_frame.select(*exprs))
108+
93109
def __getattr__(self, attr: str) -> Any:
94110
if attr == "schema":
95111
return {
@@ -120,3 +136,6 @@ def to_pandas(self: Self) -> pd.DataFrame:
120136

121137
def to_arrow(self: Self) -> pa.Table:
122138
return self._native_frame.arrow()
139+
140+
def _from_native_frame(self: Self, df: Any) -> Self:
141+
return self.__class__(df, dtypes=self._dtypes)

narwhals/_ibis/dataframe.py

+21
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,24 @@ def to_pandas(self: Self) -> pd.DataFrame:
8585
def to_arrow(self: Self) -> pa.Table:
8686
return self._native_frame.to_pyarrow()
8787

88+
def select(
89+
self: Self,
90+
*exprs: Any,
91+
**named_exprs: Any,
92+
) -> Self:
93+
if named_exprs or not all(isinstance(x, str) for x in exprs): # pragma: no cover
94+
msg = (
95+
"`select`-ing not by name is not supported for Ibis backend.\n\n"
96+
"If you would like to see this kind of object better supported in "
97+
"Narwhals, please open a feature request "
98+
"at https://github.com/narwhals-dev/narwhals/issues."
99+
)
100+
raise NotImplementedError(msg)
101+
102+
import ibis.selectors as s
103+
104+
return self._from_native_frame(self._native_frame.select(s.cols(*exprs)))
105+
88106
def __getattr__(self, attr: str) -> Any:
89107
if attr == "schema":
90108
return {
@@ -98,3 +116,6 @@ def __getattr__(self, attr: str) -> Any:
98116
"at https://github.com/narwhals-dev/narwhals/issues."
99117
)
100118
raise NotImplementedError(msg)
119+
120+
def _from_native_frame(self: Self, df: Any) -> Self:
121+
return self.__class__(df, dtypes=self._dtypes)
+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from __future__ import annotations
2+
3+
import duckdb
4+
import polars as pl
5+
import pytest
6+
7+
import narwhals.stable.v1 as nw
8+
9+
data = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]}
10+
11+
12+
def test_interchange() -> None:
13+
df_pl = pl.DataFrame(data)
14+
df = nw.from_native(df_pl.__dataframe__(), eager_or_interchange_only=True)
15+
with pytest.raises(
16+
NotImplementedError,
17+
match="Attribute select is not supported for metadata-only dataframes",
18+
):
19+
df.select("a", "z")
20+
21+
22+
def test_interchange_ibis(
23+
tmpdir: pytest.TempdirFactory,
24+
) -> None: # pragma: no cover
25+
ibis = pytest.importorskip("ibis")
26+
df_pl = pl.DataFrame(data)
27+
28+
filepath = str(tmpdir / "file.parquet") # type: ignore[operator]
29+
df_pl.write_parquet(filepath)
30+
31+
tbl = ibis.read_parquet(filepath)
32+
df = nw.from_native(tbl, eager_or_interchange_only=True)
33+
34+
out_cols = df.select("a", "z").schema.names()
35+
36+
assert out_cols == ["a", "z"]
37+
38+
39+
def test_interchange_duckdb() -> None:
40+
df_pl = pl.DataFrame(data) # noqa: F841
41+
rel = duckdb.sql("select * from df_pl")
42+
df = nw.from_native(rel, eager_or_interchange_only=True)
43+
44+
out_cols = df.select("a", "z").schema.names()
45+
46+
assert out_cols == ["a", "z"]

0 commit comments

Comments
 (0)