Skip to content

Commit bc5f854

Browse files
authored
Merge branch 'main' into feat/pyarrow-to-datetime-infer
2 parents 3a26b96 + e980483 commit bc5f854

File tree

129 files changed

+885
-430
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

129 files changed

+885
-430
lines changed

.github/workflows/bump-version.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
python utils/bump_version.py ${{ github.event.inputs.release_type }}
3434
3535
- name: Create pull request
36-
uses: actions/github-script@v6
36+
uses: actions/github-script@v7
3737
if: github.actor == 'MarcoGorelli' || github.actor == 'FBruzzesi'
3838
with:
3939
script: |

.github/workflows/extremes.yml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,6 @@ jobs:
5959
run: uv pip freeze
6060
- name: Run pytest
6161
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow
62-
- name: Run doctests
63-
run: pytest narwhals --doctest-modules
6462

6563
not_so_old_versions:
6664
strategy:
@@ -88,13 +86,11 @@ jobs:
8886
run: uv pip freeze
8987
- name: Run pytest
9088
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow
91-
- name: Run doctests
92-
run: pytest narwhals --doctest-modules
9389

9490
nightlies:
9591
strategy:
9692
matrix:
97-
python-version: ["3.11"]
93+
python-version: ["3.12"]
9894
os: [ubuntu-latest]
9995
if: github.event.pull_request.head.repo.full_name == github.repository
10096
runs-on: ${{ matrix.os }}

.github/workflows/pytest.yml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,6 @@ jobs:
3030
run: uv pip freeze
3131
- name: Run pytest
3232
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=85
33-
- name: Run doctests
34-
if: startsWith(matrix.os, 'windows') != true
35-
run: pytest narwhals --doctest-modules
3633

3734
pytest-windows:
3835
strategy:
@@ -60,8 +57,6 @@ jobs:
6057
run: uv pip freeze
6158
- name: Run pytest
6259
run: pytest tests --cov=narwhals --cov=tests --runslow --cov-fail-under=95
63-
- name: Run doctests
64-
run: pytest narwhals --doctest-modules
6560

6661
pytest-coverage:
6762
strategy:
@@ -95,4 +90,5 @@ jobs:
9590
- name: Run pytest
9691
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=100 --runslow
9792
- name: Run doctests
93+
if: matrix.python-version == '3.12'
9894
run: pytest narwhals --doctest-modules

CONTRIBUTING.md

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,17 +51,20 @@ Here's how you can set up your local development environment to contribute.
5151

5252
#### Option 1: Use UV (recommended)
5353

54-
1. Make sure you have Python3.8+ installed (for example, Python 3.11), create a virtual environment,
54+
1. Make sure you have Python3.12 installed, create a virtual environment,
5555
and activate it. If you're new to this, here's one way that we recommend:
5656
1. Install uv: https://github.com/astral-sh/uv?tab=readme-ov-file#getting-started
57-
2. Install some version of Python greater than Python3.8. For example, to install
58-
Python3.11:
57+
or make sure it is up-to-date with:
5958
```
60-
uv python install 3.11
59+
uv self update
60+
```
61+
2. Install Python3.12:
62+
```
63+
uv python install 3.12
6164
```
6265
3. Create a virtual environment:
6366
```
64-
uv venv -p 3.11 --seed
67+
uv venv -p 3.12 --seed
6568
```
6669
4. Activate it. On Linux, this is `. .venv/bin/activate`, on Windows `.\.venv\Scripts\activate`.
6770
2. Install Narwhals: `uv pip install -e .`

docs/api-reference/dtypes.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
members:
77
- Array
88
- List
9-
- Struct
109
- Int64
1110
- Int32
1211
- Int16
@@ -15,12 +14,14 @@
1514
- UInt32
1615
- UInt16
1716
- UInt8
17+
- Field
1818
- Float64
1919
- Float32
2020
- Boolean
2121
- Categorical
2222
- Enum
2323
- String
24+
- Struct
2425
- Date
2526
- Datetime
2627
- Duration

docs/api-reference/narwhals.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ Here are the top-level functions available in Narwhals.
1414
- concat_str
1515
- from_dict
1616
- from_native
17+
- from_arrow
1718
- get_level
1819
- get_native_namespace
1920
- is_ordered_categorical

narwhals/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from narwhals.dtypes import Datetime
1111
from narwhals.dtypes import Duration
1212
from narwhals.dtypes import Enum
13+
from narwhals.dtypes import Field
1314
from narwhals.dtypes import Float32
1415
from narwhals.dtypes import Float64
1516
from narwhals.dtypes import Int8
@@ -44,6 +45,7 @@
4445
from narwhals.expr import sum_horizontal
4546
from narwhals.expr import when
4647
from narwhals.functions import concat
48+
from narwhals.functions import from_arrow
4749
from narwhals.functions import from_dict
4850
from narwhals.functions import get_level
4951
from narwhals.functions import new_series
@@ -68,6 +70,7 @@
6870
"selectors",
6971
"concat",
7072
"from_dict",
73+
"from_arrow",
7174
"get_level",
7275
"new_series",
7376
"to_native",
@@ -118,6 +121,7 @@
118121
"String",
119122
"Datetime",
120123
"Duration",
124+
"Field",
121125
"Struct",
122126
"Array",
123127
"List",

narwhals/_arrow/utils.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,16 @@ def native_to_narwhals_dtype(dtype: Any, dtypes: DTypes) -> DType:
5656
if pa.types.is_dictionary(dtype):
5757
return dtypes.Categorical()
5858
if pa.types.is_struct(dtype):
59-
return dtypes.Struct()
59+
return dtypes.Struct(
60+
[
61+
dtypes.Field(
62+
dtype.field(i).name,
63+
native_to_narwhals_dtype(dtype.field(i).type, dtypes),
64+
)
65+
for i in range(dtype.num_fields)
66+
]
67+
)
68+
6069
if pa.types.is_list(dtype) or pa.types.is_large_list(dtype):
6170
return dtypes.List(native_to_narwhals_dtype(dtype.value_type, dtypes))
6271
if pa.types.is_fixed_size_list(dtype):

narwhals/_duckdb/dataframe.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,16 @@ def map_duckdb_dtype_to_narwhals_dtype(duckdb_dtype: Any, dtypes: DTypes) -> DTy
5252
if duckdb_dtype == "INTERVAL":
5353
return dtypes.Duration()
5454
if duckdb_dtype.startswith("STRUCT"):
55-
return dtypes.Struct()
55+
matchstruc_ = re.findall(r"(\w+)\s+(\w+)", duckdb_dtype)
56+
return dtypes.Struct(
57+
[
58+
dtypes.Field(
59+
matchstruc_[i][0],
60+
map_duckdb_dtype_to_narwhals_dtype(matchstruc_[i][1], dtypes),
61+
)
62+
for i in range(len(matchstruc_))
63+
]
64+
)
5665
if match_ := re.match(r"(.*)\[\]$", duckdb_dtype):
5766
return dtypes.List(map_duckdb_dtype_to_narwhals_dtype(match_.group(1), dtypes))
5867
if match_ := re.match(r"(\w+)\[(\d+)\]", duckdb_dtype):

narwhals/_ibis/dataframe.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,15 @@ def map_ibis_dtype_to_narwhals_dtype(ibis_dtype: Any, dtypes: DTypes) -> DType:
5151
map_ibis_dtype_to_narwhals_dtype(ibis_dtype.value_type, dtypes)
5252
)
5353
if ibis_dtype.is_struct():
54-
return dtypes.Struct()
54+
return dtypes.Struct(
55+
[
56+
dtypes.Field(
57+
ibis_dtype_name,
58+
map_ibis_dtype_to_narwhals_dtype(ibis_dtype_field, dtypes),
59+
)
60+
for ibis_dtype_name, ibis_dtype_field in ibis_dtype.items()
61+
]
62+
)
5563
return dtypes.Unknown() # pragma: no cover
5664

5765

narwhals/_pandas_like/series.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -619,9 +619,7 @@ def quantile(
619619

620620
def zip_with(self: Self, mask: Any, other: Any) -> PandasLikeSeries:
621621
ser = self._native_series
622-
mask = validate_column_comparand(
623-
ser.index, mask, treat_length_one_as_scalar=False
624-
)
622+
mask = validate_column_comparand(ser.index, mask)
625623
other = validate_column_comparand(ser.index, other)
626624
res = ser.where(mask, other)
627625
return self._from_native_series(res)

narwhals/_pandas_like/utils.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,7 @@
3232
}
3333

3434

35-
def validate_column_comparand(
36-
index: Any, other: Any, *, treat_length_one_as_scalar: bool = True
37-
) -> Any:
35+
def validate_column_comparand(index: Any, other: Any) -> Any:
3836
"""Validate RHS of binary operation.
3937
4038
If the comparison isn't supported, return `NotImplemented` so that the
@@ -55,9 +53,10 @@ def validate_column_comparand(
5553
if isinstance(other, PandasLikeDataFrame):
5654
return NotImplemented
5755
if isinstance(other, PandasLikeSeries):
58-
if other.len() == 1 and treat_length_one_as_scalar:
56+
if other.len() == 1:
5957
# broadcast
60-
return other.item()
58+
s = other._native_series
59+
return s.__class__(s.iloc[0], index=index, dtype=s.dtype)
6160
if other._native_series.index is not index:
6261
return set_axis(
6362
other._native_series,
@@ -83,7 +82,8 @@ def validate_dataframe_comparand(index: Any, other: Any) -> Any:
8382
if isinstance(other, PandasLikeSeries):
8483
if other.len() == 1:
8584
# broadcast
86-
return other._native_series.iloc[0]
85+
s = other._native_series
86+
return s.__class__(s.iloc[0], index=index, dtype=s.dtype)
8787
if other._native_series.index is not index:
8888
return set_axis(
8989
other._native_series,
@@ -294,7 +294,7 @@ def native_to_narwhals_dtype(native_column: Any, dtypes: DTypes) -> DType:
294294
native_column.dtype.pyarrow_dtype.list_size,
295295
)
296296
if dtype.startswith("struct"):
297-
return dtypes.Struct()
297+
return arrow_native_to_narwhals_dtype(native_column.dtype.pyarrow_dtype, dtypes)
298298
if dtype == "object":
299299
if ( # pragma: no cover TODO(unassigned): why does this show as uncovered?
300300
idx := getattr(native_column, "first_valid_index", lambda: None)()

narwhals/_polars/utils.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,12 @@ def native_to_narwhals_dtype(dtype: Any, dtypes: DTypes) -> DType:
7575
du_time_unit: Literal["us", "ns", "ms"] = getattr(dtype, "time_unit", "us")
7676
return dtypes.Duration(time_unit=du_time_unit)
7777
if dtype == pl.Struct:
78-
return dtypes.Struct()
78+
return dtypes.Struct(
79+
[
80+
dtypes.Field(field_name, native_to_narwhals_dtype(field_type, dtypes))
81+
for field_name, field_type in dtype
82+
]
83+
)
7984
if dtype == pl.List:
8085
return dtypes.List(native_to_narwhals_dtype(dtype.inner, dtypes))
8186
if dtype == pl.Array:

narwhals/dataframe.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -546,12 +546,12 @@ def write_csv(self, file: str | Path | BytesIO | None = None) -> Any:
546546
547547
We can pass any supported library such as pandas, Polars or PyArrow to `func`:
548548
549-
>>> func(df_pd) # doctest: +SKIP
550-
'foo,bar,ham\n1,6.0,a\n2,7.0,b\n3,8.0,c\n'
551-
>>> func(df_pl) # doctest: +SKIP
549+
>>> func(df_pd)
552550
'foo,bar,ham\n1,6.0,a\n2,7.0,b\n3,8.0,c\n'
553-
>>> func(df_pa) # doctest: +SKIP
551+
>>> func(df_pl)
554552
'foo,bar,ham\n1,6.0,a\n2,7.0,b\n3,8.0,c\n'
553+
>>> func(df_pa)
554+
'"foo","bar","ham"\n1,6,"a"\n2,7,"b"\n3,8,"c"\n'
555555
556556
If we had passed a file name to `write_csv`, it would have been
557557
written to that file.
@@ -582,9 +582,9 @@ def write_parquet(self, file: str | Path | BytesIO) -> Any:
582582
583583
We can then pass either pandas, Polars or PyArrow to `func`:
584584
585-
>>> func(df_pd) # doctest:+SKIP
586-
>>> func(df_pl) # doctest:+SKIP
587-
>>> func(df_pa) # doctest:+SKIP
585+
>>> func(df_pd)
586+
>>> func(df_pl)
587+
>>> func(df_pa)
588588
"""
589589
self._compliant_frame.write_parquet(file)
590590

@@ -1116,12 +1116,12 @@ def schema(self) -> Schema:
11161116
You can pass either pandas or Polars to `func`:
11171117
11181118
>>> df_pd_schema = func(df_pd)
1119-
>>> df_pd_schema # doctest:+SKIP
1120-
Schema({'foo': Int64, 'bar': Float64, 'ham', String})
1119+
>>> df_pd_schema
1120+
Schema({'foo': Int64, 'bar': Float64, 'ham': String})
11211121
11221122
>>> df_pl_schema = func(df_pl)
1123-
>>> df_pl_schema # doctest:+SKIP
1124-
Schema({'foo': Int64, 'bar': Float64, 'ham', String})
1123+
>>> df_pl_schema
1124+
Schema({'foo': Int64, 'bar': Float64, 'ham': String})
11251125
"""
11261126
return super().schema
11271127

@@ -1150,12 +1150,12 @@ def collect_schema(self: Self) -> Schema:
11501150
You can pass either pandas or Polars to `func`:
11511151
11521152
>>> df_pd_schema = func(df_pd)
1153-
>>> df_pd_schema # doctest:+SKIP
1154-
Schema({'foo': Int64, 'bar': Float64, 'ham', String})
1153+
>>> df_pd_schema
1154+
Schema({'foo': Int64, 'bar': Float64, 'ham': String})
11551155
11561156
>>> df_pl_schema = func(df_pl)
1157-
>>> df_pl_schema # doctest:+SKIP
1158-
Schema({'foo': Int64, 'bar': Float64, 'ham', String})
1157+
>>> df_pl_schema
1158+
Schema({'foo': Int64, 'bar': Float64, 'ham': String})
11591159
"""
11601160
return super().collect_schema()
11611161

@@ -2478,8 +2478,8 @@ def item(self: Self, row: int | None = None, column: int | str | None = None) ->
24782478
24792479
We can then pass either pandas or Polars to `func`:
24802480
2481-
>>> func(df_pd, 1, 1), func(df_pd, 2, "b") # doctest:+SKIP
2482-
(5, 6)
2481+
>>> func(df_pd, 1, 1), func(df_pd, 2, "b")
2482+
(np.int64(5), np.int64(6))
24832483
24842484
>>> func(df_pl, 1, 1), func(df_pl, 2, "b")
24852485
(5, 6)
@@ -2581,7 +2581,7 @@ def to_arrow(self: Self) -> pa.Table:
25812581
... def func(df):
25822582
... return df.to_arrow()
25832583
2584-
>>> func(df_pd) # doctest:+SKIP
2584+
>>> func(df_pd)
25852585
pyarrow.Table
25862586
foo: int64
25872587
bar: string
@@ -3010,7 +3010,7 @@ def schema(self) -> Schema:
30103010
... }
30113011
... )
30123012
>>> lf = nw.from_native(lf_pl)
3013-
>>> lf.schema # doctest:+SKIP
3013+
>>> lf.schema # doctest: +SKIP
30143014
Schema({'foo': Int64, 'bar': Float64, 'ham', String})
30153015
"""
30163016
return super().schema
@@ -3030,8 +3030,8 @@ def collect_schema(self: Self) -> Schema:
30303030
... }
30313031
... )
30323032
>>> lf = nw.from_native(lf_pl)
3033-
>>> lf.collect_schema() # doctest:+SKIP
3034-
Schema({'foo': Int64, 'bar': Float64, 'ham', String})
3033+
>>> lf.collect_schema()
3034+
Schema({'foo': Int64, 'bar': Float64, 'ham': String})
30353035
"""
30363036
return super().collect_schema()
30373037

0 commit comments

Comments
 (0)