Skip to content

Commit 8978e18

Browse files
authored
fix: Panic in to_physical for series of arrays and lists (#21289)
1 parent ab1f3c4 commit 8978e18

File tree

3 files changed

+47
-8
lines changed

3 files changed

+47
-8
lines changed

crates/polars-core/src/chunked_array/array/mod.rs

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ mod iterator;
44

55
use std::borrow::Cow;
66

7+
use either::Either;
8+
79
use crate::prelude::*;
810

911
impl ArrayChunked {
@@ -37,13 +39,24 @@ impl ArrayChunked {
3739
return Cow::Borrowed(self);
3840
};
3941

40-
assert_eq!(self.chunks().len(), physical_repr.chunks().len());
42+
let chunk_len_validity_iter =
43+
if physical_repr.chunks().len() == 1 && self.chunks().len() > 1 {
44+
// Physical repr got rechunked, rechunk our validity as well.
45+
Either::Left(std::iter::once((self.len(), self.rechunk_validity())))
46+
} else {
47+
// No rechunking, expect the same number of chunks.
48+
assert_eq!(self.chunks().len(), physical_repr.chunks().len());
49+
Either::Right(
50+
self.chunks()
51+
.iter()
52+
.map(|c| (c.len(), c.validity().cloned())),
53+
)
54+
};
4155

4256
let width = self.width();
43-
let chunks: Vec<_> = self
44-
.downcast_iter()
57+
let chunks: Vec<_> = chunk_len_validity_iter
4558
.zip(physical_repr.into_chunks())
46-
.map(|(chunk, values)| {
59+
.map(|((len, validity), values)| {
4760
FixedSizeListArray::new(
4861
ArrowDataType::FixedSizeList(
4962
Box::new(ArrowField::new(
@@ -53,9 +66,9 @@ impl ArrayChunked {
5366
)),
5467
width,
5568
),
56-
chunk.len(),
69+
len,
5770
values,
58-
chunk.validity().cloned(),
71+
validity,
5972
)
6073
.to_boxed()
6174
})

crates/polars-core/src/chunked_array/list/mod.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,16 @@ impl ListChunked {
4444
return Cow::Borrowed(self);
4545
};
4646

47-
assert_eq!(self.chunks().len(), physical_repr.chunks().len());
47+
let ca = if physical_repr.chunks().len() == 1 && self.chunks().len() > 1 {
48+
// Physical repr got rechunked, rechunk self as well.
49+
self.rechunk()
50+
} else {
51+
Cow::Borrowed(self)
52+
};
4853

49-
let chunks: Vec<_> = self
54+
assert_eq!(ca.chunks().len(), physical_repr.chunks().len());
55+
56+
let chunks: Vec<_> = ca
5057
.downcast_iter()
5158
.zip(physical_repr.into_chunks())
5259
.map(|(chunk, values)| {

py-polars/tests/unit/series/test_series.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1672,6 +1672,25 @@ def test_to_physical() -> None:
16721672
assert_series_equal(s.to_physical(), expected)
16731673

16741674

1675+
def test_to_physical_rechunked_21285() -> None:
1676+
# A series with multiple chunks, dtype is array or list of structs with a
1677+
# null field (causes rechunking) and a field with a different physical and
1678+
# logical repr (causes the full body of `to_physical_repr` to run).
1679+
arr_dtype = pl.Array(pl.Struct({"f0": pl.Time, "f1": pl.Null}), shape=(1,))
1680+
s = pl.Series("a", [None], arr_dtype) # content doesn't matter
1681+
s = s.append(s)
1682+
expected_arr_dtype = pl.Array(pl.Struct({"f0": Int64, "f1": pl.Null}), shape=(1,))
1683+
expected = pl.Series("a", [None, None], expected_arr_dtype)
1684+
assert_series_equal(s.to_physical(), expected)
1685+
1686+
list_dtype = pl.List(pl.Struct({"f0": pl.Time, "f1": pl.Null}))
1687+
s = pl.Series("a", [None], list_dtype) # content doesn't matter
1688+
s = s.append(s)
1689+
expected_list_dtype = pl.List(pl.Struct({"f0": Int64, "f1": pl.Null}))
1690+
expected = pl.Series("a", [None, None], expected_list_dtype)
1691+
assert_series_equal(s.to_physical(), expected)
1692+
1693+
16751694
def test_is_between_datetime() -> None:
16761695
s = pl.Series("a", [datetime(2020, 1, 1, 10, 0, 0), datetime(2020, 1, 1, 20, 0, 0)])
16771696
start = datetime(2020, 1, 1, 12, 0, 0)

0 commit comments

Comments
 (0)