Skip to content

Commit

Permalink
fix: Panic in to_physical for series of arrays and lists (#21289)
Browse files Browse the repository at this point in the history
  • Loading branch information
JakubValtar authored Feb 17, 2025
1 parent ab1f3c4 commit 8978e18
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 8 deletions.
25 changes: 19 additions & 6 deletions crates/polars-core/src/chunked_array/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ mod iterator;

use std::borrow::Cow;

use either::Either;

use crate::prelude::*;

impl ArrayChunked {
Expand Down Expand Up @@ -37,13 +39,24 @@ impl ArrayChunked {
return Cow::Borrowed(self);
};

assert_eq!(self.chunks().len(), physical_repr.chunks().len());
let chunk_len_validity_iter =
if physical_repr.chunks().len() == 1 && self.chunks().len() > 1 {
// Physical repr got rechunked, rechunk our validity as well.
Either::Left(std::iter::once((self.len(), self.rechunk_validity())))
} else {
// No rechunking, expect the same number of chunks.
assert_eq!(self.chunks().len(), physical_repr.chunks().len());
Either::Right(
self.chunks()
.iter()
.map(|c| (c.len(), c.validity().cloned())),
)
};

let width = self.width();
let chunks: Vec<_> = self
.downcast_iter()
let chunks: Vec<_> = chunk_len_validity_iter
.zip(physical_repr.into_chunks())
.map(|(chunk, values)| {
.map(|((len, validity), values)| {
FixedSizeListArray::new(
ArrowDataType::FixedSizeList(
Box::new(ArrowField::new(
Expand All @@ -53,9 +66,9 @@ impl ArrayChunked {
)),
width,
),
chunk.len(),
len,
values,
chunk.validity().cloned(),
validity,
)
.to_boxed()
})
Expand Down
11 changes: 9 additions & 2 deletions crates/polars-core/src/chunked_array/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,16 @@ impl ListChunked {
return Cow::Borrowed(self);
};

assert_eq!(self.chunks().len(), physical_repr.chunks().len());
let ca = if physical_repr.chunks().len() == 1 && self.chunks().len() > 1 {
// Physical repr got rechunked, rechunk self as well.
self.rechunk()
} else {
Cow::Borrowed(self)
};

let chunks: Vec<_> = self
assert_eq!(ca.chunks().len(), physical_repr.chunks().len());

let chunks: Vec<_> = ca
.downcast_iter()
.zip(physical_repr.into_chunks())
.map(|(chunk, values)| {
Expand Down
19 changes: 19 additions & 0 deletions py-polars/tests/unit/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1672,6 +1672,25 @@ def test_to_physical() -> None:
assert_series_equal(s.to_physical(), expected)


def test_to_physical_rechunked_21285() -> None:
# A series with multiple chunks, dtype is array or list of structs with a
# null field (causes rechunking) and a field with a different physical and
# logical repr (causes the full body of `to_physical_repr` to run).
arr_dtype = pl.Array(pl.Struct({"f0": pl.Time, "f1": pl.Null}), shape=(1,))
s = pl.Series("a", [None], arr_dtype) # content doesn't matter
s = s.append(s)
expected_arr_dtype = pl.Array(pl.Struct({"f0": Int64, "f1": pl.Null}), shape=(1,))
expected = pl.Series("a", [None, None], expected_arr_dtype)
assert_series_equal(s.to_physical(), expected)

list_dtype = pl.List(pl.Struct({"f0": pl.Time, "f1": pl.Null}))
s = pl.Series("a", [None], list_dtype) # content doesn't matter
s = s.append(s)
expected_list_dtype = pl.List(pl.Struct({"f0": Int64, "f1": pl.Null}))
expected = pl.Series("a", [None, None], expected_list_dtype)
assert_series_equal(s.to_physical(), expected)


def test_is_between_datetime() -> None:
s = pl.Series("a", [datetime(2020, 1, 1, 10, 0, 0), datetime(2020, 1, 1, 20, 0, 0)])
start = datetime(2020, 1, 1, 12, 0, 0)
Expand Down

0 comments on commit 8978e18

Please sign in to comment.