Skip to content

Commit 936303e

Browse files
committed
Handle empty filtered aggregate results
Return 0 for sum() on empty filtered views. Return NaN for mean() and std() on empty filtered views. Keep min()/max() raising because they have no identity. Add tests for empty filtered aggregate behavior.
1 parent 7ef6329 commit 936303e

2 files changed

Lines changed: 25 additions & 7 deletions

File tree

src/blosc2/ctable.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1317,6 +1317,8 @@ def _sum_lazy_fastpath(self, acc_dtype, where=None):
13171317
def sum(self, dtype=None, *, where=None):
13181318
"""Sum of all live, non-null values.
13191319
1320+
Returns zero for an empty column or filtered view.
1321+
13201322
Supported dtypes: bool, int, uint, float, complex.
13211323
Bool values are counted as 0 / 1.
13221324
Null sentinel values are skipped.
@@ -1351,8 +1353,6 @@ def sum(self, dtype=None, *, where=None):
13511353
"""
13521354
self._require_kind("biufc", "sum")
13531355
where = self._normalize_sum_where(where)
1354-
if where is None:
1355-
self._require_nonempty("sum")
13561356
# Use a wide accumulator to reduce overflow risk
13571357
acc_dtype = np.dtype(dtype).type if dtype is not None else None
13581358
if acc_dtype is None:
@@ -1478,7 +1478,9 @@ def mean(self, *, where=None) -> float:
14781478
"""
14791479
self._require_kind("biuf", "mean")
14801480
where = self._normalize_sum_where(where)
1481-
if where is None:
1481+
if where is None and len(self) == 0:
1482+
if self._table.base is not None:
1483+
return float("nan")
14821484
self._require_nonempty("mean")
14831485
fast = self._lazy_aggregate_fastpath("mean", where=where)
14841486
if fast is not NotImplemented:
@@ -1512,7 +1514,9 @@ def std(self, ddof: int = 0, *, where=None) -> float:
15121514
"""
15131515
self._require_kind("biuf", "std")
15141516
where = self._normalize_sum_where(where)
1515-
if where is None:
1517+
if where is None and len(self) == 0:
1518+
if self._table.base is not None:
1519+
return float("nan")
15161520
self._require_nonempty("std")
15171521
fast = self._lazy_aggregate_fastpath("std", where=where, ddof=ddof)
15181522
if fast is not NotImplemented:

tests/ctable/test_column.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -374,10 +374,14 @@ def test_sum_skips_deleted_rows():
374374
assert t["id"].sum() == sum(range(1, 20))
375375

376376

377-
def test_sum_empty_raises():
377+
def test_sum_empty_returns_zero():
378378
t = CTable(Row)
379-
with pytest.raises(ValueError, match="empty"):
380-
t["id"].sum()
379+
assert t["id"].sum() == 0
380+
381+
382+
def test_sum_empty_filtered_view_returns_zero():
383+
t = CTable(Row, new_data=DATA20)
384+
assert t[t.id < 0]["id"].sum() == 0
381385

382386

383387
def test_sum_wrong_type_raises():
@@ -466,6 +470,11 @@ def test_mean_empty_raises():
466470
t["id"].mean()
467471

468472

473+
def test_mean_empty_filtered_view_is_nan():
474+
t = CTable(Row, new_data=DATA20)
475+
assert np.isnan(t[t.id < 0]["id"].mean())
476+
477+
469478
# -------------------------------------------------------------------
470479
# Aggregates: std
471480
# -------------------------------------------------------------------
@@ -499,6 +508,11 @@ def test_std_empty_raises():
499508
t["id"].std()
500509

501510

511+
def test_std_empty_filtered_view_is_nan():
512+
t = CTable(Row, new_data=DATA20)
513+
assert np.isnan(t[t.id < 0]["id"].std())
514+
515+
502516
# -------------------------------------------------------------------
503517
# Aggregates: any / all
504518
# -------------------------------------------------------------------

0 commit comments

Comments
 (0)