Skip to content

Commit 420b8bc

Browse files
committed
Remove explicit default parameter from CTable.add_column. Fixes #631.
1 parent 7b21b3e commit 420b8bc

7 files changed

Lines changed: 86 additions & 29 deletions

File tree

doc/getting_started/tutorials/13.ctable-basics.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1526,7 +1526,7 @@
15261526
],
15271527
"source": [
15281528
"# Add a 'feels_like' column: temperature adjusted for wind chill (simplified)\n",
1529-
"climate.add_column(\"feels_like\", blosc2.float32(), default=0.0)\n",
1529+
"climate.add_column(\"feels_like\", blosc2.field(blosc2.float32(), default=0.0))\n",
15301530
"\n",
15311531
"temp = climate.temperature[:]\n",
15321532
"wind = climate[\"wind_speed\"][:]\n",

examples/ctable/mutations.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ class Employee:
5353
print(t)
5454

5555
# -- add_column(): new column filled with a default -------------------------
56-
t.add_column("bonus", blosc2.float64(ge=0), default=0.0)
56+
t.add_column("bonus", blosc2.field(blosc2.float64(ge=0), default=0.0))
5757
print("After add_column('bonus'):")
5858
print(t)
5959

src/blosc2/ctable.py

Lines changed: 59 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import ast
1414
import contextlib
1515
import contextvars
16+
import copy
1617
import dataclasses
1718
import itertools
1819
import os
@@ -68,6 +69,7 @@
6869
_validate_column_name,
6970
compile_schema,
7071
compute_display_width,
72+
get_blosc2_field_metadata,
7173
schema_from_dict,
7274
schema_to_dict,
7375
)
@@ -3688,32 +3690,60 @@ def from_csv(
36883690
# Schema mutations: add / drop / rename columns
36893691
# ------------------------------------------------------------------
36903692

3693+
@staticmethod
3694+
def _column_spec_default_and_cparams(
3695+
spec_or_field: SchemaSpec | dataclasses.Field,
3696+
) -> tuple[SchemaSpec, Any, dict | None]:
3697+
"""Extract the schema spec, default and cparams for ``add_column()``."""
3698+
if isinstance(spec_or_field, dataclasses.Field):
3699+
meta = get_blosc2_field_metadata(spec_or_field)
3700+
if meta is None:
3701+
raise TypeError("add_column() field descriptors must be created with blosc2.field().")
3702+
spec = copy.deepcopy(meta["spec"])
3703+
if spec_or_field.default is not MISSING:
3704+
default = spec_or_field.default
3705+
elif spec_or_field.default_factory is not MISSING: # type: ignore[misc]
3706+
default = spec_or_field.default_factory()
3707+
else:
3708+
default = MISSING
3709+
cparams = meta.get("cparams")
3710+
else:
3711+
spec = spec_or_field
3712+
default = MISSING
3713+
cparams = None
3714+
3715+
if not isinstance(spec, SchemaSpec):
3716+
raise TypeError(f"add_column() requires a SchemaSpec, got {type(spec)!r}.")
3717+
return spec, default, cparams
3718+
36913719
def add_column(
36923720
self,
36933721
name: str,
3694-
spec: SchemaSpec,
3695-
default,
3722+
spec: SchemaSpec | dataclasses.Field,
36963723
*,
36973724
cparams: dict | None = None,
36983725
) -> None:
3699-
"""Add a new column filled with *default* for every existing live row.
3726+
"""Add a new column filled from the default declared in *spec*.
37003727
37013728
Parameters
37023729
----------
37033730
name:
37043731
Column name. Must follow the same naming rules as schema fields.
37053732
spec:
3706-
A schema descriptor such as ``b2.int64(ge=0)`` or ``b2.string()``.
3707-
default:
3708-
Value written to every existing live row. Must be coercible to
3709-
*spec*'s dtype.
3733+
A schema descriptor such as ``b2.int64(ge=0)`` or a field
3734+
descriptor such as ``b2.field(b2.int64(ge=0), default=0)``.
3735+
A default is required when the table already has live rows, so
3736+
those rows can be backfilled.
37103737
cparams:
3711-
Optional compression parameters for this column's NDArray.
3738+
Optional compression parameters for this column's NDArray. When
3739+
*spec* is a :func:`blosc2.field` descriptor, its compression
3740+
parameters are used unless this argument is provided.
37123741
37133742
Raises
37143743
------
37153744
ValueError
3716-
If the table is read-only, is a view, or the column already exists.
3745+
If the table is read-only, is a view, the column already exists,
3746+
or a non-empty table is given a column without a default value.
37173747
TypeError
37183748
If *default* cannot be coerced to *spec*'s dtype.
37193749
"""
@@ -3727,6 +3757,17 @@ def add_column(
37273757
if name in self._computed_cols:
37283758
raise ValueError(f"A computed column named {name!r} already exists.")
37293759

3760+
spec, default, field_cparams = self._column_spec_default_and_cparams(spec)
3761+
if cparams is None:
3762+
cparams = field_cparams
3763+
3764+
live_pos = np.where(self._valid_rows[:])[0]
3765+
if default is MISSING and len(live_pos) > 0:
3766+
raise ValueError(
3767+
"add_column() requires a default declared as blosc2.field(..., default=...) "
3768+
"when the table has live rows."
3769+
)
3770+
37303771
compiled_col = self._compiled_column_from_spec(name, spec)
37313772
self._resolve_nullable_specs(
37323773
CompiledSchema(row_cls=None, columns=[compiled_col], columns_by_name={name: compiled_col}),
@@ -3737,7 +3778,6 @@ def add_column(
37373778
if self._is_varlen_scalar_column(compiled_col):
37383779
# Varlen scalar columns don't use fixed-width NDArray storage.
37393780
new_col = self._storage.create_varlen_scalar_column(name, spec=spec, cparams=cparams)
3740-
live_pos = np.where(self._valid_rows[:])[0]
37413781
for _ in live_pos:
37423782
new_col.append(default)
37433783
new_col.flush()
@@ -3746,10 +3786,15 @@ def add_column(
37463786
"add_column() does not support list columns; use the constructor with a full schema."
37473787
)
37483788
else:
3749-
try:
3750-
default_val = spec.dtype.type(default)
3751-
except (ValueError, OverflowError) as exc:
3752-
raise TypeError(f"Cannot coerce default {default!r} to dtype {spec.dtype!r}: {exc}") from exc
3789+
if default is not MISSING:
3790+
try:
3791+
default_val = spec.dtype.type(default)
3792+
except (ValueError, OverflowError) as exc:
3793+
raise TypeError(
3794+
f"Cannot coerce default {default!r} to dtype {spec.dtype!r}: {exc}"
3795+
) from exc
3796+
else:
3797+
default_val = None
37533798

37543799
capacity = len(self._valid_rows)
37553800
default_chunks, default_blocks = compute_chunks_blocks((capacity,))
@@ -3762,7 +3807,6 @@ def add_column(
37623807
cparams=cparams,
37633808
dparams=None,
37643809
)
3765-
live_pos = np.where(self._valid_rows[:])[0]
37663810
if len(live_pos) > 0:
37673811
new_col[live_pos] = default_val
37683812

tests/ctable/test_ctable_computed_cols.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ def test_add_stored_collision_with_computed():
376376
t = _make_invoice_table()
377377
t.add_computed_column("total", lambda cols: cols["price"] * cols["qty"])
378378
with pytest.raises(ValueError, match="already exists"):
379-
t.add_column("total", blosc2.float64(), default=0.0)
379+
t.add_column("total", blosc2.field(blosc2.float64(), default=0.0))
380380

381381

382382
# ---------------------------------------------------------------------------

tests/ctable/test_nullable.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ class Row:
147147
def test_add_column_nullable_true_uses_null_policy():
148148
t = CTable(IntRow)
149149
with blosc2.null_policy(blosc2.NullPolicy(signed_int_strategy="max")):
150-
t.add_column("extra", blosc2.int32(nullable=True), 0)
150+
t.add_column("extra", blosc2.field(blosc2.int32(nullable=True), default=0))
151151

152152
assert t["extra"].null_value == np.iinfo(np.int32).max
153153

tests/ctable/test_schema_mutations.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -203,34 +203,47 @@ def test_assign_respects_deleted_rows():
203203

204204
def test_add_column_appears_in_col_names():
205205
t = CTable(Row, new_data=DATA10)
206-
t.add_column("weight", blosc2.float64(), 0.0)
206+
t.add_column("weight", blosc2.field(blosc2.float64(), default=0.0))
207207
assert "weight" in t.col_names
208208

209209

210210
def test_add_column_fills_default_for_existing_rows():
211211
t = CTable(Row, new_data=DATA10)
212-
t.add_column("weight", blosc2.float64(), 5.5)
212+
t.add_column("weight", blosc2.field(blosc2.float64(), default=5.5))
213213
np.testing.assert_array_equal(t["weight"][:], np.full(10, 5.5))
214214

215215

216+
def test_add_column_without_default_allowed_for_empty_table():
217+
t = CTable(Row)
218+
t.add_column("weight", blosc2.float64())
219+
t.append((1, 2.0, True, 3.0))
220+
assert t["weight"][0] == pytest.approx(3.0)
221+
222+
223+
def test_add_column_without_default_on_non_empty_table_raises():
224+
t = CTable(Row, new_data=DATA10)
225+
with pytest.raises(ValueError, match="requires a default"):
226+
t.add_column("weight", blosc2.float64())
227+
228+
216229
def test_add_column_new_rows_can_use_it():
217230
t = CTable(Row, new_data=DATA10)
218-
t.add_column("weight", blosc2.float64(), 0.0)
231+
t.add_column("weight", blosc2.field(blosc2.float64(), default=0.0))
219232
# After adding, extend doesn't know about weight — add manually
220233
t["weight"].assign(np.ones(10) * 2.0)
221234
assert t["weight"].mean() == pytest.approx(2.0)
222235

223236

224237
def test_add_column_schema_updated():
225238
t = CTable(Row, new_data=DATA10)
226-
t.add_column("weight", blosc2.float64(), 0.0)
239+
t.add_column("weight", blosc2.field(blosc2.float64(), default=0.0))
227240
assert "weight" in t.schema.columns_by_name
228241

229242

230243
def test_add_column_persists_on_disk():
231244
path = table_path("add_col")
232245
t = CTable(Row, urlpath=path, mode="w", new_data=DATA10)
233-
t.add_column("weight", blosc2.float64(), 7.0)
246+
t.add_column("weight", blosc2.field(blosc2.float64(), default=7.0))
234247
t.close()
235248
t2 = CTable.open(path)
236249
assert "weight" in t2.col_names
@@ -241,25 +254,25 @@ def test_add_column_view_raises():
241254
t = CTable(Row, new_data=DATA10)
242255
view = t.where(t["id"] > 4)
243256
with pytest.raises(ValueError, match="view"):
244-
view.add_column("weight", blosc2.float64(), 0.0)
257+
view.add_column("weight", blosc2.field(blosc2.float64(), default=0.0))
245258

246259

247260
def test_add_column_duplicate_raises():
248261
t = CTable(Row, new_data=DATA10)
249262
with pytest.raises(ValueError, match="already exists"):
250-
t.add_column("score", blosc2.float64(), 0.0)
263+
t.add_column("score", blosc2.field(blosc2.float64(), default=0.0))
251264

252265

253266
def test_add_column_bad_default_raises():
254267
t = CTable(Row, new_data=DATA10)
255268
with pytest.raises(TypeError):
256-
t.add_column("flag", blosc2.int8(), "not_a_number")
269+
t.add_column("flag", blosc2.field(blosc2.int8(), default="not_a_number"))
257270

258271

259272
def test_add_column_skips_deleted_rows():
260273
t = CTable(Row, new_data=DATA10)
261274
t.delete([0, 1]) # 8 live rows
262-
t.add_column("weight", blosc2.float64(), 3.0)
275+
t.add_column("weight", blosc2.field(blosc2.float64(), default=3.0))
263276
vals = t["weight"][:]
264277
assert len(vals) == 8
265278
assert all(v == 3.0 for v in vals)

tests/ctable/test_vlstring_vlbytes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ class SimpleRow:
539539

540540
def test_ctable_add_vlstring_column():
541541
ct = blosc2.CTable(SimpleRow, new_data=[(i,) for i in range(5)])
542-
ct.add_column("label", blosc2.vlstring(), default="unknown")
542+
ct.add_column("label", blosc2.field(blosc2.vlstring(), default="unknown"))
543543
assert "label" in ct.col_names
544544
assert ct.label[0] == "unknown"
545545
assert ct.label[4] == "unknown"

0 commit comments

Comments
 (0)