Skip to content

Commit 0d4a7f9

Browse files
committed
Preserve no-default semantics in CTable schemas
Omit "default" during schema serialization when no default is declared, so explicit default=None can round-trip distinctly from dataclasses.MISSING. Also improve CTable append/extend behavior and errors for omitted columns: columns with declared defaults are filled automatically, while columns with no default declared produce clearer ValueError messages.
1 parent 3d6f2b1 commit 0d4a7f9

4 files changed

Lines changed: 79 additions & 16 deletions

File tree

src/blosc2/ctable.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3735,16 +3735,16 @@ def add_column(
37353735
spec:
37363736
A schema descriptor such as ``b2.int64(ge=0)`` or a field
37373737
descriptor such as ``b2.field(b2.int64(ge=0), default=0)``.
3738-
A default is required when the table already has live rows, so
3739-
those rows can be backfilled.
3738+
When the table already has live rows, use ``blosc2.field(...)``
3739+
with a default declared so those rows can be backfilled.
37403740
37413741
Raises
37423742
------
37433743
ValueError
37443744
If the table is read-only, is a view, the column already exists,
3745-
or a non-empty table is given a column without a default value.
3745+
or a non-empty table is given a column with no default declared.
37463746
TypeError
3747-
If *default* cannot be coerced to *spec*'s dtype.
3747+
If a declared default cannot be coerced to *spec*'s dtype.
37483748
"""
37493749
if self._read_only:
37503750
raise ValueError("Table is read-only (opened with mode='r').")
@@ -4111,6 +4111,28 @@ def _autofill_materialized_row_values(self, row: dict[str, Any]) -> dict[str, An
41114111
row[name] = np.asarray(values, dtype=meta["dtype"])[0]
41124112
return row
41134113

4114+
def _validate_no_default_columns_present(self, row: dict[str, Any]) -> None:
4115+
"""Raise a clear error when a row omits a column with no default declared."""
4116+
for col in self._schema.columns:
4117+
if col.name in row:
4118+
continue
4119+
is_nullable = getattr(col.spec, "null_value", None) is not None or bool(
4120+
getattr(col.spec, "nullable", False)
4121+
)
4122+
if col.default is MISSING and not is_nullable:
4123+
raise ValueError(f"Column {col.name!r} has no default declared; a value must be provided.")
4124+
4125+
def _fill_default_batch_columns(self, raw_columns: dict[str, Any], row_count: int) -> dict[str, Any]:
4126+
"""Fill omitted batch columns from defaults, or raise if no default is declared."""
4127+
raw_columns = dict(raw_columns)
4128+
for col in self._schema.columns:
4129+
if col.name in raw_columns:
4130+
continue
4131+
if col.default is MISSING:
4132+
raise ValueError(f"Column {col.name!r} has no default declared; values must be provided.")
4133+
raw_columns[col.name] = [col.default] * row_count
4134+
return raw_columns
4135+
41144136
def _autofill_materialized_batch_columns(
41154137
self, raw_columns: dict[str, Any], row_count: int, *, provided_names: set[str]
41164138
) -> dict[str, Any]:
@@ -5974,6 +5996,7 @@ def append(self, data: list | np.void | np.ndarray) -> None:
59745996
# Normalize → validate → coerce
59755997
row = self._normalize_row_input(data)
59765998
row = self._autofill_materialized_row_values(row)
5999+
self._validate_no_default_columns_present(row)
59776000
if self._validate:
59786001
from blosc2.schema_validation import validate_row
59796002

@@ -6033,7 +6056,8 @@ def extend(self, data: list | CTable | Any, *, validate: bool | None = None) ->
60336056
*data* may be:
60346057
60356058
* a **dict of arrays** ``{"col": array, ...}`` — all arrays must have
6036-
the same length; missing columns are filled with their default value;
6059+
the same length; omitted columns are filled from their declared default;
6060+
columns with no default declared must be provided;
60376061
* a **list of rows**, each compatible with :meth:`append`;
60386062
* another **CTable** — columns are matched by name.
60396063
@@ -6103,6 +6127,7 @@ def extend(self, data: list | CTable | Any, *, validate: bool | None = None) ->
61036127
raw_columns = self._autofill_materialized_batch_columns(
61046128
raw_columns, new_nrows, provided_names=provided_names
61056129
)
6130+
raw_columns = self._fill_default_batch_columns(raw_columns, new_nrows)
61066131

61076132
# Validate constraints column-by-column before writing
61086133
if do_validate:

src/blosc2/schema_compiler.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ class CompiledColumn:
149149
py_type: Any
150150
spec: SchemaSpec
151151
dtype: np.dtype | None
152-
default: Any # MISSING means required (no default)
152+
default: Any # MISSING means no default declared
153153
config: ColumnConfig
154154
display_width: int = 20 # terminal column width for __str__ / info()
155155

@@ -360,9 +360,9 @@ def _json_to_bytes(value: dict[str, Any]) -> bytes:
360360

361361

362362
def _default_to_json(value: Any) -> Any:
363-
"""Convert a field default to a JSON-compatible value."""
363+
"""Convert a declared field default to a JSON-compatible value."""
364364
if value is MISSING:
365-
return None
365+
raise ValueError("Cannot serialize MISSING as a declared default.")
366366
if isinstance(value, complex):
367367
return {"__complex__": True, "real": value.real, "imag": value.imag}
368368
if isinstance(value, bytes):
@@ -372,8 +372,6 @@ def _default_to_json(value: Any) -> Any:
372372

373373
def _default_from_json(value: Any) -> Any:
374374
"""Reverse of :func:`_default_to_json`."""
375-
if value is None:
376-
return MISSING
377375
if isinstance(value, dict) and value.get("__complex__"):
378376
return complex(value["real"], value["imag"])
379377
if isinstance(value, dict) and value.get("__bytes__"):
@@ -411,7 +409,7 @@ def schema_to_dict(schema: CompiledSchema) -> dict[str, Any]:
411409
"version": 1,
412410
"row_cls": "Row",
413411
"columns": [
414-
{"name": "id", "kind": "int64", "ge": 0, "default": null},
412+
{"name": "id", "kind": "int64", "ge": 0},
415413
{"name": "score", "kind": "float64", "ge": 0, "le": 100, "default": 0.0},
416414
{"name": "active", "kind": "bool", "default": true},
417415
]
@@ -423,7 +421,8 @@ def schema_to_dict(schema: CompiledSchema) -> dict[str, Any]:
423421
entry.update(col.spec.to_metadata_dict()) # adds "kind" + constraints
424422
if isinstance(entry.get("null_value"), bytes):
425423
entry["null_value"] = _bytes_to_json(entry["null_value"])
426-
entry["default"] = _default_to_json(col.default)
424+
if col.default is not MISSING:
425+
entry["default"] = _default_to_json(col.default)
427426
if col.config.cparams is not None:
428427
entry["cparams"] = col.config.cparams
429428
if col.config.dparams is not None:
@@ -465,7 +464,7 @@ def schema_from_dict(data: dict[str, Any]) -> CompiledSchema:
465464
entry = dict(entry) # don't mutate caller's data
466465
name = entry.pop("name")
467466
kind = entry.pop("kind")
468-
default = _default_from_json(entry.pop("default", None))
467+
default = _default_from_json(entry.pop("default")) if "default" in entry else MISSING
469468
cparams = entry.pop("cparams", None)
470469
dparams = entry.pop("dparams", None)
471470
chunks = tuple(entry.pop("chunks")) if "chunks" in entry else None

tests/ctable/test_schema_compiler.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ class WithComplex:
4646
c_val: complex = blosc2.field(blosc2.complex128(), default=0j)
4747

4848

49+
@dataclass
50+
class WithNoneDefault:
51+
id: int = blosc2.field(blosc2.int64())
52+
note: str = blosc2.field(blosc2.vlstring(nullable=True), default=None)
53+
54+
4955
# -------------------------------------------------------------------
5056
# compile_schema — explicit b2.field()
5157
# -------------------------------------------------------------------
@@ -84,7 +90,7 @@ def test_compile_column_specs():
8490

8591
def test_compile_defaults():
8692
s = compile_schema(Simple)
87-
assert s.columns_by_name["id"].default is MISSING # required
93+
assert s.columns_by_name["id"].default is MISSING # no default declared
8894
assert s.columns_by_name["score"].default == 0.0
8995
assert s.columns_by_name["active"].default is True
9096

@@ -193,7 +199,7 @@ def test_schema_to_dict_column_fields():
193199
id_col = next(c for c in d["columns"] if c["name"] == "id")
194200
assert id_col["kind"] == "int64"
195201
assert id_col["ge"] == 0
196-
assert id_col["default"] is None # MISSING → None
202+
assert "default" not in id_col # no default declared omits the key
197203

198204

199205
def test_schema_to_dict_default_values():
@@ -213,6 +219,20 @@ def test_schema_to_dict_complex_default():
213219
assert c_col["default"]["imag"] == 0.0
214220

215221

222+
def test_schema_to_dict_none_default():
223+
d = schema_to_dict(compile_schema(WithNoneDefault))
224+
id_col = next(c for c in d["columns"] if c["name"] == "id")
225+
note_col = next(c for c in d["columns"] if c["name"] == "note")
226+
assert "default" not in id_col
227+
assert note_col["default"] is None
228+
229+
230+
def test_schema_roundtrip_none_default():
231+
restored = schema_from_dict(schema_to_dict(compile_schema(WithNoneDefault)))
232+
assert restored.columns_by_name["id"].default is MISSING
233+
assert restored.columns_by_name["note"].default is None
234+
235+
216236
def test_schema_roundtrip():
217237
"""schema_from_dict(schema_to_dict(s)) reproduces the same column structure."""
218238
original = compile_schema(Simple)

tests/ctable/test_schema_validation.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,18 @@ def test_append_boundary_values():
6060
def test_append_default_fill():
6161
"""Fields with defaults can be omitted from a tuple — Pydantic fills them in."""
6262
t = CTable(Row, expected_size=5)
63-
# Only id is required; score and active have defaults
63+
# Only id has no default declared; score and active have defaults
6464
t.append((5,)) # score=0.0, active=True filled by defaults
6565
assert len(t) == 1
6666
assert t[0].id == 5
6767

6868

69+
def test_append_omitted_no_default_column_raises_clear_error():
70+
t = CTable(Row, expected_size=5)
71+
with pytest.raises(ValueError, match="no default declared"):
72+
t.append(())
73+
74+
6975
def test_append_validate_false():
7076
"""validate=False skips constraint checks — invalid data is stored silently."""
7177
t = CTable(Row, expected_size=5, validate=False)
@@ -103,6 +109,19 @@ def test_extend_le_violation():
103109
t.extend(data)
104110

105111

112+
def test_extend_omitted_columns_with_defaults_are_filled():
113+
t = CTable(Row, expected_size=10)
114+
t.extend({"id": [1, 2]})
115+
assert list(t["score"][:]) == [0.0, 0.0]
116+
assert list(t["active"][:]) == [True, True]
117+
118+
119+
def test_extend_omitted_no_default_column_raises_clear_error():
120+
t = CTable(Row, expected_size=10)
121+
with pytest.raises(ValueError, match="no default declared"):
122+
t.extend({"score": [1.0, 2.0]})
123+
124+
106125
def test_extend_validate_false():
107126
"""validate=False on the table skips bulk constraint checks."""
108127
t = CTable(Row, expected_size=10, validate=False)

0 commit comments

Comments
 (0)