1313import ast
1414import contextlib
1515import contextvars
16+ import copy
1617import dataclasses
1718import itertools
1819import os
6869 _validate_column_name ,
6970 compile_schema ,
7071 compute_display_width ,
72+ get_blosc2_field_metadata ,
7173 schema_from_dict ,
7274 schema_to_dict ,
7375)
@@ -3688,32 +3690,60 @@ def from_csv(
36883690 # Schema mutations: add / drop / rename columns
36893691 # ------------------------------------------------------------------
36903692
3693+ @staticmethod
3694+ def _column_spec_default_and_cparams (
3695+ spec_or_field : SchemaSpec | dataclasses .Field ,
3696+ ) -> tuple [SchemaSpec , Any , dict | None ]:
3697+ """Extract the schema spec, default and cparams for ``add_column()``."""
3698+ if isinstance (spec_or_field , dataclasses .Field ):
3699+ meta = get_blosc2_field_metadata (spec_or_field )
3700+ if meta is None :
3701+ raise TypeError ("add_column() field descriptors must be created with blosc2.field()." )
3702+ spec = copy .deepcopy (meta ["spec" ])
3703+ if spec_or_field .default is not MISSING :
3704+ default = spec_or_field .default
3705+ elif spec_or_field .default_factory is not MISSING : # type: ignore[misc]
3706+ default = spec_or_field .default_factory ()
3707+ else :
3708+ default = MISSING
3709+ cparams = meta .get ("cparams" )
3710+ else :
3711+ spec = spec_or_field
3712+ default = MISSING
3713+ cparams = None
3714+
3715+ if not isinstance (spec , SchemaSpec ):
3716+ raise TypeError (f"add_column() requires a SchemaSpec, got { type (spec )!r} ." )
3717+ return spec , default , cparams
3718+
36913719 def add_column (
36923720 self ,
36933721 name : str ,
3694- spec : SchemaSpec ,
3695- default ,
3722+ spec : SchemaSpec | dataclasses .Field ,
36963723 * ,
36973724 cparams : dict | None = None ,
36983725 ) -> None :
3699- """Add a new column filled with * default* for every existing live row .
3726+ """Add a new column filled from the default declared in *spec* .
37003727
37013728 Parameters
37023729 ----------
37033730 name:
37043731 Column name. Must follow the same naming rules as schema fields.
37053732 spec:
3706- A schema descriptor such as ``b2.int64(ge=0)`` or ``b2.string()``.
3707- default:
3708- Value written to every existing live row. Must be coercible to
3709- *spec*'s dtype .
3733+ A schema descriptor such as ``b2.int64(ge=0)`` or a field
3734+ descriptor such as ``b2.field(b2.int64(ge=0), default=0)``.
3735+ A default is required when the table already has live rows, so
3736+ those rows can be backfilled .
37103737 cparams:
3711- Optional compression parameters for this column's NDArray.
3738+ Optional compression parameters for this column's NDArray. When
3739+ *spec* is a :func:`blosc2.field` descriptor, its compression
3740+ parameters are used unless this argument is provided.
37123741
37133742 Raises
37143743 ------
37153744 ValueError
3716- If the table is read-only, is a view, or the column already exists.
3745+ If the table is read-only, is a view, the column already exists,
3746+ or a non-empty table is given a column without a default value.
37173747 TypeError
37183748 If *default* cannot be coerced to *spec*'s dtype.
37193749 """
@@ -3727,6 +3757,17 @@ def add_column(
37273757 if name in self ._computed_cols :
37283758 raise ValueError (f"A computed column named { name !r} already exists." )
37293759
3760+ spec , default , field_cparams = self ._column_spec_default_and_cparams (spec )
3761+ if cparams is None :
3762+ cparams = field_cparams
3763+
3764+ live_pos = np .where (self ._valid_rows [:])[0 ]
3765+ if default is MISSING and len (live_pos ) > 0 :
3766+ raise ValueError (
3767+ "add_column() requires a default declared as blosc2.field(..., default=...) "
3768+ "when the table has live rows."
3769+ )
3770+
37303771 compiled_col = self ._compiled_column_from_spec (name , spec )
37313772 self ._resolve_nullable_specs (
37323773 CompiledSchema (row_cls = None , columns = [compiled_col ], columns_by_name = {name : compiled_col }),
@@ -3737,7 +3778,6 @@ def add_column(
37373778 if self ._is_varlen_scalar_column (compiled_col ):
37383779 # Varlen scalar columns don't use fixed-width NDArray storage.
37393780 new_col = self ._storage .create_varlen_scalar_column (name , spec = spec , cparams = cparams )
3740- live_pos = np .where (self ._valid_rows [:])[0 ]
37413781 for _ in live_pos :
37423782 new_col .append (default )
37433783 new_col .flush ()
@@ -3746,10 +3786,15 @@ def add_column(
37463786 "add_column() does not support list columns; use the constructor with a full schema."
37473787 )
37483788 else :
3749- try :
3750- default_val = spec .dtype .type (default )
3751- except (ValueError , OverflowError ) as exc :
3752- raise TypeError (f"Cannot coerce default { default !r} to dtype { spec .dtype !r} : { exc } " ) from exc
3789+ if default is not MISSING :
3790+ try :
3791+ default_val = spec .dtype .type (default )
3792+ except (ValueError , OverflowError ) as exc :
3793+ raise TypeError (
3794+ f"Cannot coerce default { default !r} to dtype { spec .dtype !r} : { exc } "
3795+ ) from exc
3796+ else :
3797+ default_val = None
37533798
37543799 capacity = len (self ._valid_rows )
37553800 default_chunks , default_blocks = compute_chunks_blocks ((capacity ,))
@@ -3762,7 +3807,6 @@ def add_column(
37623807 cparams = cparams ,
37633808 dparams = None ,
37643809 )
3765- live_pos = np .where (self ._valid_rows [:])[0 ]
37663810 if len (live_pos ) > 0 :
37673811 new_col [live_pos ] = default_val
37683812
0 commit comments