@@ -535,7 +535,7 @@ def is_list(self) -> bool:
535535 def is_varlen_scalar (self ) -> bool :
536536 """True if this column holds variable-length scalar strings or bytes."""
537537 col = self ._table ._schema .columns_by_name .get (self ._col_name )
538- return col is not None and isinstance (col .spec , (VLStringSpec , VLBytesSpec ))
538+ return col is not None and isinstance (col .spec , (VLStringSpec , VLBytesSpec , StructSpec ))
539539
540540 @property
541541 def _valid_rows (self ):
@@ -1581,7 +1581,7 @@ def _is_list_column(col: CompiledColumn) -> bool:
15811581
15821582 @staticmethod
15831583 def _is_varlen_scalar_column (col : CompiledColumn ) -> bool :
1584- return isinstance (col .spec , (VLStringSpec , VLBytesSpec ))
1584+ return isinstance (col .spec , (VLStringSpec , VLBytesSpec , StructSpec ))
15851585
15861586 @staticmethod
15871587 def _is_list_spec (spec : SchemaSpec ) -> bool :
@@ -1644,7 +1644,7 @@ def _resolve_nullable_specs(
16441644 for col in schema .columns :
16451645 spec = col .spec
16461646 if (
1647- isinstance (spec , (ListSpec , VLStringSpec , VLBytesSpec ))
1647+ isinstance (spec , (ListSpec , VLStringSpec , VLBytesSpec , StructSpec ))
16481648 or getattr (spec , "null_value" , None ) is not None
16491649 ):
16501650 continue
@@ -2913,14 +2913,21 @@ def _arrow_type_to_spec( # noqa: C901
29132913 for field in pa_type :
29142914 child_col = None
29152915 if arrow_col is not None :
2916- child_col = arrow_col .field (field .name )
2916+ combined = (
2917+ arrow_col .combine_chunks () if hasattr (arrow_col , "combine_chunks" ) else arrow_col
2918+ )
2919+ child_col = combined .field (field .name )
29172920 child_string_max_length = string_max_length
29182921 if field .type in (pa .string (), pa .large_string (), pa .utf8 (), pa .large_utf8 ()):
29192922 child_string_max_length = max (string_max_length or 1 , 1_000_000 )
29202923 fields [field .name ] = CTable ._arrow_type_to_spec (
2921- pa , field .type , child_col , string_max_length = child_string_max_length
2924+ pa ,
2925+ field .type ,
2926+ child_col ,
2927+ string_max_length = child_string_max_length ,
2928+ nullable = field .nullable ,
29222929 )
2923- return b2s .struct (fields )
2930+ return b2s .struct (fields , nullable = nullable )
29242931
29252932 if pa_type in (pa .string (), pa .large_string (), pa .utf8 (), pa .large_utf8 ()):
29262933 if string_max_length is None :
@@ -2938,7 +2945,7 @@ def _arrow_type_to_spec( # noqa: C901
29382945
29392946 raise TypeError (
29402947 f"No blosc2 spec for Arrow type { pa_type !r} . Supported: int8/16/32/64, "
2941- "uint8/16/32/64, float32/64, bool, string, binary, and list ."
2948+ "uint8/16/32/64, float32/64, bool, string, binary, list, and struct ."
29422949 )
29432950
29442951 @staticmethod
@@ -3197,8 +3204,11 @@ def from_arrow(
31973204 When *string_max_length* is ``None`` (the default), scalar Arrow
31983205 ``string`` / ``large_string`` columns are imported as
31993206 :func:`~blosc2.vlstring` columns and ``binary`` / ``large_binary``
3200- columns are imported as :func:`~blosc2.vlbytes` columns. Null values
3201- are represented as native ``None`` with no sentinel needed.
3207+ columns are imported as :func:`~blosc2.vlbytes` columns. Arrow
3208+ ``struct`` columns are imported as :func:`~blosc2.struct` columns backed
3209+ by batched variable-length storage. Null values for these variable-
3210+ length scalar columns are represented as native ``None`` with no
3211+ sentinel needed.
32023212
32033213 When *string_max_length* is set to a positive integer, scalar string
32043214 and binary columns are imported as fixed-width
@@ -3208,9 +3218,10 @@ def from_arrow(
32083218 :func:`~blosc2.vlstring` / :func:`~blosc2.vlbytes` columns.
32093219
32103220 ``blosc2_batch_size`` controls how many rows are buffered before
3211- BatchArray-backed imported columns (list columns and varlen scalar
3212- columns) are flushed to their backend. Set it to ``None`` to keep
3213- those columns pending until the final flush.
3221+ BatchArray-backed imported columns (list columns and variable-length
3222+ scalar columns such as ``vlstring``, ``vlbytes``, and ``struct``) are
3223+ flushed to their backend. Set it to ``None`` to keep those columns
3224+ pending until the final flush.
32143225 """
32153226 pa = cls ._require_pyarrow ("from_arrow()" )
32163227 if blosc2_batch_size is not None and blosc2_batch_size <= 0 :
@@ -3314,7 +3325,9 @@ def from_parquet(
33143325
33153326 This method delegates the actual table construction to
33163327 :meth:`CTable.from_arrow`, so Arrow schema handling, nullable-column support,
3317- and Blosc2 write tuning follow the same rules as that method.
3328+ and Blosc2 write tuning follow the same rules as that method. Top-level
3329+ Arrow ``struct<...>`` columns are imported as :func:`~blosc2.struct`
3330+ columns backed by batched variable-length storage.
33183331
33193332 Parameters
33203333 ----------
@@ -3900,7 +3913,7 @@ def _fetch_col_at_positions(self, name: str, positions: np.ndarray):
39003913 )
39013914 col = self ._cols [name ]
39023915 spec = self ._schema .columns_by_name [name ].spec
3903- if self ._is_list_spec (spec ) or isinstance (spec , (VLStringSpec , VLBytesSpec )):
3916+ if self ._is_list_spec (spec ) or isinstance (spec , (VLStringSpec , VLBytesSpec , StructSpec )):
39043917 return col [positions ]
39053918 return col [positions ]
39063919
@@ -4473,7 +4486,7 @@ def _normalise_sort_keys(
44734486 dtype = self ._col_dtype (name )
44744487 if dtype is None :
44754488 cc = self ._schema .columns_by_name .get (name )
4476- if cc is not None and isinstance (cc .spec , (VLStringSpec , VLBytesSpec )):
4489+ if cc is not None and isinstance (cc .spec , (VLStringSpec , VLBytesSpec , StructSpec )):
44774490 raise TypeError (
44784491 f"Column { name !r} is a varlen scalar column and does not support sort ordering."
44794492 )
@@ -5364,10 +5377,10 @@ def create_index( # noqa: C901
53645377 col_arr = self ._cols [col_name ]
53655378 if isinstance (self ._schema .columns_by_name [col_name ].spec , ListSpec ):
53665379 raise ValueError (f"Cannot create an index on list column { col_name !r} in V1." )
5367- if isinstance (self ._schema .columns_by_name [col_name ].spec , (VLStringSpec , VLBytesSpec )):
5380+ if isinstance (self ._schema .columns_by_name [col_name ].spec , (VLStringSpec , VLBytesSpec , StructSpec )):
53685381 raise NotImplementedError (
5369- f"Cannot create an index on varlen scalar column { col_name !r} : "
5370- "indexing for vlstring/vlbytes columns is not supported yet."
5382+ f"Cannot create an index on variable-length scalar column { col_name !r} : "
5383+ "indexing for vlstring/vlbytes/struct columns is not supported yet."
53715384 )
53725385 is_persistent = self ._storage .index_anchor_path (col_name ) is not None
53735386
@@ -5756,6 +5769,8 @@ def _dtype_info_label(dtype: np.dtype | None, spec: SchemaSpec | None = None) ->
57565769 return "vlstring"
57575770 if isinstance (spec , VLBytesSpec ):
57585771 return "vlbytes"
5772+ if isinstance (spec , StructSpec ):
5773+ return spec .display_label ()
57595774 if isinstance (spec , ListSpec ):
57605775 return spec .display_label ()
57615776 if dtype is None :
@@ -6010,7 +6025,7 @@ def _guard_varlen_scalar_expression(self, expr: str) -> None:
60106025 rf"(?<!\w){ re .escape (col .name )} (?!\w)" , expr
60116026 ):
60126027 raise NotImplementedError (
6013- f"Column { col .name !r} is a vlstring/vlbytes column ; "
6028+ f"Column { col .name !r} is a variable-length scalar column ( vlstring/vlbytes/struct) ; "
60146029 "lazy expressions are not supported yet."
60156030 )
60166031
0 commit comments