Skip to content

Commit f8bc315

Browse files
d-v-bdcherian
andauthored
create_array creates explicit groups (#2795)
* refactor create_array tests and add failing test for implicit -> explicit groups * clean up array config parsing, and modify init_array to create parent groups and return an asyncarray instead of metadata * typecheck tests * remove comment * release notes * add type: ignore statement * fix unbound local error in test * remove type:ignore * Add property test * fix test * Update tests/test_array.py --------- Co-authored-by: Deepak Cherian <[email protected]>
1 parent 3c25dac commit f8bc315

File tree

7 files changed

+429
-413
lines changed

7 files changed

+429
-413
lines changed

Diff for: changes/2795.bugfix.rst

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Alters the behavior of ``create_array`` to ensure that any groups implied by the array's name are created if they do not already exist. Also simplifies the type signature for any function that takes an ArrayConfig-like object.

Diff for: src/zarr/api/asynchronous.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from typing_extensions import deprecated
1111

1212
from zarr.core.array import Array, AsyncArray, create_array, get_array_metadata
13-
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike
13+
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArrayConfigParams
1414
from zarr.core.buffer import NDArrayLike
1515
from zarr.core.common import (
1616
JSON,
@@ -856,7 +856,7 @@ async def create(
856856
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
857857
dimension_names: Iterable[str] | None = None,
858858
storage_options: dict[str, Any] | None = None,
859-
config: ArrayConfig | ArrayConfigLike | None = None,
859+
config: ArrayConfigLike | None = None,
860860
**kwargs: Any,
861861
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
862862
"""Create an array.
@@ -1018,7 +1018,7 @@ async def create(
10181018
mode = "a"
10191019
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
10201020

1021-
config_dict: ArrayConfigLike = {}
1021+
config_dict: ArrayConfigParams = {}
10221022

10231023
if write_empty_chunks is not None:
10241024
if config is not None:

Diff for: src/zarr/api/synchronous.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
SerializerLike,
2626
ShardsLike,
2727
)
28-
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike
28+
from zarr.core.array_spec import ArrayConfigLike
2929
from zarr.core.buffer import NDArrayLike
3030
from zarr.core.chunk_key_encodings import ChunkKeyEncoding, ChunkKeyEncodingLike
3131
from zarr.core.common import (
@@ -625,7 +625,7 @@ def create(
625625
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
626626
dimension_names: Iterable[str] | None = None,
627627
storage_options: dict[str, Any] | None = None,
628-
config: ArrayConfig | ArrayConfigLike | None = None,
628+
config: ArrayConfigLike | None = None,
629629
**kwargs: Any,
630630
) -> Array:
631631
"""Create an array.
@@ -695,7 +695,7 @@ def create(
695695
storage_options : dict
696696
If using an fsspec URL to create the store, these will be passed to
697697
the backend implementation. Ignored otherwise.
698-
config : ArrayConfig or ArrayConfigLike, optional
698+
config : ArrayConfigLike, optional
699699
Runtime configuration of the array. If provided, will override the
700700
default values from `zarr.config.array`.
701701
@@ -761,7 +761,7 @@ def create_array(
761761
dimension_names: Iterable[str] | None = None,
762762
storage_options: dict[str, Any] | None = None,
763763
overwrite: bool = False,
764-
config: ArrayConfig | ArrayConfigLike | None = None,
764+
config: ArrayConfigLike | None = None,
765765
) -> Array:
766766
"""Create an array.
767767
@@ -853,7 +853,7 @@ def create_array(
853853
Ignored otherwise.
854854
overwrite : bool, default False
855855
Whether to overwrite an array with the same name in the store, if one exists.
856-
config : ArrayConfig or ArrayConfigLike, optional
856+
config : ArrayConfigLike, optional
857857
Runtime configuration for the array.
858858
859859
Returns

Diff for: src/zarr/core/array.py

+28-32
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ class AsyncArray(Generic[T_ArrayMetadata]):
221221
The metadata of the array.
222222
store_path : StorePath
223223
The path to the Zarr store.
224-
config : ArrayConfig, optional
224+
config : ArrayConfigLike, optional
225225
The runtime configuration of the array, by default None.
226226
227227
Attributes
@@ -246,22 +246,22 @@ def __init__(
246246
self: AsyncArray[ArrayV2Metadata],
247247
metadata: ArrayV2Metadata | ArrayV2MetadataDict,
248248
store_path: StorePath,
249-
config: ArrayConfig | None = None,
249+
config: ArrayConfigLike | None = None,
250250
) -> None: ...
251251

252252
@overload
253253
def __init__(
254254
self: AsyncArray[ArrayV3Metadata],
255255
metadata: ArrayV3Metadata | ArrayV3MetadataDict,
256256
store_path: StorePath,
257-
config: ArrayConfig | None = None,
257+
config: ArrayConfigLike | None = None,
258258
) -> None: ...
259259

260260
def __init__(
261261
self,
262262
metadata: ArrayMetadata | ArrayMetadataDict,
263263
store_path: StorePath,
264-
config: ArrayConfig | None = None,
264+
config: ArrayConfigLike | None = None,
265265
) -> None:
266266
if isinstance(metadata, dict):
267267
zarr_format = metadata["zarr_format"]
@@ -275,12 +275,11 @@ def __init__(
275275
raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3")
276276

277277
metadata_parsed = parse_array_metadata(metadata)
278-
279-
config = ArrayConfig.from_dict({}) if config is None else config
278+
config_parsed = parse_array_config(config)
280279

281280
object.__setattr__(self, "metadata", metadata_parsed)
282281
object.__setattr__(self, "store_path", store_path)
283-
object.__setattr__(self, "_config", config)
282+
object.__setattr__(self, "_config", config_parsed)
284283
object.__setattr__(self, "codec_pipeline", create_codec_pipeline(metadata=metadata_parsed))
285284

286285
# this overload defines the function signature when zarr_format is 2
@@ -304,7 +303,7 @@ async def create(
304303
# runtime
305304
overwrite: bool = False,
306305
data: npt.ArrayLike | None = None,
307-
config: ArrayConfig | ArrayConfigLike | None = None,
306+
config: ArrayConfigLike | None = None,
308307
) -> AsyncArray[ArrayV2Metadata]: ...
309308

310309
# this overload defines the function signature when zarr_format is 3
@@ -333,7 +332,7 @@ async def create(
333332
# runtime
334333
overwrite: bool = False,
335334
data: npt.ArrayLike | None = None,
336-
config: ArrayConfig | ArrayConfigLike | None = None,
335+
config: ArrayConfigLike | None = None,
337336
) -> AsyncArray[ArrayV3Metadata]: ...
338337

339338
@overload
@@ -361,7 +360,7 @@ async def create(
361360
# runtime
362361
overwrite: bool = False,
363362
data: npt.ArrayLike | None = None,
364-
config: ArrayConfig | ArrayConfigLike | None = None,
363+
config: ArrayConfigLike | None = None,
365364
) -> AsyncArray[ArrayV3Metadata]: ...
366365

367366
@overload
@@ -395,7 +394,7 @@ async def create(
395394
# runtime
396395
overwrite: bool = False,
397396
data: npt.ArrayLike | None = None,
398-
config: ArrayConfig | ArrayConfigLike | None = None,
397+
config: ArrayConfigLike | None = None,
399398
) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]: ...
400399

401400
@classmethod
@@ -430,7 +429,7 @@ async def create(
430429
# runtime
431430
overwrite: bool = False,
432431
data: npt.ArrayLike | None = None,
433-
config: ArrayConfig | ArrayConfigLike | None = None,
432+
config: ArrayConfigLike | None = None,
434433
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
435434
"""Method to create a new asynchronous array instance.
436435
@@ -508,7 +507,7 @@ async def create(
508507
Whether to raise an error if the store already exists (default is False).
509508
data : npt.ArrayLike, optional
510509
The data to be inserted into the array (default is None).
511-
config : ArrayConfig or ArrayConfigLike, optional
510+
config : ArrayConfigLike, optional
512511
Runtime configuration for the array.
513512
514513
Returns
@@ -571,7 +570,7 @@ async def _create(
571570
# runtime
572571
overwrite: bool = False,
573572
data: npt.ArrayLike | None = None,
574-
config: ArrayConfig | ArrayConfigLike | None = None,
573+
config: ArrayConfigLike | None = None,
575574
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
576575
"""Method to create a new asynchronous array instance.
577576
See :func:`AsyncArray.create` for more details.
@@ -1745,7 +1744,7 @@ def create(
17451744
compressor: dict[str, JSON] | None = None,
17461745
# runtime
17471746
overwrite: bool = False,
1748-
config: ArrayConfig | ArrayConfigLike | None = None,
1747+
config: ArrayConfigLike | None = None,
17491748
) -> Array:
17501749
"""Creates a new Array instance from an initialized store.
17511750
@@ -1874,7 +1873,7 @@ def _create(
18741873
compressor: dict[str, JSON] | None = None,
18751874
# runtime
18761875
overwrite: bool = False,
1877-
config: ArrayConfig | ArrayConfigLike | None = None,
1876+
config: ArrayConfigLike | None = None,
18781877
) -> Array:
18791878
"""Creates a new Array instance from an initialized store.
18801879
See :func:`Array.create` for more details.
@@ -3814,7 +3813,8 @@ async def init_array(
38143813
chunk_key_encoding: ChunkKeyEncodingLike | None = None,
38153814
dimension_names: Iterable[str] | None = None,
38163815
overwrite: bool = False,
3817-
) -> ArrayV3Metadata | ArrayV2Metadata:
3816+
config: ArrayConfigLike | None,
3817+
) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]:
38183818
"""Create and persist an array metadata document.
38193819
38203820
Parameters
@@ -3893,11 +3893,13 @@ async def init_array(
38933893
Zarr format 3 only. Zarr format 2 arrays should not use this parameter.
38943894
overwrite : bool, default False
38953895
Whether to overwrite an array with the same name in the store, if one exists.
3896+
config : ArrayConfigLike or None, optional
3897+
Configuration for this array.
38963898
38973899
Returns
38983900
-------
3899-
ArrayV3Metadata | ArrayV2Metadata
3900-
The array metadata document.
3901+
AsyncArray
3902+
The AsyncArray.
39013903
"""
39023904

39033905
if zarr_format is None:
@@ -3997,14 +3999,9 @@ async def init_array(
39973999
attributes=attributes,
39984000
)
39994001

4000-
# save the metadata to disk
4001-
# TODO: make this easier -- it should be a simple function call that takes a {key: buffer}
4002-
coros = (
4003-
(store_path / key).set(value)
4004-
for key, value in meta.to_buffer_dict(default_buffer_prototype()).items()
4005-
)
4006-
await gather(*coros)
4007-
return meta
4002+
arr = AsyncArray(metadata=meta, store_path=store_path, config=config)
4003+
await arr._save_metadata(meta, ensure_parents=True)
4004+
return arr
40084005

40094006

40104007
async def create_array(
@@ -4027,7 +4024,7 @@ async def create_array(
40274024
dimension_names: Iterable[str] | None = None,
40284025
storage_options: dict[str, Any] | None = None,
40294026
overwrite: bool = False,
4030-
config: ArrayConfig | ArrayConfigLike | None = None,
4027+
config: ArrayConfigLike | None = None,
40314028
write_data: bool = True,
40324029
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
40334030
"""Create an array.
@@ -4117,7 +4114,7 @@ async def create_array(
41174114
Ignored otherwise.
41184115
overwrite : bool, default False
41194116
Whether to overwrite an array with the same name in the store, if one exists.
4120-
config : ArrayConfig or ArrayConfigLike, optional
4117+
config : ArrayConfigLike, optional
41214118
Runtime configuration for the array.
41224119
write_data : bool
41234120
If a pre-existing array-like object was provided to this function via the ``data`` parameter
@@ -4143,13 +4140,12 @@ async def create_array(
41434140
<AsyncArray memory://140349042942400 shape=(100, 100) dtype=int32>
41444141
"""
41454142
mode: Literal["a"] = "a"
4146-
config_parsed = parse_array_config(config)
41474143
store_path = await make_store_path(store, path=name, mode=mode, storage_options=storage_options)
41484144

41494145
data_parsed, shape_parsed, dtype_parsed = _parse_data_params(
41504146
data=data, shape=shape, dtype=dtype
41514147
)
4152-
meta = await init_array(
4148+
result = await init_array(
41534149
store_path=store_path,
41544150
shape=shape_parsed,
41554151
dtype=dtype_parsed,
@@ -4165,9 +4161,9 @@ async def create_array(
41654161
chunk_key_encoding=chunk_key_encoding,
41664162
dimension_names=dimension_names,
41674163
overwrite=overwrite,
4164+
config=config,
41684165
)
41694166

4170-
result = AsyncArray(metadata=meta, store_path=store_path, config=config_parsed)
41714167
if write_data is True and data_parsed is not None:
41724168
await result._set_selection(
41734169
BasicIndexer(..., shape=result.shape, chunk_grid=result.metadata.chunk_grid),

Diff for: src/zarr/core/array_spec.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from zarr.core.common import ChunkCoords
2222

2323

24-
class ArrayConfigLike(TypedDict):
24+
class ArrayConfigParams(TypedDict):
2525
"""
2626
A TypedDict model of the attributes of an ArrayConfig class, but with no required fields.
2727
This allows for partial construction of an ArrayConfig, with the assumption that the unset
@@ -56,13 +56,13 @@ def __init__(self, order: MemoryOrder, write_empty_chunks: bool) -> None:
5656
object.__setattr__(self, "write_empty_chunks", write_empty_chunks_parsed)
5757

5858
@classmethod
59-
def from_dict(cls, data: ArrayConfigLike) -> Self:
59+
def from_dict(cls, data: ArrayConfigParams) -> Self:
6060
"""
6161
Create an ArrayConfig from a dict. The keys of that dict are a subset of the
6262
attributes of the ArrayConfig class. Any keys missing from that dict will be set to the
6363
the values in the ``array`` namespace of ``zarr.config``.
6464
"""
65-
kwargs_out: ArrayConfigLike = {}
65+
kwargs_out: ArrayConfigParams = {}
6666
for f in fields(ArrayConfig):
6767
field_name = cast(Literal["order", "write_empty_chunks"], f.name)
6868
if field_name not in data:
@@ -72,7 +72,10 @@ def from_dict(cls, data: ArrayConfigLike) -> Self:
7272
return cls(**kwargs_out)
7373

7474

75-
def parse_array_config(data: ArrayConfig | ArrayConfigLike | None) -> ArrayConfig:
75+
ArrayConfigLike = ArrayConfig | ArrayConfigParams
76+
77+
78+
def parse_array_config(data: ArrayConfigLike | None) -> ArrayConfig:
7679
"""
7780
Convert various types of data to an ArrayConfig.
7881
"""

0 commit comments

Comments
 (0)