diff --git a/changes/2819.chore.rst b/changes/2819.chore.rst new file mode 100644 index 0000000000..f9a3358309 --- /dev/null +++ b/changes/2819.chore.rst @@ -0,0 +1,4 @@ +Ensure that invocations of ``create_array`` use consistent keyword arguments, with consistent defaults. +Specifically, ``zarr.api.synchronous.create_array`` now takes a ``write_data`` keyword argument; The +``create_array`` method on ``zarr.Group`` takes ``data`` and ``write_data`` keyword arguments. The ``fill_value`` +keyword argument of the various invocations of ``create_array`` has been consistently set to ``None``, where previously it was either ``None`` or ``0``. \ No newline at end of file diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 6059893920..05fa5eea03 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -830,7 +830,7 @@ async def open_group( async def create( shape: ChunkCoords | int, *, # Note: this is a change from v2 - chunks: ChunkCoords | int | None = None, # TODO: v2 allowed chunks=True + chunks: ChunkCoords | int | bool | None = None, dtype: npt.DTypeLike | None = None, compressor: dict[str, JSON] | None = None, # TODO: default and type change fill_value: Any | None = 0, # TODO: need type diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 9424ae1fde..7e5cfc4554 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -759,11 +759,12 @@ def create_array( order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = 3, attributes: dict[str, JSON] | None = None, - chunk_key_encoding: ChunkKeyEncoding | ChunkKeyEncodingLike | None = None, + chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: Iterable[str] | None = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, config: ArrayConfigLike | None = None, + write_data: bool = True, ) -> Array: """Create an array. @@ -857,6 +858,11 @@ def create_array( Whether to overwrite an array with the same name in the store, if one exists. config : ArrayConfigLike, optional Runtime configuration for the array. + write_data : bool + If a pre-existing array-like object was provided to this function via the ``data`` parameter + then ``write_data`` determines whether the values in that array-like object should be + written to the Zarr array created by this function. If ``write_data`` is ``False``, then the + array will be left empty. Returns ------- @@ -897,6 +903,7 @@ def create_array( storage_options=storage_options, overwrite=overwrite, config=config, + write_data=write_data, ) ) ) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 8e7f7f3474..47eb80adf4 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -987,22 +987,24 @@ async def create_array( self, name: str, *, - shape: ShapeLike, - dtype: npt.DTypeLike, + shape: ShapeLike | None = None, + dtype: npt.DTypeLike | None = None, + data: np.ndarray[Any, np.dtype[Any]] | None = None, chunks: ChunkCoords | Literal["auto"] = "auto", shards: ShardsLike | None = None, filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", compressor: CompressorLike = "auto", serializer: SerializerLike = "auto", - fill_value: Any | None = 0, + fill_value: Any | None = None, order: MemoryOrder | None = None, attributes: dict[str, JSON] | None = None, - chunk_key_encoding: ChunkKeyEncoding | ChunkKeyEncodingLike | None = None, + chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: Iterable[str] | None = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, + write_data: bool = True, ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Create an array within this group. @@ -1090,6 +1092,11 @@ async def create_array( Whether to overwrite an array with the same name in the store, if one exists. config : ArrayConfig or ArrayConfigLike, optional Runtime configuration for the array. + write_data : bool + If a pre-existing array-like object was provided to this function via the ``data`` parameter + then ``write_data`` determines whether the values in that array-like object should be + written to the Zarr array created by this function. If ``write_data`` is ``False``, then the + array will be left empty. Returns ------- @@ -1104,6 +1111,7 @@ async def create_array( name=name, shape=shape, dtype=dtype, + data=data, chunks=chunks, shards=shards, filters=filters, @@ -1118,6 +1126,7 @@ async def create_array( storage_options=storage_options, overwrite=overwrite, config=config, + write_data=write_data, ) @deprecated("Use AsyncGroup.create_array instead.") @@ -2358,22 +2367,24 @@ def create_array( self, name: str, *, - shape: ShapeLike, - dtype: npt.DTypeLike, + shape: ShapeLike | None = None, + dtype: npt.DTypeLike | None = None, + data: np.ndarray[Any, np.dtype[Any]] | None = None, chunks: ChunkCoords | Literal["auto"] = "auto", shards: ShardsLike | None = None, filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", compressor: CompressorLike = "auto", serializer: SerializerLike = "auto", - fill_value: Any | None = 0, - order: MemoryOrder | None = "C", + fill_value: Any | None = None, + order: MemoryOrder | None = None, attributes: dict[str, JSON] | None = None, - chunk_key_encoding: ChunkKeyEncoding | ChunkKeyEncodingLike | None = None, + chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: Iterable[str] | None = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, + write_data: bool = True, ) -> Array: """Create an array within this group. @@ -2384,10 +2395,13 @@ def create_array( name : str The name of the array relative to the group. If ``path`` is ``None``, the array will be located at the root of the store. - shape : ChunkCoords - Shape of the array. - dtype : npt.DTypeLike - Data type of the array. + shape : ChunkCoords, optional + Shape of the array. Can be ``None`` if ``data`` is provided. + dtype : npt.DTypeLike | None + Data type of the array. Can be ``None`` if ``data`` is provided. + data : Array-like data to use for initializing the array. If this parameter is provided, the + ``shape`` and ``dtype`` parameters must be identical to ``data.shape`` and ``data.dtype``, + or ``None``. chunks : ChunkCoords, optional Chunk shape of the array. If not specified, default are guessed based on the shape and dtype. @@ -2461,6 +2475,11 @@ def create_array( Whether to overwrite an array with the same name in the store, if one exists. config : ArrayConfig or ArrayConfigLike, optional Runtime configuration for the array. + write_data : bool + If a pre-existing array-like object was provided to this function via the ``data`` parameter + then ``write_data`` determines whether the values in that array-like object should be + written to the Zarr array created by this function. If ``write_data`` is ``False``, then the + array will be left empty. Returns ------- @@ -2475,6 +2494,7 @@ def create_array( name=name, shape=shape, dtype=dtype, + data=data, chunks=chunks, shards=shards, fill_value=fill_value, @@ -2488,6 +2508,7 @@ def create_array( overwrite=overwrite, storage_options=storage_options, config=config, + write_data=write_data, ) ) ) diff --git a/tests/test_api.py b/tests/test_api.py index 94140ac784..ec8dfcff2a 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,9 +1,12 @@ from __future__ import annotations +import inspect +import pathlib from typing import TYPE_CHECKING if TYPE_CHECKING: import pathlib + from collections.abc import Callable from zarr.abc.store import Store from zarr.core.common import JSON, MemoryOrder, ZarrFormat @@ -1138,6 +1141,28 @@ def test_open_array_with_mode_r_plus(store: Store) -> None: z2[:] = 3 +@pytest.mark.parametrize( + ("a_func", "b_func"), + [ + (zarr.api.asynchronous.create_array, zarr.api.synchronous.create_array), + (zarr.api.asynchronous.save, zarr.api.synchronous.save), + (zarr.api.asynchronous.save_array, zarr.api.synchronous.save_array), + (zarr.api.asynchronous.save_group, zarr.api.synchronous.save_group), + (zarr.api.asynchronous.open_group, zarr.api.synchronous.open_group), + (zarr.api.asynchronous.create, zarr.api.synchronous.create), + ], +) +def test_consistent_signatures( + a_func: Callable[[object], object], b_func: Callable[[object], object] +) -> None: + """ + Ensure that pairs of functions have the same signature + """ + base_sig = inspect.signature(a_func) + test_sig = inspect.signature(b_func) + assert test_sig.parameters == base_sig.parameters + + def test_api_exports() -> None: """ Test that the sync API and the async API export the same objects diff --git a/tests/test_array.py b/tests/test_array.py index efcf8a6bf9..ec31206a0e 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1,4 +1,5 @@ import dataclasses +import inspect import json import math import multiprocessing as mp @@ -932,6 +933,42 @@ def test_auto_partition_auto_shards( assert auto_shards == expected_shards +def test_chunks_and_shards() -> None: + store = StorePath(MemoryStore()) + shape = (100, 100) + chunks = (5, 5) + shards = (10, 10) + + arr_v3 = zarr.create_array(store=store / "v3", shape=shape, chunks=chunks, dtype="i4") + assert arr_v3.chunks == chunks + assert arr_v3.shards is None + + arr_v3_sharding = zarr.create_array( + store=store / "v3_sharding", + shape=shape, + chunks=chunks, + shards=shards, + dtype="i4", + ) + assert arr_v3_sharding.chunks == chunks + assert arr_v3_sharding.shards == shards + + arr_v2 = zarr.create_array( + store=store / "v2", shape=shape, chunks=chunks, zarr_format=2, dtype="i4" + ) + assert arr_v2.chunks == chunks + assert arr_v2.shards is None + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +@pytest.mark.parametrize( + ("dtype", "fill_value_expected"), [(" None: + a = zarr.create_array(store, shape=(5,), chunks=(5,), dtype=dtype) + assert a.fill_value == fill_value_expected + + @pytest.mark.parametrize("store", ["memory"], indirect=True) class TestCreateArray: @staticmethod @@ -1423,6 +1460,25 @@ def test_multiprocessing(store: Store, method: Literal["fork", "spawn", "forkser assert all(np.array_equal(r, data) for r in results) +def test_create_array_method_signature() -> None: + """ + Test that the signature of the ``AsyncGroup.create_array`` function has nearly the same signature + as the ``create_array`` function. ``AsyncGroup.create_array`` should take all of the same keyword + arguments as ``create_array`` except ``store``. + """ + + base_sig = inspect.signature(create_array) + meth_sig = inspect.signature(AsyncGroup.create_array) + # ignore keyword arguments that are either missing or have different semantics when + # create_array is invoked as a group method + ignore_kwargs = {"zarr_format", "store", "name"} + # TODO: make this test stronger. right now, it only checks that all the parameters in the + # function signature are used in the method signature. we can be more strict and check that + # the method signature uses no extra parameters. + base_params = dict(filter(lambda kv: kv[0] not in ignore_kwargs, base_sig.parameters.items())) + assert (set(base_params.items()) - set(meth_sig.parameters.items())) == set() + + async def test_sharding_coordinate_selection() -> None: store = MemoryStore() g = zarr.open_group(store, mode="w") diff --git a/tests/test_group.py b/tests/test_group.py index 1e4f31b5d6..9ab6630da1 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -1530,6 +1530,7 @@ def test_create_nodes_concurrency_limit(store: MemoryStore) -> None: @pytest.mark.parametrize( ("a_func", "b_func"), [ + (zarr.core.group.AsyncGroup.create_array, zarr.core.group.Group.create_array), (zarr.core.group.AsyncGroup.create_hierarchy, zarr.core.group.Group.create_hierarchy), (zarr.core.group.create_hierarchy, zarr.core.sync_group.create_hierarchy), (zarr.core.group.create_nodes, zarr.core.sync_group.create_nodes), diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py index c1ff2e130a..85b3e99646 100644 --- a/tests/test_metadata/test_consolidated.py +++ b/tests/test_metadata/test_consolidated.py @@ -521,7 +521,7 @@ async def test_consolidated_metadata_v2(self): dtype=dtype, attributes={"key": "a"}, chunks=(1,), - fill_value=0, + fill_value=None, compressor=Blosc(), order="C", ),