From 210f4e9434615c8d587be7fc114b647e90722665 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 12 Feb 2025 15:42:55 +0100 Subject: [PATCH 1/6] add signature tests for async / sync api, and fix mismatched signatures --- src/zarr/api/asynchronous.py | 2 +- src/zarr/api/synchronous.py | 9 ++++++++- tests/test_api.py | 25 +++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 0584f19c3f..d916ffa218 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -824,7 +824,7 @@ async def open_group( async def create( shape: ChunkCoords | int, *, # Note: this is a change from v2 - chunks: ChunkCoords | int | None = None, # TODO: v2 allowed chunks=True + chunks: ChunkCoords | int | bool | None = None, dtype: npt.DTypeLike | None = None, compressor: dict[str, JSON] | None = None, # TODO: default and type change fill_value: Any | None = 0, # TODO: need type diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index fe68981cb9..c3953a97cd 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -757,11 +757,12 @@ def create_array( order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = 3, attributes: dict[str, JSON] | None = None, - chunk_key_encoding: ChunkKeyEncoding | ChunkKeyEncodingLike | None = None, + chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: Iterable[str] | None = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, config: ArrayConfig | ArrayConfigLike | None = None, + write_data: bool = True, ) -> Array: """Create an array. @@ -855,6 +856,11 @@ def create_array( Whether to overwrite an array with the same name in the store, if one exists. config : ArrayConfig or ArrayConfigLike, optional Runtime configuration for the array. + write_data : bool + If a pre-existing array-like object was provided to this function via the ``data`` parameter + then ``write_data`` determines whether the values in that array-like object should be + written to the Zarr array created by this function. If ``write_data`` is ``False``, then the + array will be left empty. Returns ------- @@ -895,6 +901,7 @@ def create_array( storage_options=storage_options, overwrite=overwrite, config=config, + write_data=write_data, ) ) ) diff --git a/tests/test_api.py b/tests/test_api.py index aacd558f2a..ac5c72634e 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,5 +1,7 @@ +import inspect import pathlib import warnings +from collections.abc import Callable from typing import Literal import numpy as np @@ -8,6 +10,7 @@ import zarr import zarr.api.asynchronous +import zarr.api.synchronous import zarr.core.group from zarr import Array, Group from zarr.abc.store import Store @@ -1121,3 +1124,25 @@ def test_open_array_with_mode_r_plus(store: Store) -> None: assert isinstance(z2, Array) assert (z2[:] == 1).all() z2[:] = 3 + + +@pytest.mark.parametrize( + ("a_func", "b_func"), + [ + (zarr.api.asynchronous.create_array, zarr.api.synchronous.create_array), + (zarr.api.asynchronous.save, zarr.api.synchronous.save), + (zarr.api.asynchronous.save_array, zarr.api.synchronous.save_array), + (zarr.api.asynchronous.save_group, zarr.api.synchronous.save_group), + (zarr.api.asynchronous.open_group, zarr.api.synchronous.open_group), + (zarr.api.asynchronous.create, zarr.api.synchronous.create), + ], +) +def test_consistent_signatures( + a_func: Callable[[object], object], b_func: Callable[[object], object] +) -> None: + """ + Ensure that pairs of functions have the same signature + """ + base_sig = inspect.signature(a_func) + test_sig = inspect.signature(b_func) + assert test_sig.parameters == base_sig.parameters From 4c966c01b838bce30757749b0f4213308fb5c77a Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 12 Feb 2025 17:13:14 +0100 Subject: [PATCH 2/6] test for consistent signatures, and make array default fill value consistently 0 --- src/zarr/api/synchronous.py | 2 +- src/zarr/core/array.py | 2 +- src/zarr/core/group.py | 15 ++++++++++++--- tests/test_array.py | 22 +++++++++++++++++++++- 4 files changed, 35 insertions(+), 6 deletions(-) diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index c3953a97cd..2596a70bc0 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -753,7 +753,7 @@ def create_array( filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", serializer: SerializerLike = "auto", - fill_value: Any | None = None, + fill_value: Any | None = 0, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = 3, attributes: dict[str, JSON] | None = None, diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 4c444a81fa..05d3d227be 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -4015,7 +4015,7 @@ async def create_array( filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", serializer: SerializerLike = "auto", - fill_value: Any | None = None, + fill_value: Any | None = 0, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = 3, attributes: dict[str, JSON] | None = None, diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 1f5d57c0ab..fd9d4cf171 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -1005,8 +1005,9 @@ async def create_array( self, name: str, *, - shape: ShapeLike, - dtype: npt.DTypeLike, + shape: ShapeLike | None = None, + dtype: npt.DTypeLike | None = None, + data: np.ndarray[Any, np.dtype[Any]] | None = None, chunks: ChunkCoords | Literal["auto"] = "auto", shards: ShardsLike | None = None, filters: FiltersLike = "auto", @@ -1016,11 +1017,12 @@ async def create_array( fill_value: Any | None = 0, order: MemoryOrder | None = None, attributes: dict[str, JSON] | None = None, - chunk_key_encoding: ChunkKeyEncoding | ChunkKeyEncodingLike | None = None, + chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: Iterable[str] | None = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, config: ArrayConfig | ArrayConfigLike | None = None, + write_data: bool = True, ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Create an array within this group. @@ -1108,6 +1110,11 @@ async def create_array( Whether to overwrite an array with the same name in the store, if one exists. config : ArrayConfig or ArrayConfigLike, optional Runtime configuration for the array. + write_data : bool + If a pre-existing array-like object was provided to this function via the ``data`` parameter + then ``write_data`` determines whether the values in that array-like object should be + written to the Zarr array created by this function. If ``write_data`` is ``False``, then the + array will be left empty. Returns ------- @@ -1122,6 +1129,7 @@ async def create_array( name=name, shape=shape, dtype=dtype, + data=data, chunks=chunks, shards=shards, filters=filters, @@ -1136,6 +1144,7 @@ async def create_array( storage_options=storage_options, overwrite=overwrite, config=config, + write_data=write_data, ) @deprecated("Use AsyncGroup.create_array instead.") diff --git a/tests/test_array.py b/tests/test_array.py index 1b84d1d061..f1777e511c 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1,4 +1,5 @@ import dataclasses +import inspect import json import math import multiprocessing as mp @@ -1021,7 +1022,7 @@ def test_chunks_and_shards() -> None: def test_create_array_default_fill_values() -> None: a = zarr.create_array(MemoryStore(), shape=(5,), chunks=(5,), dtype=" None: + """ + Test that the signature of the ``AsyncGroup.create_array`` function has nearly the same signature + as the ``create_array`` function. ``AsyncGroup.create_array`` should take all of the same keyword + arguments as ``create_array`` except ``store``. + """ + + base_sig = inspect.signature(create_array) + meth_sig = inspect.signature(AsyncGroup.create_array) + # ignore keyword arguments that are either missing or have different semantics when + # create_array is invoked as a group method + ignore_kwargs = {"zarr_format", "store", "name"} + # TODO: make this test stronger. right now, it only checks that all the parameters in the + # function signature are used in the method signature. we can be more strict and check that + # the method signature uses no extra parameters. + base_params = dict(filter(lambda kv: kv[0] not in ignore_kwargs, base_sig.parameters.items())) + assert (set(base_params.items()) - set(meth_sig.parameters.items())) == set() From 665eebb8843e9ae84e193cbf8098db4ea7a33b76 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 12 Feb 2025 17:29:49 +0100 Subject: [PATCH 3/6] test for async group / group methods --- src/zarr/core/group.py | 28 ++++++++++++++++++++-------- tests/test_group.py | 18 ++++++++++++++++++ 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index fd9d4cf171..9de5162fc6 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -2249,8 +2249,9 @@ def create_array( self, name: str, *, - shape: ShapeLike, - dtype: npt.DTypeLike, + shape: ShapeLike | None = None, + dtype: npt.DTypeLike | None = None, + data: np.ndarray[Any, np.dtype[Any]] | None = None, chunks: ChunkCoords | Literal["auto"] = "auto", shards: ShardsLike | None = None, filters: FiltersLike = "auto", @@ -2258,13 +2259,14 @@ def create_array( compressor: CompressorLike = "auto", serializer: SerializerLike = "auto", fill_value: Any | None = 0, - order: MemoryOrder | None = "C", + order: MemoryOrder | None = None, attributes: dict[str, JSON] | None = None, - chunk_key_encoding: ChunkKeyEncoding | ChunkKeyEncodingLike | None = None, + chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: Iterable[str] | None = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, config: ArrayConfig | ArrayConfigLike | None = None, + write_data: bool = True, ) -> Array: """Create an array within this group. @@ -2275,10 +2277,13 @@ def create_array( name : str The name of the array relative to the group. If ``path`` is ``None``, the array will be located at the root of the store. - shape : ChunkCoords - Shape of the array. - dtype : npt.DTypeLike - Data type of the array. + shape : ChunkCoords, optional + Shape of the array. Can be ``None`` if ``data`` is provided. + dtype : npt.DTypeLike | None + Data type of the array. Can be ``None`` if ``data`` is provided. + data : Array-like data to use for initializing the array. If this parameter is provided, the + ``shape`` and ``dtype`` parameters must be identical to ``data.shape`` and ``data.dtype``, + or ``None``. chunks : ChunkCoords, optional Chunk shape of the array. If not specified, default are guessed based on the shape and dtype. @@ -2352,6 +2357,11 @@ def create_array( Whether to overwrite an array with the same name in the store, if one exists. config : ArrayConfig or ArrayConfigLike, optional Runtime configuration for the array. + write_data : bool + If a pre-existing array-like object was provided to this function via the ``data`` parameter + then ``write_data`` determines whether the values in that array-like object should be + written to the Zarr array created by this function. If ``write_data`` is ``False``, then the + array will be left empty. Returns ------- @@ -2366,6 +2376,7 @@ def create_array( name=name, shape=shape, dtype=dtype, + data=data, chunks=chunks, shards=shards, fill_value=fill_value, @@ -2379,6 +2390,7 @@ def create_array( overwrite=overwrite, storage_options=storage_options, config=config, + write_data=write_data, ) ) ) diff --git a/tests/test_group.py b/tests/test_group.py index 144054605e..b19f0257f1 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -1,6 +1,7 @@ from __future__ import annotations import contextlib +import inspect import operator import pickle import time @@ -29,6 +30,8 @@ from .conftest import parse_store if TYPE_CHECKING: + from collections.abc import Callable + from _pytest.compat import LEGACY_PATH from zarr.core.common import JSON, ZarrFormat @@ -1505,3 +1508,18 @@ def test_group_members_concurrency_limit(store: MemoryStore) -> None: elapsed = time.time() - start assert elapsed > num_groups * get_latency + + +@pytest.mark.parametrize( + ("a_func", "b_func"), + [(zarr.core.group.AsyncGroup.create_array, zarr.core.group.Group.create_array)], +) +def test_consistent_signatures( + a_func: Callable[[object], object], b_func: Callable[[object], object] +) -> None: + """ + Ensure that pairs of functions have consistent signatures + """ + base_sig = inspect.signature(a_func) + test_sig = inspect.signature(b_func) + assert test_sig.parameters == base_sig.parameters From 918f86a2bdd4f9dd8984a8487abf5f78eca1e6bb Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 12 Feb 2025 17:40:46 +0100 Subject: [PATCH 4/6] release notes --- changes/2819.chore.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/2819.chore.rst diff --git a/changes/2819.chore.rst b/changes/2819.chore.rst new file mode 100644 index 0000000000..b5dbacf7fb --- /dev/null +++ b/changes/2819.chore.rst @@ -0,0 +1 @@ +Ensure that invocations of ``create_array`` use consistent keyword arguments. \ No newline at end of file From 642272dcaf0ae313034046a494225ae4a87d6d31 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 27 Feb 2025 11:01:04 +0100 Subject: [PATCH 5/6] default fill value is None --- src/zarr/api/synchronous.py | 2 +- src/zarr/core/array.py | 2 +- src/zarr/core/group.py | 8 ++++---- tests/test_array.py | 19 +++++++------------ tests/test_metadata/test_consolidated.py | 2 +- 5 files changed, 14 insertions(+), 19 deletions(-) diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 557877a7e3..7e5cfc4554 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -755,7 +755,7 @@ def create_array( filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", serializer: SerializerLike = "auto", - fill_value: Any | None = 0, + fill_value: Any | None = None, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = 3, attributes: dict[str, JSON] | None = None, diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index bc7d378068..9c2f8a7260 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -4016,7 +4016,7 @@ async def create_array( filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", serializer: SerializerLike = "auto", - fill_value: Any | None = 0, + fill_value: Any | None = None, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = 3, attributes: dict[str, JSON] | None = None, diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index dab5f7f119..1a43d6b941 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -996,14 +996,14 @@ async def create_array( compressors: CompressorsLike = "auto", compressor: CompressorLike = "auto", serializer: SerializerLike = "auto", - fill_value: Any | None = 0, + fill_value: Any | None = None, order: MemoryOrder | None = None, attributes: dict[str, JSON] | None = None, chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: Iterable[str] | None = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, write_data: bool = True, ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Create an array within this group. @@ -2378,14 +2378,14 @@ def create_array( compressors: CompressorsLike = "auto", compressor: CompressorLike = "auto", serializer: SerializerLike = "auto", - fill_value: Any | None = 0, + fill_value: Any | None = None, order: MemoryOrder | None = None, attributes: dict[str, JSON] | None = None, chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: Iterable[str] | None = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, write_data: bool = True, ) -> Array: """Create an array within this group. diff --git a/tests/test_array.py b/tests/test_array.py index 93abcd7180..ec31206a0e 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -960,18 +960,13 @@ def test_chunks_and_shards() -> None: assert arr_v2.shards is None -def test_create_array_default_fill_values() -> None: - a = zarr.create_array(MemoryStore(), shape=(5,), chunks=(5,), dtype=" None: + a = zarr.create_array(store, shape=(5,), chunks=(5,), dtype=dtype) + assert a.fill_value == fill_value_expected @pytest.mark.parametrize("store", ["memory"], indirect=True) diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py index c1ff2e130a..85b3e99646 100644 --- a/tests/test_metadata/test_consolidated.py +++ b/tests/test_metadata/test_consolidated.py @@ -521,7 +521,7 @@ async def test_consolidated_metadata_v2(self): dtype=dtype, attributes={"key": "a"}, chunks=(1,), - fill_value=0, + fill_value=None, compressor=Blosc(), order="C", ), From 3fa6526511200c25256c0c9abc5afac95803ff47 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 27 Feb 2025 20:15:08 +0100 Subject: [PATCH 6/6] expand changelog --- changes/2819.chore.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/changes/2819.chore.rst b/changes/2819.chore.rst index b5dbacf7fb..f9a3358309 100644 --- a/changes/2819.chore.rst +++ b/changes/2819.chore.rst @@ -1 +1,4 @@ -Ensure that invocations of ``create_array`` use consistent keyword arguments. \ No newline at end of file +Ensure that invocations of ``create_array`` use consistent keyword arguments, with consistent defaults. +Specifically, ``zarr.api.synchronous.create_array`` now takes a ``write_data`` keyword argument; The +``create_array`` method on ``zarr.Group`` takes ``data`` and ``write_data`` keyword arguments. The ``fill_value`` +keyword argument of the various invocations of ``create_array`` has been consistently set to ``None``, where previously it was either ``None`` or ``0``. \ No newline at end of file