From d4be9738f7df2fd42c6068e16cf6666b611dd192 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Feb 2025 09:25:45 -0600 Subject: [PATCH 1/7] Public API for buffer objects This moves the public imports from buffer things out of `zarr.core`. Abstract stuff is availble under `zarr.abc.buffer`. Concrete implementations are available under `zarr.buffer.{cpu,gpu}`. --- docs/user-guide/extending.rst | 5 ++++- src/zarr/abc/buffer.py | 9 +++++++++ src/zarr/buffer/__init__.py | 12 ++++++++++++ src/zarr/buffer/cpu.py | 15 +++++++++++++++ src/zarr/buffer/gpu.py | 7 +++++++ tests/test_buffer.py | 3 ++- 6 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 src/zarr/abc/buffer.py create mode 100644 src/zarr/buffer/__init__.py create mode 100644 src/zarr/buffer/cpu.py create mode 100644 src/zarr/buffer/gpu.py diff --git a/docs/user-guide/extending.rst b/docs/user-guide/extending.rst index 7647703fbb..fd16f5221e 100644 --- a/docs/user-guide/extending.rst +++ b/docs/user-guide/extending.rst @@ -83,7 +83,10 @@ Coming soon. Custom array buffers -------------------- -Coming soon. +zarr-python provides control where and how arrays stored in memory through +:mod:`zarr.buffer`. Currently both CPU (the default) and GPU implementations are +provided (see :ref:`user-guide-gpu` for more). You can implement your own buffer +classes by implementing the interface defined in :mod:`zarr.abc.buffer`. Other extensions ---------------- diff --git a/src/zarr/abc/buffer.py b/src/zarr/abc/buffer.py new file mode 100644 index 0000000000..3d5ac07157 --- /dev/null +++ b/src/zarr/abc/buffer.py @@ -0,0 +1,9 @@ +from zarr.core.buffer.core import ArrayLike, Buffer, BufferPrototype, NDArrayLike, NDBuffer + +__all__ = [ + "ArrayLike", + "Buffer", + "BufferPrototype", + "NDArrayLike", + "NDBuffer", +] diff --git a/src/zarr/buffer/__init__.py b/src/zarr/buffer/__init__.py new file mode 100644 index 0000000000..8c0e92aa27 --- /dev/null +++ b/src/zarr/buffer/__init__.py @@ -0,0 +1,12 @@ +""" +Public API for implementations of the Zarr Buffer interface. + +See Also +======== +arr.abc.buffer: Abstract base class for the Zarr Buffer interface. +""" + +from ..core.buffer import default_buffer_prototype +from . import cpu, gpu + +__all__ = ["cpu", "default_buffer_prototype", "gpu"] diff --git a/src/zarr/buffer/cpu.py b/src/zarr/buffer/cpu.py new file mode 100644 index 0000000000..5307927c06 --- /dev/null +++ b/src/zarr/buffer/cpu.py @@ -0,0 +1,15 @@ +from zarr.core.buffer.cpu import ( + Buffer, + NDBuffer, + as_numpy_array_wrapper, + buffer_prototype, + numpy_buffer_prototype, +) + +__all__ = [ + "Buffer", + "NDBuffer", + "as_numpy_array_wrapper", + "buffer_prototype", + "numpy_buffer_prototype", +] diff --git a/src/zarr/buffer/gpu.py b/src/zarr/buffer/gpu.py new file mode 100644 index 0000000000..dbdc1b1357 --- /dev/null +++ b/src/zarr/buffer/gpu.py @@ -0,0 +1,7 @@ +from zarr.core.buffer.gpu import Buffer, NDBuffer, buffer_prototype + +__all__ = [ + "Buffer", + "NDBuffer", + "buffer_prototype", +] diff --git a/tests/test_buffer.py b/tests/test_buffer.py index baef0b8109..5cd1450f23 100644 --- a/tests/test_buffer.py +++ b/tests/test_buffer.py @@ -6,12 +6,13 @@ import pytest import zarr +from zarr.abc.buffer import ArrayLike, BufferPrototype, NDArrayLike +from zarr.buffer import cpu, gpu from zarr.codecs.blosc import BloscCodec from zarr.codecs.crc32c_ import Crc32cCodec from zarr.codecs.gzip import GzipCodec from zarr.codecs.transpose import TransposeCodec from zarr.codecs.zstd import ZstdCodec -from zarr.core.buffer import ArrayLike, BufferPrototype, NDArrayLike, cpu, gpu from zarr.storage import MemoryStore, StorePath from zarr.testing.buffer import ( NDBufferUsingTestNDArrayLike, From efa674d7b921c814a858566cd9196b7c25574bdf Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Feb 2025 19:37:25 -0600 Subject: [PATCH 2/7] changelog --- changes/2871.feature.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/2871.feature.rst diff --git a/changes/2871.feature.rst b/changes/2871.feature.rst new file mode 100644 index 0000000000..0908cd5d62 --- /dev/null +++ b/changes/2871.feature.rst @@ -0,0 +1 @@ +Add public :mod:`zarr.buffer` API for controlling how data is stored in memory. \ No newline at end of file From 4ff4f7efc6795c45095a8bc69b852519575e0d55 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Mar 2025 09:23:17 -0500 Subject: [PATCH 3/7] absolute imports --- src/zarr/buffer/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/zarr/buffer/__init__.py b/src/zarr/buffer/__init__.py index 8c0e92aa27..db393f66c7 100644 --- a/src/zarr/buffer/__init__.py +++ b/src/zarr/buffer/__init__.py @@ -1,12 +1,12 @@ """ -Public API for implementations of the Zarr Buffer interface. +Implementations of the Zarr Buffer interface. See Also ======== -arr.abc.buffer: Abstract base class for the Zarr Buffer interface. +zarr.abc.buffer: Abstract base class for the Zarr Buffer interface. """ -from ..core.buffer import default_buffer_prototype -from . import cpu, gpu +from zarr.buffer import cpu, gpu +from zarr.core.buffer import default_buffer_prototype __all__ = ["cpu", "default_buffer_prototype", "gpu"] From 7946745fc4bf4d9e827dffdb7d774b1e838ce582 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Mar 2025 09:25:28 -0500 Subject: [PATCH 4/7] fixed warning in doc build --- src/zarr/core/group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 8e7f7f3474..6f5e2c740a 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -1429,7 +1429,7 @@ async def create_hierarchy( group already exists at path ``a``, then this function will leave the group at ``a`` as-is. Yields - ------- + ------ tuple[str, AsyncArray | AsyncGroup]. """ # check that all the nodes have the same zarr_format as Self From 5cf1bdef5fb5ff030ab087d96b55e5843ea87d5f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Mar 2025 13:01:25 -0500 Subject: [PATCH 5/7] Updated config --- docs/user-guide/config.rst | 4 ++-- src/zarr/core/config.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/user-guide/config.rst b/docs/user-guide/config.rst index 91ffe50b91..c45bc25251 100644 --- a/docs/user-guide/config.rst +++ b/docs/user-guide/config.rst @@ -72,7 +72,7 @@ This is the current default configuration:: 'string': {'name': 'vlen-utf8'}}, 'write_empty_chunks': False}, 'async': {'concurrency': 10, 'timeout': None}, - 'buffer': 'zarr.core.buffer.cpu.Buffer', + 'buffer': 'zarr.buffer.cpu.Buffer', 'codec_pipeline': {'batch_size': 1, 'path': 'zarr.core.codec_pipeline.BatchedCodecPipeline'}, 'codecs': {'blosc': 'zarr.codecs.blosc.BloscCodec', @@ -87,5 +87,5 @@ This is the current default configuration:: 'zstd': 'zarr.codecs.zstd.ZstdCodec'}, 'default_zarr_format': 3, 'json_indent': 2, - 'ndbuffer': 'zarr.core.buffer.cpu.NDBuffer', + 'ndbuffer': 'zarr.buffer.cpu.NDBuffer', 'threading': {'max_workers': None}} diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py index c565cb0708..ecc73b78b0 100644 --- a/src/zarr/core/config.py +++ b/src/zarr/core/config.py @@ -125,8 +125,8 @@ def enable_gpu(self) -> ConfigSet: "vlen-utf8": "zarr.codecs.vlen_utf8.VLenUTF8Codec", "vlen-bytes": "zarr.codecs.vlen_utf8.VLenBytesCodec", }, - "buffer": "zarr.core.buffer.cpu.Buffer", - "ndbuffer": "zarr.core.buffer.cpu.NDBuffer", + "buffer": "zarr.buffer.cpu.Buffer", + "ndbuffer": "zarr.buffer.cpu.NDBuffer", } ], ) From 50792a4952821d83df015e9f670f872e3d191066 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Mar 2025 13:11:30 -0500 Subject: [PATCH 6/7] Updated config --- src/zarr/core/buffer/cpu.py | 4 ++-- src/zarr/registry.py | 14 ++++++++------ tests/test_config.py | 10 ++-------- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py index 225adb6f5c..26e2d3c167 100644 --- a/src/zarr/core/buffer/cpu.py +++ b/src/zarr/core/buffer/cpu.py @@ -223,5 +223,5 @@ def numpy_buffer_prototype() -> core.BufferPrototype: return core.BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer) -register_buffer(Buffer) -register_ndbuffer(NDBuffer) +register_buffer(Buffer, qualname="zarr.buffer.cpu.Buffer") +register_ndbuffer(NDBuffer, qualname="zarr.buffer.cpu.NDBuffer") diff --git a/src/zarr/registry.py b/src/zarr/registry.py index 704db3f704..d50ee15d31 100644 --- a/src/zarr/registry.py +++ b/src/zarr/registry.py @@ -45,8 +45,10 @@ def lazy_load(self) -> None: self.register(e.load()) self.lazy_load_list.clear() - def register(self, cls: type[T]) -> None: - self[fully_qualified_name(cls)] = cls + def register(self, cls: type[T], qualname: str | None = None) -> None: + if qualname is None: + qualname = fully_qualified_name(cls) + self[qualname] = cls __codec_registries: dict[str, Registry[Codec]] = defaultdict(Registry) @@ -123,12 +125,12 @@ def register_pipeline(pipe_cls: type[CodecPipeline]) -> None: __pipeline_registry.register(pipe_cls) -def register_ndbuffer(cls: type[NDBuffer]) -> None: - __ndbuffer_registry.register(cls) +def register_ndbuffer(cls: type[NDBuffer], qualname: str | None = None) -> None: + __ndbuffer_registry.register(cls, qualname) -def register_buffer(cls: type[Buffer]) -> None: - __buffer_registry.register(cls) +def register_buffer(cls: type[Buffer], qualname: str | None = None) -> None: + __buffer_registry.register(cls, qualname) def get_codec_class(key: str, reload_config: bool = False) -> type[Codec]: diff --git a/tests/test_config.py b/tests/test_config.py index 1a2453d646..21d85776b0 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -88,8 +88,8 @@ def test_config_defaults_set() -> None: "path": "zarr.core.codec_pipeline.BatchedCodecPipeline", "batch_size": 1, }, - "buffer": "zarr.core.buffer.cpu.Buffer", - "ndbuffer": "zarr.core.buffer.cpu.NDBuffer", + "buffer": "zarr.buffer.cpu.Buffer", + "ndbuffer": "zarr.buffer.cpu.NDBuffer", "codecs": { "blosc": "zarr.codecs.blosc.BloscCodec", "gzip": "zarr.codecs.gzip.GzipCodec", @@ -223,9 +223,6 @@ class NewBloscCodec(BloscCodec): @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_config_ndbuffer_implementation(store: Store) -> None: - # has default value - assert fully_qualified_name(get_ndbuffer_class()) == config.defaults[0]["ndbuffer"] - # set custom ndbuffer with TestNDArrayLike implementation register_ndbuffer(NDBufferUsingTestNDArrayLike) with config.set({"ndbuffer": fully_qualified_name(NDBufferUsingTestNDArrayLike)}): @@ -242,9 +239,6 @@ def test_config_ndbuffer_implementation(store: Store) -> None: def test_config_buffer_implementation() -> None: - # has default value - assert fully_qualified_name(get_buffer_class()) == config.defaults[0]["buffer"] - arr = zeros(shape=(100), store=StoreExpectingTestBuffer()) # AssertionError of StoreExpectingTestBuffer when not using my buffer From 674ca514d9fc272b304253d3fe94378df42d2be6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Mar 2025 14:20:34 -0500 Subject: [PATCH 7/7] wording --- docs/user-guide/extending.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user-guide/extending.rst b/docs/user-guide/extending.rst index fd16f5221e..4487e07ddf 100644 --- a/docs/user-guide/extending.rst +++ b/docs/user-guide/extending.rst @@ -83,7 +83,7 @@ Coming soon. Custom array buffers -------------------- -zarr-python provides control where and how arrays stored in memory through +Zarr-python provides control over where and how arrays stored in memory through :mod:`zarr.buffer`. Currently both CPU (the default) and GPU implementations are provided (see :ref:`user-guide-gpu` for more). You can implement your own buffer classes by implementing the interface defined in :mod:`zarr.abc.buffer`.