diff --git a/changes/2871.feature.rst b/changes/2871.feature.rst new file mode 100644 index 0000000000..0908cd5d62 --- /dev/null +++ b/changes/2871.feature.rst @@ -0,0 +1 @@ +Add public :mod:`zarr.buffer` API for controlling how data is stored in memory. \ No newline at end of file diff --git a/docs/user-guide/config.rst b/docs/user-guide/config.rst index 91ffe50b91..c45bc25251 100644 --- a/docs/user-guide/config.rst +++ b/docs/user-guide/config.rst @@ -72,7 +72,7 @@ This is the current default configuration:: 'string': {'name': 'vlen-utf8'}}, 'write_empty_chunks': False}, 'async': {'concurrency': 10, 'timeout': None}, - 'buffer': 'zarr.core.buffer.cpu.Buffer', + 'buffer': 'zarr.buffer.cpu.Buffer', 'codec_pipeline': {'batch_size': 1, 'path': 'zarr.core.codec_pipeline.BatchedCodecPipeline'}, 'codecs': {'blosc': 'zarr.codecs.blosc.BloscCodec', @@ -87,5 +87,5 @@ This is the current default configuration:: 'zstd': 'zarr.codecs.zstd.ZstdCodec'}, 'default_zarr_format': 3, 'json_indent': 2, - 'ndbuffer': 'zarr.core.buffer.cpu.NDBuffer', + 'ndbuffer': 'zarr.buffer.cpu.NDBuffer', 'threading': {'max_workers': None}} diff --git a/docs/user-guide/extending.rst b/docs/user-guide/extending.rst index 7647703fbb..4487e07ddf 100644 --- a/docs/user-guide/extending.rst +++ b/docs/user-guide/extending.rst @@ -83,7 +83,10 @@ Coming soon. Custom array buffers -------------------- -Coming soon. +Zarr-python provides control over where and how arrays stored in memory through +:mod:`zarr.buffer`. Currently both CPU (the default) and GPU implementations are +provided (see :ref:`user-guide-gpu` for more). You can implement your own buffer +classes by implementing the interface defined in :mod:`zarr.abc.buffer`. Other extensions ---------------- diff --git a/src/zarr/abc/buffer.py b/src/zarr/abc/buffer.py new file mode 100644 index 0000000000..3d5ac07157 --- /dev/null +++ b/src/zarr/abc/buffer.py @@ -0,0 +1,9 @@ +from zarr.core.buffer.core import ArrayLike, Buffer, BufferPrototype, NDArrayLike, NDBuffer + +__all__ = [ + "ArrayLike", + "Buffer", + "BufferPrototype", + "NDArrayLike", + "NDBuffer", +] diff --git a/src/zarr/buffer/__init__.py b/src/zarr/buffer/__init__.py new file mode 100644 index 0000000000..db393f66c7 --- /dev/null +++ b/src/zarr/buffer/__init__.py @@ -0,0 +1,12 @@ +""" +Implementations of the Zarr Buffer interface. + +See Also +======== +zarr.abc.buffer: Abstract base class for the Zarr Buffer interface. +""" + +from zarr.buffer import cpu, gpu +from zarr.core.buffer import default_buffer_prototype + +__all__ = ["cpu", "default_buffer_prototype", "gpu"] diff --git a/src/zarr/buffer/cpu.py b/src/zarr/buffer/cpu.py new file mode 100644 index 0000000000..5307927c06 --- /dev/null +++ b/src/zarr/buffer/cpu.py @@ -0,0 +1,15 @@ +from zarr.core.buffer.cpu import ( + Buffer, + NDBuffer, + as_numpy_array_wrapper, + buffer_prototype, + numpy_buffer_prototype, +) + +__all__ = [ + "Buffer", + "NDBuffer", + "as_numpy_array_wrapper", + "buffer_prototype", + "numpy_buffer_prototype", +] diff --git a/src/zarr/buffer/gpu.py b/src/zarr/buffer/gpu.py new file mode 100644 index 0000000000..dbdc1b1357 --- /dev/null +++ b/src/zarr/buffer/gpu.py @@ -0,0 +1,7 @@ +from zarr.core.buffer.gpu import Buffer, NDBuffer, buffer_prototype + +__all__ = [ + "Buffer", + "NDBuffer", + "buffer_prototype", +] diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py index 8464518818..593ff6ad67 100644 --- a/src/zarr/core/buffer/cpu.py +++ b/src/zarr/core/buffer/cpu.py @@ -223,5 +223,5 @@ def numpy_buffer_prototype() -> core.BufferPrototype: return core.BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer) -register_buffer(Buffer) -register_ndbuffer(NDBuffer) +register_buffer(Buffer, qualname="zarr.buffer.cpu.Buffer") +register_ndbuffer(NDBuffer, qualname="zarr.buffer.cpu.NDBuffer") diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py index c565cb0708..ecc73b78b0 100644 --- a/src/zarr/core/config.py +++ b/src/zarr/core/config.py @@ -125,8 +125,8 @@ def enable_gpu(self) -> ConfigSet: "vlen-utf8": "zarr.codecs.vlen_utf8.VLenUTF8Codec", "vlen-bytes": "zarr.codecs.vlen_utf8.VLenBytesCodec", }, - "buffer": "zarr.core.buffer.cpu.Buffer", - "ndbuffer": "zarr.core.buffer.cpu.NDBuffer", + "buffer": "zarr.buffer.cpu.Buffer", + "ndbuffer": "zarr.buffer.cpu.NDBuffer", } ], ) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 925252ccf0..47e46f6618 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -1429,7 +1429,7 @@ async def create_hierarchy( group already exists at path ``a``, then this function will leave the group at ``a`` as-is. Yields - ------- + ------ tuple[str, AsyncArray | AsyncGroup]. """ # check that all the nodes have the same zarr_format as Self diff --git a/src/zarr/registry.py b/src/zarr/registry.py index 704db3f704..d50ee15d31 100644 --- a/src/zarr/registry.py +++ b/src/zarr/registry.py @@ -45,8 +45,10 @@ def lazy_load(self) -> None: self.register(e.load()) self.lazy_load_list.clear() - def register(self, cls: type[T]) -> None: - self[fully_qualified_name(cls)] = cls + def register(self, cls: type[T], qualname: str | None = None) -> None: + if qualname is None: + qualname = fully_qualified_name(cls) + self[qualname] = cls __codec_registries: dict[str, Registry[Codec]] = defaultdict(Registry) @@ -123,12 +125,12 @@ def register_pipeline(pipe_cls: type[CodecPipeline]) -> None: __pipeline_registry.register(pipe_cls) -def register_ndbuffer(cls: type[NDBuffer]) -> None: - __ndbuffer_registry.register(cls) +def register_ndbuffer(cls: type[NDBuffer], qualname: str | None = None) -> None: + __ndbuffer_registry.register(cls, qualname) -def register_buffer(cls: type[Buffer]) -> None: - __buffer_registry.register(cls) +def register_buffer(cls: type[Buffer], qualname: str | None = None) -> None: + __buffer_registry.register(cls, qualname) def get_codec_class(key: str, reload_config: bool = False) -> type[Codec]: diff --git a/tests/test_buffer.py b/tests/test_buffer.py index 33ac0266eb..e25f6f9ea5 100644 --- a/tests/test_buffer.py +++ b/tests/test_buffer.py @@ -6,12 +6,13 @@ import pytest import zarr +from zarr.abc.buffer import ArrayLike, BufferPrototype, NDArrayLike +from zarr.buffer import cpu, gpu from zarr.codecs.blosc import BloscCodec from zarr.codecs.crc32c_ import Crc32cCodec from zarr.codecs.gzip import GzipCodec from zarr.codecs.transpose import TransposeCodec from zarr.codecs.zstd import ZstdCodec -from zarr.core.buffer import ArrayLike, BufferPrototype, NDArrayLike, cpu, gpu from zarr.storage import MemoryStore, StorePath from zarr.testing.buffer import ( NDBufferUsingTestNDArrayLike, diff --git a/tests/test_config.py b/tests/test_config.py index 1a2453d646..21d85776b0 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -88,8 +88,8 @@ def test_config_defaults_set() -> None: "path": "zarr.core.codec_pipeline.BatchedCodecPipeline", "batch_size": 1, }, - "buffer": "zarr.core.buffer.cpu.Buffer", - "ndbuffer": "zarr.core.buffer.cpu.NDBuffer", + "buffer": "zarr.buffer.cpu.Buffer", + "ndbuffer": "zarr.buffer.cpu.NDBuffer", "codecs": { "blosc": "zarr.codecs.blosc.BloscCodec", "gzip": "zarr.codecs.gzip.GzipCodec", @@ -223,9 +223,6 @@ class NewBloscCodec(BloscCodec): @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_config_ndbuffer_implementation(store: Store) -> None: - # has default value - assert fully_qualified_name(get_ndbuffer_class()) == config.defaults[0]["ndbuffer"] - # set custom ndbuffer with TestNDArrayLike implementation register_ndbuffer(NDBufferUsingTestNDArrayLike) with config.set({"ndbuffer": fully_qualified_name(NDBufferUsingTestNDArrayLike)}): @@ -242,9 +239,6 @@ def test_config_ndbuffer_implementation(store: Store) -> None: def test_config_buffer_implementation() -> None: - # has default value - assert fully_qualified_name(get_buffer_class()) == config.defaults[0]["buffer"] - arr = zeros(shape=(100), store=StoreExpectingTestBuffer()) # AssertionError of StoreExpectingTestBuffer when not using my buffer