diff --git a/changes/2972.misc.rst b/changes/2972.misc.rst new file mode 100644 index 0000000000..f0258c1d05 --- /dev/null +++ b/changes/2972.misc.rst @@ -0,0 +1 @@ +Avoid an unnecessary memory copy when writing Zarr with obstore diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index 1318f868a0..eaafa4c714 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -255,6 +255,19 @@ def as_numpy_array(self) -> npt.NDArray[Any]: """ ... + def as_buffer_like(self) -> BytesLike: + """Returns the buffer as an object that implements the Python buffer protocol. + + Notes + ----- + Might have to copy data, since the implementation uses `.as_numpy_array()`. + + Returns + ------- + An object that implements the Python buffer protocol + """ + return memoryview(self.as_numpy_array()) # type: ignore[arg-type] + def to_bytes(self) -> bytes: """Returns the buffer as `bytes` (host memory). diff --git a/src/zarr/storage/_local.py b/src/zarr/storage/_local.py index bd5bfc1da2..b46c263333 100644 --- a/src/zarr/storage/_local.py +++ b/src/zarr/storage/_local.py @@ -52,10 +52,10 @@ def _put( with path.open("r+b") as f: f.seek(start) # write takes any object supporting the buffer protocol - f.write(value.as_numpy_array()) # type: ignore[arg-type] + f.write(value.as_buffer_like()) return None else: - view = memoryview(value.as_numpy_array()) # type: ignore[arg-type] + view = value.as_buffer_like() if exclusive: mode = "xb" else: diff --git a/src/zarr/storage/_obstore.py b/src/zarr/storage/_obstore.py index 4381acb2ae..e3d8d9cee6 100644 --- a/src/zarr/storage/_obstore.py +++ b/src/zarr/storage/_obstore.py @@ -160,7 +160,7 @@ async def set(self, key: str, value: Buffer) -> None: self._check_writable() - buf = value.to_bytes() + buf = value.as_buffer_like() await obs.put_async(self.store, key, buf) async def set_if_not_exists(self, key: str, value: Buffer) -> None: @@ -168,7 +168,7 @@ async def set_if_not_exists(self, key: str, value: Buffer) -> None: import obstore as obs self._check_writable() - buf = value.to_bytes() + buf = value.as_buffer_like() with contextlib.suppress(obs.exceptions.AlreadyExistsError): await obs.put_async(self.store, key, buf, mode="create")