Use unsigned bytes to back Buffer

QuLogic · QuLogic · commit 01c6e3503fda · 2025-01-21T04:24:12.000-05:00
This makes compressors consistent with v2, and seems more correct than signed bytes. Fixes #2735
diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
@@ -114,7 +114,7 @@ async def _encode_single(
 
         nd_array = chunk_array.as_ndarray_like()
         # Flatten the nd-array (only copy if needed) and reinterpret as bytes
-        nd_array = nd_array.ravel().view(dtype="b")
+        nd_array = nd_array.ravel().view(dtype="B")
         return chunk_spec.prototype.buffer.from_array_like(nd_array)
 
     def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py
@@ -139,7 +139,7 @@ class Buffer(ABC):
     def __init__(self, array_like: ArrayLike) -> None:
         if array_like.ndim != 1:
             raise ValueError("array_like: only 1-dim allowed")
-        if array_like.dtype != np.dtype("b"):
+        if array_like.dtype != np.dtype("B"):
             raise ValueError("array_like: only byte dtype allowed")
         self._data = array_like
 
@@ -302,7 +302,7 @@ class NDBuffer:
     Notes
     -----
     The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
-    is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
+    is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
     in order to use Python's type system to differentiate between the contiguous
     Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
     two classes separate.
diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py
@@ -49,7 +49,7 @@ def __init__(self, array_like: ArrayLike) -> None:
 
     @classmethod
     def create_zero_length(cls) -> Self:
-        return cls(np.array([], dtype="b"))
+        return cls(np.array([], dtype="B"))
 
     @classmethod
     def from_buffer(cls, buffer: core.Buffer) -> Self:
@@ -92,7 +92,7 @@ def from_bytes(cls, bytes_like: BytesLike) -> Self:
         -------
             New buffer representing `bytes_like`
         """
-        return cls.from_array_like(np.frombuffer(bytes_like, dtype="b"))
+        return cls.from_array_like(np.frombuffer(bytes_like, dtype="B"))
 
     def as_numpy_array(self) -> npt.NDArray[Any]:
         """Returns the buffer as a NumPy array (host memory).
@@ -111,7 +111,7 @@ def __add__(self, other: core.Buffer) -> Self:
         """Concatenate two buffers"""
 
         other_array = other.as_array_like()
-        assert other_array.dtype == np.dtype("b")
+        assert other_array.dtype == np.dtype("B")
         return self.__class__(
             np.concatenate((np.asanyarray(self._data), np.asanyarray(other_array)))
         )
@@ -131,7 +131,7 @@ class NDBuffer(core.NDBuffer):
     Notes
     -----
     The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
-    is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
+    is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
     in order to use Python's type system to differentiate between the contiguous
     Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
     two classes separate.
diff --git a/src/zarr/core/buffer/gpu.py b/src/zarr/core/buffer/gpu.py
@@ -55,7 +55,7 @@ def __init__(self, array_like: ArrayLike) -> None:
 
         if array_like.ndim != 1:
             raise ValueError("array_like: only 1-dim allowed")
-        if array_like.dtype != np.dtype("b"):
+        if array_like.dtype != np.dtype("B"):
             raise ValueError("array_like: only byte dtype allowed")
 
         if not hasattr(array_like, "__cuda_array_interface__"):
@@ -80,7 +80,7 @@ def create_zero_length(cls) -> Self:
         -------
             New empty 0-length buffer
         """
-        return cls(cp.array([], dtype="b"))
+        return cls(cp.array([], dtype="B"))
 
     @classmethod
     def from_buffer(cls, buffer: core.Buffer) -> Self:
@@ -96,14 +96,14 @@ def from_buffer(cls, buffer: core.Buffer) -> Self:
 
     @classmethod
     def from_bytes(cls, bytes_like: BytesLike) -> Self:
-        return cls.from_array_like(cp.frombuffer(bytes_like, dtype="b"))
+        return cls.from_array_like(cp.frombuffer(bytes_like, dtype="B"))
 
     def as_numpy_array(self) -> npt.NDArray[Any]:
         return cast(npt.NDArray[Any], cp.asnumpy(self._data))
 
     def __add__(self, other: core.Buffer) -> Self:
         other_array = other.as_array_like()
-        assert other_array.dtype == np.dtype("b")
+        assert other_array.dtype == np.dtype("B")
         gpu_other = Buffer(other_array)
         gpu_other_array = gpu_other.as_array_like()
         return self.__class__(
@@ -125,7 +125,7 @@ class NDBuffer(core.NDBuffer):
     Notes
     -----
     The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
-    is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
+    is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
     in order to use Python's type system to differentiate between the contiguous
     Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
     two classes separate.