Skip to content

Commit 01c6e35

Browse files
committed
Use unsigned bytes to back Buffer
This makes compressors consistent with v2, and seems more correct than signed bytes. Fixes #2735
1 parent e9772ac commit 01c6e35

File tree

4 files changed

+12
-12
lines changed

4 files changed

+12
-12
lines changed

src/zarr/codecs/bytes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ async def _encode_single(
114114

115115
nd_array = chunk_array.as_ndarray_like()
116116
# Flatten the nd-array (only copy if needed) and reinterpret as bytes
117-
nd_array = nd_array.ravel().view(dtype="b")
117+
nd_array = nd_array.ravel().view(dtype="B")
118118
return chunk_spec.prototype.buffer.from_array_like(nd_array)
119119

120120
def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:

src/zarr/core/buffer/core.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ class Buffer(ABC):
139139
def __init__(self, array_like: ArrayLike) -> None:
140140
if array_like.ndim != 1:
141141
raise ValueError("array_like: only 1-dim allowed")
142-
if array_like.dtype != np.dtype("b"):
142+
if array_like.dtype != np.dtype("B"):
143143
raise ValueError("array_like: only byte dtype allowed")
144144
self._data = array_like
145145

@@ -302,7 +302,7 @@ class NDBuffer:
302302
Notes
303303
-----
304304
The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
305-
is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
305+
is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
306306
in order to use Python's type system to differentiate between the contiguous
307307
Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
308308
two classes separate.

src/zarr/core/buffer/cpu.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def __init__(self, array_like: ArrayLike) -> None:
4949

5050
@classmethod
5151
def create_zero_length(cls) -> Self:
52-
return cls(np.array([], dtype="b"))
52+
return cls(np.array([], dtype="B"))
5353

5454
@classmethod
5555
def from_buffer(cls, buffer: core.Buffer) -> Self:
@@ -92,7 +92,7 @@ def from_bytes(cls, bytes_like: BytesLike) -> Self:
9292
-------
9393
New buffer representing `bytes_like`
9494
"""
95-
return cls.from_array_like(np.frombuffer(bytes_like, dtype="b"))
95+
return cls.from_array_like(np.frombuffer(bytes_like, dtype="B"))
9696

9797
def as_numpy_array(self) -> npt.NDArray[Any]:
9898
"""Returns the buffer as a NumPy array (host memory).
@@ -111,7 +111,7 @@ def __add__(self, other: core.Buffer) -> Self:
111111
"""Concatenate two buffers"""
112112

113113
other_array = other.as_array_like()
114-
assert other_array.dtype == np.dtype("b")
114+
assert other_array.dtype == np.dtype("B")
115115
return self.__class__(
116116
np.concatenate((np.asanyarray(self._data), np.asanyarray(other_array)))
117117
)
@@ -131,7 +131,7 @@ class NDBuffer(core.NDBuffer):
131131
Notes
132132
-----
133133
The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
134-
is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
134+
is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
135135
in order to use Python's type system to differentiate between the contiguous
136136
Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
137137
two classes separate.

src/zarr/core/buffer/gpu.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def __init__(self, array_like: ArrayLike) -> None:
5555

5656
if array_like.ndim != 1:
5757
raise ValueError("array_like: only 1-dim allowed")
58-
if array_like.dtype != np.dtype("b"):
58+
if array_like.dtype != np.dtype("B"):
5959
raise ValueError("array_like: only byte dtype allowed")
6060

6161
if not hasattr(array_like, "__cuda_array_interface__"):
@@ -80,7 +80,7 @@ def create_zero_length(cls) -> Self:
8080
-------
8181
New empty 0-length buffer
8282
"""
83-
return cls(cp.array([], dtype="b"))
83+
return cls(cp.array([], dtype="B"))
8484

8585
@classmethod
8686
def from_buffer(cls, buffer: core.Buffer) -> Self:
@@ -96,14 +96,14 @@ def from_buffer(cls, buffer: core.Buffer) -> Self:
9696

9797
@classmethod
9898
def from_bytes(cls, bytes_like: BytesLike) -> Self:
99-
return cls.from_array_like(cp.frombuffer(bytes_like, dtype="b"))
99+
return cls.from_array_like(cp.frombuffer(bytes_like, dtype="B"))
100100

101101
def as_numpy_array(self) -> npt.NDArray[Any]:
102102
return cast(npt.NDArray[Any], cp.asnumpy(self._data))
103103

104104
def __add__(self, other: core.Buffer) -> Self:
105105
other_array = other.as_array_like()
106-
assert other_array.dtype == np.dtype("b")
106+
assert other_array.dtype == np.dtype("B")
107107
gpu_other = Buffer(other_array)
108108
gpu_other_array = gpu_other.as_array_like()
109109
return self.__class__(
@@ -125,7 +125,7 @@ class NDBuffer(core.NDBuffer):
125125
Notes
126126
-----
127127
The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
128-
is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
128+
is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
129129
in order to use Python's type system to differentiate between the contiguous
130130
Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
131131
two classes separate.

0 commit comments

Comments
 (0)