Skip to content

Commit 1abffd3

Browse files
committed
Fixes
1 parent 723df80 commit 1abffd3

File tree

3 files changed

+890
-65
lines changed

3 files changed

+890
-65
lines changed

numcodecs/blosc.py

+36-5
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,14 @@ def _check_buffer_size(buf, max_buffer_size):
9595
raise ValueError(msg)
9696

9797

98-
def compress(source, cname: str, clevel: int, shuffle: int = SHUFFLE, blocksize=AUTOBLOCKS):
98+
def compress(
99+
source,
100+
cname: str,
101+
clevel: int,
102+
shuffle: int = SHUFFLE,
103+
blocksize=AUTOBLOCKS,
104+
typesize: int = 8,
105+
):
99106
"""
100107
Compress data.
101108
@@ -127,19 +134,28 @@ def compress(source, cname: str, clevel: int, shuffle: int = SHUFFLE, blocksize=
127134
shuffle = BITSHUFFLE
128135
else:
129136
shuffle = SHUFFLE
137+
130138
blosc.set_blocksize(blocksize)
131139
if isinstance(source, np.ndarray):
140+
print('hi')
141+
if typesize is None:
142+
typesize = source.dtype.itemsize
132143
_check_not_object_array(source)
133144
result = blosc.compress_ptr(
134145
source.ctypes.data,
135146
source.size,
136-
source.dtype.itemsize,
147+
typesize,
137148
cname=cname,
138149
clevel=clevel,
139150
shuffle=shuffle,
140151
)
141152
else:
142-
result = blosc.compress(source, cname=cname, clevel=clevel, shuffle=shuffle)
153+
if typesize is None:
154+
# Same default as blosc
155+
typesize = 8
156+
result = blosc.compress(
157+
source, cname=cname, clevel=clevel, shuffle=shuffle, typesize=typesize
158+
)
143159
blosc.set_blocksize(AUTOBLOCKS)
144160
return result
145161

@@ -205,7 +221,16 @@ class Blosc(Codec):
205221
AUTOSHUFFLE = AUTOSHUFFLE
206222
max_buffer_size = 2**31 - 1
207223

208-
def __init__(self, cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=AUTOBLOCKS):
224+
def __init__(
225+
self,
226+
cname='lz4',
227+
clevel=5,
228+
shuffle=SHUFFLE,
229+
blocksize=AUTOBLOCKS,
230+
typesize: int | None = None,
231+
):
232+
if isinstance(typesize, int) and typesize < 1:
233+
raise ValueError(f"Cannot use typesize {typesize} less than 1.")
209234
self.cname = cname
210235
if isinstance(cname, str):
211236
self._cname_bytes = cname.encode('ascii')
@@ -214,11 +239,17 @@ def __init__(self, cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=AUTOBLOCKS)
214239
self.clevel = clevel
215240
self.shuffle = shuffle
216241
self.blocksize = blocksize
242+
self.typesize = typesize
217243

218244
def encode(self, buf):
219245
_check_buffer_size(buf, self.max_buffer_size)
220246
return compress(
221-
buf, self.cname, clevel=self.clevel, shuffle=self.shuffle, blocksize=self.blocksize
247+
buf,
248+
self.cname,
249+
clevel=self.clevel,
250+
shuffle=self.shuffle,
251+
blocksize=self.blocksize,
252+
typesize=self.typesize,
222253
)
223254

224255
def decode(self, buf, out=None):

numcodecs/tests/test_blosc.py

+6-60
Original file line numberDiff line numberDiff line change
@@ -103,33 +103,6 @@ def test_eq():
103103
assert Blosc(cname='lz4') != 'foo'
104104

105105

106-
def test_compress_blocksize_default(use_threads):
107-
arr = np.arange(1000, dtype='i4')
108-
109-
blosc.use_threads = use_threads
110-
111-
# default blocksize
112-
enc = blosc.compress(arr, b'lz4', 1, Blosc.NOSHUFFLE)
113-
_, _, blocksize = blosc._cbuffer_sizes(enc)
114-
assert blocksize > 0
115-
116-
# explicit default blocksize
117-
enc = blosc.compress(arr, b'lz4', 1, Blosc.NOSHUFFLE, 0)
118-
_, _, blocksize = blosc._cbuffer_sizes(enc)
119-
assert blocksize > 0
120-
121-
122-
@pytest.mark.parametrize('bs', [2**7, 2**8])
123-
def test_compress_blocksize(use_threads, bs):
124-
arr = np.arange(1000, dtype='i4')
125-
126-
blosc.use_threads = use_threads
127-
128-
enc = blosc.compress(arr, b'lz4', 1, Blosc.NOSHUFFLE, bs)
129-
_, _, blocksize = blosc._cbuffer_sizes(enc)
130-
assert blocksize == bs
131-
132-
133106
def test_compress_complib(use_threads):
134107
arr = np.arange(1000, dtype='i4')
135108
expected_complibs = {
@@ -141,43 +114,16 @@ def test_compress_complib(use_threads):
141114
}
142115
blosc.use_threads = use_threads
143116
for cname in blosc.list_compressors():
144-
enc = blosc.compress(arr, cname.encode(), 1, Blosc.NOSHUFFLE)
117+
enc = blosc.compress(arr, cname, 1, Blosc.NOSHUFFLE)
145118
complib = blosc.cbuffer_complib(enc)
146119
expected_complib = expected_complibs[cname]
147120
assert complib == expected_complib
148121
with pytest.raises(ValueError):
149122
# capitalized cname
150-
blosc.compress(arr, b'LZ4', 1)
123+
blosc.compress(arr, 'LZ4', 1)
151124
with pytest.raises(ValueError):
152125
# bad cname
153-
blosc.compress(arr, b'foo', 1)
154-
155-
156-
@pytest.mark.parametrize('dtype', ['i1', 'i2', 'i4', 'i8'])
157-
def test_compress_metainfo(dtype, use_threads):
158-
arr = np.arange(1000, dtype=dtype)
159-
for shuffle in Blosc.NOSHUFFLE, Blosc.SHUFFLE, Blosc.BITSHUFFLE:
160-
blosc.use_threads = use_threads
161-
for cname in blosc.list_compressors():
162-
enc = blosc.compress(arr, cname.encode(), 1, shuffle)
163-
typesize, did_shuffle, _ = blosc._cbuffer_metainfo(enc)
164-
assert typesize == arr.dtype.itemsize
165-
assert did_shuffle == shuffle
166-
167-
168-
def test_compress_autoshuffle(use_threads):
169-
arr = np.arange(8000)
170-
for dtype in 'i1', 'i2', 'i4', 'i8', 'f2', 'f4', 'f8', 'bool', 'S10':
171-
varr = arr.view(dtype)
172-
blosc.use_threads = use_threads
173-
for cname in blosc.list_compressors():
174-
enc = blosc.compress(varr, cname.encode(), 1, Blosc.AUTOSHUFFLE)
175-
typesize, did_shuffle, _ = blosc._cbuffer_metainfo(enc)
176-
assert typesize == varr.dtype.itemsize
177-
if typesize == 1:
178-
assert did_shuffle == Blosc.BITSHUFFLE
179-
else:
180-
assert did_shuffle == Blosc.SHUFFLE
126+
blosc.compress(arr, 'foo', 1)
181127

182128

183129
def test_config_blocksize():
@@ -269,16 +215,16 @@ def test_typesize_explicit():
269215
encoded_without_itemsize = codec_no_type_size.encode(arr.tobytes())
270216
encoded_with_itemsize = codec_itemsize.encode(arr.tobytes())
271217
# third byte encodes the `typesize`
272-
assert encoded_without_itemsize[3] == 1 # inferred from bytes i.e., 1
218+
assert encoded_without_itemsize[3] == 8 # default blosc itemsize
273219
assert encoded_with_itemsize[3] == itemsize # given as a constructor argument
274220

275221

276222
def test_typesize_less_than_1():
277-
with pytest.raises(ValueError, match=r"Cannot use typesize"):
223+
with pytest.raises(ValueError, match=r"Cannot use typesize 0 less than 1"):
278224
Blosc(shuffle=Blosc.SHUFFLE, typesize=0)
279225
compressor = Blosc(shuffle=Blosc.SHUFFLE)
280226
# not really something that should be done in practice, but good for testing.
281227
compressor.typesize = 0
282228
arr = np.arange(100)
283-
with pytest.raises(ValueError, match=r"Cannot use typesize"):
229+
with pytest.raises(ValueError, match=r"typesize can only be in the 1-255 range"):
284230
compressor.encode(arr.tobytes())

0 commit comments

Comments
 (0)