Skip to content

Commit 0afbf7e

Browse files
committed
Parse 0 fill value as "" for str dtype
1 parent 87557e3 commit 0afbf7e

File tree

5 files changed

+25
-7
lines changed

5 files changed

+25
-7
lines changed

changes/2798.bugfix.rst

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Ensure fill value of `0` for `str` `dtype` is parsed to `""`

src/zarr/core/array.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
_warn_order_kwarg,
5959
concurrent_map,
6060
parse_dtype,
61+
parse_fill_value,
6162
parse_order,
6263
parse_shapelike,
6364
product,
@@ -3901,6 +3902,7 @@ async def init_array(
39013902

39023903
from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation
39033904

3905+
fill_value_parsed = parse_fill_value(fill_value, dtype, zarr_format)
39043906
dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format)
39053907
shape_parsed = parse_shapelike(shape)
39063908
chunk_key_encoding_parsed = _parse_chunk_key_encoding(
@@ -3947,7 +3949,7 @@ async def init_array(
39473949
dtype=dtype_parsed,
39483950
chunks=chunk_shape_parsed,
39493951
dimension_separator=chunk_key_encoding_parsed.separator,
3950-
fill_value=fill_value,
3952+
fill_value=fill_value_parsed,
39513953
order=order_parsed,
39523954
filters=filters_parsed,
39533955
compressor=compressor_parsed,
@@ -3985,7 +3987,7 @@ async def init_array(
39853987
meta = AsyncArray._create_metadata_v3(
39863988
shape=shape_parsed,
39873989
dtype=dtype_parsed,
3988-
fill_value=fill_value,
3990+
fill_value=fill_value_parsed,
39893991
chunk_shape=chunks_out,
39903992
chunk_key_encoding=chunk_key_encoding_parsed,
39913993
codecs=codecs_out,

src/zarr/core/array_spec.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from zarr.core.common import (
99
MemoryOrder,
1010
parse_bool,
11-
parse_fill_value,
1211
parse_order,
1312
parse_shapelike,
1413
)
@@ -102,11 +101,10 @@ def __init__(
102101
) -> None:
103102
shape_parsed = parse_shapelike(shape)
104103
dtype_parsed = np.dtype(dtype)
105-
fill_value_parsed = parse_fill_value(fill_value)
106104

107105
object.__setattr__(self, "shape", shape_parsed)
108106
object.__setattr__(self, "dtype", dtype_parsed)
109-
object.__setattr__(self, "fill_value", fill_value_parsed)
107+
object.__setattr__(self, "fill_value", fill_value)
110108
object.__setattr__(self, "config", config)
111109
object.__setattr__(self, "prototype", prototype)
112110

src/zarr/core/common.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,11 @@ def parse_shapelike(data: int | Iterable[int]) -> tuple[int, ...]:
150150
return data_tuple
151151

152152

153-
def parse_fill_value(data: Any) -> Any:
153+
def parse_fill_value(fill_value: Any, dtype: Any, zarr_format: ZarrFormat) -> Any:
154+
if zarr_format == 2 and (dtype is str or dtype == "str") and fill_value == 0:
155+
fill_value = ""
154156
# todo: real validation
155-
return data
157+
return fill_value
156158

157159

158160
def parse_order(data: Any) -> Literal["C", "F"]:

tests/test_metadata/test_v2.py

+15
Original file line numberDiff line numberDiff line change
@@ -298,3 +298,18 @@ def test_zstd_checksum() -> None:
298298
arr.metadata.to_buffer_dict(default_buffer_prototype())[".zarray"].to_bytes()
299299
)
300300
assert "checksum" not in metadata["compressor"]
301+
302+
303+
def test_0_fill_str_type():
304+
array = zarr.create_array(
305+
store=zarr.storage.MemoryStore(),
306+
dtype=str,
307+
shape=(5,),
308+
chunks=(2,),
309+
fill_value=0,
310+
zarr_format=2,
311+
overwrite=True,
312+
)
313+
314+
# Ensure the array initializes correctly with the fill value
315+
np.testing.assert_array_equal(array[:], ["", "", "", "", ""])

0 commit comments

Comments
 (0)