Skip to content

Commit 4187921

Browse files
authored
Merge branch 'main' into fill_value_default
2 parents ea99951 + a52048d commit 4187921

File tree

5 files changed

+55
-2
lines changed

5 files changed

+55
-2
lines changed

changes/2778.bugfix.rst

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Use removeprefix rather than replace when removing filename prefixes in `FsspecStore.list`

changes/2801.bugfix.rst

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Ensure utf8 compliant strings are used to construct numpy arrays in property-based tests

src/zarr/storage/_fsspec.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ async def set_partial_values(
341341
async def list(self) -> AsyncIterator[str]:
342342
# docstring inherited
343343
allfiles = await self.fs._find(self.path, detail=False, withdirs=False)
344-
for onefile in (a.replace(self.path + "/", "") for a in allfiles):
344+
for onefile in (a.removeprefix(self.path + "/") for a in allfiles):
345345
yield onefile
346346

347347
async def list_dir(self, prefix: str) -> AsyncIterator[str]:

src/zarr/testing/store.py

+31
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,37 @@ async def test_list_prefix(self, store: S) -> None:
400400
expected = tuple(sorted(expected))
401401
assert observed == expected
402402

403+
async def test_list_empty_path(self, store: S) -> None:
404+
"""
405+
Verify that list and list_prefix work correctly when path is an empty string,
406+
i.e. no unwanted replacement occurs.
407+
"""
408+
data = self.buffer_cls.from_bytes(b"")
409+
store_dict = {
410+
"foo/bar/zarr.json": data,
411+
"foo/bar/c/1": data,
412+
"foo/baz/c/0": data,
413+
}
414+
await store._set_many(store_dict.items())
415+
416+
# Test list()
417+
observed_list = await _collect_aiterator(store.list())
418+
observed_list_sorted = sorted(observed_list)
419+
expected_list_sorted = sorted(store_dict.keys())
420+
assert observed_list_sorted == expected_list_sorted
421+
422+
# Test list_prefix() with an empty prefix
423+
observed_prefix_empty = await _collect_aiterator(store.list_prefix(""))
424+
observed_prefix_empty_sorted = sorted(observed_prefix_empty)
425+
expected_prefix_empty_sorted = sorted(store_dict.keys())
426+
assert observed_prefix_empty_sorted == expected_prefix_empty_sorted
427+
428+
# Test list_prefix() with a non-empty prefix
429+
observed_prefix = await _collect_aiterator(store.list_prefix("foo/bar/"))
430+
observed_prefix_sorted = sorted(observed_prefix)
431+
expected_prefix_sorted = sorted(k for k in store_dict if k.startswith("foo/bar/"))
432+
assert observed_prefix_sorted == expected_prefix_sorted
433+
403434
async def test_list_dir(self, store: S) -> None:
404435
root = "foo"
405436
store_dict = {

src/zarr/testing/strategies.py

+21-1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,21 @@ def v2_dtypes() -> st.SearchStrategy[np.dtype]:
5151
)
5252

5353

54+
def safe_unicode_for_dtype(dtype: np.dtype[np.str_]) -> st.SearchStrategy[str]:
55+
"""Generate UTF-8-safe text constrained to max_len of dtype."""
56+
# account for utf-32 encoding (i.e. 4 bytes/character)
57+
max_len = max(1, dtype.itemsize // 4)
58+
59+
return st.text(
60+
alphabet=st.characters(
61+
blacklist_categories=["Cs"], # Avoid *technically allowed* surrogates
62+
min_codepoint=32,
63+
),
64+
min_size=1,
65+
max_size=max_len,
66+
)
67+
68+
5469
# From https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#node-names
5570
# 1. must not be the empty string ("")
5671
# 2. must not include the character "/"
@@ -86,7 +101,12 @@ def numpy_arrays(
86101
Generate numpy arrays that can be saved in the provided Zarr format.
87102
"""
88103
zarr_format = draw(zarr_formats)
89-
return draw(npst.arrays(dtype=v3_dtypes() if zarr_format == 3 else v2_dtypes(), shape=shapes))
104+
dtype = draw(v3_dtypes() if zarr_format == 3 else v2_dtypes())
105+
if np.issubdtype(dtype, np.str_):
106+
safe_unicode_strings = safe_unicode_for_dtype(dtype)
107+
return draw(npst.arrays(dtype=dtype, shape=shapes, elements=safe_unicode_strings))
108+
109+
return draw(npst.arrays(dtype=dtype, shape=shapes))
90110

91111

92112
@st.composite # type: ignore[misc]

0 commit comments

Comments
 (0)