From d740c7aa4273f634918d334b95a20f2f92fc664d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 5 Feb 2025 21:13:06 -0700 Subject: [PATCH 1/8] Restrict stateful tests to slow hypothesis CI workflow. --- pyproject.toml | 5 +++-- src/zarr/testing/stateful.py | 3 +++ tests/conftest.py | 18 ++++++++++++++++++ tests/test_store/test_stateful.py | 21 +++++++-------------- 4 files changed, 31 insertions(+), 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6fbcd1991e..c9ec1c4fa2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -161,7 +161,7 @@ run = "run-coverage --no-cov" run-pytest = "run" run-verbose = "run-coverage --verbose" run-mypy = "mypy src" -run-hypothesis = "pytest --hypothesis-profile ci tests/test_properties.py tests/test_store/test_stateful*" +run-hypothesis = "pytest --hypothesis-profile ci --run-slow-hypothesis tests/test_properties.py tests/test_store/test_stateful*" list-env = "pip list" [tool.hatch.envs.doctest] @@ -398,7 +398,8 @@ filterwarnings = [ "ignore:.*is currently not part in the Zarr format 3 specification.*:UserWarning", ] markers = [ - "gpu: mark a test as requiring CuPy and GPU" + "gpu: mark a test as requiring CuPy and GPU", + "slow_hypothesis: slow hypothesis tests", ] [tool.repo-review] diff --git a/src/zarr/testing/stateful.py b/src/zarr/testing/stateful.py index 1a1ef0e3a3..f2c616cb34 100644 --- a/src/zarr/testing/stateful.py +++ b/src/zarr/testing/stateful.py @@ -25,6 +25,9 @@ MAX_BINARY_SIZE = 100 +# Handle possible case-insensitive file systems (e.g. MacOS) +node_names = node_names.map(lambda x: x.lower()) + def split_prefix_name(path: str) -> tuple[str, str]: split = path.rsplit("/", maxsplit=1) diff --git a/tests/conftest.py b/tests/conftest.py index e9cd2b8120..9be675cb20 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -147,6 +147,24 @@ def zarr_format(request: pytest.FixtureRequest) -> ZarrFormat: raise ValueError(msg) +def pytest_addoption(parser: Any) -> None: + parser.addoption( + "--run-slow-hypothesis", + action="store_true", + default=False, + help="run slow hypothesis tests", + ) + + +def pytest_collection_modifyitems(config: Any, items: Any) -> None: + if config.getoption("--run-slow-hypothesis"): + return + skip_slow_hyp = pytest.mark.skip(reason="need --run-slow-hypothesis option to run") + for item in items: + if "slow_hypothesis" in item.keywords: + item.add_marker(skip_slow_hyp) + + settings.register_profile( "ci", max_examples=1000, diff --git a/tests/test_store/test_stateful.py b/tests/test_store/test_stateful.py index 637eea882d..2360099ad9 100644 --- a/tests/test_store/test_stateful.py +++ b/tests/test_store/test_stateful.py @@ -1,14 +1,15 @@ # Stateful tests for arbitrary Zarr stores. import pytest from hypothesis.stateful import ( - Settings, run_state_machine_as_test, ) from zarr.abc.store import Store -from zarr.storage import LocalStore, MemoryStore, ZipStore +from zarr.storage import ZipStore from zarr.testing.stateful import ZarrHierarchyStateMachine, ZarrStoreStateMachine +pytestmark = pytest.mark.slow_hypothesis + def test_zarr_hierarchy(sync_store: Store): def mk_test_instance_sync() -> ZarrHierarchyStateMachine: @@ -16,10 +17,8 @@ def mk_test_instance_sync() -> ZarrHierarchyStateMachine: if isinstance(sync_store, ZipStore): pytest.skip(reason="ZipStore does not support delete") - if isinstance(sync_store, MemoryStore): - run_state_machine_as_test( - mk_test_instance_sync, settings=Settings(report_multiple_bugs=False, max_examples=50) - ) + + run_state_machine_as_test(mk_test_instance_sync) def test_zarr_store(sync_store: Store) -> None: @@ -28,11 +27,5 @@ def mk_test_instance_sync() -> None: if isinstance(sync_store, ZipStore): pytest.skip(reason="ZipStore does not support delete") - elif isinstance(sync_store, LocalStore): - pytest.skip(reason="This test has errors") - elif isinstance(sync_store, MemoryStore): - run_state_machine_as_test(mk_test_instance_sync, settings=Settings(max_examples=50)) - else: - run_state_machine_as_test( - mk_test_instance_sync, settings=Settings(report_multiple_bugs=True) - ) + + run_state_machine_as_test(mk_test_instance_sync) From 83a91c96f924ce595f356110653479cee134a918 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 5 Feb 2025 21:17:57 -0700 Subject: [PATCH 2/8] cleanuip --- src/zarr/testing/stateful.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/zarr/testing/stateful.py b/src/zarr/testing/stateful.py index f2c616cb34..dac6eb71a5 100644 --- a/src/zarr/testing/stateful.py +++ b/src/zarr/testing/stateful.py @@ -25,9 +25,6 @@ MAX_BINARY_SIZE = 100 -# Handle possible case-insensitive file systems (e.g. MacOS) -node_names = node_names.map(lambda x: x.lower()) - def split_prefix_name(path: str) -> tuple[str, str]: split = path.rsplit("/", maxsplit=1) @@ -71,6 +68,9 @@ def can_add(self, path: str) -> bool: # -------------------- store operations ----------------------- @rule(name=node_names, data=st.data()) def add_group(self, name: str, data: DataObject) -> None: + # Handle possible case-insensitive file systems (e.g. MacOS) + if isinstance(self.store, LocalStore): + name = name.lower() if self.all_groups: parent = data.draw(st.sampled_from(sorted(self.all_groups)), label="Group parent") else: @@ -93,6 +93,9 @@ def add_array( name: str, array_and_chunks: tuple[np.ndarray[Any, Any], tuple[int, ...]], ) -> None: + # Handle possible case-insensitive file systems (e.g. MacOS) + if isinstance(self.store, LocalStore): + name = name.lower() array, chunks = array_and_chunks fill_value = data.draw(npst.from_dtype(array.dtype)) if self.all_groups: From 01edfcacc5c6437030681f46c3c2541f01b4bfa2 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 5 Feb 2025 18:15:23 -0700 Subject: [PATCH 3/8] Strategy updates --- src/zarr/testing/stateful.py | 7 +++++++ src/zarr/testing/strategies.py | 11 ++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/zarr/testing/stateful.py b/src/zarr/testing/stateful.py index dac6eb71a5..3e8dbcdf04 100644 --- a/src/zarr/testing/stateful.py +++ b/src/zarr/testing/stateful.py @@ -141,6 +141,7 @@ def add_array( # self.model.rename(from_group, new_path) # self.repo.store.rename(from_group, new_path) + @precondition(lambda self: self.store.supports_deletes) @precondition(lambda self: len(self.all_arrays) >= 1) @rule(data=st.data()) def delete_array_using_del(self, data: DataObject) -> None: @@ -155,6 +156,7 @@ def delete_array_using_del(self, data: DataObject) -> None: del group[array_name] self.all_arrays.remove(array_path) + @precondition(lambda self: self.store.supports_deletes) @precondition(lambda self: len(self.all_groups) >= 2) # fixme don't delete root @rule(data=st.data()) def delete_group_using_del(self, data: DataObject) -> None: @@ -290,6 +292,10 @@ def supports_partial_writes(self) -> bool: def supports_writes(self) -> bool: return self.store.supports_writes + @property + def supports_deletes(self) -> bool: + return self.store.supports_deletes + class ZarrStoreStateMachine(RuleBasedStateMachine): """ " @@ -372,6 +378,7 @@ def get_partial_values(self, data: DataObject) -> None: model_vals_ls, ) + @precondition(lambda self: self.store.supports_deletes) @precondition(lambda self: len(self.model.keys()) > 0) @rule(data=st.data()) def delete(self, data: DataObject) -> None: diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 81bb482b06..5722f3c99e 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -1,3 +1,4 @@ +import sys from typing import Any import hypothesis.extra.numpy as npst @@ -209,7 +210,7 @@ def basic_indices(draw: st.DrawFn, *, shape: tuple[int], **kwargs: Any) -> Any: def key_ranges( - keys: SearchStrategy = node_names, max_size: int | None = None + keys: SearchStrategy = node_names, max_size: int = sys.maxsize ) -> SearchStrategy[list[int]]: """ Function to generate key_ranges strategy for get_partial_values() @@ -218,10 +219,14 @@ def key_ranges( [(key, (range_start, range_end)), (key, (range_start, range_end)),...] """ + + def make_request(start: int, length: int) -> RangeByteRequest: + return RangeByteRequest(start, end=min(start + length, max_size)) + byte_ranges = st.builds( - RangeByteRequest, + make_request, start=st.integers(min_value=0, max_value=max_size), - end=st.integers(min_value=0, max_value=max_size), + length=st.integers(min_value=0, max_value=max_size), ) key_tuple = st.tuples(keys, byte_ranges) return st.lists(key_tuple, min_size=1, max_size=10) From 457188437fba18ccb88fbfab4774172381125cee Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 5 Feb 2025 21:29:54 -0700 Subject: [PATCH 4/8] Fix LocalStore test --- src/zarr/storage/_local.py | 7 +++++++ tests/test_store/test_stateful.py | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/zarr/storage/_local.py b/src/zarr/storage/_local.py index 1defea26b4..10af5a958b 100644 --- a/src/zarr/storage/_local.py +++ b/src/zarr/storage/_local.py @@ -208,6 +208,13 @@ async def delete(self, key: str) -> None: else: await asyncio.to_thread(path.unlink, True) # Q: we may want to raise if path is missing + async def delete_dir(self, prefix: str) -> None: + # docstring inherited + self._check_writable() + path = self.root / prefix + if path.is_dir(): + shutil.rmtree(path) + async def exists(self, key: str) -> bool: # docstring inherited path = self.root / key diff --git a/tests/test_store/test_stateful.py b/tests/test_store/test_stateful.py index 2360099ad9..e428ea60fd 100644 --- a/tests/test_store/test_stateful.py +++ b/tests/test_store/test_stateful.py @@ -5,7 +5,7 @@ ) from zarr.abc.store import Store -from zarr.storage import ZipStore +from zarr.storage import LocalStore, ZipStore from zarr.testing.stateful import ZarrHierarchyStateMachine, ZarrStoreStateMachine pytestmark = pytest.mark.slow_hypothesis @@ -28,4 +28,6 @@ def mk_test_instance_sync() -> None: if isinstance(sync_store, ZipStore): pytest.skip(reason="ZipStore does not support delete") + if isinstance(sync_store, LocalStore): + pytest.skip(reason="Test isn't suitable for LocalStore.") run_state_machine_as_test(mk_test_instance_sync) From d6fe75edbbc7f3c8ec750c32572c18a65c6c1ec5 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 5 Feb 2025 21:47:59 -0700 Subject: [PATCH 5/8] add changes --- changes/2804.feature.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/2804.feature.rst diff --git a/changes/2804.feature.rst b/changes/2804.feature.rst new file mode 100644 index 0000000000..24280ce0eb --- /dev/null +++ b/changes/2804.feature.rst @@ -0,0 +1 @@ +:py:class:`LocalStore` learned to ``delete_dir``. From b5cbf257918985a57f20ea45f59c4f0ec358a77f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 5 Feb 2025 22:00:23 -0700 Subject: [PATCH 6/8] add coverage to hypothesis workflow --- .github/workflows/hypothesis.yaml | 6 ++++++ pyproject.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/hypothesis.yaml b/.github/workflows/hypothesis.yaml index 1029063ef4..0a320de00b 100644 --- a/.github/workflows/hypothesis.yaml +++ b/.github/workflows/hypothesis.yaml @@ -69,6 +69,12 @@ jobs: path: .hypothesis/ key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }} + - name: Upload coverage + uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} + verbose: true # optional (default = false) + - name: Generate and publish the report if: | failure() diff --git a/pyproject.toml b/pyproject.toml index c9ec1c4fa2..ab285ff7ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -161,7 +161,7 @@ run = "run-coverage --no-cov" run-pytest = "run" run-verbose = "run-coverage --verbose" run-mypy = "mypy src" -run-hypothesis = "pytest --hypothesis-profile ci --run-slow-hypothesis tests/test_properties.py tests/test_store/test_stateful*" +run-hypothesis = "run-coverage --hypothesis-profile ci --run-slow-hypothesis tests/test_properties.py tests/test_store/test_stateful*" list-env = "pip list" [tool.hatch.envs.doctest] From 950f6d831e42ba3b7d2f778e24b688a0a757bc23 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 6 Feb 2025 07:56:32 -0700 Subject: [PATCH 7/8] review comments --- changes/2804.feature.rst | 2 +- tests/test_store/test_stateful.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/changes/2804.feature.rst b/changes/2804.feature.rst index 24280ce0eb..5a707752a0 100644 --- a/changes/2804.feature.rst +++ b/changes/2804.feature.rst @@ -1 +1 @@ -:py:class:`LocalStore` learned to ``delete_dir``. +:py:class:`LocalStore` learned to ``delete_dir``. This makes array and group deletes more efficient. diff --git a/tests/test_store/test_stateful.py b/tests/test_store/test_stateful.py index e428ea60fd..63df814ac9 100644 --- a/tests/test_store/test_stateful.py +++ b/tests/test_store/test_stateful.py @@ -29,5 +29,8 @@ def mk_test_instance_sync() -> None: pytest.skip(reason="ZipStore does not support delete") if isinstance(sync_store, LocalStore): + # This test uses arbitrary keys, which are passed to `set` and `delete`. + # It assumes that `set` and `delete` are the only two operations that modify state. + # But LocalStore, directories can hang around even after a key is delete-d. pytest.skip(reason="Test isn't suitable for LocalStore.") run_state_machine_as_test(mk_test_instance_sync) From 46382fc3ce8deb5b422cdc3ac6d0bd4209d945ae Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 6 Feb 2025 08:00:12 -0700 Subject: [PATCH 8/8] Review comments --- src/zarr/storage/_local.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/zarr/storage/_local.py b/src/zarr/storage/_local.py index 10af5a958b..b20a601bed 100644 --- a/src/zarr/storage/_local.py +++ b/src/zarr/storage/_local.py @@ -214,6 +214,12 @@ async def delete_dir(self, prefix: str) -> None: path = self.root / prefix if path.is_dir(): shutil.rmtree(path) + elif path.is_file(): + raise ValueError(f"delete_dir was passed a {prefix=!r} that is a file") + else: + # Non-existent directory + # This path is tested by test_group:test_create_creates_parents for one + pass async def exists(self, key: str) -> bool: # docstring inherited