diff --git a/fsspec/implementations/dbfs.py b/fsspec/implementations/dbfs.py index d6ca450ab..137558918 100644 --- a/fsspec/implementations/dbfs.py +++ b/fsspec/implementations/dbfs.py @@ -118,6 +118,7 @@ def makedirs(self, path, exist_ok=True): raise FileExistsError(e.message) raise e + self.invalidate_cache(self._parent(path)) def mkdir(self, path, create_parents=True, **kwargs): """ @@ -163,6 +164,7 @@ def rm(self, path, recursive=False): raise OSError(e.message) raise e + self.invalidate_cache(self._parent(path)) def mv(self, source_path, destination_path, recursive=False, maxdepth=None): """ @@ -205,6 +207,8 @@ def mv(self, source_path, destination_path, recursive=False, maxdepth=None): raise FileExistsError(e.message) raise e + self.invalidate_cache(self._parent(source_path)) + self.invalidate_cache(self._parent(destination_path)) def _open(self, path, mode="rb", block_size="default", **kwargs): """ @@ -364,6 +368,13 @@ def _get_data(self, path, start, end): raise e + def invalidate_cache(self, path=None): + if path is None: + self.dircache.clear() + else: + self.dircache.pop(path, None) + super().invalidate_cache(path) + class DatabricksFile(AbstractBufferedFile): """ diff --git a/fsspec/implementations/tests/cassettes/test_dbfs_write_and_read.yaml b/fsspec/implementations/tests/cassettes/test_dbfs_write_and_read.yaml index 54c943a9e..9a1e65c53 100644 --- a/fsspec/implementations/tests/cassettes/test_dbfs_write_and_read.yaml +++ b/fsspec/implementations/tests/cassettes/test_dbfs_write_and_read.yaml @@ -670,4 +670,47 @@ interactions: status: code: 200 message: OK +- request: + body: '{"path": "/FileStore/"}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '23' + Content-Type: + - application/json + User-Agent: + - python-requests/2.25.1 + authorization: + - DUMMY + method: GET + uri: https://my_instance.com/api/2.0/dbfs/list + response: + body: + string: !!binary | + H4sIAAAAAAAEAxzLQQqAIBAF0Lv8tZFBRHqALtAyQqKUBjJDx0VFdy/aPng3HG02QQ83jolXaJTd + Jz2HaEt/FomzcxCgZBaK0ByzFX8yiS4LLQV8WMjRPDGF3TD5T6umqltVK9VKKZ/xeQEAAP//AwC1 + 7zK3aAAAAA== + headers: + content-encoding: + - gzip + content-type: + - application/json + server: + - databricks + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + transfer-encoding: + - chunked + vary: + - Accept-Encoding + x-content-type-options: + - nosniff + status: + code: 200 + message: OK version: 1 diff --git a/fsspec/spec.py b/fsspec/spec.py index 1e039fee3..354972939 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -340,7 +340,7 @@ def _ls_from_cache(self, path): """ parent = self._parent(path) if path.rstrip("/") in self.dircache: - return self.dircache[path] + return self.dircache[path.rstrip("/")] try: files = [ f diff --git a/fsspec/tests/test_spec.py b/fsspec/tests/test_spec.py index f5f8fc076..dc71f3ee2 100644 --- a/fsspec/tests/test_spec.py +++ b/fsspec/tests/test_spec.py @@ -57,19 +57,21 @@ def __getitem__(self, name): return item raise IndexError("{name} not found!".format(name=name)) - def ls(self, path, detail=True, **kwargs): - path = self._strip_protocol(path) - - files = { - file["name"]: file - for file in self._fs_contents - if path == self._parent(file["name"]) - } + def ls(self, path, detail=True, refresh=True, **kwargs): + if kwargs.pop("strip_proto", True): + path = self._strip_protocol(path) + + files = not refresh and self._ls_from_cache(path) + if not files: + files = [ + file for file in self._fs_contents if path == self._parent(file["name"]) + ] + files.sort(key=lambda file: file["name"]) + self.dircache[path.rstrip("/")] = files if detail: - return [files[name] for name in sorted(files)] - - return list(sorted(files)) + return files + return [file["name"] for file in files] @classmethod def get_test_paths(cls, start_with=""): @@ -296,6 +298,23 @@ def test_json(): assert DummyTestFS.from_json(outb) is b +def test_ls_from_cache(): + fs = DummyTestFS() + uncached_results = fs.ls("top_level/second_level/", refresh=True) + + assert fs.ls("top_level/second_level/", refresh=False) == uncached_results + + # _strip_protocol removes everything by default though + # for the sake of testing the _ls_from_cache interface + # directly, we need run one time more without that call + # to actually verify that our stripping in the client + # function works. + assert ( + fs.ls("top_level/second_level/", refresh=False, strip_proto=False) + == uncached_results + ) + + @pytest.mark.parametrize( "dt", [ diff --git a/tox.ini b/tox.ini index 42df3940f..d8d52ad8c 100644 --- a/tox.ini +++ b/tox.ini @@ -60,7 +60,7 @@ conda_deps= {[core]conda_deps} httpretty aiobotocore - moto + "moto<2.0" flask changedir=.tox/s3fs/tmp whitelist_externals=