Skip to content

Commit 2209529

Browse files
authored
Merge pull request #557 from isidentical/normalization-ls-from-cache
spec: normalize the key for _ls_from_cache
2 parents 64868f1 + 8f0aecb commit 2209529

File tree

5 files changed

+86
-13
lines changed

5 files changed

+86
-13
lines changed

fsspec/implementations/dbfs.py

+11
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ def makedirs(self, path, exist_ok=True):
118118
raise FileExistsError(e.message)
119119

120120
raise e
121+
self.invalidate_cache(self._parent(path))
121122

122123
def mkdir(self, path, create_parents=True, **kwargs):
123124
"""
@@ -163,6 +164,7 @@ def rm(self, path, recursive=False):
163164
raise OSError(e.message)
164165

165166
raise e
167+
self.invalidate_cache(self._parent(path))
166168

167169
def mv(self, source_path, destination_path, recursive=False, maxdepth=None):
168170
"""
@@ -205,6 +207,8 @@ def mv(self, source_path, destination_path, recursive=False, maxdepth=None):
205207
raise FileExistsError(e.message)
206208

207209
raise e
210+
self.invalidate_cache(self._parent(source_path))
211+
self.invalidate_cache(self._parent(destination_path))
208212

209213
def _open(self, path, mode="rb", block_size="default", **kwargs):
210214
"""
@@ -364,6 +368,13 @@ def _get_data(self, path, start, end):
364368

365369
raise e
366370

371+
def invalidate_cache(self, path=None):
372+
if path is None:
373+
self.dircache.clear()
374+
else:
375+
self.dircache.pop(path, None)
376+
super().invalidate_cache(path)
377+
367378

368379
class DatabricksFile(AbstractBufferedFile):
369380
"""

fsspec/implementations/tests/cassettes/test_dbfs_write_and_read.yaml

+43
Original file line numberDiff line numberDiff line change
@@ -670,4 +670,47 @@ interactions:
670670
status:
671671
code: 200
672672
message: OK
673+
- request:
674+
body: '{"path": "/FileStore/"}'
675+
headers:
676+
Accept:
677+
- '*/*'
678+
Accept-Encoding:
679+
- gzip, deflate
680+
Connection:
681+
- keep-alive
682+
Content-Length:
683+
- '23'
684+
Content-Type:
685+
- application/json
686+
User-Agent:
687+
- python-requests/2.25.1
688+
authorization:
689+
- DUMMY
690+
method: GET
691+
uri: https://my_instance.com/api/2.0/dbfs/list
692+
response:
693+
body:
694+
string: !!binary |
695+
H4sIAAAAAAAEAxzLQQqAIBAF0Lv8tZFBRHqALtAyQqKUBjJDx0VFdy/aPng3HG02QQ83jolXaJTd
696+
Jz2HaEt/FomzcxCgZBaK0ByzFX8yiS4LLQV8WMjRPDGF3TD5T6umqltVK9VKKZ/xeQEAAP//AwC1
697+
7zK3aAAAAA==
698+
headers:
699+
content-encoding:
700+
- gzip
701+
content-type:
702+
- application/json
703+
server:
704+
- databricks
705+
strict-transport-security:
706+
- max-age=31536000; includeSubDomains; preload
707+
transfer-encoding:
708+
- chunked
709+
vary:
710+
- Accept-Encoding
711+
x-content-type-options:
712+
- nosniff
713+
status:
714+
code: 200
715+
message: OK
673716
version: 1

fsspec/spec.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ def _ls_from_cache(self, path):
340340
"""
341341
parent = self._parent(path)
342342
if path.rstrip("/") in self.dircache:
343-
return self.dircache[path]
343+
return self.dircache[path.rstrip("/")]
344344
try:
345345
files = [
346346
f

fsspec/tests/test_spec.py

+30-11
Original file line numberDiff line numberDiff line change
@@ -57,19 +57,21 @@ def __getitem__(self, name):
5757
return item
5858
raise IndexError("{name} not found!".format(name=name))
5959

60-
def ls(self, path, detail=True, **kwargs):
61-
path = self._strip_protocol(path)
62-
63-
files = {
64-
file["name"]: file
65-
for file in self._fs_contents
66-
if path == self._parent(file["name"])
67-
}
60+
def ls(self, path, detail=True, refresh=True, **kwargs):
61+
if kwargs.pop("strip_proto", True):
62+
path = self._strip_protocol(path)
63+
64+
files = not refresh and self._ls_from_cache(path)
65+
if not files:
66+
files = [
67+
file for file in self._fs_contents if path == self._parent(file["name"])
68+
]
69+
files.sort(key=lambda file: file["name"])
70+
self.dircache[path.rstrip("/")] = files
6871

6972
if detail:
70-
return [files[name] for name in sorted(files)]
71-
72-
return list(sorted(files))
73+
return files
74+
return [file["name"] for file in files]
7375

7476
@classmethod
7577
def get_test_paths(cls, start_with=""):
@@ -296,6 +298,23 @@ def test_json():
296298
assert DummyTestFS.from_json(outb) is b
297299

298300

301+
def test_ls_from_cache():
302+
fs = DummyTestFS()
303+
uncached_results = fs.ls("top_level/second_level/", refresh=True)
304+
305+
assert fs.ls("top_level/second_level/", refresh=False) == uncached_results
306+
307+
# _strip_protocol removes everything by default though
308+
# for the sake of testing the _ls_from_cache interface
309+
# directly, we need run one time more without that call
310+
# to actually verify that our stripping in the client
311+
# function works.
312+
assert (
313+
fs.ls("top_level/second_level/", refresh=False, strip_proto=False)
314+
== uncached_results
315+
)
316+
317+
299318
@pytest.mark.parametrize(
300319
"dt",
301320
[

tox.ini

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ conda_deps=
6060
{[core]conda_deps}
6161
httpretty
6262
aiobotocore
63-
moto
63+
"moto<2.0"
6464
flask
6565
changedir=.tox/s3fs/tmp
6666
whitelist_externals=

0 commit comments

Comments
 (0)