Skip to content

spec: normalize the key for _ls_from_cache #557

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions fsspec/implementations/dbfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def makedirs(self, path, exist_ok=True):
raise FileExistsError(e.message)

raise e
self.invalidate_cache(self._parent(path))

def mkdir(self, path, create_parents=True, **kwargs):
"""
Expand Down Expand Up @@ -163,6 +164,7 @@ def rm(self, path, recursive=False):
raise OSError(e.message)

raise e
self.invalidate_cache(self._parent(path))

def mv(self, source_path, destination_path, recursive=False, maxdepth=None):
"""
Expand Down Expand Up @@ -205,6 +207,8 @@ def mv(self, source_path, destination_path, recursive=False, maxdepth=None):
raise FileExistsError(e.message)

raise e
self.invalidate_cache(self._parent(source_path))
self.invalidate_cache(self._parent(destination_path))

def _open(self, path, mode="rb", block_size="default", **kwargs):
"""
Expand Down Expand Up @@ -364,6 +368,13 @@ def _get_data(self, path, start, end):

raise e

def invalidate_cache(self, path=None):
if path is None:
self.dircache.clear()
else:
self.dircache.pop(path, None)
super().invalidate_cache(path)


class DatabricksFile(AbstractBufferedFile):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -670,4 +670,47 @@ interactions:
status:
code: 200
message: OK
- request:
body: '{"path": "/FileStore/"}'
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '23'
Content-Type:
- application/json
User-Agent:
- python-requests/2.25.1
authorization:
- DUMMY
method: GET
uri: https://my_instance.com/api/2.0/dbfs/list
response:
body:
string: !!binary |
H4sIAAAAAAAEAxzLQQqAIBAF0Lv8tZFBRHqALtAyQqKUBjJDx0VFdy/aPng3HG02QQ83jolXaJTd
Jz2HaEt/FomzcxCgZBaK0ByzFX8yiS4LLQV8WMjRPDGF3TD5T6umqltVK9VKKZ/xeQEAAP//AwC1
7zK3aAAAAA==
headers:
content-encoding:
- gzip
content-type:
- application/json
server:
- databricks
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
vary:
- Accept-Encoding
x-content-type-options:
- nosniff
status:
code: 200
message: OK
version: 1
2 changes: 1 addition & 1 deletion fsspec/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ def _ls_from_cache(self, path):
"""
parent = self._parent(path)
if path.rstrip("/") in self.dircache:
return self.dircache[path]
return self.dircache[path.rstrip("/")]
try:
files = [
f
Expand Down
41 changes: 30 additions & 11 deletions fsspec/tests/test_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,19 +57,21 @@ def __getitem__(self, name):
return item
raise IndexError("{name} not found!".format(name=name))

def ls(self, path, detail=True, **kwargs):
path = self._strip_protocol(path)

files = {
file["name"]: file
for file in self._fs_contents
if path == self._parent(file["name"])
}
def ls(self, path, detail=True, refresh=True, **kwargs):
if kwargs.pop("strip_proto", True):
path = self._strip_protocol(path)

files = not refresh and self._ls_from_cache(path)
if not files:
files = [
file for file in self._fs_contents if path == self._parent(file["name"])
]
files.sort(key=lambda file: file["name"])
self.dircache[path.rstrip("/")] = files

if detail:
return [files[name] for name in sorted(files)]

return list(sorted(files))
return files
return [file["name"] for file in files]

@classmethod
def get_test_paths(cls, start_with=""):
Expand Down Expand Up @@ -296,6 +298,23 @@ def test_json():
assert DummyTestFS.from_json(outb) is b


def test_ls_from_cache():
fs = DummyTestFS()
uncached_results = fs.ls("top_level/second_level/", refresh=True)

assert fs.ls("top_level/second_level/", refresh=False) == uncached_results

# _strip_protocol removes everything by default though
# for the sake of testing the _ls_from_cache interface
# directly, we need run one time more without that call
# to actually verify that our stripping in the client
# function works.
assert (
fs.ls("top_level/second_level/", refresh=False, strip_proto=False)
== uncached_results
)


@pytest.mark.parametrize(
"dt",
[
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ conda_deps=
{[core]conda_deps}
httpretty
aiobotocore
moto
"moto<2.0"
flask
changedir=.tox/s3fs/tmp
whitelist_externals=
Expand Down