Skip to content

Commit f8399bf

Browse files
authored
Merge pull request #569 from juliospain/chained_url_fix
Chained url fix
2 parents 553d07c + ff3ccdb commit f8399bf

File tree

2 files changed

+136
-73
lines changed

2 files changed

+136
-73
lines changed

fsspec/core.py

+67-71
Original file line numberDiff line numberDiff line change
@@ -332,48 +332,48 @@ def _un_chain(path, kwargs):
332332
# [[url, protocol, kwargs], ...]
333333
out = []
334334
previous_bit = None
335-
previous_protocol = None
336335
for bit in reversed(bits):
337336
protocol = split_protocol(bit)[0] or "file"
338337
cls = get_filesystem_class(protocol)
339338
extra_kwargs = cls._get_kwargs_from_urls(bit)
340-
kws = kwargs.get(split_protocol(bit)[0] or "file", {})
339+
kws = kwargs.get(protocol, {})
341340
kw = dict(**extra_kwargs, **kws)
341+
bit = cls._strip_protocol(bit)
342342
if (
343343
protocol in {"blockcache", "filecache", "simplecache"}
344344
and "target_protocol" not in kw
345345
):
346-
bit = previous_bit.replace(previous_protocol, protocol, 1)
346+
bit = previous_bit
347347
out.append((bit, protocol, kw))
348348
previous_bit = bit
349-
previous_protocol = protocol
350349
out = list(reversed(out))
351-
# We should only do the url rewrite if the cache is in the middle of the chain
352-
if out[0][1] in {"blockcache", "filecache", "simplecache"}:
353-
out[0] = (f"{out[0][1]}://", out[0][1], out[0][2])
354350
return out
355351

356352

357353
def url_to_fs(url, **kwargs):
358354
"""Turn fully-qualified and potentially chained URL into filesystem instance"""
359355
chain = _un_chain(url, kwargs)
360356
if len(chain) > 1:
361-
kwargs = chain[0][2]
362-
inkwargs = kwargs
363-
for i, ch in enumerate(chain):
357+
inkwargs = {}
358+
# Reverse iterate the chain, creating a nested target_* structure
359+
for i, ch in enumerate(reversed(chain)):
364360
urls, protocol, kw = ch
365-
if i == 0:
361+
if i == len(chain) - 1:
362+
inkwargs = dict(**kw, **inkwargs)
366363
continue
364+
inkwargs["target_options"] = dict(**kw, **inkwargs)
367365
inkwargs["target_protocol"] = protocol
368-
inkwargs["target_options"] = kw.copy()
369366
inkwargs["fo"] = urls
370-
inkwargs = inkwargs["target_options"]
371-
protocol = chain[0][1]
372-
urlpath = chain[-1][1] + "://" + split_protocol(urls)[1]
373-
fs = filesystem(protocol, **kwargs)
367+
urlpath, protocol, _ = chain[0]
368+
fs = filesystem(protocol, **inkwargs)
374369
else:
375-
protocol, urlpath = split_protocol(url)
376-
fs = filesystem(protocol, **kwargs)
370+
protocol = split_protocol(url)[0]
371+
cls = get_filesystem_class(protocol)
372+
373+
options = cls._get_kwargs_from_urls(url)
374+
urlpath = cls._strip_protocol(url)
375+
update_storage_options(options, kwargs)
376+
fs = cls(**options)
377377
urlpath = fs._strip_protocol(url)
378378
return fs, urlpath
379379

@@ -563,72 +563,68 @@ def get_fs_token_paths(
563563
Expand string paths for writing, assuming the path is a directory
564564
"""
565565
if isinstance(urlpath, (list, tuple, set)):
566+
if not urlpath:
567+
raise ValueError("empty urlpath sequence")
566568
urlpath = [stringify_path(u) for u in urlpath]
567569
else:
568570
urlpath = stringify_path(urlpath)
569571
chain = _un_chain(urlpath, storage_options or {})
570572
if len(chain) > 1:
571-
storage_options = chain[0][2]
572-
inkwargs = storage_options
573-
urlpath = False
574-
for i, ch in enumerate(chain):
575-
urls, protocol, kw = ch
576-
if isinstance(urls, str):
577-
if not urlpath and split_protocol(urls)[1]:
578-
urlpath = protocol + "://" + split_protocol(urls)[1]
579-
else:
580-
if not urlpath and any(split_protocol(u)[1] for u in urls):
581-
urlpath = [protocol + "://" + split_protocol(u)[1] for u in urls]
582-
if i == 0:
573+
inkwargs = {}
574+
# Reverse iterate the chain, creating a nested target_* structure
575+
for i, ch in enumerate(reversed(chain)):
576+
urls, nested_protocol, kw = ch
577+
if i == len(chain) - 1:
578+
inkwargs = dict(**kw, **inkwargs)
583579
continue
584-
inkwargs["target_protocol"] = protocol
585-
inkwargs["target_options"] = kw.copy()
580+
inkwargs["target_options"] = dict(**kw, **inkwargs)
581+
inkwargs["target_protocol"] = nested_protocol
586582
inkwargs["fo"] = urls
587-
inkwargs = inkwargs["target_options"]
588-
protocol = chain[0][1]
589-
if isinstance(urlpath, (list, tuple)):
590-
if not urlpath:
591-
raise ValueError("empty urlpath sequence")
592-
protocols, paths = zip(*map(split_protocol, urlpath))
593-
if protocol is None:
594-
protocol = protocols[0]
595-
if not all(p == protocol for p in protocols):
583+
paths, protocol, _ = chain[0]
584+
fs = filesystem(protocol, **inkwargs)
585+
if isinstance(paths, (list, tuple, set)):
586+
paths = [fs._strip_protocol(u) for u in paths]
587+
else:
588+
paths = fs._strip_protocol(paths)
589+
else:
590+
if isinstance(urlpath, (list, tuple, set)):
591+
protocols, paths = zip(*map(split_protocol, urlpath))
592+
if protocol is None:
593+
protocol = protocols[0]
594+
if not all(p == protocol for p in protocols):
595+
raise ValueError(
596+
"When specifying a list of paths, all paths must "
597+
"share the same protocol"
598+
)
599+
cls = get_filesystem_class(protocol)
600+
optionss = list(map(cls._get_kwargs_from_urls, urlpath))
601+
paths = [cls._strip_protocol(u) for u in urlpath]
602+
options = optionss[0]
603+
if not all(o == options for o in optionss):
596604
raise ValueError(
597605
"When specifying a list of paths, all paths must "
598-
"share the same protocol"
606+
"share the same file-system options"
599607
)
600-
cls = get_filesystem_class(protocol)
601-
optionss = list(map(cls._get_kwargs_from_urls, urlpath))
602-
paths = [cls._strip_protocol(u) for u in urlpath]
603-
options = optionss[0]
604-
if not all(o == options for o in optionss):
605-
raise ValueError(
606-
"When specifying a list of paths, all paths must "
607-
"share the same file-system options"
608-
)
609-
update_storage_options(options, storage_options)
610-
fs = cls(**options)
608+
update_storage_options(options, storage_options)
609+
fs = cls(**options)
610+
else:
611+
protocols = split_protocol(urlpath)[0]
612+
protocol = protocol or protocols
613+
cls = get_filesystem_class(protocol)
614+
options = cls._get_kwargs_from_urls(urlpath)
615+
paths = cls._strip_protocol(urlpath)
616+
update_storage_options(options, storage_options)
617+
fs = cls(**options)
618+
619+
if isinstance(paths, (list, tuple, set)):
611620
paths = expand_paths_if_needed(paths, mode, num, fs, name_function)
612-
613-
elif isinstance(urlpath, str) or hasattr(urlpath, "name"):
614-
protocols, path = split_protocol(urlpath)
615-
protocol = protocol or protocols
616-
cls = get_filesystem_class(protocol)
617-
618-
options = cls._get_kwargs_from_urls(urlpath)
619-
path = cls._strip_protocol(urlpath)
620-
update_storage_options(options, storage_options)
621-
fs = cls(**options)
622-
621+
else:
623622
if "w" in mode and expand:
624-
paths = _expand_paths(path, name_function, num)
625-
elif "*" in path:
626-
paths = [f for f in sorted(fs.glob(path)) if not fs.isdir(f)]
623+
paths = _expand_paths(paths, name_function, num)
624+
elif "*" in paths:
625+
paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
627626
else:
628-
paths = [path]
629-
630-
else:
631-
raise TypeError("url type not understood: %s" % urlpath)
627+
paths = [paths]
632628

633629
return fs, fs._fs_token, paths
634630

fsspec/tests/test_core.py

+69-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import os
22
import pickle
33
import tempfile
4+
import zipfile
5+
from contextlib import contextmanager
46

57
import pytest
68

@@ -15,6 +17,21 @@
1517
)
1618

1719

20+
@contextmanager
21+
def tempzip(data={}):
22+
f = tempfile.mkstemp(suffix="zip")[1]
23+
with zipfile.ZipFile(f, mode="w") as z:
24+
for k, v in data.items():
25+
z.writestr(k, v)
26+
try:
27+
yield f
28+
finally:
29+
try:
30+
os.remove(f)
31+
except (IOError, OSError):
32+
pass
33+
34+
1835
@pytest.mark.parametrize(
1936
"path, name_function, num, out",
2037
[
@@ -147,15 +164,15 @@ def test_mismatch():
147164

148165

149166
def test_url_kwargs_chain(ftp_writable):
150-
host, port, username, password = "localhost", 2121, "user", "pass"
167+
host, port, username, password = ftp_writable
151168
data = b"hello"
152169
with fsspec.open(
153170
"ftp:///afile", "wb", host=host, port=port, username=username, password=password
154171
) as f:
155172
f.write(data)
156173

157174
with fsspec.open(
158-
"simplecache::ftp://{}:{}@{}:{}/afile".format(username, password, host, port),
175+
f"simplecache::ftp://{username}:{password}@{host}:{port}//afile",
159176
"rb",
160177
) as f:
161178
assert f.read() == data
@@ -178,3 +195,53 @@ def test_multi_context(tmpdir):
178195
def test_not_local():
179196
with pytest.raises(ValueError, match="attribute local_file=True"):
180197
open_local("memory://afile")
198+
199+
200+
def test_url_to_fs(ftp_writable):
201+
host, port, username, password = ftp_writable
202+
data = b"hello"
203+
with fsspec.open(f"ftp://{username}:{password}@{host}:{port}/afile", "wb") as f:
204+
f.write(data)
205+
fs, url = fsspec.core.url_to_fs(
206+
f"simplecache::ftp://{username}:{password}@{host}:{port}/afile"
207+
)
208+
fs, url = fsspec.core.url_to_fs(f"ftp://{username}:{password}@{host}:{port}/afile")
209+
assert url == "/afile"
210+
211+
212+
def test_target_protocol_options(ftp_writable):
213+
host, port, username, password = ftp_writable
214+
data = {"afile": b"hello"}
215+
options = {"host": host, "port": port, "username": username, "password": password}
216+
with tempzip(data) as lfile, fsspec.open(
217+
"ftp:///archive.zip", "wb", **options
218+
) as f:
219+
f.write(open(lfile, "rb").read())
220+
with fsspec.open(
221+
"zip://afile",
222+
"rb",
223+
target_protocol="ftp",
224+
target_options=options,
225+
fo="archive.zip",
226+
) as f:
227+
assert f.read() == data["afile"]
228+
229+
230+
def test_chained_url(ftp_writable):
231+
host, port, username, password = ftp_writable
232+
data = {"afile": b"hello"}
233+
cls = fsspec.get_filesystem_class("ftp")
234+
fs = cls(host=host, port=port, username=username, password=password)
235+
with tempzip(data) as lfile:
236+
fs.put_file(lfile, "archive.zip")
237+
238+
urls = [
239+
"zip://afile",
240+
"zip://afile::simplecache",
241+
"simplecache::zip://afile",
242+
"simplecache::zip://afile::simplecache",
243+
]
244+
for url in urls:
245+
url += f"::ftp://{username}:{password}@{host}:{port}/archive.zip"
246+
with fsspec.open(url, "rb") as f:
247+
assert f.read() == data["afile"]

0 commit comments

Comments
 (0)