Skip to content

Commit 3e5f154

Browse files
author
Luca Moschella
committed
Add PrefixFileSystem
The PrefixFileSystem is a filesystem-wrapper. It assumes every path it is dealing with is relative to the `prefix`. After performing the necessary paths operation it delegates everything to the wrapped filesystem. Resolves #395
1 parent c15b586 commit 3e5f154

File tree

2 files changed

+270
-0
lines changed

2 files changed

+270
-0
lines changed

fsspec/implementations/prefix.py

+135
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
import os
2+
from pathlib import Path
3+
from typing import Any, Iterable, Sequence, Union
4+
5+
import fsspec
6+
from fsspec import AbstractFileSystem
7+
from fsspec.core import split_protocol
8+
from fsspec.utils import stringify_path
9+
10+
11+
class PrefixFileSystem(AbstractFileSystem):
12+
def __init__(
13+
self,
14+
prefix: str,
15+
filesystem: fsspec.AbstractFileSystem,
16+
*args,
17+
**storage_options,
18+
) -> None:
19+
super().__init__(*args, **storage_options)
20+
self.prefix = prefix
21+
self.filesystem = filesystem
22+
23+
def _add_fs_prefix(self, path: Union[str, Path]) -> Union[str, Sequence[str]]:
24+
if isinstance(path, str):
25+
path = stringify_path(path)
26+
protocol, path = split_protocol(path)
27+
path = os.path.join(self.prefix, path)
28+
return protocol + "://" + path if protocol is not None else path
29+
elif isinstance(path, Iterable):
30+
return [self._add_fs_prefix(x) for x in path]
31+
assert False
32+
33+
def _remove_fs_prefix(self, path: Union[str, Path]) -> Union[str, Sequence[str]]:
34+
if isinstance(path, str):
35+
path = stringify_path(path)
36+
protocol, path = split_protocol(path)
37+
path = os.path.relpath(path, start=self.prefix)
38+
return protocol + "://" + path if protocol is not None else path
39+
elif isinstance(path, Iterable):
40+
return [self._remove_fs_prefix(x) for x in path]
41+
assert False
42+
43+
def mkdir(self, path: str, create_parents: bool = True, **kwargs) -> None:
44+
path = self._add_fs_prefix(path)
45+
return self.filesystem.mkdir(path=path, create_parents=create_parents, **kwargs)
46+
47+
def makedirs(self, path: str, exist_ok: bool = False):
48+
path = self._add_fs_prefix(path)
49+
return self.filesystem.mkdirs(path=path, exist_ok=exist_ok)
50+
51+
def rmdir(self, path: str):
52+
path = self._add_fs_prefix(path)
53+
return self.filesystem.rmdir(path=path)
54+
55+
def ls(
56+
self,
57+
path: str,
58+
detail=False,
59+
**kwargs,
60+
) -> Sequence[str]:
61+
path = self._add_fs_prefix(path)
62+
ls_out = self.filesystem.ls(path=path, detail=detail, **kwargs)
63+
if detail:
64+
for out in ls_out:
65+
out["name"] = self._remove_fs_prefix(out["name"])
66+
return ls_out
67+
return self._remove_fs_prefix(ls_out)
68+
69+
def glob(self, path: str, **kwargs):
70+
path = self._add_fs_prefix(path)
71+
glob_out = self.filesystem.glob(path=path, **kwargs)
72+
return [self._remove_fs_prefix(x) for x in glob_out]
73+
74+
def info(self, path: str, **kwargs):
75+
path = self._add_fs_prefix(path)
76+
return self.filesystem.info(path=path, **kwargs)
77+
78+
def cp_file(self, path1: str, path2: str, **kwargs):
79+
path1 = self._add_fs_prefix(path1)
80+
path2 = self._add_fs_prefix(path2)
81+
return self.filesystem.cp_file(path1, path2, **kwargs)
82+
83+
def get_file(self, path1: str, path2: str, callback=None, **kwargs):
84+
path1 = self._add_fs_prefix(path1)
85+
path2 = self._add_fs_prefix(path2)
86+
return self.filesystem.get_file(path1, path2, callback, **kwargs)
87+
88+
def put_file(self, path1: str, path2: str, callback=None, **kwargs):
89+
path1 = self._add_fs_prefix(path1)
90+
path2 = self._add_fs_prefix(path2)
91+
return self.filesystem.put_file(path1, path2, callback, **kwargs)
92+
93+
def mv_file(self, path1: str, path2: str, **kwargs):
94+
path1 = self._add_fs_prefix(path1)
95+
path2 = self._add_fs_prefix(path2)
96+
return self.filesystem.mv_file(path1, path2, **kwargs)
97+
98+
def rm_file(self, path: str):
99+
path = self._add_fs_prefix(path)
100+
return self.filesystem.rm_file(path)
101+
102+
def rm(self, path: str, recursive=False, maxdepth=None):
103+
path = self._add_fs_prefix(path)
104+
return self.filesystem.rm(path, recursive=False, maxdepth=None)
105+
106+
def touch(self, path: str, **kwargs):
107+
path = self._add_fs_prefix(path)
108+
return self.filesystem.touch(path, **kwargs)
109+
110+
def created(self, path: str):
111+
path = self._add_fs_prefix(path)
112+
return self.filesystem.created(path)
113+
114+
def modified(self, path: str):
115+
path = self._add_fs_prefix(path)
116+
return self.filesystem.modified(path)
117+
118+
def sign(self, path: str, expiration=100, **kwargs):
119+
path = self._add_fs_prefix(path)
120+
return self.filesystem.sign(path, expiration=100, **kwargs)
121+
122+
def cat(
123+
self,
124+
path: str,
125+
recursive: bool = False,
126+
on_error: str = "raise",
127+
**kwargs: Any,
128+
):
129+
path = self._add_fs_prefix(path)
130+
return self.filesystem.cat(
131+
path, recursive=recursive, on_error=on_error, **kwargs
132+
)
133+
134+
def __repr__(self) -> str:
135+
return f"{self.__class__.__qualname__}(prefix='{self.prefix}', filesystem={self.filesystem})"
+135
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
from __future__ import absolute_import, division, print_function
2+
3+
import os
4+
import os.path
5+
import tempfile
6+
from contextlib import contextmanager
7+
8+
import pytest
9+
10+
import fsspec
11+
from fsspec.core import OpenFile
12+
from fsspec.implementations.local import make_path_posix
13+
from fsspec.implementations.prefix import PrefixFileSystem
14+
15+
files = {
16+
".test.accounts.1.json": (
17+
b'{"amount": 100, "name": "Alice"}\n'
18+
b'{"amount": 200, "name": "Bob"}\n'
19+
b'{"amount": 300, "name": "Charlie"}\n'
20+
b'{"amount": 400, "name": "Dennis"}\n'
21+
),
22+
".test.accounts.2.json": (
23+
b'{"amount": 500, "name": "Alice"}\n'
24+
b'{"amount": 600, "name": "Bob"}\n'
25+
b'{"amount": 700, "name": "Charlie"}\n'
26+
b'{"amount": 800, "name": "Dennis"}\n'
27+
),
28+
}
29+
30+
31+
csv_files = {
32+
".test.fakedata.1.csv": (b"a,b\n" b"1,2\n"),
33+
".test.fakedata.2.csv": (b"a,b\n" b"3,4\n"),
34+
}
35+
odir = os.getcwd()
36+
37+
38+
@contextmanager
39+
def filetexts(d, open=open, mode="t"):
40+
"""Dumps a number of textfiles to disk
41+
42+
d - dict
43+
a mapping from filename to text like {'a.csv': '1,1\n2,2'}
44+
45+
Since this is meant for use in tests, this context manager will
46+
automatically switch to a temporary current directory, to avoid
47+
race conditions when running tests in parallel.
48+
"""
49+
dirname = tempfile.mkdtemp()
50+
try:
51+
os.chdir(dirname)
52+
for filename, text in d.items():
53+
f = open(filename, "w" + mode)
54+
try:
55+
f.write(text)
56+
finally:
57+
try:
58+
f.close()
59+
except AttributeError:
60+
pass
61+
62+
yield list(d)
63+
64+
for filename in d:
65+
if os.path.exists(filename):
66+
try:
67+
os.remove(filename)
68+
except (IOError, OSError):
69+
pass
70+
finally:
71+
os.chdir(odir)
72+
73+
74+
def test_cats():
75+
with filetexts(csv_files, mode="b"):
76+
fs = PrefixFileSystem(prefix=".", filesystem=fsspec.filesystem("file"))
77+
assert fs.cat(".test.fakedata.1.csv") == b"a,b\n" b"1,2\n"
78+
out = set(fs.cat([".test.fakedata.1.csv", ".test.fakedata.2.csv"]).values())
79+
assert out == {b"a,b\n" b"1,2\n", b"a,b\n" b"3,4\n"}
80+
assert fs.cat(".test.fakedata.1.csv", None, None) == b"a,b\n" b"1,2\n"
81+
assert fs.cat(".test.fakedata.1.csv", start=1, end=6) == b"a,b\n" b"1,2\n"[1:6]
82+
assert fs.cat(".test.fakedata.1.csv", start=-1) == b"a,b\n" b"1,2\n"[-1:]
83+
assert (
84+
fs.cat(".test.fakedata.1.csv", start=1, end=-2) == b"a,b\n" b"1,2\n"[1:-2]
85+
)
86+
out = set(
87+
fs.cat(
88+
[".test.fakedata.1.csv", ".test.fakedata.2.csv"], start=1, end=-1
89+
).values()
90+
)
91+
assert out == {b"a,b\n" b"1,2\n"[1:-1], b"a,b\n" b"3,4\n"[1:-1]}
92+
93+
94+
def test_not_found():
95+
fn = "not-a-file"
96+
fs = PrefixFileSystem(prefix=".", filesystem=fsspec.filesystem("file"))
97+
with pytest.raises((FileNotFoundError, OSError)):
98+
with OpenFile(fs, fn, mode="rb"):
99+
pass
100+
101+
102+
def test_isfile():
103+
fs = PrefixFileSystem(prefix=".", filesystem=fsspec.filesystem("file"))
104+
with filetexts(files, mode="b"):
105+
for f in files.keys():
106+
assert fs.isfile(f)
107+
assert fs.isfile("file://" + f)
108+
assert not fs.isfile("not-a-file")
109+
assert not fs.isfile("file://not-a-file")
110+
111+
112+
def test_isdir():
113+
fs = PrefixFileSystem(prefix=".", filesystem=fsspec.filesystem("file"))
114+
with filetexts(files, mode="b"):
115+
for f in files.keys():
116+
assert fs.isdir(os.path.dirname(os.path.abspath(f)))
117+
assert not fs.isdir(f)
118+
assert not fs.isdir("not-a-dir")
119+
120+
121+
@pytest.mark.parametrize("prefix", ["/", "/tmp"])
122+
def test_directories(tmpdir, prefix):
123+
tmpdir = make_path_posix(str(tmpdir))
124+
rel_tmpdir = os.path.relpath(tmpdir, prefix)
125+
126+
fs = PrefixFileSystem(prefix=prefix, filesystem=fsspec.filesystem("file"))
127+
128+
fs.mkdir(rel_tmpdir + "/dir")
129+
130+
assert not fs.ls(tmpdir + "/dir")
131+
132+
assert rel_tmpdir + "/dir" in fs.ls(rel_tmpdir)
133+
assert fs.ls(rel_tmpdir, True)[0]["type"] == "directory"
134+
fs.rmdir(rel_tmpdir + "/dir")
135+
assert not fs.ls(rel_tmpdir)

0 commit comments

Comments
 (0)