Skip to content

Commit 24e16e4

Browse files
jsignellgadomski
andauthored
Allow links to have file:// prefix - but don't write them that way by default (#1489)
* Allow reading and writing when href startswith file:/// * Add test of reading and writing to hrefs starting with file:/// * Ensure that file:/// are interpretted as absolute urls * Try to fix windows * Try to fix windows * Try to fix windows * Add some print debugging * Add more print debugging * Add more print debugging * Moved is_absolute to os dependent test * Fix os-dependent test * Strip initial slash and see if it passes * Add more print debugging * Just strip off the leading slash * Only for windows * Fix windows handling --------- Co-authored-by: Pete Gadomski <[email protected]>
1 parent 43cfdec commit 24e16e4

File tree

4 files changed

+58
-6
lines changed

4 files changed

+58
-6
lines changed

pystac/stac_io.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,7 @@ def read_text_from_href(self, href: str) -> str:
303303
except HTTPError as e:
304304
raise Exception(f"Could not read uri {href}") from e
305305
else:
306+
href = safe_urlparse(href).path
306307
with open(href, encoding="utf-8") as f:
307308
href_contents = f.read()
308309
return href_contents
@@ -328,7 +329,7 @@ def write_text_to_href(self, href: str, txt: str) -> None:
328329
"""
329330
if _is_url(href):
330331
raise NotImplementedError("DefaultStacIO cannot write to urls")
331-
href = os.fspath(href)
332+
href = safe_urlparse(href).path
332333
dirname = os.path.dirname(href)
333334
if dirname != "" and not os.path.isdir(dirname):
334335
os.makedirs(dirname)
@@ -391,7 +392,7 @@ def _report_duplicate_object_names(
391392

392393
def _is_url(href: str) -> bool:
393394
parsed = safe_urlparse(href)
394-
return parsed.scheme != ""
395+
return parsed.scheme not in ["", "file"]
395396

396397

397398
if HAS_URLLIB3:

pystac/utils.py

+28-3
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,25 @@ def safe_urlparse(href: str) -> URLParseResult:
7171
query=parsed.query,
7272
fragment=parsed.fragment,
7373
)
74+
75+
# Windows drives sometimes get parsed as the netloc and sometimes
76+
# as part of the parsed.path.
77+
if parsed.scheme == "file" and os.name == "nt":
78+
if parsed.netloc:
79+
path = f"{parsed.netloc}{parsed.path}"
80+
elif parsed.path.startswith("/") and ":" in parsed.path:
81+
path = parsed.path[1:]
82+
else:
83+
path = parsed.path
84+
85+
return URLParseResult(
86+
scheme=parsed.scheme,
87+
netloc="",
88+
path=path,
89+
params=parsed.params,
90+
query=parsed.query,
91+
fragment=parsed.fragment,
92+
)
7493
else:
7594
return parsed
7695

@@ -246,7 +265,7 @@ def make_relative_href(
246265
):
247266
return source_href
248267

249-
if parsed_start.scheme == "":
268+
if parsed_start.scheme in ["", "file"]:
250269
return _make_relative_href_path(parsed_source, parsed_start, start_is_dir)
251270
else:
252271
return _make_relative_href_url(parsed_source, parsed_start, start_is_dir)
@@ -311,6 +330,9 @@ def _make_absolute_href_path(
311330
make_posix_style(os.path.abspath(start_dir)), start_dir
312331
)
313332

333+
if parsed_source.scheme or parsed_start.scheme:
334+
abs_path = f"file://{abs_path}"
335+
314336
return abs_path
315337

316338

@@ -346,7 +368,10 @@ def make_absolute_href(
346368
parsed_start = safe_urlparse(start_href)
347369
parsed_source = safe_urlparse(source_href)
348370

349-
if parsed_source.scheme != "" or parsed_start.scheme != "":
371+
if parsed_source.scheme not in ["", "file"] or parsed_start.scheme not in [
372+
"",
373+
"file",
374+
]:
350375
return _make_absolute_href_url(parsed_source, parsed_start, start_is_dir)
351376
else:
352377
return _make_absolute_href_path(parsed_source, parsed_start, start_is_dir)
@@ -364,7 +389,7 @@ def is_absolute_href(href: str) -> bool:
364389
bool: ``True`` if the given HREF is absolute, ``False`` if it is relative.
365390
"""
366391
parsed = safe_urlparse(href)
367-
return parsed.scheme != "" or os.path.isabs(parsed.path)
392+
return parsed.scheme not in ["", "file"] or os.path.isabs(parsed.path)
368393

369394

370395
def datetime_to_str(dt: datetime, timespec: str = "auto") -> str:

tests/test_stac_io.py

+9
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,15 @@ def test_read_write_collection(self) -> None:
2424
pystac.write_file(collection, dest_href=dest_href)
2525
self.assertTrue(os.path.exists(dest_href), msg="File was not written.")
2626

27+
def test_read_write_collection_with_file_protocol(self) -> None:
28+
collection = pystac.read_file(
29+
"file://" + TestCases.get_path("data-files/collections/multi-extent.json")
30+
)
31+
with tempfile.TemporaryDirectory() as tmp_dir:
32+
dest_href = os.path.join(tmp_dir, "collection.json")
33+
pystac.write_file(collection, dest_href="file://" + dest_href)
34+
self.assertTrue(os.path.exists(dest_href), msg="File was not written.")
35+
2736
def test_read_item(self) -> None:
2837
item = pystac.read_file(TestCases.get_path("data-files/item/sample-item.json"))
2938
with tempfile.TemporaryDirectory() as tmp_dir:

tests/test_utils.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ def test_make_relative_href(self) -> None:
3333
("/a/catalog.json", "/a/b/c/catalog.json", "../../catalog.json"),
3434
("/a/b/c/d/", "/a/b/c/catalog.json", "./d/"),
3535
("/a/b/c/d/.dotfile", "/a/b/c/d/catalog.json", "./.dotfile"),
36+
(
37+
"file:///a/b/c/d/catalog.json",
38+
"file:///a/b/c/catalog.json",
39+
"./d/catalog.json",
40+
),
3641
]
3742

3843
for source_href, start_href, expected in test_cases:
@@ -161,11 +166,22 @@ def test_make_absolute_href(self) -> None:
161166
"https://stacspec.org/a/b/item.json",
162167
),
163168
("http://localhost:8000", None, "http://localhost:8000"),
169+
("item.json", "file:///a/b/c/catalog.json", "file:///a/b/c/item.json"),
170+
(
171+
"./z/item.json",
172+
"file:///a/b/c/catalog.json",
173+
"file:///a/b/c/z/item.json",
174+
),
175+
("file:///a/b/c/item.json", None, "file:///a/b/c/item.json"),
164176
]
165177

166178
for source_href, start_href, expected in test_cases:
167179
actual = make_absolute_href(source_href, start_href)
168-
_, actual = os.path.splitdrive(actual)
180+
if expected.startswith("file://"):
181+
_, actual = os.path.splitdrive(actual.replace("file://", ""))
182+
actual = f"file://{actual}"
183+
else:
184+
_, actual = os.path.splitdrive(actual)
169185
self.assertEqual(actual, expected)
170186

171187
def test_make_absolute_href_on_vsitar(self) -> None:
@@ -234,6 +250,7 @@ def test_is_absolute_href_os_aware(self) -> None:
234250
test_cases = [
235251
("/item.json", not incl_drive_letter),
236252
("/home/someuser/Downloads/item.json", not incl_drive_letter),
253+
("file:///home/someuser/Downloads/item.json", not incl_drive_letter),
237254
("d:/item.json", is_windows),
238255
("c:/files/more_files/item.json", is_windows),
239256
]

0 commit comments

Comments
 (0)