Skip to content

Commit 2af7e74

Browse files
committed
When making self links absolute, also add file:// as the scheme
1 parent b052b2a commit 2af7e74

File tree

4 files changed

+55
-25
lines changed

4 files changed

+55
-25
lines changed

pystac/asset.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,12 @@
88

99
from pystac import MediaType, STACError, common_metadata, utils
1010
from pystac.html.jinja_env import get_jinja_env
11-
from pystac.utils import is_absolute_href, make_absolute_href, make_relative_href
11+
from pystac.utils import (
12+
is_absolute_href,
13+
make_absolute_href,
14+
make_relative_href,
15+
safe_urlparse,
16+
)
1217

1318
if TYPE_CHECKING:
1419
from pystac.common_metadata import CommonMetadata
@@ -380,7 +385,7 @@ def get_self_href(self) -> str | None:
380385

381386
def _absolute_href(href: str, owner: Assets | None, action: str = "access") -> str:
382387
if utils.is_absolute_href(href):
383-
return href
388+
return safe_urlparse(href).path
384389
else:
385390
item_self = owner.get_self_href() if owner else None
386391
if item_self is None:
@@ -389,4 +394,4 @@ def _absolute_href(href: str, owner: Assets | None, action: str = "access") -> s
389394
"and owner item is not set. Hint: try using "
390395
":func:`~pystac.Item.make_asset_hrefs_absolute`"
391396
)
392-
return utils.make_absolute_href(href, item_self)
397+
return safe_urlparse(utils.make_absolute_href(href, item_self)).path

pystac/link.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def __init__(
102102
self.rel = rel
103103
if isinstance(target, str):
104104
if rel == pystac.RelType.SELF:
105-
self._target_href = make_absolute_href(target)
105+
self._target_href = make_absolute_href(target, must_include_scheme=True)
106106
else:
107107
self._target_href = make_posix_style(target)
108108
self._target_object = None

pystac/stac_io.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,7 @@ def read_text_from_href(self, href: str) -> str:
303303
except HTTPError as e:
304304
raise Exception(f"Could not read uri {href}") from e
305305
else:
306+
href = safe_urlparse(href).path
306307
with open(href, encoding="utf-8") as f:
307308
href_contents = f.read()
308309
return href_contents
@@ -328,7 +329,7 @@ def write_text_to_href(self, href: str, txt: str) -> None:
328329
"""
329330
if _is_url(href):
330331
raise NotImplementedError("DefaultStacIO cannot write to urls")
331-
href = os.fspath(href)
332+
href = safe_urlparse(href).path
332333
dirname = os.path.dirname(href)
333334
if dirname != "" and not os.path.isdir(dirname):
334335
os.makedirs(dirname)
@@ -391,7 +392,7 @@ def _report_duplicate_object_names(
391392

392393
def _is_url(href: str) -> bool:
393394
parsed = safe_urlparse(href)
394-
return parsed.scheme != ""
395+
return parsed.scheme != "" and parsed.scheme != "file"
395396

396397

397398
if HAS_URLLIB3:

pystac/utils.py

+43-19
Original file line numberDiff line numberDiff line change
@@ -288,34 +288,54 @@ def _make_absolute_href_path(
288288
parsed_source: URLParseResult,
289289
parsed_start: URLParseResult,
290290
start_is_dir: bool = False,
291+
must_include_scheme: bool = False,
291292
) -> str:
292-
# If the source is already absolute, just return it
293+
# If the source is already absolute and doesn't need a scheme just return it
293294
if os.path.isabs(parsed_source.path):
294-
return urlunparse(parsed_source)
295-
296-
# If the start path is not a directory, get the parent directory
297-
start_dir = (
298-
parsed_start.path if start_is_dir else os.path.dirname(parsed_start.path)
299-
)
295+
if must_include_scheme:
296+
abs_path = parsed_source.path
297+
else:
298+
return urlunparse(parsed_source)
299+
else:
300+
# If the start path is not a directory, get the parent directory
301+
start_dir = (
302+
parsed_start.path if start_is_dir else os.path.dirname(parsed_start.path)
303+
)
300304

301-
# Join the start directory to the relative path and find the absolute path
302-
abs_path = make_posix_style(
303-
os.path.abspath(os.path.join(start_dir, parsed_source.path))
304-
)
305+
# Join the start directory to the relative path and find the absolute path
306+
abs_path = make_posix_style(
307+
os.path.abspath(os.path.join(start_dir, parsed_source.path))
308+
)
305309

306-
# Account for the normalization of abspath for
307-
# things like /vsitar// prefixes by replacing the
308-
# original start_dir text when abspath modifies the start_dir.
309-
if not start_dir == make_posix_style(os.path.abspath(start_dir)):
310-
abs_path = abs_path.replace(
311-
make_posix_style(os.path.abspath(start_dir)), start_dir
310+
# Account for the normalization of abspath for
311+
# things like /vsitar// prefixes by replacing the
312+
# original start_dir text when abspath modifies the start_dir.
313+
if not start_dir == make_posix_style(os.path.abspath(start_dir)):
314+
abs_path = abs_path.replace(
315+
make_posix_style(os.path.abspath(start_dir)), start_dir
316+
)
317+
318+
# add a scheme if there isn't one already
319+
if must_include_scheme:
320+
return urlunparse(
321+
(
322+
"file",
323+
parsed_start.netloc,
324+
abs_path,
325+
parsed_source.params,
326+
parsed_source.query,
327+
parsed_source.fragment,
328+
)
312329
)
313330

314331
return abs_path
315332

316333

317334
def make_absolute_href(
318-
source_href: str, start_href: str | None = None, start_is_dir: bool = False
335+
source_href: str,
336+
start_href: str | None = None,
337+
start_is_dir: bool = False,
338+
must_include_scheme: bool = False,
319339
) -> str:
320340
"""Returns a new string that represents ``source_href`` as an absolute path. If
321341
``source_href`` is already absolute it is returned unchanged. If ``source_href``
@@ -332,6 +352,8 @@ def make_absolute_href(
332352
start_is_dir : If ``True``, ``start_href`` is treated as a directory.
333353
Otherwise, ``start_href`` is considered to be a path to a file. Defaults to
334354
``False``.
355+
must_include_scheme : If ``True``, every output will have a scheme. This means
356+
that local filepaths will be prefixed with `file://`. Defaults to ``False``.
335357
336358
Returns:
337359
str: The absolute HREF.
@@ -349,7 +371,9 @@ def make_absolute_href(
349371
if parsed_source.scheme != "" or parsed_start.scheme != "":
350372
return _make_absolute_href_url(parsed_source, parsed_start, start_is_dir)
351373
else:
352-
return _make_absolute_href_path(parsed_source, parsed_start, start_is_dir)
374+
return _make_absolute_href_path(
375+
parsed_source, parsed_start, start_is_dir, must_include_scheme
376+
)
353377

354378

355379
def is_absolute_href(href: str) -> bool:

0 commit comments

Comments
 (0)