diff --git a/pystac/asset.py b/pystac/asset.py index e42b8237b..141938d01 100644 --- a/pystac/asset.py +++ b/pystac/asset.py @@ -8,7 +8,12 @@ from pystac import MediaType, STACError, common_metadata, utils from pystac.html.jinja_env import get_jinja_env -from pystac.utils import is_absolute_href, make_absolute_href, make_relative_href +from pystac.utils import ( + is_absolute_href, + make_absolute_href, + make_relative_href, + safe_urlparse, +) if TYPE_CHECKING: from pystac.common_metadata import CommonMetadata @@ -380,7 +385,7 @@ def get_self_href(self) -> str | None: def _absolute_href(href: str, owner: Assets | None, action: str = "access") -> str: if utils.is_absolute_href(href): - return href + return safe_urlparse(href).path else: item_self = owner.get_self_href() if owner else None if item_self is None: @@ -389,4 +394,4 @@ def _absolute_href(href: str, owner: Assets | None, action: str = "access") -> s "and owner item is not set. Hint: try using " ":func:`~pystac.Item.make_asset_hrefs_absolute`" ) - return utils.make_absolute_href(href, item_self) + return safe_urlparse(utils.make_absolute_href(href, item_self)).path diff --git a/pystac/link.py b/pystac/link.py index f88061090..1d7d97bad 100644 --- a/pystac/link.py +++ b/pystac/link.py @@ -102,7 +102,7 @@ def __init__( self.rel = rel if isinstance(target, str): if rel == pystac.RelType.SELF: - self._target_href = make_absolute_href(target) + self._target_href = make_absolute_href(target, must_include_scheme=True) else: self._target_href = make_posix_style(target) self._target_object = None diff --git a/pystac/utils.py b/pystac/utils.py index 4d702c187..3496d1ae2 100644 --- a/pystac/utils.py +++ b/pystac/utils.py @@ -307,27 +307,44 @@ def _make_absolute_href_path( parsed_source: URLParseResult, parsed_start: URLParseResult, start_is_dir: bool = False, + must_include_scheme: bool = False, ) -> str: - # If the source is already absolute, just return it + # If the source is already absolute and doesn't need a scheme just return it if os.path.isabs(parsed_source.path): - return urlunparse(parsed_source) - - # If the start path is not a directory, get the parent directory - start_dir = ( - parsed_start.path if start_is_dir else os.path.dirname(parsed_start.path) - ) + if must_include_scheme: + abs_path = parsed_source.path + else: + return urlunparse(parsed_source) + else: + # If the start path is not a directory, get the parent directory + start_dir = ( + parsed_start.path if start_is_dir else os.path.dirname(parsed_start.path) + ) - # Join the start directory to the relative path and find the absolute path - abs_path = make_posix_style( - os.path.abspath(os.path.join(start_dir, parsed_source.path)) - ) + # Join the start directory to the relative path and find the absolute path + abs_path = make_posix_style( + os.path.abspath(os.path.join(start_dir, parsed_source.path)) + ) - # Account for the normalization of abspath for - # things like /vsitar// prefixes by replacing the - # original start_dir text when abspath modifies the start_dir. - if not start_dir == make_posix_style(os.path.abspath(start_dir)): - abs_path = abs_path.replace( - make_posix_style(os.path.abspath(start_dir)), start_dir + # Account for the normalization of abspath for + # things like /vsitar// prefixes by replacing the + # original start_dir text when abspath modifies the start_dir. + if not start_dir == make_posix_style(os.path.abspath(start_dir)): + abs_path = abs_path.replace( + make_posix_style(os.path.abspath(start_dir)), start_dir + ) + + # add a scheme if there isn't one already + if must_include_scheme: + return urlunparse( + ( + "file", + parsed_start.netloc, + abs_path, + parsed_source.params, + parsed_source.query, + parsed_source.fragment, + ) ) if parsed_source.scheme or parsed_start.scheme: @@ -337,7 +354,10 @@ def _make_absolute_href_path( def make_absolute_href( - source_href: str, start_href: str | None = None, start_is_dir: bool = False + source_href: str, + start_href: str | None = None, + start_is_dir: bool = False, + must_include_scheme: bool = False, ) -> str: """Returns a new string that represents ``source_href`` as an absolute path. If ``source_href`` is already absolute it is returned unchanged. If ``source_href`` @@ -354,6 +374,8 @@ def make_absolute_href( start_is_dir : If ``True``, ``start_href`` is treated as a directory. Otherwise, ``start_href`` is considered to be a path to a file. Defaults to ``False``. + must_include_scheme : If ``True``, every output will have a scheme. This means + that local filepaths will be prefixed with `file://`. Defaults to ``False``. Returns: str: The absolute HREF. @@ -374,7 +396,9 @@ def make_absolute_href( ]: return _make_absolute_href_url(parsed_source, parsed_start, start_is_dir) else: - return _make_absolute_href_path(parsed_source, parsed_start, start_is_dir) + return _make_absolute_href_path( + parsed_source, parsed_start, start_is_dir, must_include_scheme + ) def is_absolute_href(href: str) -> bool: diff --git a/tests/test_asset.py b/tests/test_asset.py index b41113403..165fddac7 100644 --- a/tests/test_asset.py +++ b/tests/test_asset.py @@ -21,9 +21,9 @@ def test_alter_asset_absolute_path( assert asset.get_absolute_href() == new_href assert os.path.exists(new_href) if action == "move": - assert not os.path.exists(old_href) + assert not os.path.exists(old_href.replace("file://", "")) elif action == "copy": - assert os.path.exists(old_href) + assert os.path.exists(old_href.replace("file://", "")) @pytest.mark.parametrize("action", ["copy", "move"]) @@ -38,11 +38,11 @@ def test_alter_asset_relative_path(action: str, tmp_asset: pystac.Asset) -> None assert asset.href == new_href href = asset.get_absolute_href() assert href is not None - assert os.path.exists(href) + assert os.path.exists(href.replace("file://", "")) if action == "move": - assert not os.path.exists(old_href) + assert not os.path.exists(old_href.replace("file://", "")) elif action == "copy": - assert os.path.exists(old_href) + assert os.path.exists(old_href.replace("file://", "")) @pytest.mark.parametrize("action", ["copy", "move"]) @@ -82,18 +82,18 @@ def test_delete_asset(tmp_asset: pystac.Asset) -> None: asset = tmp_asset href = asset.get_absolute_href() assert href is not None - assert os.path.exists(href) + assert os.path.exists(href.replace("file://", "")) asset.delete() - assert not os.path.exists(href) + assert not os.path.exists(href.replace("file://", "")) def test_delete_asset_relative_no_owner_fails(tmp_asset: pystac.Asset) -> None: asset = tmp_asset href = asset.get_absolute_href() assert href is not None - assert os.path.exists(href) + assert os.path.exists(href.replace("file://", "")) asset.owner = None @@ -101,4 +101,4 @@ def test_delete_asset_relative_no_owner_fails(tmp_asset: pystac.Asset) -> None: asset.delete() assert asset.href in str(e.value) - assert os.path.exists(href) + assert os.path.exists(href.replace("file://", "")) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index 45b7aa536..b28976977 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -541,7 +541,7 @@ def test_save_with_different_stac_io(self) -> None: assert len(hrefs) == stac_io.mock.write_text.call_count for call_args_list in stac_io.mock.write_text.call_args_list: - assert call_args_list[0][0] in hrefs + assert f"file://{call_args_list[0][0]}" in hrefs def test_subcatalogs_saved_to_correct_path(self) -> None: with tempfile.TemporaryDirectory() as tmp_dir: diff --git a/tests/test_collection.py b/tests/test_collection.py index c339a8d10..b5ac60a6b 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -691,7 +691,7 @@ def test_delete_asset_relative_no_self_link_fails( assert asset.href in str(e.value) assert name in collection.assets - assert os.path.exists(href) + assert os.path.exists(href.replace("file://", "")) def test_permissive_temporal_extent_deserialization(collection: Collection) -> None: diff --git a/tests/test_item.py b/tests/test_item.py index a1c39f12e..beed81a2d 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -92,11 +92,11 @@ def test_asset_absolute_href(self) -> None: item.set_self_href(item_path) rel_asset = Asset("./data.geojson") rel_asset.set_owner(item) - expected_href = make_posix_style( + expected_filepath = make_posix_style( os.path.abspath(os.path.join(os.path.dirname(item_path), "./data.geojson")) ) actual_href = rel_asset.get_absolute_href() - self.assertEqual(expected_href, actual_href) + self.assertEqual(f"file://{expected_filepath}", actual_href) def test_asset_absolute_href_no_item_self(self) -> None: item_dict = self.get_example_item_dict() @@ -612,7 +612,7 @@ def test_delete_asset_relative_no_self_link_fails(tmp_asset: pystac.Asset) -> No assert asset.href in str(e.value) assert name in item.assets - assert os.path.exists(href) + assert os.path.exists(href.replace("file://", "")) def test_resolve_collection_with_root( diff --git a/tests/test_layout.py b/tests/test_layout.py index ad3996ed2..7281c2861 100644 --- a/tests/test_layout.py +++ b/tests/test_layout.py @@ -418,7 +418,9 @@ class AsIsLayoutStrategyTest(unittest.TestCase): def setUp(self) -> None: self.strategy = AsIsLayoutStrategy() self.expected_local_href = ( - "/an/href" if not path_includes_drive_letter() else "D:/an/href" + "file:///an/href" + if not path_includes_drive_letter() + else "file://D:/an/href" ) def test_catalog(self) -> None: @@ -429,6 +431,7 @@ def test_catalog(self) -> None: href = self.strategy.get_href( cat, parent_dir="https://example.com", is_root=True ) + self.assertEqual(href, self.expected_local_href) def test_collection(self) -> None: @@ -442,6 +445,7 @@ def test_collection(self) -> None: href = self.strategy.get_href( collection, parent_dir="https://example.com", is_root=True ) + self.assertEqual(href, self.expected_local_href) def test_item(self) -> None: @@ -452,6 +456,7 @@ def test_item(self) -> None: self.strategy.get_href(item, parent_dir="http://example.com") item.set_self_href("/an/href") href = self.strategy.get_href(item, parent_dir="http://example.com") + self.assertEqual(href, self.expected_local_href) diff --git a/tests/test_link.py b/tests/test_link.py index c7ecdb6cc..7d8fe5579 100644 --- a/tests/test_link.py +++ b/tests/test_link.py @@ -107,7 +107,9 @@ def test_resolved_self_href(self) -> None: link = catalog.get_single_link(pystac.RelType.SELF) assert link link.resolve_stac_object() - self.assertEqual(link.get_absolute_href(), make_posix_style(path)) + self.assertEqual( + link.get_absolute_href(), f"file://{make_posix_style(path)}" + ) def test_target_getter_setter(self) -> None: link = pystac.Link("my rel", target="./foo/bar.json") @@ -140,7 +142,10 @@ def test_relative_self_href(self) -> None: item = pystac.read_file("item.json") href = item.get_self_href() assert href - self.assertTrue(os.path.isabs(href), f"Not an absolute path: {href}") + self.assertTrue( + os.path.isabs(href.replace("file://", "")), + f"Not an absolute path: {href}", + ) finally: os.chdir(previous) @@ -237,7 +242,10 @@ def test_from_dict_round_trip(self) -> None: d2 = pystac.Link.from_dict(d).to_dict() self.assertEqual(d, d2) d = {"rel": "self", "href": "t"} - d2 = {"rel": "self", "href": make_posix_style(os.path.join(os.getcwd(), "t"))} + d2 = { + "rel": "self", + "href": f"file://{make_posix_style(os.path.join(os.getcwd(), 't'))}", + } self.assertEqual(pystac.Link.from_dict(d).to_dict(), d2) def test_from_dict_failures(self) -> None: @@ -333,7 +341,7 @@ def test_relative_self_link(tmp_path: Path) -> None: assert read_item asset_href = read_item.assets["data"].get_absolute_href() assert asset_href - assert Path(asset_href).exists() + assert Path(asset_href.replace("file://", "")).exists() @pytest.mark.parametrize("rel", HIERARCHICAL_LINKS) diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py index d3f5dd416..1d92ef4ff 100644 --- a/tests/validation/test_validate.py +++ b/tests/validation/test_validate.py @@ -125,7 +125,9 @@ def test_validate_all_dict(self, test_case: pystac.Catalog) -> None: dst_dir = os.path.join(tmp_dir, "catalog") # Copy test case 7 to the temporary directory catalog_href = get_opt(TestCases.case_7().get_self_href()) - shutil.copytree(os.path.dirname(catalog_href), dst_dir) + shutil.copytree( + os.path.dirname(catalog_href.replace("file://", "")), dst_dir + ) new_cat_href = os.path.join(dst_dir, "catalog.json")