From 2999925facbff456880c8c890f41fa11b6605609 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Fri, 7 Feb 2025 13:48:30 -0500 Subject: [PATCH 1/6] Add Collection.from_items --- pystac/collection.py | 70 ++++++++++++++++++++++++++++++++++++++++ tests/conftest.py | 14 +++++++- tests/test_collection.py | 56 ++++++++++++++++++++++++++++++++ 3 files changed, 139 insertions(+), 1 deletion(-) diff --git a/pystac/collection.py b/pystac/collection.py index 891b3cc33..d68a97b4d 100644 --- a/pystac/collection.py +++ b/pystac/collection.py @@ -710,6 +710,76 @@ def from_dict( return collection + @classmethod + def from_items( + cls: type[C], + items: Iterable[Item] | pystac.ItemCollection, + *, + id: str | None = None, + strategy: HrefLayoutStrategy | None = None, + ) -> C: + """Create a :class:`Collection` from items or an :class:`ItemCollection`. + + Will try to pull collection attributes from :attr:`ItemCollection.extra_fields` + and items when possible. + + Args: + items : Iterable of :class:`~pystac.Item` instances to include in the + :class:`ItemCollection`. This can be an :class:`pystac.ItemCollection`. + id : Identifier for the collection. If not set, must be available on the + items and they must all match. + strategy : The layout strategy to use for setting the + HREFs of the catalog child objections and items. + If not provided, it will default to strategy of the parent and fallback + to :class:`~pystac.layout.BestPracticesLayoutStrategy`. + """ + + def extract(attr: str) -> Any: + """Extract attrs from items or item.properties as long as they all match""" + value = None + values = {getattr(item, attr, None) for item in items} + if len(values) == 1: + value = next(iter(values)) + if value is None: + values = {item.properties.get(attr, None) for item in items} + if len(values) == 1: + value = next(iter(values)) + return value + + if isinstance(items, pystac.ItemCollection): + extra_fields = deepcopy(items.extra_fields) + links = extra_fields.pop("links", {}) + providers = extra_fields.pop("providers", None) + if providers is not None: + providers = [pystac.Provider.from_dict(p) for p in providers] + else: + extra_fields = {} + links = {} + providers = [] + + id = id or extract("collection_id") + if id is None: + raise ValueError( + "Collection id must be defined. Either by specifying collection_id " + "on every item, or as a keyword argument to this function." + ) + + collection = cls( + id=id, + description=extract("description"), + extent=Extent.from_items(items), + title=extract("title"), + providers=providers, + extra_fields=extra_fields, + strategy=strategy, + ) + collection.add_items(items) + + for link in links: + collection.add_link(Link.from_dict(link)) + + return collection + def get_item(self, id: str, recursive: bool = False) -> Item | None: """Returns an item with a given ID. diff --git a/tests/conftest.py b/tests/conftest.py index 10a8b606e..86a3c3528 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,7 +9,7 @@ import pytest -from pystac import Asset, Catalog, Collection, Item, Link +from pystac import Asset, Catalog, Collection, Item, ItemCollection, Link from .utils import ARBITRARY_BBOX, ARBITRARY_EXTENT, ARBITRARY_GEOM, TestCases @@ -76,6 +76,18 @@ def sample_item() -> Item: return Item.from_file(TestCases.get_path("data-files/item/sample-item.json")) +@pytest.fixture +def sample_item_collection() -> ItemCollection: + return ItemCollection.from_file( + TestCases.get_path("data-files/item-collection/sample-item-collection.json") + ) + + +@pytest.fixture +def sample_items(sample_item_collection: ItemCollection) -> list[Item]: + return list(sample_item_collection) + + @pytest.fixture(scope="function") def tmp_asset(tmp_path: Path) -> Asset: """Copy the entirety of test-case-2 to tmp and""" diff --git a/tests/test_collection.py b/tests/test_collection.py index 5aafa1ea0..618689d5f 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -19,6 +19,7 @@ Collection, Extent, Item, + ItemCollection, Provider, SpatialExtent, TemporalExtent, @@ -711,3 +712,58 @@ def test_permissive_temporal_extent_deserialization(collection: Collection) -> N ]["interval"][0] with pytest.warns(UserWarning): Collection.from_dict(collection_dict) + + +@pytest.mark.parametrize("fixture_name", ("sample_item_collection", "sample_items")) +def test_from_items(fixture_name: str, request: pytest.FixtureRequest) -> None: + items = request.getfixturevalue(fixture_name) + collection = Collection.from_items(items) + + for item in items: + assert collection.id == item.collection_id + assert collection.extent.spatial.bboxes[0][0] <= item.bbox[0] + assert collection.extent.spatial.bboxes[0][1] <= item.bbox[1] + assert collection.extent.spatial.bboxes[0][2] >= item.bbox[2] + assert collection.extent.spatial.bboxes[0][3] >= item.bbox[3] + + start = collection.extent.temporal.intervals[0][0] + end = collection.extent.temporal.intervals[0][1] + assert start and start <= datetime.fromisoformat( + item.properties["start_datetime"] + ) + assert end and end >= datetime.fromisoformat(item.properties["end_datetime"]) + + if isinstance(items, ItemCollection): + expected = {(link["rel"], link["href"]) for link in items.extra_fields["links"]} + actual = {(link.rel, link.href) for link in collection.links} + assert expected.issubset(actual) + + +def test_from_items_pulls_from_properties() -> None: + item1 = Item( + id="test-item-1", + geometry=ARBITRARY_GEOM, + bbox=[-10, -20, 0, -10], + datetime=datetime(2000, 2, 1, 12, 0, 0, 0, tzinfo=tz.UTC), + collection="test-collection-1", + properties={"title": "Test Item", "description": "Extra words describing"}, + ) + collection = Collection.from_items([item1]) + assert collection.id == item1.collection_id + assert collection.title == item1.properties["title"] + assert collection.description == item1.properties["description"] + + +def test_from_items_without_collection_id() -> None: + item1 = Item( + id="test-item-1", + geometry=ARBITRARY_GEOM, + bbox=[-10, -20, 0, -10], + datetime=datetime(2000, 2, 1, 12, 0, 0, 0, tzinfo=tz.UTC), + properties={}, + ) + with pytest.raises(ValueError, match="Collection id must be defined."): + Collection.from_items([item1]) + + collection = Collection.from_items([item1], id="test-collection") + assert collection.id == "test-collection" From f51b87263d8694ad394c1de8a03eb39fa4a7cf66 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Fri, 7 Feb 2025 14:06:30 -0500 Subject: [PATCH 2/6] Fix datetime for python 3.10 --- tests/test_collection.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/test_collection.py b/tests/test_collection.py index 618689d5f..170520772 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -728,10 +728,8 @@ def test_from_items(fixture_name: str, request: pytest.FixtureRequest) -> None: start = collection.extent.temporal.intervals[0][0] end = collection.extent.temporal.intervals[0][1] - assert start and start <= datetime.fromisoformat( - item.properties["start_datetime"] - ) - assert end and end >= datetime.fromisoformat(item.properties["end_datetime"]) + assert start and start <= str_to_datetime(item.properties["start_datetime"]) + assert end and end >= str_to_datetime(item.properties["end_datetime"]) if isinstance(items, ItemCollection): expected = {(link["rel"], link["href"]) for link in items.extra_fields["links"]} From 9ffd92bf44d98efee076138b0afee750c3d0c002 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Fri, 7 Feb 2025 15:39:34 -0500 Subject: [PATCH 3/6] More tests, fix docs --- docs/conf.py | 1 + pystac/collection.py | 9 ++++--- tests/test_collection.py | 55 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 4 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index d6a3a667d..3c219eb28 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -250,6 +250,7 @@ rst_epilog = f".. |stac_version| replace:: {STACVersion.DEFAULT_STAC_VERSION}" nitpick_ignore = [ + ("py:class", "C"), ("py:class", "Datetime"), ("py:class", "L"), ("py:class", "pystac.summaries.T"), diff --git a/pystac/collection.py b/pystac/collection.py index d68a97b4d..d144e807d 100644 --- a/pystac/collection.py +++ b/pystac/collection.py @@ -718,14 +718,15 @@ def from_items( id: str | None = None, strategy: HrefLayoutStrategy | None = None, ) -> C: - """Create a :class:`Collection` from items or an :class:`ItemCollection`. + """Create a :class:`Collection` from iterable of items or an + :class:`~pystac.ItemCollection`. - Will try to pull collection attributes from :attr:`ItemCollection.extra_fields` - and items when possible. + Will try to pull collection attributes from + :attr:`~pystac.ItemCollection.extra_fields` and items when possible. Args: items : Iterable of :class:`~pystac.Item` instances to include in the - :class:`ItemCollection`. This can be an :class:`pystac.ItemCollection`. + :class:`Collection`. This can be a :class:`~pystac.ItemCollection`. id : Identifier for the collection. If not set, must be available on the items and they must all match. strategy : The layout strategy to use for setting the diff --git a/tests/test_collection.py b/tests/test_collection.py index 170520772..2279e21ce 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -765,3 +765,58 @@ def test_from_items_without_collection_id() -> None: collection = Collection.from_items([item1], id="test-collection") assert collection.id == "test-collection" + + +def test_from_items_with_collection_ids() -> None: + item1 = Item( + id="test-item-1", + geometry=ARBITRARY_GEOM, + bbox=[-10, -20, 0, -10], + datetime=datetime(2000, 2, 1, 12, 0, 0, 0, tzinfo=tz.UTC), + collection="test-collection-1", + properties={}, + ) + item2 = Item( + id="test-item-2", + geometry=ARBITRARY_GEOM, + bbox=[-15, -20, 0, -10], + datetime=datetime(2000, 2, 1, 13, 0, 0, 0, tzinfo=tz.UTC), + collection="test-collection-2", + properties={}, + ) + + with pytest.raises(ValueError, match="Collection id must be defined."): + Collection.from_items([item1, item2]) + + collection = Collection.from_items([item1, item2], id="test-collection") + assert collection.id == "test-collection" + + +def test_from_items_with_different_values() -> None: + item1 = Item( + id="test-item-1", + geometry=ARBITRARY_GEOM, + bbox=[-10, -20, 0, -10], + datetime=datetime(2000, 2, 1, 12, 0, 0, 0, tzinfo=tz.UTC), + properties={"title": "Test Item 1"}, + ) + item2 = Item( + id="test-item-2", + geometry=ARBITRARY_GEOM, + bbox=[-15, -20, 0, -10], + datetime=datetime(2000, 2, 1, 13, 0, 0, 0, tzinfo=tz.UTC), + properties={"title": "Test Item 2"}, + ) + + collection = Collection.from_items([item1, item2], id="test_collection") + assert collection.title is None + + +def test_from_items_with_providers(sample_item_collection: ItemCollection) -> None: + sample_item_collection.extra_fields["providers"] = [{"name": "pystac"}] + + collection = Collection.from_items(sample_item_collection) + assert collection.providers and len(collection.providers) == 1 + + provider = collection.providers[0] + assert provider and provider.name == "pystac" From fca309cea6cecaad51995be40893a8bae70ef60f Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 10 Feb 2025 09:14:54 -0500 Subject: [PATCH 4/6] Use `Collection` rather than TypeVar --- docs/api/pystac.rst | 1 + docs/conf.py | 1 - pystac/collection.py | 4 ++-- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/api/pystac.rst b/docs/api/pystac.rst index a325710eb..e16c4692d 100644 --- a/docs/api/pystac.rst +++ b/docs/api/pystac.rst @@ -141,6 +141,7 @@ ItemCollection .. autoclass:: pystac.ItemCollection :members: :inherited-members: + :undoc-members: Link ---- diff --git a/docs/conf.py b/docs/conf.py index 3c219eb28..d6a3a667d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -250,7 +250,6 @@ rst_epilog = f".. |stac_version| replace:: {STACVersion.DEFAULT_STAC_VERSION}" nitpick_ignore = [ - ("py:class", "C"), ("py:class", "Datetime"), ("py:class", "L"), ("py:class", "pystac.summaries.T"), diff --git a/pystac/collection.py b/pystac/collection.py index d144e807d..40f301dc3 100644 --- a/pystac/collection.py +++ b/pystac/collection.py @@ -712,12 +712,12 @@ def from_dict( @classmethod def from_items( - cls: type[C], + cls: type[Collection], items: Iterable[Item] | pystac.ItemCollection, *, id: str | None = None, strategy: HrefLayoutStrategy | None = None, - ) -> C: + ) -> Collection: """Create a :class:`Collection` from iterable of items or an :class:`~pystac.ItemCollection`. From f44f4f1ab7ca873f83ce269ec40952daa50a714e Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 10 Feb 2025 09:17:27 -0500 Subject: [PATCH 5/6] Update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c6b825a7..33664c039 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Added + +- `Collection.from_items` for creating a `pystac.Collection` from an `ItemCollection` ([#1522](https://github.com/stac-utils/pystac/pull/1522)) + ### Fixed - Make sure that `VersionRange` has `VersionID`s rather than strings ([#1512](https://github.com/stac-utils/pystac/pull/1512)) From 798bcbe7b09e419a044c794d0cf94b4ba954aed3 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 10 Feb 2025 09:37:42 -0500 Subject: [PATCH 6/6] objections -> objects --- pystac/catalog.py | 2 +- pystac/collection.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pystac/catalog.py b/pystac/catalog.py index c1772599c..ae47787c0 100644 --- a/pystac/catalog.py +++ b/pystac/catalog.py @@ -131,7 +131,7 @@ class Catalog(STACObject): catalog_type : Optional catalog type for this catalog. Must be one of the values in :class:`~pystac.CatalogType`. strategy : The layout strategy to use for setting the - HREFs of the catalog child objections and items. + HREFs of the catalog child objects and items. If not provided, it will default to the strategy of the root and fallback to :class:`~pystac.layout.BestPracticesLayoutStrategy`. """ diff --git a/pystac/collection.py b/pystac/collection.py index 40f301dc3..200b2f10d 100644 --- a/pystac/collection.py +++ b/pystac/collection.py @@ -474,7 +474,7 @@ class Collection(Catalog, Assets): :class:`~pystac.Asset` values in the dictionary will have their :attr:`~pystac.Asset.owner` attribute set to the created Collection. strategy : The layout strategy to use for setting the - HREFs of the catalog child objections and items. + HREFs of the catalog child objects and items. If not provided, it will default to strategy of the parent and fallback to :class:`~pystac.layout.BestPracticesLayoutStrategy`. """ @@ -730,7 +730,7 @@ def from_items( id : Identifier for the collection. If not set, must be available on the items and they must all match. strategy : The layout strategy to use for setting the - HREFs of the catalog child objections and items. + HREFs of the catalog child objects and items. If not provided, it will default to strategy of the parent and fallback to :class:`~pystac.layout.BestPracticesLayoutStrategy`. """