From 3b4d247d204b071e6a6d88362865c30598562516 Mon Sep 17 00:00:00 2001 From: Tom Kralidis Date: Sun, 3 Nov 2024 13:19:30 -0500 Subject: [PATCH] STAC API: implement queryable collections (#1039) --- docs/stac.rst | 9 +++ pycsw/core/repository.py | 9 ++- pycsw/stac/api.py | 59 +++++++++++++++---- .../stac_api/test_stac_api_functional.py | 19 ++++-- 4 files changed, 77 insertions(+), 19 deletions(-) diff --git a/docs/stac.rst b/docs/stac.rst index a57624705..4f4f828cc 100644 --- a/docs/stac.rst +++ b/docs/stac.rst @@ -61,6 +61,15 @@ Request Examples http://localhost:8000/stac/openapi # collections http://localhost:8000/stac/collections + # collections query, full text search + http://localhost:8000/stac/collections?q=sentinel + # collections query, spatial query + http://localhost:8000/stac/collections?bbox=-142,42,-52,84 + # collections query, full text search and spatial query + http://localhost:8000/stac/collections?q=sentinel,bbox=-142,42,-52,84 + # collections query, limiting results + http://localhost:8000/stac/collections?limit=2 + # collections query, spatial query # single collection http://localhost:8000/stac/collections/metadata:main # collection queryables, all records diff --git a/pycsw/core/repository.py b/pycsw/core/repository.py index d074d4a94..544046bda 100644 --- a/pycsw/core/repository.py +++ b/pycsw/core/repository.py @@ -77,6 +77,7 @@ def create_engine(clazz, url): # for sqlite < 0.7, we need to to this on a per-connection basis if engine.name in ['sqlite', 'sqlite3'] and __version__ >= '0.7': from sqlalchemy import event + @event.listens_for(engine, "connect") def connect(dbapi_connection, connection_rec): create_custom_sql_functions(dbapi_connection) @@ -335,7 +336,7 @@ def query_ids(self, ids): query = self.session.query(self.dataset).filter(column.in_(ids)) return self._get_repo_filter(query).all() - def query_collections(self): + def query_collections(self, filters=None, limit=10): ''' Query for parent collections ''' column = getattr(self.dataset, @@ -352,7 +353,11 @@ def query_collections(self): query = self.session.query(self.dataset).filter(column.in_(ids)) - return self._get_repo_filter(query).all() + if filters is not None: + LOGGER.debug('Querying repository with additional filters') + return self._get_repo_filter(query).filter(filters).limit(limit).all() + + return self._get_repo_filter(query).limit(limit).all() def query_domain(self, domain, typenames, domainquerytype='list', count=False): diff --git a/pycsw/stac/api.py b/pycsw/stac/api.py index 377a62b2b..566da8f59 100644 --- a/pycsw/stac/api.py +++ b/pycsw/stac/api.py @@ -35,9 +35,9 @@ from pygeofilter.parsers.ecql import parse as parse_ecql from pycsw import __version__ -from pycsw.ogc.api.oapi import gen_oapi -from pycsw.ogc.api.records import API from pycsw.core.pygeofilter_evaluate import to_filter +from pycsw.ogc.api.oapi import gen_oapi +from pycsw.ogc.api.records import API, build_anytext from pycsw.core.util import geojson_geometry2bbox LOGGER = logging.getLogger(__name__) @@ -54,6 +54,7 @@ CONFORMANCE_CLASSES = [ 'http://www.opengis.net/spec/ogcapi-common-1/1.0/conf/core', 'http://www.opengis.net/spec/ogcapi-common-2/1.0/conf/collections', + 'http://www.opengis.net/spec/ogcapi-common-2/1.0/conf/simple-query', 'http://www.opengis.net/spec/ogcapi-features-1/1.0/conf/core', 'http://www.opengis.net/spec/ogcapi-features-3/1.0/conf/queryables', 'http://www.opengis.net/spec/ogcapi-features-3/1.0/conf/queryables-query-parameters', @@ -66,7 +67,9 @@ 'https://api.stacspec.org/v1.0.0/ogcapi-features', 'https://api.stacspec.org/v1.0.0/item-search', 'https://api.stacspec.org/v1.0.0/item-search#filter', - 'https://api.stacspec.org/v1.0.0/item-search#free-text' + 'https://api.stacspec.org/v1.0.0/item-search#free-text', + 'https://api.stacspec.org/v1.0.0-rc.1/collection-search', + 'https://api.stacspec.org/v1.0.0-rc.1/collection-search#free-text' ] @@ -205,13 +208,41 @@ def collections(self, headers_, args): collections = [] - LOGGER.debug('Generating default metadata:main collection') - collection_info = self.get_collection_info() - - collections.append(collection_info) + # LOGGER.debug('Generating default metadata:main collection') + # collection_info = self.get_collection_info() + # collections.append(collection_info) LOGGER.debug('Generating virtual collections') - virtual_collections = self.repository.query_collections() + + filters = None + query_args = [] + + LOGGER.debug('Handling collection level search') + for k, v in args.items(): + if k == 'bbox': + query_args.append(f'BBOX(geometry, {v})') + elif k == 'datetime': + if '/' not in v: + query_args.append(f'date = "{v}"') + else: + begin, end = v.split('/') + if begin != '..': + query_args.append(f'time_begin >= "{begin}"') + if end != '..': + query_args.append(f'time_end <= "{end}"') + elif k == 'q': + if v not in [None, '']: + query_args.append(build_anytext('anytext', v)) + + limit = int(args.get('limit', self.config['server']['maxrecords'])) + + if query_args: + ast = parse_ecql(' AND '.join(query_args)) + LOGGER.debug(f'Abstract syntax tree: {ast}') + filters = to_filter(ast, self.repository.dbtype, self.repository.query_mappings) + LOGGER.debug(f'Filter: {filters}') + + virtual_collections = self.repository.query_collections(filters, limit) for virtual_collection in virtual_collections: virtual_collection_info = self.get_collection_info( @@ -226,13 +257,14 @@ def collections(self, headers_, args): } LOGGER.debug('Generating STAC collections') - mapping = {'typename': self.repository.dataset.typename} - ast = parse_ecql("typename = 'stac:Collection'") + + query_args.append("typename = 'stac:Collection'") + ast = parse_ecql(' AND '.join(query_args)) LOGGER.debug(f'Abstract syntax tree: {ast}') - filters = to_filter(ast, self.repository.dbtype, mapping) + filters = to_filter(ast, self.repository.dbtype, self.repository.query_mappings) LOGGER.debug(f'Filter: {filters}') sc_query = self.repository.session.query( - self.repository.dataset).filter(filters).all() + self.repository.dataset).filter(filters).limit(limit).all() for sc in sc_query: response['collections'].append(self.get_collection_info( @@ -265,6 +297,7 @@ def collections(self, headers_, args): 'href': self.config['server']['url'] }] + response['collections'] = response['collections'][:limit] response['numberMatched'] = len(response['collections']) response['numberReturned'] = len(response['collections']) @@ -434,7 +467,7 @@ def get_collection_info(self, collection_name: str = 'metadata:main', :param collection_name: name of collection default is 'metadata:main' main collection - :param collection_info: `dict` of collecton info + :param collection_info: `dict` of collection info :returns: `dict` of collection """ diff --git a/tests/functionaltests/suites/stac_api/test_stac_api_functional.py b/tests/functionaltests/suites/stac_api/test_stac_api_functional.py index 102c729fb..3d85a931e 100644 --- a/tests/functionaltests/suites/stac_api/test_stac_api_functional.py +++ b/tests/functionaltests/suites/stac_api/test_stac_api_functional.py @@ -49,7 +49,7 @@ def test_landing_page(config): assert content['stac_version'] == '1.0.0' assert content['type'] == 'Catalog' - assert len(content['conformsTo']) == 15 + assert len(content['conformsTo']) == 18 assert len(content['keywords']) == 3 @@ -70,13 +70,16 @@ def test_conformance(config): assert headers['Content-Type'] == 'application/json' assert status == 200 - assert len(content['conformsTo']) == 15 + assert len(content['conformsTo']) == 18 conformances = [ + 'http://www.opengis.net/spec/ogcapi-common-2/1.0/conf/simple-query', 'https://api.stacspec.org/v1.0.0/core', 'https://api.stacspec.org/v1.0.0/ogcapi-features', 'https://api.stacspec.org/v1.0.0/item-search', - 'https://api.stacspec.org/v1.0.0/item-search#filter' + 'https://api.stacspec.org/v1.0.0/item-search#filter', + 'https://api.stacspec.org/v1.0.0-rc.1/collection-search', + 'https://api.stacspec.org/v1.0.0-rc.1/collection-search#free-text' ] for conformance in conformances: @@ -92,10 +95,18 @@ def test_collections(config): assert status == 200 assert len(content['links']) == 3 - assert len(content['collections']) == 1 + assert len(content['collections']) == 0 assert len(content['collections']) == content['numberMatched'] assert len(content['collections']) == content['numberReturned'] + headers, status, content = api.collections({}, {'limit': 0, 'f': 'json'}) + content = json.loads(content) + + assert headers['Content-Type'] == 'application/json' + assert status == 200 + assert len(content['collections']) == 0 + + def test_queryables(config): api = STACAPI(config) headers, status, content = api.queryables({}, {})