Skip to content

Commit 3be5e1d

Browse files
author
Phil Varner
authored
Pv/one index per collection (#97)
* refactor to use one index per collection * restrict queries involving collection to only those indices
1 parent 5f9e145 commit 3be5e1d

File tree

10 files changed

+286
-224
lines changed

10 files changed

+286
-224
lines changed

Diff for: .pre-commit-config.yaml

+11-5
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,17 @@ repos:
1717
# W503 black conflicts with "line break before operator" rule
1818
# E203 black conflicts with "whitespace before ':'" rule
1919
'--ignore=E501,W503,E203,C901' ]
20-
# - repo: https://github.com/pre-commit/mirrors-mypy
21-
# rev: v0.942
22-
# hooks:
23-
# - id: mypy
24-
# args: [--no-strict-optional, --ignore-missing-imports]
20+
- repo: https://github.com/pre-commit/mirrors-mypy
21+
rev: v0.942
22+
hooks:
23+
- id: mypy
24+
exclude: /tests/
25+
# --strict
26+
args: [--no-strict-optional, --ignore-missing-imports, --implicit-reexport]
27+
additional_dependencies: [
28+
"types-attrs",
29+
"types-requests"
30+
]
2531
- repo: https://github.com/PyCQA/pydocstyle
2632
rev: 6.1.1
2733
hooks:

Diff for: CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
1919

2020
- Default to Python 3.10
2121
- Default to Elasticsearch 8.x
22+
- Collection objects are now stored in `collections` index rather than `stac_collections` index
23+
- Item objects are no longer stored in `stac_items`, but in indices per collection named `items_{collection_id}`
2224

2325
### Removed
2426

Diff for: stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
CoreClient,
88
TransactionsClient,
99
)
10+
from stac_fastapi.elasticsearch.database_logic import create_collection_index
1011
from stac_fastapi.elasticsearch.extensions import QueryExtension
11-
from stac_fastapi.elasticsearch.indexes import IndexesClient
1212
from stac_fastapi.elasticsearch.session import Session
1313
from stac_fastapi.extensions.core import ( # FieldsExtension,
1414
ContextExtension,
@@ -44,11 +44,11 @@
4444

4545

4646
@app.on_event("startup")
47-
async def _startup_event():
48-
await IndexesClient().create_indexes()
47+
async def _startup_event() -> None:
48+
await create_collection_index()
4949

5050

51-
def run():
51+
def run() -> None:
5252
"""Run app from command line using uvicorn if available."""
5353
try:
5454
import uvicorn

Diff for: stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import os
33
from typing import Any, Dict, Set
44

5-
from elasticsearch import AsyncElasticsearch, Elasticsearch
5+
from elasticsearch import AsyncElasticsearch, Elasticsearch # type: ignore
66
from stac_fastapi.types.config import ApiSettings
77

88

Diff for: stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/core.py

+40-28
Original file line numberDiff line numberDiff line change
@@ -52,31 +52,30 @@ class CoreClient(AsyncBaseCoreClient):
5252
async def all_collections(self, **kwargs) -> Collections:
5353
"""Read all collections from the database."""
5454
base_url = str(kwargs["request"].base_url)
55-
collection_list = await self.database.get_all_collections()
56-
collection_list = [
57-
self.collection_serializer.db_to_stac(c, base_url=base_url)
58-
for c in collection_list
59-
]
60-
61-
links = [
62-
{
63-
"rel": Relations.root.value,
64-
"type": MimeTypes.json,
65-
"href": base_url,
66-
},
67-
{
68-
"rel": Relations.parent.value,
69-
"type": MimeTypes.json,
70-
"href": base_url,
71-
},
72-
{
73-
"rel": Relations.self.value,
74-
"type": MimeTypes.json,
75-
"href": urljoin(base_url, "collections"),
76-
},
77-
]
7855

79-
return Collections(collections=collection_list, links=links)
56+
return Collections(
57+
collections=[
58+
self.collection_serializer.db_to_stac(c, base_url=base_url)
59+
for c in await self.database.get_all_collections()
60+
],
61+
links=[
62+
{
63+
"rel": Relations.root.value,
64+
"type": MimeTypes.json,
65+
"href": base_url,
66+
},
67+
{
68+
"rel": Relations.parent.value,
69+
"type": MimeTypes.json,
70+
"href": base_url,
71+
},
72+
{
73+
"rel": Relations.self.value,
74+
"type": MimeTypes.json,
75+
"href": urljoin(base_url, "collections"),
76+
},
77+
],
78+
)
8079

8180
@overrides
8281
async def get_collection(self, collection_id: str, **kwargs) -> Collection:
@@ -100,6 +99,8 @@ async def item_collection(
10099
limit=limit,
101100
token=token,
102101
sort=None,
102+
collection_ids=[collection_id],
103+
ignore_unavailable=False,
103104
)
104105

105106
items = [
@@ -276,6 +277,7 @@ async def post_search(
276277
limit=limit,
277278
token=search_request.token, # type: ignore
278279
sort=sort,
280+
collection_ids=search_request.collections,
279281
)
280282

281283
items = [
@@ -341,8 +343,11 @@ async def create_item(self, item: stac_types.Item, **kwargs) -> stac_types.Item:
341343
processed_items = [
342344
bulk_client.preprocess_item(item, base_url) for item in item["features"] # type: ignore
343345
]
346+
347+
# not a great way to get the collection_id-- should be part of the method signature
348+
collection_id = processed_items[0]["collection"]
344349
await self.database.bulk_async(
345-
processed_items, refresh=kwargs.get("refresh", False)
350+
collection_id, processed_items, refresh=kwargs.get("refresh", False)
346351
)
347352

348353
return None # type: ignore
@@ -355,12 +360,14 @@ async def create_item(self, item: stac_types.Item, **kwargs) -> stac_types.Item:
355360
async def update_item(self, item: stac_types.Item, **kwargs) -> stac_types.Item:
356361
"""Update item."""
357362
base_url = str(kwargs["request"].base_url)
363+
collection_id = item["collection"]
364+
358365
now = datetime_type.now(timezone.utc).isoformat().replace("+00:00", "Z")
359366
item["properties"]["updated"] = str(now)
360367

361-
await self.database.check_collection_exists(collection_id=item["collection"])
368+
await self.database.check_collection_exists(collection_id)
362369
# todo: index instead of delete and create
363-
await self.delete_item(item_id=item["id"], collection_id=item["collection"])
370+
await self.delete_item(item_id=item["id"], collection_id=collection_id)
364371
await self.create_item(item=item, **kwargs)
365372

366373
return ItemSerializer.db_to_stac(item, base_url)
@@ -440,6 +447,11 @@ def bulk_item_insert(
440447
self.preprocess_item(item, base_url) for item in items.items.values()
441448
]
442449

443-
self.database.bulk_sync(processed_items, refresh=kwargs.get("refresh", False))
450+
# not a great way to get the collection_id-- should be part of the method signature
451+
collection_id = processed_items[0]["collection"]
452+
453+
self.database.bulk_sync(
454+
collection_id, processed_items, refresh=kwargs.get("refresh", False)
455+
)
444456

445457
return f"Successfully added {len(processed_items)} Items."

0 commit comments

Comments
 (0)