Skip to content

Commit e22dc2b

Browse files
authored
Merge pull request #1453 from materialsproject/release-v0.87.0
Release v0.87.0
2 parents 0ac0b62 + 5916d3e commit e22dc2b

82 files changed

Lines changed: 2304 additions & 1012 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

emmet-api/emmet/api/core/documentation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@
250250
},
251251
{
252252
"name": "DOIs",
253-
"description": "Route providing DOI and bibtex reference information for a material. \
253+
"description": "Route providing DOI reference information for a material. \
254254
Note that this data may not be available for all materials in the Materials \
255255
Project database. See the `DOIDoc` schema for a full list of fields returned by this route.",
256256
},

emmet-api/emmet/api/query_operator/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from emmet.api.query_operator.core import (
22
BoolQuery,
33
DeprecationQuery,
4+
IdFormatQuery,
45
InQuery,
56
MultiMaterialIDQuery,
67
MultiTaskIDQuery,
@@ -27,5 +28,6 @@
2728
"InQuery",
2829
"BoolQuery",
2930
"DeprecationQuery",
31+
"IdFormatQuery",
3032
"RangeQuery",
3133
]

emmet-api/emmet/api/query_operator/core.py

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from collections.abc import Callable
44
from dataclasses import dataclass, field
55
from functools import partial
6-
from typing import Any
6+
from typing import Any, Protocol
77

88
from fastapi import Query
99

@@ -176,3 +176,112 @@ def query(
176176
),
177177
) -> STORE_PARAMS:
178178
return self._prepare_query(material_ids)
179+
180+
181+
# Allowed values for the optional ``id_format`` query parameter. Anything not
182+
# in this set is treated as if the parameter was absent (no-op reformatting),
183+
# which is the safer default for backwards compatibility.
184+
_ID_FORMAT_VALUES = ("legacy", "alpha")
185+
186+
187+
class IdFormatter(Protocol):
188+
"""Callable signature for the formatters consumed by :class:`IdFormatQuery`.
189+
190+
Each registered formatter is invoked as
191+
``formatter(value, legacy=<bool>)`` against every truthy id-field value
192+
on the response. ``legacy`` is passed by keyword to match the explicit
193+
signatures of the canonical formatters in :mod:`emmet.core.types.typing`
194+
and :mod:`emmet.core.xas`.
195+
"""
196+
197+
def __call__(self, value: Any, *, legacy: bool) -> str: ...
198+
199+
200+
@dataclass
201+
class IdFormatQuery(QueryOperator):
202+
"""Optional response-side reformatting of MP identifier fields.
203+
204+
Adds an ``id_format`` query parameter to an endpoint and, on
205+
``post_process``, rewrites the identifier fields on each returned
206+
document according to the requested shape:
207+
208+
- ``id_format=legacy`` -> ``mp-149`` / ``mp-2658_Al`` / ``mp-779827-XANES-O-K``
209+
- ``id_format=alpha`` -> ``mp-aaaaaaft`` / ``mp-aaaaadyg_Al`` / ``aaabsjpj-XANES-O-K``
210+
- parameter absent (or any other value) -> documents are returned with
211+
identifier fields exactly as the database stores them; no rewriting
212+
is attempted.
213+
214+
This is purely a serialization concern: ``query()`` returns an empty
215+
criteria dict so this operator never affects which documents the
216+
database returns. It only mutates the response payload.
217+
218+
Constructor takes a list of ``(field_name, formatter)`` tuples. Each
219+
formatter must be a callable with signature ``formatter(value, legacy: bool) -> str``
220+
and must be fault-tolerant (i.e. return the input unchanged on parse
221+
failure, never raise). The canonical formatters live in
222+
:mod:`emmet.core.types.typing` (``format_identifier``,
223+
``format_compound_identifier``, ``format_task_id``) and
224+
:mod:`emmet.core.xas` (``format_spectrum_id``).
225+
226+
Example registration:
227+
228+
.. code-block:: python
229+
230+
from emmet.core.types.typing import format_identifier, format_task_id
231+
from emmet.core.xas import format_spectrum_id
232+
233+
# /materials/summary/
234+
IdFormatQuery(id_fields=[("material_id", format_identifier)])
235+
236+
# /materials/xas/
237+
IdFormatQuery(id_fields=[
238+
("task_id", format_task_id),
239+
("spectrum_id", format_spectrum_id),
240+
])
241+
242+
Attributes:
243+
id_fields: A list of ``(field_name, formatter)`` tuples describing
244+
which fields on each returned document to rewrite and how.
245+
Fields that are absent from a given document (e.g. due to
246+
sparse-fields projection) are silently skipped.
247+
"""
248+
249+
id_fields: list[tuple[str, IdFormatter]] = field(default_factory=list)
250+
251+
def query(
252+
self,
253+
id_format: str | None = Query(
254+
None,
255+
description=(
256+
"Optional. If set to 'legacy', MP identifier fields in the "
257+
"response are returned in the form 'mp-149'. If set to "
258+
"'alpha', they are returned in the padded AlphaID form "
259+
"'mp-aaaaaaft'. If omitted (or set to any other value), "
260+
"identifiers are returned in their stored form. This is a "
261+
"purely cosmetic transform; query inputs accept either "
262+
"shape regardless."
263+
),
264+
),
265+
) -> STORE_PARAMS:
266+
# The store query is empty — this operator only affects response
267+
# serialization. The ``id_format`` value is threaded through the
268+
# returned ``STORE_PARAMS`` so ``post_process`` can read it back.
269+
return {"criteria": {}, "id_format": id_format}
270+
271+
def post_process(self, docs: list[dict], query: dict) -> list[dict]:
272+
fmt = query.get("id_format")
273+
if fmt not in _ID_FORMAT_VALUES:
274+
# Absent / invalid value -> no-op. We deliberately do not 400
275+
# on a bad value: existing clients that misspell the parameter
276+
# continue to receive a valid response.
277+
return docs
278+
279+
legacy = fmt == "legacy"
280+
for doc in docs:
281+
if not isinstance(doc, dict):
282+
continue
283+
for field_name, formatter in self.id_fields:
284+
value = doc.get(field_name)
285+
if value:
286+
doc[field_name] = formatter(value, legacy=legacy)
287+
return docs

emmet-api/emmet/api/query_operator/identifier.py

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from abc import abstractmethod
22
from dataclasses import dataclass
3+
from fastapi import Query
4+
from inspect import Parameter, Signature
35

46
from emmet.api.query_operator import QueryOperator
5-
from emmet.api.utils import STORE_PARAMS
67
from emmet.core.types.typing import CompoundIDType
78

89

@@ -38,29 +39,49 @@ def validate_identifer(idx: str) -> CompoundIDType:
3839
def num_suffixes(self) -> int:
3940
return len(self.identifier_fields) - 1
4041

41-
def query(self, **kwargs) -> STORE_PARAMS:
42+
def __post_init__(self) -> None:
4243

43-
identifiers = {
44-
v.strip() for v in (kwargs.get(f"{self.field_name}s") or "").split(",") if v
45-
}
46-
if len(identifiers) == 0:
47-
return {"criteria": {}}
48-
49-
identifiers_as_components = [
50-
self.validate_identifer(idx) for idx in identifiers
44+
field = f"{self.field_name}s"
45+
params = [
46+
Parameter(
47+
field,
48+
Parameter.KEYWORD_ONLY,
49+
default=Query(
50+
default=None,
51+
description=f"Comma-separated list of {self.field_name} values to query on.",
52+
),
53+
annotation=str,
54+
)
5155
]
5256

53-
components = {
54-
self.identifier_fields[0]: {
55-
str(component["identifier"]) for component in identifiers_as_components
56-
},
57-
**{
58-
suffix: {
59-
component["suffix"][i].value
57+
def _query(**kwargs):
58+
59+
identifiers = {v.strip() for v in (kwargs.get(field) or "").split(",") if v}
60+
if len(identifiers) == 0:
61+
return {"criteria": {}}
62+
63+
identifiers_as_components = [
64+
self.validate_identifer(idx) for idx in identifiers
65+
]
66+
67+
components = {
68+
self.identifier_fields[0]: {
69+
str(component["identifier"])
6070
for component in identifiers_as_components
61-
}
62-
for i, suffix in enumerate(self.identifier_fields[1:])
63-
},
64-
}
71+
},
72+
**{
73+
suffix: {
74+
component["suffix"][i].value
75+
for component in identifiers_as_components
76+
}
77+
for i, suffix in enumerate(self.identifier_fields[1:])
78+
},
79+
}
80+
81+
return {"criteria": {k: {"$in": sorted(v)} for k, v in components.items()}}
82+
83+
self.query = _query # type: ignore
84+
self.query.__signature__ = Signature(params) # type: ignore
6585

66-
return {"criteria": {k: {"$in": sorted(v)} for k, v in components.items()}}
86+
def query(self):
87+
"""Stub query function for abstract class."""

emmet-api/emmet/api/routes/materials/grain_boundary/resources.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from functools import partial
2-
31
from emmet.api.core.global_header import GlobalHeaderProcessor
42
from emmet.api.core.settings import MAPISettings
53
from emmet.api.query_operator import (
@@ -10,7 +8,6 @@
108
)
119
from emmet.api.resource import ReadOnlyResource
1210
from emmet.api.routes.materials.grain_boundary.query_operators import GBStructureQuery
13-
from emmet.api.utils import process_identifiers
1411
from emmet.core.grain_boundary import GrainBoundaryDoc
1512

1613

@@ -19,9 +16,7 @@ def gb_resource(gb_store):
1916
gb_store,
2017
GrainBoundaryDoc,
2118
query_operators=[
22-
MultiMaterialIDQuery(
23-
pre_processor=partial(process_identifiers, use_prefix=False)
24-
),
19+
MultiMaterialIDQuery(),
2520
NumericQuery(
2621
model=GrainBoundaryDoc, excluded_fields=["rotation_axis", "gb_plane"]
2722
),

emmet-api/emmet/api/routes/materials/phonon/query_operators.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,31 @@
11
from __future__ import annotations
22

3+
from collections.abc import Callable
4+
from dataclasses import dataclass, field
5+
from functools import partial
6+
37
from fastapi import Query
48

5-
from emmet.api.query_operator import QueryOperator
6-
from emmet.api.utils import STORE_PARAMS
9+
from emmet.api.query_operator import InQuery, QueryOperator
10+
from emmet.api.utils import STORE_PARAMS, process_identifiers
11+
12+
13+
@dataclass
14+
class MultiPhononIDQuery(InQuery):
15+
"""Generate a query for different phonon ids."""
16+
17+
field_name: str = "identifier"
18+
pre_processor: Callable[[str], list[str]] = field(
19+
default=partial(process_identifiers, use_prefix=False)
20+
)
21+
22+
def query(
23+
self,
24+
identifiers: str | None = Query(
25+
None, description="Comma-separated list of phonon_ids to query on"
26+
),
27+
) -> STORE_PARAMS:
28+
return self._prepare_query(identifiers)
729

830

931
class PhononMethodQuery(QueryOperator):

emmet-api/emmet/api/routes/materials/phonon/resources.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,17 @@
11
from emmet.api.core.global_header import GlobalHeaderProcessor
22
from emmet.api.core.settings import MAPISettings
3-
from emmet.api.query_operator import (
4-
MultiMaterialIDQuery,
5-
NumericQuery,
6-
PaginationQuery,
7-
SparseFieldsQuery,
8-
)
3+
from emmet.api.query_operator import NumericQuery, PaginationQuery, SparseFieldsQuery
94
from emmet.api.resource import ReadOnlyResource
105
from emmet.api.routes.materials.materials.query_operators import (
116
ChemsysQuery,
127
ElementsQuery,
138
FormulaQuery,
149
SymmetryQuery,
1510
)
16-
from emmet.api.routes.materials.phonon.query_operators import PhononMethodQuery
11+
from emmet.api.routes.materials.phonon.query_operators import (
12+
MultiPhononIDQuery,
13+
PhononMethodQuery,
14+
)
1715
from emmet.core.phonon import PhononBSDOSDoc
1816

1917

@@ -22,7 +20,7 @@ def phonon_bsdos_resource(phonon_bs_store):
2220
phonon_bs_store,
2321
PhononBSDOSDoc,
2422
query_operators=[
25-
MultiMaterialIDQuery(),
23+
MultiPhononIDQuery(),
2624
FormulaQuery(),
2725
ChemsysQuery(),
2826
ElementsQuery(),
@@ -31,7 +29,7 @@ def phonon_bsdos_resource(phonon_bs_store):
3129
NumericQuery(model=PhononBSDOSDoc, excluded_fields=["composition"]),
3230
PaginationQuery(),
3331
SparseFieldsQuery(
34-
PhononBSDOSDoc, default_fields=["material_id", "last_updated"]
32+
PhononBSDOSDoc, default_fields=["identifier", "last_updated"]
3533
),
3634
],
3735
header_processor=GlobalHeaderProcessor(),

emmet-api/emmet/api/routes/materials/robocrys/query_operators.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def query(
5353
return {"pipeline": pipeline}
5454

5555
def post_process(self, docs, query):
56-
self.total_doc = docs[0]["meta"]["count"]["total"]
56+
self.total_doc = 0 if len(docs) == 0 else docs[0]["meta"]["count"]["total"]
5757
return docs
5858

5959
def meta(self):

emmet-api/emmet/api/routes/materials/similarity/query_operators.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def query(
5353
)
5454
if method is None:
5555
try:
56-
method = next(
56+
sim_method = next(
5757
method
5858
for method, fvlen in SIM_METHOD_TO_FEAT_VEC_LENGTH.items()
5959
if fvlen == len(feature_vector)
@@ -64,13 +64,15 @@ def query(
6464
f"length = {len(feature_vector)} matches no known embedding method."
6565
)
6666
elif isinstance(method, str):
67-
method = (
67+
sim_method = (
6868
SimilarityMethod[method]
6969
if method in SimilarityMethod.__members__
7070
else SimilarityMethod(method)
7171
)
72+
else:
73+
sim_method = method
7274

73-
ref_fv_len = SIM_METHOD_TO_FEAT_VEC_LENGTH[method]
75+
ref_fv_len = SIM_METHOD_TO_FEAT_VEC_LENGTH[sim_method]
7476

7577
if (
7678
not isinstance(feature_vector, list | tuple)
@@ -80,17 +82,12 @@ def query(
8082
raise HTTPException(
8183
status_code=400,
8284
detail=(
83-
f"Invalid feature vector for method {method.value}: " # type: ignore[union-attr]
85+
f"Invalid feature vector for method {sim_method.value}: "
8486
f"should be a list of {ref_fv_len} floats.",
8587
),
8688
)
8789

88-
index_name = "similarity_feature_vector"
89-
# because MongoDB does not permit renaming indexes,
90-
# and I was not forward thinking in naming it.
91-
# TODO: homogenize once we have other data built out
92-
if method != SimilarityMethod.CRYSTALNN:
93-
index_name += f"_{method.value.lower()}" # type: ignore[union-attr]
90+
index_name = f"similarity_feature_vector_{sim_method.value.lower()}"
9491

9592
pipeline = [
9693
{

0 commit comments

Comments
 (0)