materialsproject
diff --git a/‎emmet-api/emmet/api/core/documentation.py‎
Lines changed: 1 addition & 1 deletion b/‎emmet-api/emmet/api/core/documentation.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎emmet-api/emmet/api/query_operator/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎emmet-api/emmet/api/query_operator/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎emmet-api/emmet/api/query_operator/core.py‎
Lines changed: 110 additions & 1 deletion b/‎emmet-api/emmet/api/query_operator/core.py‎
Lines changed: 110 additions & 1 deletion
diff --git a/‎emmet-api/emmet/api/query_operator/identifier.py‎
Lines changed: 43 additions & 22 deletions b/‎emmet-api/emmet/api/query_operator/identifier.py‎
Lines changed: 43 additions & 22 deletions
diff --git a/‎emmet-api/emmet/api/routes/materials/grain_boundary/resources.py‎
Lines changed: 1 addition & 6 deletions b/‎emmet-api/emmet/api/routes/materials/grain_boundary/resources.py‎
Lines changed: 1 addition & 6 deletions
diff --git a/‎emmet-api/emmet/api/routes/materials/phonon/query_operators.py‎
Lines changed: 24 additions & 2 deletions b/‎emmet-api/emmet/api/routes/materials/phonon/query_operators.py‎
Lines changed: 24 additions & 2 deletions
diff --git a/‎emmet-api/emmet/api/routes/materials/phonon/resources.py‎
Lines changed: 7 additions & 9 deletions b/‎emmet-api/emmet/api/routes/materials/phonon/resources.py‎
Lines changed: 7 additions & 9 deletions
diff --git a/‎emmet-api/emmet/api/routes/materials/robocrys/query_operators.py‎
Lines changed: 1 addition & 1 deletion b/‎emmet-api/emmet/api/routes/materials/robocrys/query_operators.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎emmet-api/emmet/api/routes/materials/similarity/query_operators.py‎
Lines changed: 7 additions & 10 deletions b/‎emmet-api/emmet/api/routes/materials/similarity/query_operators.py‎
Lines changed: 7 additions & 10 deletions
@@ -250,7 +250,7 @@
     },
     {
         "name": "DOIs",
-        "description": "Route providing DOI and bibtex reference information for a material. \
+        "description": "Route providing DOI reference information for a material. \
             Note that this data may not be available for all materials in the Materials \
             Project database. See the `DOIDoc` schema for a full list of fields returned by this route.",
     },
 
@@ -1,6 +1,7 @@
 from emmet.api.query_operator.core import (
     BoolQuery,
     DeprecationQuery,
+    IdFormatQuery,
     InQuery,
     MultiMaterialIDQuery,
     MultiTaskIDQuery,
@@ -27,5 +28,6 @@
     "InQuery",
     "BoolQuery",
     "DeprecationQuery",
+    "IdFormatQuery",
     "RangeQuery",
 ]
@@ -3,7 +3,7 @@
 from collections.abc import Callable
 from dataclasses import dataclass, field
 from functools import partial
-from typing import Any
+from typing import Any, Protocol
 
 from fastapi import Query
 
@@ -176,3 +176,112 @@ def query(
         ),
     ) -> STORE_PARAMS:
         return self._prepare_query(material_ids)
+
+
+# Allowed values for the optional ``id_format`` query parameter. Anything not
+# in this set is treated as if the parameter was absent (no-op reformatting),
+# which is the safer default for backwards compatibility.
+_ID_FORMAT_VALUES = ("legacy", "alpha")
+
+
+class IdFormatter(Protocol):
+    """Callable signature for the formatters consumed by :class:`IdFormatQuery`.
+
+    Each registered formatter is invoked as
+    ``formatter(value, legacy=<bool>)`` against every truthy id-field value
+    on the response. ``legacy`` is passed by keyword to match the explicit
+    signatures of the canonical formatters in :mod:`emmet.core.types.typing`
+    and :mod:`emmet.core.xas`.
+    """
+
+    def __call__(self, value: Any, *, legacy: bool) -> str: ...
+
+
+@dataclass
+class IdFormatQuery(QueryOperator):
+    """Optional response-side reformatting of MP identifier fields.
+
+    Adds an ``id_format`` query parameter to an endpoint and, on
+    ``post_process``, rewrites the identifier fields on each returned
+    document according to the requested shape:
+
+    - ``id_format=legacy`` -> ``mp-149`` / ``mp-2658_Al`` / ``mp-779827-XANES-O-K``
+    - ``id_format=alpha``  -> ``mp-aaaaaaft`` / ``mp-aaaaadyg_Al`` / ``aaabsjpj-XANES-O-K``
+    - parameter absent (or any other value) -> documents are returned with
+      identifier fields exactly as the database stores them; no rewriting
+      is attempted.
+
+    This is purely a serialization concern: ``query()`` returns an empty
+    criteria dict so this operator never affects which documents the
+    database returns. It only mutates the response payload.
+
+    Constructor takes a list of ``(field_name, formatter)`` tuples. Each
+    formatter must be a callable with signature ``formatter(value, legacy: bool) -> str``
+    and must be fault-tolerant (i.e. return the input unchanged on parse
+    failure, never raise). The canonical formatters live in
+    :mod:`emmet.core.types.typing` (``format_identifier``,
+    ``format_compound_identifier``, ``format_task_id``) and
+    :mod:`emmet.core.xas` (``format_spectrum_id``).
+
+    Example registration:
+
+    .. code-block:: python
+
+        from emmet.core.types.typing import format_identifier, format_task_id
+        from emmet.core.xas import format_spectrum_id
+
+        # /materials/summary/
+        IdFormatQuery(id_fields=[("material_id", format_identifier)])
+
+        # /materials/xas/
+        IdFormatQuery(id_fields=[
+            ("task_id", format_task_id),
+            ("spectrum_id", format_spectrum_id),
+        ])
+
+    Attributes:
+        id_fields: A list of ``(field_name, formatter)`` tuples describing
+            which fields on each returned document to rewrite and how.
+            Fields that are absent from a given document (e.g. due to
+            sparse-fields projection) are silently skipped.
+    """
+
+    id_fields: list[tuple[str, IdFormatter]] = field(default_factory=list)
+
+    def query(
+        self,
+        id_format: str | None = Query(
+            None,
+            description=(
+                "Optional. If set to 'legacy', MP identifier fields in the "
+                "response are returned in the form 'mp-149'. If set to "
+                "'alpha', they are returned in the padded AlphaID form "
+                "'mp-aaaaaaft'. If omitted (or set to any other value), "
+                "identifiers are returned in their stored form. This is a "
+                "purely cosmetic transform; query inputs accept either "
+                "shape regardless."
+            ),
+        ),
+    ) -> STORE_PARAMS:
+        # The store query is empty — this operator only affects response
+        # serialization. The ``id_format`` value is threaded through the
+        # returned ``STORE_PARAMS`` so ``post_process`` can read it back.
+        return {"criteria": {}, "id_format": id_format}
+
+    def post_process(self, docs: list[dict], query: dict) -> list[dict]:
+        fmt = query.get("id_format")
+        if fmt not in _ID_FORMAT_VALUES:
+            # Absent / invalid value -> no-op. We deliberately do not 400
+            # on a bad value: existing clients that misspell the parameter
+            # continue to receive a valid response.
+            return docs
+
+        legacy = fmt == "legacy"
+        for doc in docs:
+            if not isinstance(doc, dict):
+                continue
+            for field_name, formatter in self.id_fields:
+                value = doc.get(field_name)
+                if value:
+                    doc[field_name] = formatter(value, legacy=legacy)
+        return docs
@@ -1,8 +1,9 @@
 from abc import abstractmethod
 from dataclasses import dataclass
+from fastapi import Query
+from inspect import Parameter, Signature
 
 from emmet.api.query_operator import QueryOperator
-from emmet.api.utils import STORE_PARAMS
 from emmet.core.types.typing import CompoundIDType
 
 
@@ -38,29 +39,49 @@ def validate_identifer(idx: str) -> CompoundIDType:
     def num_suffixes(self) -> int:
         return len(self.identifier_fields) - 1
 
-    def query(self, **kwargs) -> STORE_PARAMS:
+    def __post_init__(self) -> None:
 
-        identifiers = {
-            v.strip() for v in (kwargs.get(f"{self.field_name}s") or "").split(",") if v
-        }
-        if len(identifiers) == 0:
-            return {"criteria": {}}
-
-        identifiers_as_components = [
-            self.validate_identifer(idx) for idx in identifiers
+        field = f"{self.field_name}s"
+        params = [
+            Parameter(
+                field,
+                Parameter.KEYWORD_ONLY,
+                default=Query(
+                    default=None,
+                    description=f"Comma-separated list of {self.field_name} values to query on.",
+                ),
+                annotation=str,
+            )
         ]
 
-        components = {
-            self.identifier_fields[0]: {
-                str(component["identifier"]) for component in identifiers_as_components
-            },
-            **{
-                suffix: {
-                    component["suffix"][i].value
+        def _query(**kwargs):
+
+            identifiers = {v.strip() for v in (kwargs.get(field) or "").split(",") if v}
+            if len(identifiers) == 0:
+                return {"criteria": {}}
+
+            identifiers_as_components = [
+                self.validate_identifer(idx) for idx in identifiers
+            ]
+
+            components = {
+                self.identifier_fields[0]: {
+                    str(component["identifier"])
                     for component in identifiers_as_components
-                }
-                for i, suffix in enumerate(self.identifier_fields[1:])
-            },
-        }
+                },
+                **{
+                    suffix: {
+                        component["suffix"][i].value
+                        for component in identifiers_as_components
+                    }
+                    for i, suffix in enumerate(self.identifier_fields[1:])
+                },
+            }
+
+            return {"criteria": {k: {"$in": sorted(v)} for k, v in components.items()}}
+
+        self.query = _query  # type: ignore
+        self.query.__signature__ = Signature(params)  # type: ignore
 
-        return {"criteria": {k: {"$in": sorted(v)} for k, v in components.items()}}
+    def query(self):
+        """Stub query function for abstract class."""
@@ -1,5 +1,3 @@
-from functools import partial
-
 from emmet.api.core.global_header import GlobalHeaderProcessor
 from emmet.api.core.settings import MAPISettings
 from emmet.api.query_operator import (
@@ -10,7 +8,6 @@
 )
 from emmet.api.resource import ReadOnlyResource
 from emmet.api.routes.materials.grain_boundary.query_operators import GBStructureQuery
-from emmet.api.utils import process_identifiers
 from emmet.core.grain_boundary import GrainBoundaryDoc
 
 
@@ -19,9 +16,7 @@ def gb_resource(gb_store):
         gb_store,
         GrainBoundaryDoc,
         query_operators=[
-            MultiMaterialIDQuery(
-                pre_processor=partial(process_identifiers, use_prefix=False)
-            ),
+            MultiMaterialIDQuery(),
             NumericQuery(
                 model=GrainBoundaryDoc, excluded_fields=["rotation_axis", "gb_plane"]
             ),
 
@@ -1,9 +1,31 @@
 from __future__ import annotations
 
+from collections.abc import Callable
+from dataclasses import dataclass, field
+from functools import partial
+
 from fastapi import Query
 
-from emmet.api.query_operator import QueryOperator
-from emmet.api.utils import STORE_PARAMS
+from emmet.api.query_operator import InQuery, QueryOperator
+from emmet.api.utils import STORE_PARAMS, process_identifiers
+
+
+@dataclass
+class MultiPhononIDQuery(InQuery):
+    """Generate a query for different phonon ids."""
+
+    field_name: str = "identifier"
+    pre_processor: Callable[[str], list[str]] = field(
+        default=partial(process_identifiers, use_prefix=False)
+    )
+
+    def query(
+        self,
+        identifiers: str | None = Query(
+            None, description="Comma-separated list of phonon_ids to query on"
+        ),
+    ) -> STORE_PARAMS:
+        return self._prepare_query(identifiers)
 
 
 class PhononMethodQuery(QueryOperator):
 
@@ -1,19 +1,17 @@
 from emmet.api.core.global_header import GlobalHeaderProcessor
 from emmet.api.core.settings import MAPISettings
-from emmet.api.query_operator import (
-    MultiMaterialIDQuery,
-    NumericQuery,
-    PaginationQuery,
-    SparseFieldsQuery,
-)
+from emmet.api.query_operator import NumericQuery, PaginationQuery, SparseFieldsQuery
 from emmet.api.resource import ReadOnlyResource
 from emmet.api.routes.materials.materials.query_operators import (
     ChemsysQuery,
     ElementsQuery,
     FormulaQuery,
     SymmetryQuery,
 )
-from emmet.api.routes.materials.phonon.query_operators import PhononMethodQuery
+from emmet.api.routes.materials.phonon.query_operators import (
+    MultiPhononIDQuery,
+    PhononMethodQuery,
+)
 from emmet.core.phonon import PhononBSDOSDoc
 
 
@@ -22,7 +20,7 @@ def phonon_bsdos_resource(phonon_bs_store):
         phonon_bs_store,
         PhononBSDOSDoc,
         query_operators=[
-            MultiMaterialIDQuery(),
+            MultiPhononIDQuery(),
             FormulaQuery(),
             ChemsysQuery(),
             ElementsQuery(),
@@ -31,7 +29,7 @@ def phonon_bsdos_resource(phonon_bs_store):
             NumericQuery(model=PhononBSDOSDoc, excluded_fields=["composition"]),
             PaginationQuery(),
             SparseFieldsQuery(
-                PhononBSDOSDoc, default_fields=["material_id", "last_updated"]
+                PhononBSDOSDoc, default_fields=["identifier", "last_updated"]
             ),
         ],
         header_processor=GlobalHeaderProcessor(),
 
@@ -53,7 +53,7 @@ def query(
         return {"pipeline": pipeline}
 
     def post_process(self, docs, query):
-        self.total_doc = docs[0]["meta"]["count"]["total"]
+        self.total_doc = 0 if len(docs) == 0 else docs[0]["meta"]["count"]["total"]
         return docs
 
     def meta(self):
 
@@ -53,7 +53,7 @@ def query(
         )
         if method is None:
             try:
-                method = next(
+                sim_method = next(
                     method
                     for method, fvlen in SIM_METHOD_TO_FEAT_VEC_LENGTH.items()
                     if fvlen == len(feature_vector)
@@ -64,13 +64,15 @@ def query(
                     f"length = {len(feature_vector)} matches no known embedding method."
                 )
         elif isinstance(method, str):
-            method = (
+            sim_method = (
                 SimilarityMethod[method]
                 if method in SimilarityMethod.__members__
                 else SimilarityMethod(method)
             )
+        else:
+            sim_method = method
 
-        ref_fv_len = SIM_METHOD_TO_FEAT_VEC_LENGTH[method]
+        ref_fv_len = SIM_METHOD_TO_FEAT_VEC_LENGTH[sim_method]
 
         if (
             not isinstance(feature_vector, list | tuple)
@@ -80,17 +82,12 @@ def query(
             raise HTTPException(
                 status_code=400,
                 detail=(
-                    f"Invalid feature vector for method {method.value}: "  # type: ignore[union-attr]
+                    f"Invalid feature vector for method {sim_method.value}: "
                     f"should be a list of {ref_fv_len} floats.",
                 ),
             )
 
-        index_name = "similarity_feature_vector"
-        # because MongoDB does not permit renaming indexes,
-        # and I was not forward thinking in naming it.
-        # TODO: homogenize once we have other data built out
-        if method != SimilarityMethod.CRYSTALNN:
-            index_name += f"_{method.value.lower()}"  # type: ignore[union-attr]
+        index_name = f"similarity_feature_vector_{sim_method.value.lower()}"
 
         pipeline = [
             {
Original file line number	Diff line number	Diff line change
`@@ -250,7 +250,7 @@`
`250`	`250`	`},`
`251`	`251`	`{`
`252`	`252`	`"name": "DOIs",`
`253`		`- "description": "Route providing DOI and bibtex reference information for a material. \`
	`253`	`+ "description": "Route providing DOI reference information for a material. \`
`254`	`254`	`Note that this data may not be available for all materials in the Materials \`
`255`	`255`	Project database. See the `DOIDoc` schema for a full list of fields returned by this route.",
`256`	`256`	`},`