Skip to content

Commit 6c66f57

Browse files
authored
DEP: Deprecate content as dict input, split in two arguments (#1016)
1 parent 3344b43 commit 6c66f57

File tree

6 files changed

+156
-80
lines changed

6 files changed

+156
-80
lines changed

Diff for: docs/src/dataio_3_migration.rst

+6-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ but with replacements inplace.
2020
a dictionary form to provide a reference together with the ``vertical_domain`` is deprecated, use
2121
the ``domain_reference`` argument instead.
2222
- ``workflow`` now only supports string input, example ``workflow='Structural modelling'``.
23-
- ``content`` was previously optional, it should now be explicitly provided.
23+
- ``content`` was previously optional, it should now be explicitly provided as a valid content string.
24+
- ``content`` no longer supports using a dictionary form to provide extra information together
25+
with the ``content``, use the ``content_metadata`` argument instead.
2426
- ``content={'seismic': {'offset': '0-15'}}`` no longer works, use the key ``stacking_offset`` instead
2527
of ``offset``.
2628

@@ -32,6 +34,7 @@ Following are an example demonstrating several deprecated patterns:
3234
from fmu.dataio import ExportData
3335
3436
ExportData(
37+
content={'seismic': {'attribute': 'amplitude', 'calculation': 'mean'}}, # ⛔️
3538
fmu_context='preprocessed', # ⛔️
3639
access_ssdl={'access_level': 'asset', 'rep_include': True}, # ⛔️
3740
vertical_domain={'depth': 'msl'}, # ⛔️
@@ -45,7 +48,8 @@ Change to this instead 👇:
4548
from fmu.dataio import ExportData
4649
4750
ExportData(
48-
content='depth', # ✅ content must explicitly be provided
51+
content='seismic', # ✅ content must explicitly be provided as a string
52+
content_metadata={'attribute': 'amplitude', 'calculation': 'mean'}, #
4953
preprocessed=True, #
5054
classification='restricted', # ✅ note the use of 'restricted' instead of 'asset'
5155
rep_include=True, #

Diff for: src/fmu/dataio/dataio.py

+53-2
Original file line numberDiff line numberDiff line change
@@ -223,9 +223,14 @@ class ExportData:
223223
Note also: If missing or empty, export() may still be done, but without a
224224
metadata file (this feature may change in future releases).
225225
226-
content: Optional. Is a string or a dictionary with one key.
227-
Example is "depth" or {"fluid_contact": {"xxx": "yyy", "zzz": "uuu"}}.
226+
content: A required string describing the content of the data e.g. 'volumes'.
228227
Content is checked agains a white-list for validation!
228+
Some contents like 'seismic' requires additional information. This
229+
should be provided through the 'content_metadata' argument.
230+
231+
content_metadata: Optional. Dictionary with additional information about the
232+
provided content. Only required for some contents, e.g. 'seismic'.
233+
Example {"attribute": "amplitude", "calculation": "mean"}.
229234
230235
fmu_context: Optional string with value ``realization`` or ``case``. If not
231236
explicitly given it will be inferred based on the presence of ERT
@@ -361,6 +366,7 @@ class ExportData:
361366
classification: Optional[str] = None
362367
config: dict | GlobalConfiguration = field(default_factory=dict)
363368
content: Optional[Union[dict, str]] = None
369+
content_metadata: Optional[dict] = None
364370
depth_reference: Optional[str] = None # deprecated
365371
domain_reference: str = "msl"
366372
description: Union[str, list] = ""
@@ -521,6 +527,42 @@ def _get_rep_include(self) -> bool:
521527
logger.debug("Using default 'rep_include'=False")
522528
return False
523529

530+
def _get_content_enum(self) -> Optional[enums.Content]:
531+
"""Get the content enum."""
532+
if self.content is None:
533+
logger.debug("content not set from input, returning None'")
534+
return None
535+
536+
if isinstance(self.content, str):
537+
logger.debug("content is set from string input")
538+
return enums.Content(self.content)
539+
540+
if isinstance(self.content, dict):
541+
logger.debug("content is set from dict input")
542+
return enums.Content(next(iter(self.content)))
543+
544+
raise ValueError(
545+
"Incorrect format found for 'content'. It should be a valid "
546+
f"content string: {[m.value for m in enums.Content]}"
547+
)
548+
549+
def _get_content_metadata(self) -> Optional[dict]:
550+
"""
551+
Get the content metadata if provided by as input, else return None.
552+
Validation takes place in the objectdata provider.
553+
"""
554+
if self.content_metadata:
555+
logger.debug("content_metadata is set from content_metadata argument")
556+
return self.content_metadata
557+
558+
if isinstance(self.content, dict):
559+
logger.debug("content_metadata is set from content argument")
560+
content_enum = self._get_content_enum()
561+
return self.content[content_enum]
562+
563+
logger.debug("Found no content_metadata, returning None")
564+
return None
565+
524566
def _show_deprecations_or_notimplemented(self) -> None:
525567
"""Warn on deprecated keys or on stuff not implemented yet."""
526568

@@ -538,6 +580,15 @@ def _show_deprecations_or_notimplemented(self) -> None:
538580
"is not supported."
539581
)
540582

583+
if isinstance(self.content, dict):
584+
warn(
585+
"Using the 'content' argument to set both the content and "
586+
"and the content metadata will be deprecated. Set the 'content' "
587+
"argument to a valid content string, and provide the extra "
588+
"information through the 'content_metadata' argument instead.",
589+
FutureWarning,
590+
)
591+
541592
if self.runpath:
542593
warn(
543594
"The 'runpath' key has currently no function. It will be evaluated for "

Diff for: src/fmu/dataio/providers/objectdata/_base.py

+50-34
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import annotations
22

33
from abc import abstractmethod
4-
from copy import deepcopy
54
from dataclasses import dataclass, field
65
from datetime import datetime
76
from typing import TYPE_CHECKING, Any, Final
@@ -18,16 +17,23 @@
1817
from fmu.dataio._utils import generate_description
1918
from fmu.dataio.exceptions import ConfigurationError
2019
from fmu.dataio.providers._base import Provider
21-
from fmu.dataio.providers.objectdata._export_models import AllowedContent, UnsetData
20+
from fmu.dataio.providers.objectdata._export_models import (
21+
UnsetData,
22+
content_metadata_factory,
23+
content_requires_metadata,
24+
property_warn,
25+
)
2226

2327
if TYPE_CHECKING:
24-
from fmu.dataio._model.data import (
28+
from pydantic import BaseModel
29+
30+
from fmu.dataio._models.fmu_results.data import (
2531
BoundingBox2D,
2632
BoundingBox3D,
2733
Geometry,
2834
)
29-
from fmu.dataio._models._fmu_results.enums import FMUClass, Layout
30-
from fmu.dataio._models._fmu_results.specification import AnySpecification
35+
from fmu.dataio._models.fmu_results.enums import FMUClass, Layout
36+
from fmu.dataio._models.fmu_results.specification import AnySpecification
3137
from fmu.dataio.dataio import ExportData
3238
from fmu.dataio.types import Inferrable
3339

@@ -65,7 +71,9 @@ def __post_init__(self) -> None:
6571
raise ValueError("Can't use absolute path as 'forcefolder'")
6672
logger.info(f"Using forcefolder {self.dataio.forcefolder}")
6773

68-
content_model = self._get_validated_content(self.dataio.content)
74+
content = self.dataio._get_content_enum() or "unset"
75+
content_metadata = self._get_validated_content_metadata()
76+
6977
strat_element = self._get_stratigraphy_element()
7078
self.name = strat_element.name
7179

@@ -77,11 +85,9 @@ def __post_init__(self) -> None:
7785
metadata["top"] = strat_element.top
7886
metadata["base"] = strat_element.base
7987

80-
metadata["content"] = (usecontent := content_model.content)
81-
if content_model.content_incl_specific:
82-
metadata[usecontent] = getattr(
83-
content_model.content_incl_specific, usecontent, None
84-
)
88+
metadata["content"] = content
89+
if content_metadata:
90+
metadata[content] = content_metadata
8591
metadata["product"] = self.product
8692
metadata["tagname"] = self.dataio.tagname
8793
metadata["format"] = self.fmt
@@ -154,30 +160,40 @@ def get_metadata(self) -> AnyData | UnsetData:
154160
assert self._metadata is not None
155161
return self._metadata
156162

157-
def _get_validated_content(self, content: str | dict | None) -> AllowedContent:
158-
"""Check content and return a validated model."""
159-
logger.info("Evaluate content")
160-
logger.debug("content is %s of type %s", str(content), type(content))
161-
162-
if not content:
163-
return AllowedContent(content="unset")
164-
165-
if isinstance(content, str):
166-
return AllowedContent(content=Content(content))
167-
168-
if len(content) > 1:
169-
raise ValueError(
170-
"Found more than one content item in the 'content' dictionary. Ensure "
171-
"input is formatted as content={'mycontent': {extra_key: extra_value}}."
172-
)
173-
content = deepcopy(content)
174-
usecontent, content_specific = next(iter(content.items()))
175-
logger.debug("usecontent is %s", usecontent)
176-
logger.debug("content_specific is %s", content_specific)
163+
def _get_validated_content_metadata(self) -> BaseModel | None:
164+
"""
165+
If content_metadata have been input, and the content requires metadata,
166+
return the corresponding validated content_metadata model for the content,
167+
else return None.
168+
"""
169+
content = self.dataio._get_content_enum()
170+
if content:
171+
content_metadata = self.dataio._get_content_metadata()
172+
173+
if content_requires_metadata(content):
174+
if not content_metadata:
175+
return self._raise_error_for_missing_content_metadata(content)
176+
177+
content_metadata_model = content_metadata_factory(content)
178+
if not isinstance(content_metadata, dict):
179+
raise ValueError(
180+
"The 'content_metadata' needs to be input as a dictionary. "
181+
f"Possible keys for content '{content.value}' are: "
182+
f"{list(content_metadata_model.model_fields)}. "
183+
)
184+
return content_metadata_model.model_validate(content_metadata)
185+
return None
177186

178-
return AllowedContent.model_validate(
179-
{"content": Content(usecontent), "content_incl_specific": content}
180-
)
187+
@staticmethod
188+
def _raise_error_for_missing_content_metadata(content: Content) -> None:
189+
"""
190+
Raise error for missing content_metadata with special handling of the
191+
'property' content which is required to have content_metadata in the future.
192+
"""
193+
if content == Content.property:
194+
property_warn()
195+
else:
196+
raise ValueError(f"Content {content.value} requires additional input")
181197

182198
def _get_stratigraphy_element(self) -> StratigraphyElement:
183199
"""Derive the name and stratigraphy for the object; may have several sources.

Diff for: src/fmu/dataio/providers/objectdata/_export_models.py

+25-38
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import warnings
1111
from textwrap import dedent
12-
from typing import Final, Literal, Optional, Union
12+
from typing import Final, Literal, Optional, Type
1313

1414
from pydantic import (
1515
BaseModel,
@@ -60,43 +60,6 @@ class AllowedContentProperty(BaseModel):
6060
is_discrete: Optional[bool] = Field(default=None)
6161

6262

63-
class ContentRequireSpecific(BaseModel):
64-
field_outline: Optional[data.FieldOutline] = Field(default=None)
65-
field_region: Optional[data.FieldRegion] = Field(default=None)
66-
fluid_contact: Optional[data.FluidContact] = Field(default=None)
67-
property: Optional[AllowedContentProperty] = Field(default=None)
68-
seismic: Optional[AllowedContentSeismic] = Field(default=None)
69-
70-
71-
class AllowedContent(BaseModel):
72-
content: Union[enums.Content, Literal["unset"]]
73-
content_incl_specific: Optional[ContentRequireSpecific] = Field(default=None)
74-
75-
@model_validator(mode="before")
76-
@classmethod
77-
def _validate_input(cls, values: dict) -> dict:
78-
content = values.get("content")
79-
content_specific = values.get("content_incl_specific", {}).get(content)
80-
81-
if content in ContentRequireSpecific.model_fields and not content_specific:
82-
# 'property' should be included below after a deprecation period
83-
if content == enums.Content.property:
84-
property_warn()
85-
else:
86-
raise ValueError(f"Content {content} requires additional input")
87-
88-
if content_specific and not isinstance(content_specific, dict):
89-
raise ValueError(
90-
"Content is incorrectly formatted. When giving content as a dict, "
91-
"it must be formatted as: "
92-
"{'mycontent': {extra_key: extra_value}} where mycontent is a string "
93-
"and in the list of valid contents, and extra keys in associated "
94-
"dictionary must be valid keys for this content."
95-
)
96-
97-
return values
98-
99-
10063
class UnsetData(data.Data):
10164
content: Literal["unset"] # type: ignore
10265

@@ -111,3 +74,27 @@ def _deprecation_warning(self) -> UnsetData:
11174
FutureWarning,
11275
)
11376
return self
77+
78+
79+
def content_metadata_factory(content: enums.Content) -> Type[BaseModel]:
80+
"""Return the correct content_metadata model based on provided content."""
81+
if content == enums.Content.field_outline:
82+
return data.FieldOutline
83+
if content == enums.Content.field_region:
84+
return data.FieldRegion
85+
if content == enums.Content.fluid_contact:
86+
return data.FluidContact
87+
if content == enums.Content.property:
88+
return AllowedContentProperty
89+
if content == enums.Content.seismic:
90+
return AllowedContentSeismic
91+
raise ValueError(f"No content_metadata model exist for content {str(content)}")
92+
93+
94+
def content_requires_metadata(content: enums.Content) -> bool:
95+
"""Flag if given content requires content_metadata"""
96+
try:
97+
content_metadata_factory(content)
98+
return True
99+
except ValueError:
100+
return False

Diff for: tests/test_units/test_contents.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
import pytest
44
from pydantic import ValidationError
55

6+
from fmu.dataio._models.fmu_results import enums
67
from fmu.dataio.dataio import ExportData
8+
from fmu.dataio.providers.objectdata._export_models import content_requires_metadata
79

810
# generic testing of functionality related to content is done elsewhere,
911
# mainly in test_dataio.py.
@@ -83,7 +85,7 @@ def test_content_fluid_contact_case_insensitive(regsurf, globalconfig2):
8385

8486
def test_content_fluid_contact_raises_on_invalid_contact(regsurf, globalconfig2):
8587
"""Test export of the fluid_contact content."""
86-
with pytest.raises(ValidationError, match="fluid_contact"):
88+
with pytest.raises(ValidationError, match="FluidContact"):
8789
ExportData(
8890
config=globalconfig2,
8991
name="MyName",
@@ -333,3 +335,19 @@ def test_content_wellpicks(dataframe, globalconfig2):
333335
).generate_metadata(dataframe)
334336

335337
assert meta["data"]["content"] == "wellpicks"
338+
339+
340+
def test_content_requires_metadata():
341+
"""Test the content_requires_metadata function"""
342+
343+
# test contents that requires extra
344+
assert content_requires_metadata(enums.Content.field_outline)
345+
assert content_requires_metadata(enums.Content.field_region)
346+
assert content_requires_metadata(enums.Content.fluid_contact)
347+
assert content_requires_metadata(enums.Content.property)
348+
assert content_requires_metadata(enums.Content.seismic)
349+
350+
# test some random contents that does not require extra
351+
assert not content_requires_metadata(enums.Content.depth)
352+
assert not content_requires_metadata(enums.Content.timeseries)
353+
assert not content_requires_metadata(enums.Content.volumes)

Diff for: tests/test_units/test_dataio.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ def test_content_invalid_dict(globalconfig1, regsurf):
470470
eobj = ExportData(
471471
config=globalconfig1, content={"seismic": "some_key", "extra": "some_value"}
472472
)
473-
with pytest.raises(ValueError, match="Found more than one content item"):
473+
with pytest.raises(ValueError):
474474
eobj.generate_metadata(regsurf)
475475

476476

@@ -483,7 +483,7 @@ def test_content_valid_string(regsurf, globalconfig2):
483483

484484
def test_seismic_content_require_seismic_data(globalconfig2, regsurf):
485485
eobj = ExportData(config=globalconfig2, content="seismic")
486-
with pytest.raises(pydantic.ValidationError, match="requires additional input"):
486+
with pytest.raises(ValueError, match="requires additional input"):
487487
eobj.generate_metadata(regsurf)
488488

489489

@@ -522,7 +522,7 @@ def test_content_is_a_wrongly_formatted_dict(globalconfig2, regsurf):
522522
name="TopVolantis",
523523
content={"seismic": "myvalue"},
524524
)
525-
with pytest.raises(ValueError, match="incorrectly formatted"):
525+
with pytest.raises(ValueError):
526526
eobj.generate_metadata(regsurf)
527527

528528

0 commit comments

Comments
 (0)