diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 72b3624..52178f6 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -50,14 +50,14 @@ jobs: - name: Build wheel and source distribution run: python -m build - - name: Install the built wheel - run: | - python -c "import glob; import subprocess; wheel_files = glob.glob('dist/*.whl'); subprocess.check_call(['pip', 'install', wheel_files[0]])" + - name: Install the built wheel + run: | + python -c "import glob; import subprocess; wheel_files = glob.glob('dist/*.whl'); subprocess.check_call(['pip', 'install', wheel_files[0]])" - # - name: Lint (ruff) - # run: | - # ruff check . - # ruff format --check + - name: Lint (ruff) + run: | + ruff check . + ruff format --check # - name: Run tests with coverage # run: | diff --git a/README.md b/README.md index be9155f..7a4a344 100644 --- a/README.md +++ b/README.md @@ -6,5 +6,11 @@ Utilities for creating STAC items from HEC models -**hecstac** is an open-source Python library designed to mine metadata from HEC model simulations for use in the development of catalogs documenting probabilistic flood studies. This project automates the generation of STAC Items and Assets from HEC-HMS and HEC-RAS model files, enabling improved data and metadata management. +**hecstac** is an open-source Python library designed to mine metadata from HEC model simulations for use in the development of catalogs documenting probabilistic flood studies. This project automates the generation of STAC Items and Assets from HEC-HMS and HEC-RAS model files, enabling improved data and metadata management. +***Testing HEC-RAS model item creation*** + +- Download the HEC-RAS example project data from USACE and place it in your working directory. The data can be downloaded [here](https://github.com/HydrologicEngineeringCenter/hec-downloads/releases/download/1.0.33/Example_Projects_6_6.zip). +- In 'new_ras_item.py', set the ras_project_file to the path of the 2D Muncie project file (ex. ras_project_file = "Example_Projects_6_6/2D Unsteady Flow Hydraulics/Muncie/Muncie.prj"). +- For projects that have projection information within the geometry .hdf files, the CRS info can automatically be detected. The Muncie data lacks that projection info so it must be set by extracting the projection string and setting the CRS in 'new_ras_item.py' to the projection string. The projection can be found in the Muncie/GIS_Data folder in Muncie_IA_Clip.prj. +- Once the CRS and project file location have been set, a new item can be created with 'python -m new_ras_item' in the command line. The new item will be added inside the model directory at the same level as the project file. diff --git a/docs/source/requirements.txt b/docs/source/requirements.txt index 71051e8..608044e 100644 Binary files a/docs/source/requirements.txt and b/docs/source/requirements.txt differ diff --git a/docs/source/user_guide.rst b/docs/source/user_guide.rst index a8191e6..c920cc3 100644 --- a/docs/source/user_guide.rst +++ b/docs/source/user_guide.rst @@ -17,14 +17,14 @@ have Python already installed and setup: Note that it is highly recommended to create a python `virtual environment -`_ to install, test, and run hecstac. +`_ to install, test, and run hecstac. Workflows --------- -The following snippets provide examples for creating stac items from HEC model data. +The following snippets provide examples for creating stac items from HEC model data. .. code-block:: python @@ -56,7 +56,7 @@ The following snippets provide examples for creating stac items from HEC model d ras_item.save_object(ras_item.pm.item_path(item_id)) -The following snippet provides an example of how to create stac items for an event based simulation. +The following snippet provides an example of how to create stac items for an event based simulation. .. code-block:: python @@ -79,7 +79,7 @@ The following snippet provides an example of how to create stac items for an eve "/hms-model.met", "/Precip.dss", ] - + # RAS Info ras_source_model_item_path = "//authoritative-ras-model.json" @@ -105,7 +105,7 @@ The following snippet provides an example of how to create stac items for an eve source_model_items=[ hms_source_model_item, ras_source_model_item - ], + ], hms_simulation_files=hms_simulation_files, ras_simulation_files=ras_simulation_files, ) diff --git a/hecstac/__init__.py b/hecstac/__init__.py index 0466af5..3b9bf65 100644 --- a/hecstac/__init__.py +++ b/hecstac/__init__.py @@ -5,3 +5,6 @@ """ from hecstac.version import __version__ + +from hecstac.ras.item import RASModelItem +from hecstac.hms.item import HMSModelItem diff --git a/hecstac/common/__init__.py b/hecstac/common/__init__.py index e69de29..0f9954d 100644 --- a/hecstac/common/__init__.py +++ b/hecstac/common/__init__.py @@ -0,0 +1 @@ +"""Common scripts.""" diff --git a/hecstac/common/asset_factory.py b/hecstac/common/asset_factory.py index 1cb9128..a08223f 100644 --- a/hecstac/common/asset_factory.py +++ b/hecstac/common/asset_factory.py @@ -1,31 +1,84 @@ +"""Create instances of assets.""" + import logging from pathlib import Path -from typing import Dict, Type +from typing import Dict, Generic, Type, TypeVar +from pyproj import CRS from pystac import Asset from hecstac.hms.s3_utils import check_storage_extension +logger = logging.getLogger(__name__) + +T = TypeVar("T") # Generic for asset file accessor classes + + +class GenericAsset(Asset, Generic[T]): + """Provides a base structure for assets.""" + + regex_parse_str: str = r"" + __roles__: list[str] = [] + __description__: str = "" + __file_class__: T + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + if self.description is None: + self.description = self.__description__ + self._roles = [] + self._extra_fields = {} + self.name = Path(self.href).name + + @property + def roles(self) -> list[str]: + """Return roles with enforced values.""" + roles = self._roles + for i in self.__roles__: + if i not in roles: + roles.append(i) + return roles -class GenericAsset(Asset): - """Generic Asset.""" + @roles.setter + def roles(self, roles: list): + self._roles = roles - def __init__(self, href: str, roles=None, description=None, *args, **kwargs): - super().__init__(href, *args, **kwargs) - self.href = href - self.name = Path(href).name - self.stem = Path(href).stem - self.roles = roles or [] - self.description = description or "" + @property + def extra_fields(self): + """Return extra fields.""" + # boilerplate here, but overwritten in subclasses + return self._extra_fields + + @extra_fields.setter + def extra_fields(self, extra_fields: dict): + """Set user-defined extra fields.""" + self._extra_fields = extra_fields + + @property + def file(self) -> T: + """Return class to access asset file contents.""" + return self.__file_class__(self.get_absolute_href()) def name_from_suffix(self, suffix: str) -> str: """Generate a name by appending a suffix to the file stem.""" return f"{self.stem}.{suffix}" + @property + def crs(self) -> CRS: + """Get the authority code for the model CRS.""" + if self.ext.has("proj"): + wkt2 = self.ext.proj.wkt2 + if wkt2 is None: + return + else: + return CRS(wkt2) + def __repr__(self): + """Return string representation of the GenericAsset instance.""" return f"<{self.__class__.__name__} name={self.name}>" def __str__(self): + """Return string representation of assets name.""" return f"{self.name}" @@ -33,14 +86,13 @@ class AssetFactory: """Factory for creating HEC asset instances based on file extensions.""" def __init__(self, extension_to_asset: Dict[str, Type[GenericAsset]]): - """ - Initialize the AssetFactory with a mapping of file extensions to asset types and metadata. - """ + """Initialize the AssetFactory with a mapping of file extensions to asset types and metadata.""" self.extension_to_asset = extension_to_asset def create_hms_asset(self, fpath: str, item_type: str = "model") -> Asset: """ Create an asset instance based on the file extension. + item_type: str The type of item to create. This is used to determine the asset class. @@ -59,11 +111,11 @@ def create_hms_asset(self, fpath: str, item_type: str = "model") -> Asset: asset.title = Path(fpath).name return check_storage_extension(asset) - def create_ras_asset(self, fpath: str): - logging.debug(f"Creating asset for {fpath}") + def asset_from_dict(self, asset: Asset): + """Create HEC asset given a base Asset and a map of file extensions dict.""" + fpath = asset.href for pattern, asset_class in self.extension_to_asset.items(): if pattern.match(fpath): - logging.debug(f"Matched {pattern} for {Path(fpath).name}: {asset_class}") - return asset_class(href=fpath, title=Path(fpath).name) - - return GenericAsset(href=fpath, title=Path(fpath).name) + logger.debug(f"Matched {pattern} for {Path(fpath).name}: {asset_class}") + return asset_class.from_dict(asset.to_dict()) + return asset diff --git a/hecstac/common/geometry.py b/hecstac/common/geometry.py new file mode 100644 index 0000000..59cf5c9 --- /dev/null +++ b/hecstac/common/geometry.py @@ -0,0 +1,15 @@ +"""Geometry utils.""" + +from pyproj import CRS, Transformer +from shapely import Geometry +from shapely.ops import transform + + +def reproject_to_wgs84(geom: Geometry, crs: str) -> Geometry: + """Convert geometry CRS to EPSG:4326 for stac item geometry.""" + pyproj_crs = CRS.from_user_input(crs) + wgs_crs = CRS.from_authority("EPSG", "4326") + if pyproj_crs != wgs_crs: + transformer = Transformer.from_crs(pyproj_crs, wgs_crs, True) + return transform(transformer.transform, geom) + return geom diff --git a/hecstac/common/logger.py b/hecstac/common/logger.py index b9bed7d..afa99e9 100644 --- a/hecstac/common/logger.py +++ b/hecstac/common/logger.py @@ -2,12 +2,15 @@ import logging import sys +from re import L SUPPRESS_LOGS = ["boto3", "botocore", "geopandas", "fiona", "rasterio", "pyogrio", "xarray", "shapely", "matplotlib"] def initialize_logger(json_logging: bool = False, level: int = logging.INFO): - datefmt = "%Y-%m-%dT%H:%M:%SZ" + """Initialize the ras logger.""" + logger = logging.getLogger("hecstac") + logger.setLevel(level) if json_logging: for module in SUPPRESS_LOGS: logging.getLogger(module).setLevel(logging.WARNING) @@ -19,14 +22,14 @@ def emit(self, record): handler = FlushStreamHandler(sys.stdout) - logging.basicConfig( - level=level, - handlers=[handler], - format="""{"time": "%(asctime)s" , "level": "%(levelname)s", "msg": "%(message)s"}""", - datefmt=datefmt, - ) + handler.setLevel(level) + + datefmt = "%Y-%m-%dT%H:%M:%SZ" + fmt = """{"time": "%(asctime)s" , "level": "%(levelname)s", "msg": "%(message)s"}""" + formatter = logging.Formatter(fmt=fmt, datefmt=datefmt) + handler.setFormatter(formatter) + + logger.addHandler(handler) else: for package in SUPPRESS_LOGS: logging.getLogger(package).setLevel(logging.ERROR) - logging.basicConfig(level=level, format="%(asctime)s | %(levelname)s | %(message)s", datefmt=datefmt) - # boto3.set_stream_logger(name="botocore.credentials", level=logging.ERROR) diff --git a/hecstac/common/path_manager.py b/hecstac/common/path_manager.py index fb49792..9f404ca 100644 --- a/hecstac/common/path_manager.py +++ b/hecstac/common/path_manager.py @@ -1,29 +1,33 @@ +"""Path manager.""" + from pathlib import Path class LocalPathManager: - """ - Builds consistent paths for STAC items and collections assuming a top level local catalog - """ + """Builds consistent paths for STAC items and collections assuming a top level local catalog.""" def __init__(self, model_root_dir: str): self._model_root_dir = model_root_dir @property def model_root_dir(self) -> str: + """Model root directory.""" return str(self._model_root_dir) @property def model_parent_dir(self) -> str: + """Model parent directory.""" return str(Path(self._model_root_dir).parent) @property def item_dir(self) -> str: - """Duplicate of model_root, added for clarity in the calling code""" + """Duplicate of model_root, added for clarity in the calling code.""" return self.model_root_dir def item_path(self, item_id: str) -> str: - return f"{self._model_root_dir}/{item_id}.json" + """Item path.""" + return str(Path(self._model_root_dir) / f"{item_id}.json") def derived_item_asset(self, filename: str) -> str: - return f"{self._model_root_dir}/{filename}" + """Derive item asset path.""" + return str(Path(self._model_root_dir) / filename) diff --git a/hecstac/common/schemas.py b/hecstac/common/schemas.py index a2c89bf..279afa7 100644 --- a/hecstac/common/schemas.py +++ b/hecstac/common/schemas.py @@ -1,3 +1,5 @@ +"""Schema parsing.""" + # TODO: update these, add imports, etc. # def extract_schema_definition(definition_name: str) -> dict[str, Any]: # """Extract asset specific schema from ras extension schema""" diff --git a/hecstac/events/__init__.py b/hecstac/events/__init__.py index e69de29..2ab1885 100644 --- a/hecstac/events/__init__.py +++ b/hecstac/events/__init__.py @@ -0,0 +1 @@ +"""HEC event stac items.""" diff --git a/hecstac/events/ffrd.py b/hecstac/events/ffrd.py index c67efee..a9aff0d 100644 --- a/hecstac/events/ffrd.py +++ b/hecstac/events/ffrd.py @@ -1,3 +1,5 @@ +"""Class for event items.""" + import json import logging import os @@ -15,8 +17,12 @@ from hecstac.hms.assets import HMS_EXTENSION_MAPPING from hecstac.ras.assets import RAS_EXTENSION_MAPPING +logger = logging.getLogger(__name__) + class FFRDEventItem(Item): + """Class for event items.""" + FFRD_REALIZATION = "FFRD:realization" FFRD_BLOCK_GROUP = "FFRD:block_group" FFRD_EVENT = "FFRD:event" @@ -66,7 +72,7 @@ def _register_extensions(self) -> None: def _add_model_links(self) -> None: """Add links to the model items.""" for item in self.source_model_items: - logging.info(f"Adding link from source model item: {item.id}") + logger.info(f"Adding link from source model item: {item.id}") link = Link( rel="derived_from", target=item, @@ -116,7 +122,7 @@ def _bbox(self) -> list[float]: def add_hms_asset(self, fpath: str, item_type: str = "event") -> None: """Add an asset to the FFRD Event STAC item.""" if os.path.exists(fpath): - logging.info(f"Adding asset: {fpath}") + logger.info(f"Adding asset: {fpath}") asset = self.hms_factory.create_hms_asset(fpath, item_type=item_type) if asset is not None: self.add_asset(asset.title, asset) @@ -124,7 +130,7 @@ def add_hms_asset(self, fpath: str, item_type: str = "event") -> None: def add_ras_asset(self, fpath: str) -> None: """Add an asset to the FFRD Event STAC item.""" if os.path.exists(fpath): - logging.info(f"Adding asset: {fpath}") + logger.info(f"Adding asset: {fpath}") asset = self.ras_factory.create_ras_asset(fpath) if asset is not None: self.add_asset(asset.title, asset) diff --git a/hecstac/hms/__init__.py b/hecstac/hms/__init__.py index e69de29..4240fe8 100644 --- a/hecstac/hms/__init__.py +++ b/hecstac/hms/__init__.py @@ -0,0 +1 @@ +"""HEC-HMS STAC Item module.""" diff --git a/hecstac/hms/assets.py b/hecstac/hms/assets.py index b1dbc0d..2249d81 100644 --- a/hecstac/hms/assets.py +++ b/hecstac/hms/assets.py @@ -1,5 +1,7 @@ -from pystac import MediaType +"""HEC-HMS Stac Item asset classes.""" +from pystac import MediaType +import re from hecstac.common.asset_factory import GenericAsset from hecstac.hms.parser import ( BasinFile, @@ -18,275 +20,265 @@ class GeojsonAsset(GenericAsset): """Geojson asset.""" - def __init__(self, href: str, *args, **kwargs): - roles = ["data"] - media_type = MediaType.GEOJSON - description = "Geojson file." - super().__init__(href, roles=roles, description=description, media_type=media_type, *args, **kwargs) + regex_parse_str = r".*\.geojson$" + __roles__ = ["data", MediaType.GEOJSON] + __description__ = "Geojson file." class TiffAsset(GenericAsset): """Tiff Asset.""" - def __init__(self, href: str, *args, **kwargs): - roles = ["data"] - media_type = MediaType.GEOTIFF - description = "Tiff file." - super().__init__(href, roles=roles, description=description, media_type=media_type, *args, **kwargs) - - -class ProjectAsset(GenericAsset): - """HEC-HMS Project file asset.""" + regex_parse_str = r".*\.tiff$" + __roles__ = ["data", MediaType.GEOTIFF] + __description__ = "Tiff file." - def __init__(self, href: str, *args, **kwargs): - roles = ["hms-project"] - media_type = MediaType.TEXT - description = "The HEC-HMS project file. Summary provied at the item level" +class ProjectAsset(GenericAsset[ProjectFile]): + """Project asset.""" - super().__init__(href, roles=roles, description=description, media_type=media_type, *args, **kwargs) - self.pf = ProjectFile(href, assert_uniform_version=False) + regex_parse_str = r".*\.hms$" + __roles__ = ["hms-project", MediaType.TEXT] + __description__ = "The HEC-HMS project file. Summary provied at the item level" + __file_class__ = ProjectFile class ThumbnailAsset(GenericAsset): """Thumbnail asset.""" - def __init__(self, href: str, *args, **kwargs): - roles = ["thumbnail"] - media_type = MediaType.PNG - description = "Thumbnail" - super().__init__(href, roles=roles, description=description, media_type=media_type, *args, **kwargs) + regex_parse_str = r".*\.png$" + __roles__ = ["thumbnail", MediaType.PNG] + __description__ = "Thumbnail" -class ModelBasinAsset(GenericAsset): +class ModelBasinAsset(GenericAsset[BasinFile]): """HEC-HMS Basin file asset from authoritative model, containing geometry and other detailed data.""" - def __init__(self, href: str, *args, **kwargs): - roles = ["hms-basin"] - media_type = MediaType.TEXT - description = "Defines the basin geometry and elements for HEC-HMS simulations." - super().__init__( - href, - roles=roles, - description=description, - media_type=media_type, - *args, - **kwargs, - ) - self.bf = BasinFile(href, read_geom=True) - self.extra_fields = { - "hms:title": self.bf.name, - "hms:version": self.bf.header.attrs["Version"], - "hms:description": self.bf.header.attrs.get("Description"), - "hms:unit_system": self.bf.header.attrs["Unit System"], - "hms:gages": self.bf.gages, - "hms:drainage_area_miles": self.bf.drainage_area, - "hms:reach_length_miles": self.bf.reach_miles, - "proj:wkt": self.bf.wkt, - "proj:code": self.bf.epsg, - } | {f"hms_basin:{key}".lower(): val for key, val in self.bf.elements.element_counts.items()} - - -class EventBasinAsset(GenericAsset): + regex_parse_str = r".*\.basin$" + __roles__ = ["hms-basin", MediaType.TEXT] + __description__ = "Defines the basin geometry and elements for HEC-HMS simulations." + __file_class__ = BasinFile + + @GenericAsset.extra_fields.getter + def extra_fields(self): + """Return extra fields with added dynamic keys/values.""" + return { + "hms:title": self.file.name, + "hms:version": self.file.header.attrs["Version"], + "hms:description": self.file.header.attrs.get("Description"), + "hms:unit_system": self.file.header.attrs["Unit System"], + "hms:gages": self.file.gages, + "hms:drainage_area_miles": self.file.drainage_area, + "hms:reach_length_miles": self.file.reach_miles, + "proj:wkt": self.file.wkt, + "proj:code": self.file.epsg, + } | {f"hms_basin:{key}".lower(): val for key, val in self.file.elements.element_counts.items()} + + +class EventBasinAsset(GenericAsset[BasinFile]): """HEC-HMS Basin file asset from event, with limited basin info.""" - def __init__(self, href: str, *args, **kwargs): - roles = ["hms-basin"] - media_type = MediaType.TEXT - description = "Defines the basin geometry and elements for HEC-HMS simulations." - super().__init__( - href, - roles=roles, - description=description, - media_type=media_type, - *args, - **kwargs, - ) - self.bf = BasinFile(href) - self.extra_fields = { - "hms:title": self.bf.name, - "hms:version": self.bf.header.attrs["Version"], - "hms:description": self.bf.header.attrs.get("Description"), - "hms:unit_system": self.bf.header.attrs["Unit System"], + regex_parse_str = r".*\.basin$" + __roles__ = ["hms-basin", MediaType.TEXT] + __description__ = "Defines the basin geometry and elements for HEC-HMS simulations." + __file_class__ = BasinFile + + @GenericAsset.extra_fields.getter + def extra_fields(self): + """Return extra fields with added dynamic keys/values.""" + return { + "hms:title": self.file.name, + "hms:version": self.file.header.attrs["Version"], + "hms:description": self.file.header.attrs.get("Description"), + "hms:unit_system": self.file.header.attrs["Unit System"], } -class RunAsset(GenericAsset): +class RunAsset(GenericAsset[RunFile]): """Run asset.""" - def __init__(self, href: str, *args, **kwargs): - self.rf = RunFile(href) - roles = ["hms-run"] - media_type = MediaType.TEXT - description = "Contains data for HEC-HMS simulations." - super().__init__(href, roles=roles, description=description, media_type=media_type, *args, **kwargs) - self.extra_fields = {"hms:title": self.name} | { - run.name: {f"hms:{key}".lower(): val for key, val in run.attrs.items()} for _, run in self.rf.elements + regex_parse_str = r".*\.run$" + __file_class__ = RunFile + __roles__ = ["hms-run", MediaType.TEXT] + __description__ = "Contains data for HEC-HMS simulations." + + @GenericAsset.extra_fields.getter + def extra_fields(self): + """Return extra fields with added dynamic keys/values.""" + return {"hms:title": self.name} | { + run.name: {f"hms:{key}".lower(): val for key, val in run.attrs.items()} for _, run in self.file.elements } -class ControlAsset(GenericAsset): +class ControlAsset(GenericAsset[ControlFile]): """HEC-HMS Control file asset.""" - def __init__(self, href: str, *args, **kwargs): - roles = ["hms-control"] - media_type = MediaType.TEXT - description = "Defines time control information for HEC-HMS simulations." - super().__init__( - href, - roles=roles, - description=description, - media_type=media_type, - *args, - **kwargs, - ) - self.cf = ControlFile(href) - self.extra_fields = { - "hms:title": self.cf.name, - **{f"hms:{key}".lower(): val for key, val in self.cf.attrs.items()}, + regex_parse_str = r".*\.control$" + __roles__ = ["hms-control", MediaType.TEXT] + __description__ = "Defines time control information for HEC-HMS simulations." + __file_class__ = ControlFile + + @GenericAsset.extra_fields.getter + def extra_fields(self): + """Return extra fields with added dynamic keys/values.""" + return { + "hms:title": self.file.name, + **{f"hms:{key}".lower(): val for key, val in self.file.attrs.items()}, } -class MetAsset(GenericAsset): +class MetAsset(GenericAsset[MetFile]): """HEC-HMS Meteorological file asset.""" - def __init__(self, href: str, *args, **kwargs): - roles = ["hms-met"] - media_type = MediaType.TEXT - description = "Contains meteorological data such as precipitation and temperature." - super().__init__( - href, - roles=roles, - description=description, - media_type=media_type, - *args, - **kwargs, - ) - self.mf = MetFile(href) - self.extra_fields = { - "hms:title": self.mf.name, - **{f"hms:{key}".lower(): val for key, val in self.mf.attrs.items()}, + regex_parse_str = r".*\.met$" + __roles__ = ["hms-met", MediaType.TEXT] + __description__ = "Contains meteorological data such as precipitation and temperature." + __file_class__ = MetFile + + @GenericAsset.extra_fields.getter + def extra_fields(self): + """Return extra fields with added dynamic keys/values.""" + return { + "hms:title": self.file.name, + **{f"hms:{key}".lower(): val for key, val in self.file.attrs.items()}, } class DSSAsset(GenericAsset): """DSS asset.""" - def __init__(self, href: str, *args, **kwargs): - roles = ["hec-dss"] - media_type = "application/octet-stream" - description = "HEC-DSS file." - super().__init__(href, roles=roles, description=description, media_type=media_type, *args, **kwargs) + regex_parse_str = r".*\.dss$" + __roles__ = ["hec-dss", "application/octet-stream"] + __description__ = "HEC-DSS file." - self.extra_fields["hms:title"] = self.name + @GenericAsset.extra_fields.getter + def extra_fields(self): + """Return extra fields with added dynamic keys/values.""" + return {"hms:title": self.name} -class SqliteAsset(GenericAsset): +class SqliteAsset(GenericAsset[SqliteDB]): """HEC-HMS SQLite database asset.""" - def __init__(self, href: str, *args, **kwargs): - roles = ["hms-sqlite"] - media_type = "application/x-sqlite3" - description = "Stores spatial data for HEC-HMS basin files." - super().__init__(href, roles=roles, description=description, media_type=media_type, *args, **kwargs) - self.sqdb = SqliteDB(href) - self.extra_fields = {"hms:title": self.name, "hms:layers": self.sqdb.layers} + regex_parse_str = r".*\.sqlite$" + __roles__ = ["hms-sqlite", "application/x-sqlite3"] + __description__ = "Stores spatial data for HEC-HMS basin files." + __file_class__ = SqliteDB + + @GenericAsset.extra_fields.getter + def extra_fields(self): + """Return extra fields with added dynamic keys/values.""" + return {"hms:title": self.name, "hms:layers": self.file.layers} -class GageAsset(GenericAsset): +class GageAsset(GenericAsset[GageFile]): """Gage asset.""" - def __init__(self, href: str, *args, **kwargs): - roles = ["hms-gage"] - media_type = MediaType.TEXT - description = "Contains data for HEC-HMS gages." - super().__init__(href, roles=roles, description=description, media_type=media_type, *args, **kwargs) - self.gf = GageFile(href) - self.extra_fields = {"hms:title": self.gf.name, "hms:version": self.gf.attrs["Version"]} | { - f"hms:{gage.name}".lower(): {key: val for key, val in gage.attrs.items()} for gage in self.gf.gages + regex_parse_str = r".*\.gage$" + __roles__ = ["hms-gage", MediaType.TEXT] + __description__ = "Contains data for HEC-HMS gages." + __file_class__ = GageFile + + @GenericAsset.extra_fields.getter + def extra_fields(self): + """Return extra fields with added dynamic keys/values.""" + return {"hms:title": self.file.name, "hms:version": self.file.attrs["Version"]} | { + f"hms:{gage.name}".lower(): {key: val for key, val in gage.attrs.items()} for gage in self.file.gages } -class GridAsset(GenericAsset): - """Grid asset""" +class GridAsset(GenericAsset[GridFile]): + """Grid asset.""" + + regex_parse_str = r".*\.grid$" + __roles__ = ["hms-grid", MediaType.TEXT] + __description__ = "Contains data for HEC-HMS grid files." + __file_class__ = GridFile - def __init__(self, href: str, *args, **kwargs): - roles = ["hms-grid"] - media_type = MediaType.TEXT - description = "Contains data for HEC-HMS grid files." - super().__init__(href, roles=roles, description=description, media_type=media_type, *args, **kwargs) - self.gf = GridFile(href) - self.extra_fields = ( - {"hms:title": self.gf.name} - | {f"hms:{key}".lower(): val for key, val in self.gf.attrs.items()} - | {f"hms:{grid.name}".lower(): {key: val for key, val in grid.attrs.items()} for grid in self.gf.grids} + @GenericAsset.extra_fields.getter + def extra_fields(self): + """Return extra fields with added dynamic keys/values.""" + return ( + {"hms:title": self.file.name} + | {f"hms:{key}".lower(): val for key, val in self.file.attrs.items()} + | {f"hms:{grid.name}".lower(): {key: val for key, val in grid.attrs.items()} for grid in self.file.grids} ) class LogAsset(GenericAsset): """Log asset.""" - def __init__(self, href: str, *args, **kwargs): - roles = ["hms-log", "results"] - media_type = MediaType.TEXT - description = "Contains log data for HEC-HMS simulations." - super().__init__(href, roles=roles, description=description, media_type=media_type, *args, **kwargs) - self.extra_fields["hms:title"] = self.name + regex_parse_str = r".*\.log$" + __roles__ = ["hms-log", "results", MediaType.TEXT] + __description__ = "Contains log data for HEC-HMS simulations." + + @GenericAsset.extra_fields.getter + def extra_fields(self): + """Return extra fields with added dynamic keys/values.""" + return {"hms:title": self.name} class OutAsset(GenericAsset): """Out asset.""" - def __init__(self, href: str, *args, **kwargs): - roles = ["hms-out", "results"] - media_type = MediaType.TEXT - description = "Contains output data for HEC-HMS simulations." - super().__init__(href, roles=roles, description=description, media_type=media_type, *args, **kwargs) - self.extra_fields["hms:title"] = self.name + regex_parse_str = r".*\.out$" + __roles__ = ["hms-out", "results", MediaType.TEXT] + __description__ = "Contains output data for HEC-HMS simulations." + + @GenericAsset.extra_fields.getter + def extra_fields(self): + """Return extra fields with added dynamic keys/values.""" + return {"hms:title": self.name} -class PdataAsset(GenericAsset): +class PdataAsset(GenericAsset[PairedDataFile]): """Pdata asset.""" - def __init__(self, href: str, *args, **kwargs): - roles = ["hms-pdata"] - media_type = MediaType.TEXT - description = "Contains paired data for HEC-HMS simulations." - super().__init__(href, roles=roles, description=description, media_type=media_type, *args, **kwargs) - self.pd = PairedDataFile(href) - self.extra_fields = {"hms:title": self.pd.name, "hms:version": self.pd.attrs["Version"]} + regex_parse_str = r".*\.pdata$" + __roles__ = ["hms-pdata", MediaType.TEXT] + __description__ = "Contains paired data for HEC-HMS simulations." + __file_class__ = PairedDataFile + @GenericAsset.extra_fields.getter + def extra_fields(self): + """Return extra fields with added dynamic keys/values.""" + return {"hms:title": self.file.name, "hms:version": self.file.attrs["Version"]} -class TerrainAsset(GenericAsset): + +class TerrainAsset(GenericAsset[TerrainFile]): """Terrain asset.""" - def __init__(self, href: str, *args, **kwargs): - roles = ["hms-terrain"] - media_type = MediaType.GEOTIFF - description = "Contains terrain data for HEC-HMS simulations." - super().__init__(href, roles=roles, description=description, media_type=media_type, *args, **kwargs) - self.tf = TerrainFile(href) - self.extra_fields = {"hms:title": self.tf.name, "hms:version": self.tf.attrs["Version"]} | { - f"hms:{layer['name']}".lower(): {key: val for key, val in layer.items()} for layer in self.tf.layers + regex_parse_str = r".*\.terrain$" + __roles__ = ["hms-terrain", MediaType.GEOTIFF] + __description__ = "Contains terrain data for HEC-HMS simulations." + __file_class__ = TerrainFile + + @GenericAsset.extra_fields.getter + def extra_fields(self): + """Return extra fields with added dynamic keys/values.""" + return {"hms:title": self.file.name, "hms:version": self.file.attrs["Version"]} | { + f"hms:{layer['name']}".lower(): {key: val for key, val in layer.items()} for layer in self.file.layers } -HMS_EXTENSION_MAPPING = { - ".hms": ProjectAsset, - ".basin": {"event": EventBasinAsset, "model": ModelBasinAsset}, - ".control": ControlAsset, - ".met": MetAsset, - ".sqlite": SqliteAsset, - ".gage": GageAsset, - ".run": RunAsset, - ".grid": GridAsset, - ".log": LogAsset, - ".out": OutAsset, - ".pdata": PdataAsset, - ".terrain": TerrainAsset, - ".dss": DSSAsset, - ".geojson": GeojsonAsset, - ".tiff": TiffAsset, - ".tif": TiffAsset, - ".png": ThumbnailAsset, -} +HMS_ASSET_CLASSES = [ + ProjectAsset, + EventBasinAsset, + ModelBasinAsset, + ControlAsset, + MetAsset, + SqliteAsset, + GageAsset, + RunAsset, + GridAsset, + LogAsset, + OutAsset, + PdataAsset, + TerrainAsset, + DSSAsset, + GeojsonAsset, + TiffAsset, + TiffAsset, + ThumbnailAsset, +] + +HMS_EXTENSION_MAPPING = {re.compile(cls.regex_parse_str, re.IGNORECASE): cls for cls in HMS_ASSET_CLASSES} diff --git a/hecstac/hms/consts.py b/hecstac/hms/consts.py index 1f4836e..4fd4f48 100644 --- a/hecstac/hms/consts.py +++ b/hecstac/hms/consts.py @@ -1,3 +1,5 @@ +"""HEC-HMS STAC Item constants.""" + GPD_WRITE_ENGINE = "fiona" # Latest default as of 2024-11-11 seems to be "pyogrio" which is causing issues. # 5 spaces, (key), colon, (val), ignoring whitespace before and after key and val, e.g. " Version: 4.10" ATTR_KEYVAL_GROUPER = r"^ (\S.*?)\s*:\s*(.*?)\s*$" diff --git a/hecstac/hms/data_model.py b/hecstac/hms/data_model.py index e3c7eae..9bbb942 100644 --- a/hecstac/hms/data_model.py +++ b/hecstac/hms/data_model.py @@ -1,3 +1,5 @@ +"""HEC-HMS STAC Item data classes.""" + from __future__ import annotations from collections import Counter, OrderedDict @@ -10,7 +12,7 @@ @dataclass class Element: - """Parent class of basin elements (Subbasins, Reaches, etc)""" + """Parent class of basin elements (Subbasins, Reaches, etc).""" name: str attrs: OrderedDict @@ -18,76 +20,84 @@ class Element: @dataclass class BasinHeader: - """Header of .basin""" + """Header of .basin.""" attrs: dict @dataclass class BasinLayerProperties: - """Part of footer of .basin, find via 'Basin Layer Properties:'. - Data is stored as a series of layers rather than a set of attributes, so just storing the raw content for now. - """ + """Part of footer of .basin, find via 'Basin Layer Properties:'. Data is stored as a series of layers rather than a set of attributes, so just storing the raw content for now.""" content: str @dataclass class Control(Element): + """Represents a control element.""" + pass @dataclass class Grid(Element): + """Represents a grid element.""" + pass @dataclass class Precipitation(Element): + """Represents a precipitation element.""" + pass @dataclass class Temperature(Element): + """Represents a temperature element.""" + pass @dataclass class ET(Element): + """Represents a ET element.""" + pass @dataclass class Subbasin_ET(Element): + """Represents a Subbasin_ET element.""" + pass @dataclass class Gage(Element): + """Represents a gage element.""" + pass @dataclass class ComputationPoints: - """Part of footer of .basin, find via 'Computation Points:'. - Data has some complex attributes with nested end-flags, so just storing raw content for now. - """ + """Part of footer of .basin, find via 'Computation Points:'. Data has some complex attributes with nested end-flags, so just storing raw content for now.""" content: str @dataclass class BasinSpatialProperties: - """Part of footer of .basin, find via 'Basin Spatial Properties:'. - Data has some complex attributes with nested end-flags, so just storing raw content for now. - """ + """Part of footer of .basin, find via 'Basin Spatial Properties:'. Data has some complex attributes with nested end-flags, so just storing raw content for now.""" content: str @dataclass class BasinSchematicProperties: - """Part of footer of .basin, find via 'Basin Schematic Properties:'""" + """Part of footer of .basin, find via 'Basin Schematic Properties:'.""" attrs: dict @@ -102,21 +112,29 @@ class Run: @dataclass class Subbasin(Element): + """Represents a Subbasin element.""" + geom: Polygon = None @dataclass class Table(Element): + """Represents a Table element.""" + pass @dataclass class Pattern(Element): + """Represents a Pattern element.""" + pass @dataclass class Reach(Element): + """Represents a Reach element.""" + geom: LineString = None slope: float = ( None # assumed units of the coordinate system is the same as what is used for the project.. need to confirm this assumption @@ -125,26 +143,36 @@ class Reach(Element): @dataclass class Junction(Element): + """Represents a Junction element.""" + geom: Point = None @dataclass class Sink(Element): + """Represents a Sink element.""" + geom: Point = None @dataclass class Reservoir(Element): + """Represents a Reservoir element.""" + geom: Point = None @dataclass class Source(Element): + """Represents a Source element.""" + geom: Point = None @dataclass class Diversion(Element): + """Represents a Diversion element.""" + geom: Point = None @@ -156,18 +184,23 @@ def __init__(self): self.index_ = 0 def __setitem__(self, key, item): + """Add an element to the set.""" utils.add_no_duplicate(self.elements, key, item) def __getitem__(self, key): + """Retrieve an element by name.""" return self.elements[key] def __len__(self): + """Return the number of elements.""" return len(self.elements) def __iter__(self): + """Iterate over elements.""" return iter(self.elements.items()) def subset(self, element_type: Element): + """Retrieve a subset of elements of a given type.""" element_subset = ElementSet() for element in self.elements.values(): if isinstance(element, element_type): @@ -175,6 +208,7 @@ def subset(self, element_type: Element): return element_subset def get_element_type(self, element_type): + """Retrieve elements of a specific type by name.""" element_list = [] for element in self.elements.values(): if type(element).__name__ == element_type: @@ -183,6 +217,7 @@ def get_element_type(self, element_type): @property def element_types(self) -> list: + """Get a list of unique element types.""" types = [] for element in self.elements.values(): types.append(type(element).__name__) @@ -190,6 +225,7 @@ def element_types(self) -> list: @property def element_counts(self) -> dict: + """Get a count of each element type.""" types = [] for element in self.elements.values(): types.append(type(element).__name__) @@ -197,6 +233,7 @@ def element_counts(self) -> dict: @property def gages(self): + """Retrieve gage elements with their observed hydrograph gage names.""" gages = {} for name, element in self.elements.items(): if "Observed Hydrograph Gage" in element.attrs.keys(): diff --git a/hecstac/hms/item.py b/hecstac/hms/item.py index 54b4b0d..a852912 100644 --- a/hecstac/hms/item.py +++ b/hecstac/hms/item.py @@ -1,3 +1,5 @@ +"""HEC-RAS STAC Item class.""" + import json import logging import os @@ -8,16 +10,25 @@ import matplotlib.pyplot as plt import numpy as np import requests -from pystac import Item +from pystac import Item, Asset from pystac.extensions.projection import ProjectionExtension from pystac.extensions.storage import StorageExtension -from shapely import to_geojson, union_all +from shapely import to_geojson, unary_union +from functools import lru_cache from hecstac.common.asset_factory import AssetFactory from hecstac.common.path_manager import LocalPathManager -from hecstac.hms.assets import HMS_EXTENSION_MAPPING, ProjectAsset +from hecstac.hms.assets import HMS_EXTENSION_MAPPING from hecstac.hms.parser import BasinFile, ProjectFile +from hecstac.ras.consts import ( + NULL_DATETIME, + NULL_STAC_BBOX, + NULL_STAC_GEOMETRY, +) + +logger = logging.getLogger(__name__) + class HMSModelItem(Item): """An object representation of a HEC-HMS model.""" @@ -29,145 +40,195 @@ class HMSModelItem(Item): PROJECT_VERSION = "hms:version" PROJECT_DESCRIPTION = "hms:description" PROJECT_UNITS = "hms:unit_system" + SUMMARY = "hms:summary" - def __init__(self, hms_project_file, item_id: str, simplify_geometry: bool = True): - - self._project = None - self.assets = {} - self.links = [] - self.thumbnail_paths = [] - self.geojson_paths = [] - self.extra_fields = {} - self.stac_extensions = None - self.pm = LocalPathManager(Path(hms_project_file).parent) - self._href = self.pm.item_path(item_id) - self.hms_project_file = hms_project_file - self._simplify_geometry = simplify_geometry - - self.pf = ProjectFile(self.hms_project_file, assert_uniform_version=False) - self.factory = AssetFactory(HMS_EXTENSION_MAPPING) - - super().__init__( - Path(self.hms_project_file).stem, - self._geometry, - self._bbox, - self._datetime, - self._properties, - href=self._href, - ) + def __init__(self, *args, **kwargs): + """Add a few default properties to the base class.""" + super().__init__(*args, **kwargs) + self.simplify_geometry = True + + @classmethod + def from_prj(cls, hms_project_file, item_id: str, simplify_geometry: bool = True): + """ + Create an `HMSModelItem` from a HEC-HMS project file. + + Parameters + ---------- + hms_project_file : str + Path to the HEC-HMS project file (.hms). + item_id : str + Unique item ID for the STAC item. + simplify_geometry : bool, optional + Whether to simplify geometry. Defaults to True. + + Returns + ------- + stac : HMSModelItem + An instance of the class representing the STAC item. + """ + pm = LocalPathManager(Path(hms_project_file).parent) + href = pm.item_path(item_id) + pf = ProjectFile(hms_project_file, assert_uniform_version=False) + + # Create GeoJSON and Thumbnails + cls._check_files_exists(cls, pf.files + pf.rasters) + geojson_paths = cls.write_element_geojsons(cls, pf.basins, pm) + thumbnail_paths = cls.make_thumbnails(cls, pf.basins, pm) - self._check_files_exists(self.pf.files + self.pf.rasters) - self.make_thumbnails(self.pf.basins) - self.write_element_geojsons(self.pf.basins[0]) - for fpath in self.thumbnail_paths + self.geojson_paths + self.pf.files + self.pf.rasters: - self.add_hms_asset(fpath) + # Collect all assets + assets = {Path(i).name: Asset(i) for i in pf.files + pf.rasters + geojson_paths + thumbnail_paths} + # Create the STAC Item + stac = cls( + Path(hms_project_file).stem, + NULL_STAC_GEOMETRY, + NULL_STAC_BBOX, + NULL_DATETIME, + {"hms_project_file": hms_project_file}, + href=href, + assets=assets, + ) + stac.pm = pm + stac.simplify_geometry = simplify_geometry - self._register_extensions() + stac._register_extensions() + return stac def _register_extensions(self) -> None: ProjectionExtension.add_to(self) StorageExtension.add_to(self) @property - def _properties(self): + def hms_project_file(self) -> str: + """Get the path to the HEC-HMS .hms file.""" + return self._properties.get("hms_project_file") + + @property + @lru_cache + def factory(self) -> AssetFactory: + """Return AssetFactory for this item.""" + return AssetFactory(HMS_EXTENSION_MAPPING) + + @property + @lru_cache + def pf(self) -> ProjectFile: + """Get a ProjectFile instance for the HMS Model .hms file.""" + return ProjectFile(self.hms_project_file) + + @property + def properties(self) -> dict: """Properties for the HMS STAC item.""" - properties = {} + properties = self._properties properties[self.PROJECT] = f"{self.pf.name}.hms" properties[self.PROJECT_TITLE] = self.pf.name - properties[self.PROJECT_VERSION] = (self.pf.attrs["Version"],) - properties[self.PROJECT_DESCRIPTION] = (self.pf.attrs.get("Description"),) + properties[self.PROJECT_VERSION] = self.pf.attrs["Version"] + properties[self.PROJECT_DESCRIPTION] = self.pf.attrs.get("Description") - # TODO probably fine 99% of the time but we grab this info from the first basin file only + # Get data from the first basin properties[self.MODEL_UNITS] = self.pf.basins[0].attrs["Unit System"] properties[self.MODEL_GAGES] = self.pf.basins[0].gages - properties["proj:code"] = self.pf.basins[0].epsg - if self.pf.basins[0].epsg: - logging.warning("No EPSG code found in basin file.") + + if self.pf.basins[0].epsg is None: + logger.warning("No EPSG code found in basin file.") + properties["proj:wkt"] = self.pf.basins[0].wkt - properties["hms:summary"] = self.pf.file_counts + properties[self.SUMMARY] = self.pf.file_counts + return properties + @properties.setter + def properties(self, properties: dict): + """Set properties.""" + self._properties = properties + @property - def _bbox(self) -> tuple[float, float, float, float]: - """Bounding box of the HMS STAC item.""" - if len(self.pf.basins) == 0: - return [0, 0, 0, 0] - else: - bboxes = np.array([i.bbox(4326) for i in self.pf.basins]) - bboxes = [bboxes[:, 0].min(), bboxes[:, 1].min(), bboxes[:, 2].max(), bboxes[:, 3].max()] - return [float(i) for i in bboxes] + def geometry_assets(self) -> list[BasinFile]: + """Return list of basin geometry assets.""" + return self.pf.basins + + @property + def geometry(self) -> dict: + """Return footprint of the model as a GeoJSON.""" + if not self.geometry_assets: + return NULL_STAC_GEOMETRY + + geometries = [ + b.basin_geom.simplify(0.001) if self.simplify_geometry else b.basin_geom for b in self.geometry_assets + ] + unioned_geometry = unary_union(geometries) + + return json.loads(to_geojson(unioned_geometry)) @property - def _geometry(self) -> dict | None: - """Geometry of the HMS STAC item. Union of all basins in the HMS model.""" - if self._simplify_geometry: - geometries = [b.basin_geom.simplify(0.001) for b in self.pf.basins] - else: - geometries = [b.basin_geom for b in self.pf.basins] - return json.loads(to_geojson(union_all(geometries))) + def bbox(self) -> list[float]: + """Bounding box of the HMS model.""" + if not self.geometry_assets: + return NULL_STAC_BBOX + + bboxes = np.array([b.bbox(4326) for b in self.geometry_assets]) + return [float(i) for i in [bboxes[:, 0].min(), bboxes[:, 1].min(), bboxes[:, 2].max(), bboxes[:, 3].max()]] @property - def _datetime(self) -> datetime: + def datetime(self) -> datetime: """The datetime for the HMS STAC item.""" date = datetime.strptime(self.pf.basins[0].header.attrs["Last Modified Date"], "%d %B %Y") time = datetime.strptime(self.pf.basins[0].header.attrs["Last Modified Time"], "%H:%M:%S").time() return datetime.combine(date, time) def _check_files_exists(self, files: list[str]): - """Ensure the files exists. If they don't rasie an error.""" + """Ensure the files exists. If they don't raise an error.""" from pathlib import Path for file in files: if not os.path.exists(file): - logging.warning(f"File not found {file}") + logger.warning(f"File not found {file}") + + def make_thumbnails(self, basins: list[BasinFile], pm: LocalPathManager, overwrite: bool = False) -> list[str]: + """Create a PNG thumbnail for each basin.""" + thumbnail_paths = [] - def make_thumbnails(self, basins: list[BasinFile], overwrite: bool = False): - """Create a png for each basin. Optionally overwrite existing files.""" for bf in basins: - thumbnail_path = self.pm.derived_item_asset(f"{bf.name}.png".replace(" ", "_").replace("-", "_")) + thumbnail_path = pm.derived_item_asset(f"{bf.name}.png".replace(" ", "_").replace("-", "_")) if not overwrite and os.path.exists(thumbnail_path): - logging.info(f"Thumbnail for basin `{bf.name}` already exists. Skipping creation.") + logger.info(f"Thumbnail for basin `{bf.name}` already exists. Skipping creation.") else: - logging.info(f"{'Overwriting' if overwrite else 'Creating'} thumbnail for basin `{bf.name}`") - fig = self.make_thumbnail(bf.hms_schematic_2_gdfs) + logger.info(f"{'Overwriting' if overwrite else 'Creating'} thumbnail for basin `{bf.name}`") + fig = self.make_thumbnail(self, gdfs=bf.hms_schematic_2_gdfs) fig.savefig(thumbnail_path) fig.clf() - self.thumbnail_paths.append(thumbnail_path) + thumbnail_paths.append(thumbnail_path) - def write_element_geojsons(self, basins: list[BasinFile], overwrite: bool = False): + return thumbnail_paths + + def write_element_geojsons(self, basins: list[BasinFile], pm: LocalPathManager, overwrite: bool = False): """Write the HMS elements (Subbasins, Juctions, Reaches, etc.) to geojson.""" - for element_type in basins.elements.element_types: - logging.debug(f"Checking if geojson for {element_type} exists") - path = self.pm.derived_item_asset(f"{element_type}.geojson") + geojson_paths = [] + for element_type in basins[0].elements.element_types: + logger.debug(f"Checking if geojson for {element_type} exists") + path = pm.derived_item_asset(f"{element_type}.geojson") if not overwrite and os.path.exists(path): - logging.info(f"Geojson for {element_type} already exists. Skipping creation.") + logger.info(f"Geojson for {element_type} already exists. Skipping creation.") else: - logging.info(f"Creating geojson for {element_type}") + logger.info(f"Creating geojson for {element_type}") gdf = self.pf.basins[0].feature_2_gdf(element_type).to_crs(4326) - logging.debug(gdf.columns) + logger.debug(gdf.columns) keep_columns = ["name", "geometry", "Last Modified Date", "Last Modified Time", "Number Subreaches"] gdf = gdf[[col for col in keep_columns if col in gdf.columns]] gdf.to_file(path) - self.geojson_paths.append(path) - - def add_hms_asset(self, fpath: str) -> None: - """Add an asset to the HMS STAC item.""" - if os.path.exists(fpath): - asset = self.factory.create_hms_asset(fpath) - if asset is not None: - self.add_asset(asset.title, asset) - if isinstance(asset, ProjectAsset): - if self._project is not None: - logging.error( - f"Only one project asset is allowed. Found {str(asset)} when {str(self._project)} was already set." - ) - self._project = asset + geojson_paths.append(path) + + return geojson_paths + + def add_asset(self, key, asset): + """Subclass asset then add.""" + subclass = self.factory.asset_from_dict(asset) + if subclass is None: + return + return super().add_asset(key, subclass) def make_thumbnail(self, gdfs: dict): - """Create a png from the geodataframes (values of the dictionary). - The dictionary keys are used to label the layers in the legend.""" + """Create a png from the geodataframes (values of the dictionary). The dictionary keys are used to label the layers in the legend.""" cdict = { "Subbasin": "black", "Reach": "blue", @@ -202,3 +263,16 @@ def make_thumbnail(self, gdfs: dict): ax.set_yticks([]) fig.tight_layout() return fig + + ### Prevent external modification of dynamically generated properties ### + @geometry.setter + def geometry(self, *args, **kwargs): + pass + + @bbox.setter + def bbox(self, *args, **kwargs): + pass + + @datetime.setter + def datetime(self, *args, **kwargs): + pass diff --git a/hecstac/hms/parser.py b/hecstac/hms/parser.py index 2762be3..3380993 100644 --- a/hecstac/hms/parser.py +++ b/hecstac/hms/parser.py @@ -1,3 +1,5 @@ +"""HEC-HMS file parsing classes.""" + from __future__ import annotations import logging @@ -5,7 +7,6 @@ import os from abc import ABC from collections import OrderedDict -from datetime import datetime from functools import lru_cache from pathlib import Path @@ -13,7 +14,6 @@ import geopandas as gpd import pandas as pd from pyproj import CRS -from shapely import get_point from shapely.geometry import LineString, MultiLineString, Point import hecstac.hms.utils as utils @@ -43,8 +43,12 @@ Temperature, ) +logger = logging.getLogger(__name__) + class BaseTextFile(ABC): + """Base class for text files.""" + def __init__(self, path: str, client=None, bucket=None): self.path: str = path self.directory: str = os.path.dirname(self.path) @@ -57,19 +61,24 @@ def __init__(self, path: str, client=None, bucket=None): self.parse_header() def read_content(self): + """Read contents of text file.""" if os.path.exists(self.path): - with open(self.path) as f: - self.content = f.read() + try: + with open(self.path, encoding="utf-8") as f: + self.content = f.read() + except UnicodeDecodeError: + with open(self.path, encoding="cp1252") as f: + self.content = f.read() else: try: response = self.client.get_object(Bucket=self.bucket, Key=self.path) self.content = response["Body"].read().decode() except Exception as e: - logging.error(e) + logger.error(e) raise FileNotFoundError(f"could not find {self.path} locally nor on s3") def parse_header(self): - """Scan the file down to the first instance of 'End:' and save each colon-separated keyval pair as attrs dict""" + """Scan the file down to the first instance of 'End:' and save each colon-separated keyval pair as attrs dict.""" lines = self.content.splitlines() if not lines[0].startswith( ( @@ -89,6 +98,8 @@ def parse_header(self): class ProjectFile(BaseTextFile): + """Class for parsing HEC-HMS project files.""" + def __init__( self, path: str, @@ -123,15 +134,18 @@ def __repr__(self): @property @lru_cache def name(self): + """Extract name from project file.""" lines = self.content.splitlines() if not lines[0].startswith("Project: "): raise ValueError(f"unexpected first line: {lines[0]}") return lines[0][len("Project: ") :] def combine_stem_ext(self, ext: str) -> str: + """Combine stem and extension.""" return f"{self.stem}.{ext}" def scan_for_terrain_run_grid_gage_pdata(self): + """Scan for terrain, run, grid, gage, and pdata files.""" for ext in ["terrain", "run", "grid", "gage", "pdata"]: path = self.combine_stem_ext(ext) if os.path.exists(path): @@ -147,6 +161,7 @@ def scan_for_terrain_run_grid_gage_pdata(self): self.pdata = PairedDataFile(path) def scan_for_basins_mets_controls(self): + """Scan for basin, meteorology, and control files.""" lines = self.content.splitlines() i = -1 while True: @@ -181,6 +196,7 @@ def scan_for_basins_mets_controls(self): @property def file_counts(self): + """Return file counts.""" return { "Basins": len(self.basins), "Controls": len(self.controls), @@ -194,6 +210,7 @@ def file_counts(self): } def assert_uniform_version(self): + """Assert uniform version.""" errors = [] version = self.attrs["Version"] for basin in self.basins: @@ -217,9 +234,8 @@ def assert_uniform_version(self): @property def files(self): - + """Return associated files.""" # logging.info(f"other paths {[i.path for i in [self.terrain, self.run, self.grid, self.gage, self.pdata] if i]}") - return ( [self.path] + [basin.path for basin in self.basins] @@ -233,21 +249,28 @@ def files(self): @property def dss_files(self): + """Return dss files.""" files = set( [gage.attrs["Variant"]["Variant-1"]["DSS File Name"] for gage in self.gage.elements.elements.values()] - + [ - grid.attrs["Variant"]["Variant-1"]["DSS File Name"] - for grid in self.grid.elements.elements.values() - if "Variant" in grid.attrs - ] + [pdata.attrs["DSS File"] for pdata in self.pdata.elements.elements.values()] ) + if self.grid: + files.update( + [ + grid.attrs["Variant"]["Variant-1"]["DSS File Name"] + for grid in self.grid.elements.elements.values() + if "Variant" in grid.attrs + ] + ) + else: + logging.warning("No grid file to extract dss files from.") files = [str(Path(f.replace("\\", "/"))) for f in files] return self.absolute_paths(files) @property def result_files(self): + """Return result files.""" files = set( [i[1].attrs["Log File"] for i in self.run.elements] + [i[1].attrs["DSS File"] for i in self.run.elements] @@ -258,25 +281,41 @@ def result_files(self): return self.absolute_paths(set(files)) def absolute_paths(self, paths): + """Return absolute path.""" return [os.path.join(self.directory, path) for path in paths] @property def rasters(self): + """Return raster files.""" files = [] + if self.terrain: for terrain in self.terrain.layers: - files += [os.path.join(terrain["raster_dir"], f) for f in os.listdir(terrain["raster_dir"])] - files += [grid.attrs["Filename"] for grid in self.grid.elements.elements.values() if "Filename" in grid.attrs] + raster_dir = terrain.get("raster_dir", "").strip() + if raster_dir and os.path.exists(raster_dir): + files += [os.path.join(raster_dir, f) for f in os.listdir(raster_dir)] + else: + logging.warning(f"Skipping missing raster directory: {raster_dir}") + + if self.grid is None: + logging.warning("No grid file, skipping grid rasters.") + else: + files += [ + grid.attrs["Filename"] for grid in self.grid.elements.elements.values() if "Filename" in grid.attrs + ] files = [str(Path(f.replace("\\", "/"))) for f in files] return self.absolute_paths(set(files)) @property @lru_cache def sqlitedbs(self): + """Return SQLite database.""" return [SqliteDB(basin.sqlite_path) for basin in self.basins] class BasinFile(BaseTextFile): + """Class for parsing HEC-HMS basin files.""" + def __init__( self, path: str, @@ -313,25 +352,30 @@ def __repr__(self): @property def wkt(self): + """Return wkt representation of the CRS.""" for line in self.spatial_properties.content.splitlines(): if "Coordinate System: " in line: return line.split(": ")[1] @property def crs(self): + """Return the CRS.""" return CRS(self.wkt) @property def epsg(self): + """Return the EPSG code.""" return self.crs.to_epsg() def parse_name(self): + """Parse basin name.""" lines = self.content.splitlines() if not lines[0].startswith("Basin: "): raise ValueError(f"unexpected first line: {lines[0]}") self.name = lines[0][len("Basin: ") :] def scan_for_headers_and_footers(self): + """Scan for basin headers and footers.""" lines = self.content.splitlines() for i, line in enumerate(lines): if line.startswith("Basin: "): @@ -351,6 +395,7 @@ def scan_for_headers_and_footers(self): self.computation_points = ComputationPoints(content) def identify_sqlite(self): + """Identify SQLite.""" for line in self.content.splitlines(): if ".sqlite" in line: return line.split("File: ")[1] @@ -358,6 +403,7 @@ def identify_sqlite(self): @property @lru_cache def elements(self): + """Return basin elements.""" elements = ElementSet() if self.read_geom: sqlite = SqliteDB( @@ -433,62 +479,75 @@ def elements(self): @property @lru_cache def subbasins(self): + """Return subbasin elements.""" return self.elements.get_element_type("Subbasin") @property @lru_cache def reaches(self): + """Return reach elements.""" return self.elements.get_element_type("Reach") @property @lru_cache def junctions(self): + """Return junction elements.""" return self.elements.get_element_type("Junction") @property @lru_cache def reservoirs(self): + """Return reservoir elements.""" return self.elements.get_element_type("Reservoir") @property @lru_cache def diversions(self): + """Return diversion elements.""" return self.elements.get_element_type("Diversion") @property @lru_cache def sinks(self): + """Return sink elements.""" return self.elements.get_element_type("Sink") @property @lru_cache def sources(self): + """Return source elements.""" return self.elements.get_element_type("Source") @property @lru_cache def gages(self): + """Return gages.""" return self.elements.gages @property @lru_cache def drainage_area(self): + """Return drainage areas..""" return sum([subbasin.geom.area for subbasin in self.subbasins]) @property @lru_cache def reach_miles(self): + """Return reach lengths in miles..""" return sum([reach.geom.length for reach in self.reaches]) @property @lru_cache def basin_geom(self): + """Return basin geometry.""" return utils.remove_holes(self.feature_2_gdf("Subbasin").make_valid().to_crs(4326).union_all()) def bbox(self, crs): + """Return basin bounding box.""" return self.feature_2_gdf("Subbasin").to_crs(crs).total_bounds def feature_2_gdf(self, element_type: str) -> gpd.GeoDataFrame: + """Convert feature to GeoDataFrame.""" gdf_list = [] for e in self.elements.get_element_type(element_type): gdf_list.append( @@ -504,6 +563,7 @@ def feature_2_gdf(self, element_type: str) -> gpd.GeoDataFrame: @property @lru_cache def observation_points_gdf(self): + """Return GeoDataFrame of observation points.""" gdf_list = [] for name, element in self.elements: if "Observed Hydrograph Gage" in element.attrs.keys(): @@ -556,6 +616,7 @@ def observation_points_gdf(self): return gdf def subbasin_connection_lines(self) -> gpd.GeoDataFrame: + """Return GeoDataframe of subbasin connection lines.""" df_list = [] for subbasin in self.subbasins: us_point = subbasin.geom.centroid @@ -577,11 +638,12 @@ def subbasin_connection_lines(self) -> gpd.GeoDataFrame: return gdf def junction_connection_lines(self) -> gpd.GeoDataFrame: + """Return GeoDataframe of junction connection lines.""" df_list = [] for junction in self.junctions: us_point = junction.geom if "Downstream" not in junction.attrs: - logging.warning(f"Warning no downstream element for junction {junction.name}") + logger.warning(f"Warning no downstream element for junction {junction.name}") continue ds_element = self.elements[junction.attrs["Downstream"]] if ds_element in self.reaches: @@ -615,6 +677,7 @@ def junction_connection_lines(self) -> gpd.GeoDataFrame: @property @lru_cache def hms_schematic_2_gdfs(self) -> dict[gpd.GeoDataFrame]: + """Convert HMS schematics to GeoDataframe.""" element_gdfs = {} for element_type in [ "Reach", @@ -633,6 +696,7 @@ def hms_schematic_2_gdfs(self) -> dict[gpd.GeoDataFrame]: return element_gdfs def subbasin_bc_lines(self): + """Return subbasin boundary condition lines.""" df_list = [] for _, row in self.subbasin_connection_lines().iterrows(): geom = row.geometry @@ -654,6 +718,8 @@ def subbasin_bc_lines(self): class MetFile(BaseTextFile): + """Class for parsing HEC-HMS meteorology files.""" + def __init__(self, path: str, client=None, bucket=None): if not path.endswith(".met"): raise ValueError(f"invalid extension for Meteorology file: {path}") @@ -668,12 +734,14 @@ def __repr__(self): @property @lru_cache def name(self): + """Return meteorology name.""" lines = self.content.splitlines() if not lines[0].startswith("Meteorology: "): raise ValueError(f"unexpected first line: {lines[0]}") return lines[0][len("Meteorology: ") :] def scan_for_elements(self): + """Scan for meteorology elements.""" elements = ElementSet() lines = self.content.splitlines() for i, line in enumerate(lines): @@ -700,6 +768,8 @@ def scan_for_elements(self): class ControlFile(BaseTextFile): + """Class for parsing HEC-HMS control files.""" + def __init__(self, path: str, client=None, bucket=None): if not path.endswith(".control"): raise ValueError(f"invalid extension for Control file: {path}") @@ -712,6 +782,7 @@ def __repr__(self): @property @lru_cache def name(self): + """Return control name.""" lines = self.content.splitlines() if not lines[0].startswith("Control: "): raise ValueError(f"unexpected first line: {lines[0]}") @@ -719,36 +790,43 @@ def name(self): class TerrainFile(BaseTextFile): + """Class for parsing HEC-HMS terrain files.""" + def __init__(self, path: str, client=None, bucket=None): if not path.endswith(".terrain"): - raise ValueError(f"invalid extension for Terrain file: {path}") + raise ValueError(f"Invalid extension for Terrain file: {path}") super().__init__(path, client=client, bucket=bucket) self.layers = [] found_first = False - name, raster_path, vert_units = "", "", "" + name, raster_path, raster_dir, vert_units = "", "", "", "" + for line in self.content.splitlines(): if not found_first: if line.startswith("Terrain Data: "): found_first = True else: continue + if line == "End:": self.layers.append( { "name": name, "raster_path": raster_path, - "raster_dir": os.path.dirname(raster_path), + "raster_dir": raster_dir, "vert_units": vert_units, } ) - name, raster_path, vert_units = "", "", "" + name, raster_path, raster_dir, vert_units = "", "", "", "" elif line.startswith("Terrain Data: "): name = line[len("Terrain Data: ") :] elif line.startswith(" Elevation File Name: "): raster_path_raw = line[len(" Elevation File Name: ") :] raster_path = os.path.join(os.path.dirname(self.path), raster_path_raw.replace("\\", os.sep)) + elif line.startswith(" Terrain Directory: "): + raster_dir_raw = line[len(" Terrain Directory: ") :] + raster_dir = os.path.join(os.path.dirname(self.path), raster_dir_raw.replace("\\", os.sep)) elif line.startswith(" Vertical Units: "): vert_units = line[len(" Vertical Units: ") :] @@ -759,10 +837,13 @@ def __repr__(self): @property @lru_cache def name(self): + """Return name.""" return None class RunFile(BaseTextFile): + """Class for parsing HEC-HMS run files.""" + def __init__(self, path: str, client=None, bucket=None): if not path.endswith(".run"): raise ValueError(f"invalid extension for Run file: {path}") @@ -773,6 +854,7 @@ def __repr__(self): return f"HMSRunFile({self.path})" def runs(self): + """Retrieve all runs.""" runs = ElementSet() lines = self.content.splitlines() i = -1 @@ -788,10 +870,13 @@ def runs(self): @property def elements(self): + """Return run elements.""" return self.runs() class PairedDataFile(BaseTextFile): + """Class for parsing HEC-HMS paired data files.""" + def __init__(self, path: str, client=None, bucket=None): if not path.endswith(".pdata"): raise ValueError(f"invalid extension for Paired Data file: {path}") @@ -800,7 +885,7 @@ def __init__(self, path: str, client=None, bucket=None): response = client.get_object(Bucket=bucket, Key=path) self.content = response["Body"].read().decode() except Exception as e: - logging.info(f" {e}: No Paired Data File found: creating empty Paired Data File") + logger.info(f" {e}: No Paired Data File found: creating empty Paired Data File") self.create_pdata(path) super().__init__(path, client=client, bucket=bucket) self.elements = ElementSet() @@ -813,12 +898,14 @@ def __repr__(self): @property @lru_cache def name(self): + """Return paired data manager.""" lines = self.content.splitlines() if not lines[0].startswith("Paired Data Manager: "): raise ValueError(f"unexpected first line: {lines[0]}") return lines[0][len("Paired Data Manager: ") :] def scan_for_tables(self): + """Scan for tables.""" lines = self.content.splitlines() for i, line in enumerate(lines): if line.startswith("Table: "): @@ -828,6 +915,7 @@ def scan_for_tables(self): self.elements[f"{name}+{table_type}"] = Table(name, attrs) def scan_for_patterns(self): + """Scan for patterns.""" lines = self.content.splitlines() for i, line in enumerate(lines): if line.startswith("Pattern: "): @@ -838,6 +926,8 @@ def scan_for_patterns(self): class SqliteDB: + """SQLite database class.""" + def __init__(self, path: str, client=None, bucket=None, fiona_aws_session=None): sqlite_file, _ = os.path.splitext(path) path = f"{sqlite_file}.sqlite" @@ -869,6 +959,8 @@ def __init__(self, path: str, client=None, bucket=None, fiona_aws_session=None): class GridFile(BaseTextFile): + """Class for parsing HEC-HMS grid files.""" + def __init__(self, path: str, client=None, bucket=None): if not path.endswith(".grid"): raise ValueError(f"invalid extension for Grid file: {path}") @@ -883,12 +975,14 @@ def __repr__(self): @property @lru_cache def name(self): + """Return grid manager name.""" lines = self.content.splitlines() if not lines[0].startswith("Grid Manager: "): raise ValueError(f"unexpected first line: {lines[0]}") return lines[0][len("Grid Manager: ") :] def scan_for_grids(self): + """Scan for all grids.""" lines = self.content.splitlines() for i, line in enumerate(lines): if line.startswith("Grid: "): @@ -898,6 +992,7 @@ def scan_for_grids(self): self.elements[f"{name}+{grid_type}"] = Grid(f"{name}+{grid_type}", attrs) def remove_grid_type(self, grid_types: list[str]): + """Remove given grid types.""" new_elements = ElementSet() for name, g in self.elements.elements.items(): if g.attrs["Grid Type"] not in grid_types: @@ -907,10 +1002,13 @@ def remove_grid_type(self, grid_types: list[str]): @property @lru_cache def grids(self): + """Return grid elements.""" return self.elements.get_element_type("Grid") class GageFile(BaseTextFile): + """Class for parsing HEC-HMS gage files.""" + def __init__(self, path: str, client=None, bucket=None): if not path.endswith(".gage"): raise ValueError(f"invalid extension for Gage file: {path}") @@ -925,12 +1023,14 @@ def __repr__(self): @property @lru_cache def name(self): + """Return gage manager name.""" lines = self.content.splitlines() if not lines[0].startswith("Gage Manager: "): raise ValueError(f"unexpected first line: {lines[0]}") return lines[0][len("Gage Manager: ") :] def scan_for_gages(self): + """Search for all gages.""" lines = self.content.splitlines() for i, line in enumerate(lines): if line.startswith("Gage: "): @@ -941,4 +1041,5 @@ def scan_for_gages(self): @property @lru_cache def gages(self): + """Return gage elements.""" return self.elements.get_element_type("Gage") diff --git a/hecstac/hms/s3_utils.py b/hecstac/hms/s3_utils.py index 023c98f..1b41943 100644 --- a/hecstac/hms/s3_utils.py +++ b/hecstac/hms/s3_utils.py @@ -1,3 +1,5 @@ +"""AWS S3 utlity functions.""" + import os from pathlib import Path @@ -29,6 +31,7 @@ def file_location(file: str | Path) -> str: def list_keys(s3_client, bucket, prefix, suffix=""): + """List s3 keys in a given bucket and prefix.""" keys = [] kwargs = {"Bucket": bucket, "Prefix": prefix} while True: @@ -61,12 +64,14 @@ def get_metadata(key: str) -> str: def split_s3_key(s3_path: str) -> tuple[str, str]: """ - This function splits an S3 path into the bucket name and the key. + Split an S3 path into the bucket name and the key. - Parameters: + Parameters + ---------- s3_path (str): The S3 path to split. It should be in the format 's3://bucket/key'. - Returns: + Returns + ------- tuple: A tuple containing the bucket name and the key. If the S3 path does not contain a key, the second element of the tuple will be None. @@ -85,6 +90,7 @@ def split_s3_key(s3_path: str) -> tuple[str, str]: def init_s3_resources(minio_mode: bool = False): + """Initialize s3 resources.""" if minio_mode: session = boto3.Session( aws_access_key_id=os.environ.get("MINIO_ACCESS_KEY_ID"), @@ -110,12 +116,14 @@ def init_s3_resources(minio_mode: bool = False): def get_basic_object_metadata(obj: ObjectSummary) -> dict: """ - This function retrieves basic metadata of an AWS S3 object. + Retrieve basic metadata of an AWS S3 object. - Parameters: + Parameters + ---------- obj (ObjectSummary): The AWS S3 object. - Returns: + Returns + ------- dict: A dictionary with the size, ETag, last modified date, storage platform, region, and storage tier of the object. """ diff --git a/hecstac/hms/utils.py b/hecstac/hms/utils.py index 89b81c5..9892b35 100644 --- a/hecstac/hms/utils.py +++ b/hecstac/hms/utils.py @@ -1,3 +1,5 @@ +"""HEC-HMS STAC Item utlity functions.""" + from __future__ import annotations import re @@ -24,6 +26,7 @@ def add_no_duplicate(d: dict, key, val): def get_lines_until_end_sentinel(lines: list[str]) -> list[str]: + """Retrieve all lines until the End point.""" lines_found = [] for line in lines: if line in ["End:", "End Computation Point: "]: @@ -35,6 +38,7 @@ def get_lines_until_end_sentinel(lines: list[str]) -> list[str]: def handle_special_cases(key, val): + """Handle special cases.""" if key == "Groundwater Layer": key = key + val elif "Groundwater Layer" in key: @@ -47,7 +51,7 @@ def handle_special_cases(key, val): def parse_attrs(lines: list[str]) -> OrderedDict: - """Scan the lines down to the first instance of 'End:' and return dict containing all of the colon-separated keyval pair""" + """Scan the lines down to the first instance of 'End:' and return dict containing all of the colon-separated keyval pair.""" attrs = {} for line in lines: if line == "End:": @@ -89,6 +93,7 @@ def parse_attrs(lines: list[str]) -> OrderedDict: def remove_holes(geom): + """Remove holes in the geometry.""" if isinstance(geom, Polygon): return Polygon(geom.exterior) elif isinstance(geom, MultiPolygon): @@ -105,6 +110,7 @@ def remove_holes(geom): def attrs2list(attrs: OrderedDict) -> list[str]: + """Convert dictionary of attributes to a list.""" content = [] for key, val in attrs.items(): if not isinstance(val, str): @@ -130,7 +136,7 @@ def attrs2list(attrs: OrderedDict) -> list[str]: def insert_after_key(dic: dict, insert_key: str, new_key: str, new_val: str) -> OrderedDict: - # recreate the dictionary to insert key-val after the occurance of the insert_key if key-val doesn't exist yet in the dictionary + """Recreate the dictionary to insert key-val after the occurance of the insert_key if key-val doesn't exist yet in the dictionary.""" new_dic = {} for key, val in dic.items(): if key == new_key: @@ -159,41 +165,49 @@ def search_contents(lines: list, search_string: str, token: str = "=", expect_on class StacPathManager: - """ - Builds consistent paths for STAC items and collections assuming a top level local catalog - """ + """Build consistent paths for STAC items and collections assuming a top level local catalog.""" def __init__(self, local_catalog_dir: str): self._catalog_dir = local_catalog_dir @property def catalog_dir(self): + """Return the catalog directory.""" return self._catalog_dir @property def catalog_file(self): + """Return the catalog file path.""" return f"{self._catalog_dir}/catalog.json" def catalog_item(self, item_id: str) -> str: + """Return the catalog item file path.""" return f"{self.catalog_dir}/{item_id}/{item_id}.json" def catalog_asset(self, item_id: str, asset_dir: str = "hydro_domains") -> str: + """Return the catalog asset file path.""" return f"{self.catalog_dir}/{asset_dir}/{item_id}.json" def collection_file(self, collection_id: str) -> str: + """Return the collection file path.""" return f"{self.catalog_dir}/{collection_id}/collection.json" def collection_dir(self, collection_id: str) -> str: + """Return the collection directory.""" return f"{self.catalog_dir}/{collection_id}" def collection_asset(self, collection_id: str, filename: str) -> str: + """Return the collection asset filepath.""" return f"{self.catalog_dir}/{collection_id}/{filename}" def collection_item_dir(self, collection_id: str, item_id: str) -> str: + """Return the collection item directory.""" return f"{self.catalog_dir}/{collection_id}/{item_id}" def collection_item(self, collection_id: str, item_id: str) -> str: + """Return the collection item filepath.""" return f"{self.catalog_dir}/{collection_id}/{item_id}/{item_id}.json" def collection_item_asset(self, collection_id: str, item_id: str, filename: str) -> str: + """Return the collection item asset filepath.""" return f"{self.catalog_dir}/{collection_id}/{item_id}/{filename}" diff --git a/hecstac/ras/__init__.py b/hecstac/ras/__init__.py index e69de29..19b0aea 100644 --- a/hecstac/ras/__init__.py +++ b/hecstac/ras/__init__.py @@ -0,0 +1 @@ +"""HEC-RAS STAC Item module.""" diff --git a/hecstac/ras/assets.py b/hecstac/ras/assets.py index dca5732..8e43702 100644 --- a/hecstac/ras/assets.py +++ b/hecstac/ras/assets.py @@ -1,14 +1,20 @@ +"""Asset instances of HEC-RAS model files.""" + import logging import os import re +from functools import lru_cache import contextily as ctx import geopandas as gpd import matplotlib.pyplot as plt from matplotlib.lines import Line2D from pystac import MediaType +from shapely import MultiPolygon, Polygon from hecstac.common.asset_factory import GenericAsset +from hecstac.common.geometry import reproject_to_wgs84 +from hecstac.ras.consts import NULL_GEOMETRY from hecstac.ras.parser import ( GeometryFile, GeometryHDFFile, @@ -19,12 +25,16 @@ SteadyFlowFile, UnsteadyFlowFile, ) +from hecstac.ras.utils import is_ras_prj + +logger = logging.getLogger(__name__) CURRENT_PLAN = "ras:current_plan" PLAN_SHORT_ID = "ras:short_plan_id" TITLE = "ras:title" UNITS = "ras:units" VERSION = "ras:version" +PROJECTION = "proj:wkt" PLAN_FILE = "ras:plan_file" GEOMETRY_FILE = "ras:geometry_file" @@ -104,263 +114,311 @@ METEOROLOGY_UNITS = "ras:meteorology_units" -class ProjectAsset(GenericAsset): - """HEC-RAS Project file asset.""" +# class PrjAsset(GenericAsset): +# """A helper class to delegate .prj files into RAS project or Projection file classes.""" - regex_parse_str = r".+\.prj$" +# regex_parse_str = r".+\.prj$" - def __init__(self, href: str, *args, **kwargs): - roles = ["project-file", "ras-file"] - description = kwargs.get("description", "The HEC-RAS project file.") - - super().__init__(href, roles=roles, description=description, *args, **kwargs) - - self.href = href - self.pf = ProjectFile(self.href) - self.extra_fields = { - key: value - for key, value in { - CURRENT_PLAN: self.pf.plan_current, - PLAN_FILES: self.pf.plan_files, - GEOMETRY_FILES: self.pf.geometry_files, - STEADY_FLOW_FILES: self.pf.steady_flow_files, - QUASI_UNSTEADY_FLOW_FILES: self.pf.quasi_unsteady_flow_files, - UNSTEADY_FLOW_FILES: self.pf.unsteady_flow_files, - }.items() - if value - } - - -class PlanAsset(GenericAsset): - """HEC-RAS Plan file asset.""" +# def __new__(cls, *args, **kwargs): +# """Delegate to Project or Projection asset.""" +# if cls is PrjAsset: # Ensuring we don't instantiate Parent directly +# href = kwargs.get("href") or args[0] +# is_ras = is_ras_prj(href) +# if is_ras: +# return ProjectAsset(*args, **kwargs) +# else: +# return ProjectionAsset(*args, **kwargs) +# return super().__new__(cls) - regex_parse_str = r".+\.p\d{2}$" - def __init__(self, href: str, **kwargs): - roles = kwargs.get("roles", []) + ["plan-file", "ras-file"] - description = kwargs.get( - "description", - "The plan file which contains a list of associated input files and all simulation options.", - ) +# class ProjectionAsset(GenericAsset): +# """A geospatial projection file.""" + +# __roles__ = ["projection-file", MediaType.TEXT] +# __description__ = "A geospatial projection file." +# __file_class__ = None - super().__init__(href, roles=roles, description=description, **kwargs) - self.href = href - self.planf = PlanFile(self.href) - self.extra_fields = { - key: value - for key, value in { - TITLE: self.planf.plan_title, - VERSION: self.planf.plan_version, - GEOMETRY_FILE: self.planf.geometry_file, - FLOW_FILE: self.planf.flow_file, - BREACH_LOCATIONS: self.planf.breach_locations, - }.items() - if value - } +class ProjectAsset(GenericAsset[ProjectFile]): + """HEC-RAS Project file asset.""" + regex_parse_str = r".+\.prj$" + __roles__ = ["project-file", "ras-file"] + __description__ = "The HEC-RAS project file." + __file_class__ = ProjectFile + + @GenericAsset.extra_fields.getter + def extra_fields(self) -> dict: + """Return extra fields with added dynamic keys/values.""" + self._extra_fields[CURRENT_PLAN] = self.file.plan_current + self._extra_fields[PLAN_FILES] = self.file.plan_files + self._extra_fields[GEOMETRY_FILES] = self.file.geometry_files + self._extra_fields[STEADY_FLOW_FILES] = self.file.steady_flow_files + self._extra_fields[QUASI_UNSTEADY_FLOW_FILES] = self.file.quasi_unsteady_flow_files + self._extra_fields[UNSTEADY_FLOW_FILES] = self.file.unsteady_flow_files + return self._extra_fields + + +class PlanAsset(GenericAsset[PlanFile]): + """HEC-RAS Plan file asset.""" -class GeometryAsset(GenericAsset): + regex_parse_str = r".+\.p\d{2}$" + __roles__ = ["plan-file", "ras-file"] + __description__ = "The plan file which contains a list of associated input files and all simulation options." + __file_class__ = PlanFile + + @GenericAsset.extra_fields.getter + def extra_fields(self) -> dict: + """Return extra fields with added dynamic keys/values.""" + self._extra_fields[TITLE] = self.file.plan_title + self._extra_fields[VERSION] = self.file.plan_version + self._extra_fields[GEOMETRY_FILE] = self.file.geometry_file + self._extra_fields[FLOW_FILE] = self.file.flow_file + self._extra_fields[BREACH_LOCATIONS] = self.file.breach_locations + return self._extra_fields + + +class GeometryAsset(GenericAsset[GeometryFile]): """HEC-RAS Geometry file asset.""" regex_parse_str = r".+\.g\d{2}$" + __roles__ = ["geometry-file", "ras-file"] + __description__ = ( + "The geometry file which contains cross-sectional, 2D, hydraulic structures, and other geometric data." + ) + __file_class__ = GeometryFile PROPERTIES_WITH_GDF = ["reaches", "junctions", "cross_sections", "structures"] - def __init__(self, href: str, crs: str = None, **kwargs): - # self.pyproj_crs = self.validate_crs(crs) - roles = kwargs.get("roles", []) + ["geometry-file", "ras-file"] - description = kwargs.get( - "description", - "The geometry file which contains cross-sectional, 2D, hydraulic structures, and other geometric data", - ) + @GenericAsset.extra_fields.getter + def extra_fields(self) -> dict: + """Return extra fields with added dynamic keys/values.""" + self._extra_fields[TITLE] = self.file.geom_title + self._extra_fields[VERSION] = self.file.geom_version + self._extra_fields[HAS_1D] = self.file.has_1d + self._extra_fields[HAS_2D] = self.file.has_2d + self._extra_fields[RIVERS] = list(self.file.rivers.keys()) + self._extra_fields[REACHES] = list(self.file.reaches.keys()) + self._extra_fields[JUNCTIONS] = list(self.file.junctions.keys()) + self._extra_fields[CROSS_SECTIONS] = list(self.file.cross_sections.keys()) + self._extra_fields[STRUCTURES] = list(self.file.structures.keys()) + self._extra_fields[STORAGE_AREAS] = list(self.file.storage_areas.keys()) + self._extra_fields[CONNECTIONS] = list(self.file.connections.keys()) + return self._extra_fields + + @property + @lru_cache + def geometry(self) -> Polygon | MultiPolygon: + """Retrieves concave hull of cross-sections.""" + return self.file.concave_hull + + @property + @lru_cache + def has_1d(self) -> bool: + """Check if geometry has any river centerlines.""" + return self.file.has_1d + + @property + @lru_cache + def has_2d(self) -> bool: + """Check if geometry has any 2D areas.""" + return self.file.has_2d + + @property + @lru_cache + def geometry_wgs84(self) -> Polygon | MultiPolygon: + """Reproject geometry to wgs84.""" + # TODO: this could be generalized to be a function that takes argument for CRS. + if self.crs is None: + return NULL_GEOMETRY + else: + return reproject_to_wgs84(self.geometry, self.crs) - super().__init__(href, roles=roles, description=description, **kwargs) - - self.href = href - self.geomf = GeometryFile(self.href) - self.extra_fields = { - key: value - for key, value in { - TITLE: self.geomf.geom_title, - VERSION: self.geomf.geom_version, - HAS_1D: self.geomf.has_1d, - HAS_2D: self.geomf.has_2d, - RIVERS: self.geomf.rivers, - REACHES: self.geomf.reaches, - JUNCTIONS: self.geomf.junctions, - CROSS_SECTIONS: self.geomf.cross_sections, - STRUCTURES: self.geomf.structures, - # STORAGE_AREAS: self.geomf.storage_areas, #TODO: fix this - # CONNECTIONS: self.geomf.connections,#TODO: fix this - # BREACH_LOCATIONS: self.planf.breach_locations, - }.items() - if value - } - - -class SteadyFlowAsset(GenericAsset): + +class SteadyFlowAsset(GenericAsset[SteadyFlowFile]): """HEC-RAS Steady Flow file asset.""" regex_parse_str = r".+\.f\d{2}$" + __roles__ = ["steady-flow-file", "ras-file"] + __description__ = "Steady Flow file which contains profile information, flow data, and boundary conditions." + __file_class__ = SteadyFlowFile - def __init__(self, href: str, **kwargs): - roles = kwargs.get("roles", []) + ["steady-flow-file", "ras-file"] - description = kwargs.get( - "description", - "Steady Flow file which contains profile information, flow data, and boundary conditions.", - ) - - super().__init__(href, roles=roles, description=description, **kwargs) + @GenericAsset.extra_fields.getter + def extra_fields(self) -> dict: + """Return extra fields with added dynamic keys/values.""" + self._extra_fields[TITLE] = self.file.flow_title + self._extra_fields[N_PROFILES] = self.file.n_profiles + return self._extra_fields - self.href = href - self.flowf = SteadyFlowFile(self.href) - self.extra_fields = { - key: value - for key, value in { - TITLE: self.flowf.geom_title, - N_PROFILES: self.flowf.n_profiles, - }.items() - if value - } - -class QuasiUnsteadyFlowAsset(GenericAsset): +class QuasiUnsteadyFlowAsset(GenericAsset[QuasiUnsteadyFlowFile]): """HEC-RAS Quasi-Unsteady Flow file asset.""" # TODO: implement this class regex_parse_str = r".+\.q\d{2}$" + __roles__ = ["quasi-unsteady-flow-file", "ras-file"] + __description__ = "Quasi-Unsteady Flow file." + __file_class__ = QuasiUnsteadyFlowFile - def __init__(self, href: str, **kwargs): - roles = kwargs.get("roles", []) + ["quasi-unsteady-flow-file", "ras-file"] - description = kwargs.get("description", "Quasi-Unsteady Flow file.") - - super().__init__(href, roles=roles, description=description, **kwargs) - - self.href = href - self.flowf = QuasiUnsteadyFlowFile(self.href) - self.extra_fields = { - key: value - for key, value in { - TITLE: self.flowf.flow_title, - }.items() - if value - } + @GenericAsset.extra_fields.getter + def extra_fields(self) -> dict: + """Return extra fields with added dynamic keys/values.""" + self._extra_fields[TITLE] = self.file.flow_title + return self._extra_fields -class UnsteadyFlowAsset(GenericAsset): +class UnsteadyFlowAsset(GenericAsset[UnsteadyFlowFile]): """HEC-RAS Unsteady Flow file asset.""" regex_parse_str = r".+\.u\d{2}$" + __roles__ = ["unsteady-flow-file", "ras-file"] + __description__ = "The unsteady file contains hydrographs, initial conditions, and any flow options." + __file_class__ = UnsteadyFlowFile - def __init__(self, href: str, **kwargs): - roles = kwargs.get("roles", []) + ["unsteady-flow-file", "ras-file"] - description = kwargs.get( - "description", - "The unsteady file contains hydrographs, initial conditions, and any flow options.", - ) - - super().__init__(href, roles=roles, description=description, **kwargs) + @GenericAsset.extra_fields.getter + def extra_fields(self) -> dict: + """Return extra fields with added dynamic keys/values.""" + self._extra_fields[TITLE] = self.file.flow_title + self._extra_fields[BOUNDARY_LOCATIONS] = self.file.boundary_locations + self._extra_fields[REFERENCE_LINES] = self.file.reference_lines + return self._extra_fields - self.href = href - self.flowf = UnsteadyFlowFile(self.href) - self.extra_fields = { - key: value - for key, value in { - TITLE: self.flowf.flow_title, - BOUNDARY_LOCATIONS: self.flowf.boundary_locations, - REFERENCE_LINES: self.flowf.reference_lines, - }.items() - if value - } - -class PlanHdfAsset(GenericAsset): +class PlanHdfAsset(GenericAsset[PlanHDFFile]): """HEC-RAS Plan HDF file asset.""" regex_parse_str = r".+\.p\d{2}\.hdf$" + __roles__ = ["ras-file"] + __description__ = "The HEC-RAS plan HDF file." + __file_class__ = PlanHDFFile + + @GenericAsset.extra_fields.getter + def extra_fields(self) -> dict: + """Return extra fields with added dynamic keys/values.""" + self._extra_fields[VERSION] = self.file.file_version + self._extra_fields[UNITS] = self.file.units_system + self._extra_fields[PLAN_INFORMATION_BASE_OUTPUT_INTERVAL] = self.file.plan_information_base_output_interval + self._extra_fields[PLAN_INFORMATION_COMPUTATION_TIME_STEP_BASE] = ( + self.file.plan_information_computation_time_step_base + ) + self._extra_fields[PLAN_INFORMATION_FLOW_FILENAME] = self.file.plan_information_flow_filename + self._extra_fields[PLAN_INFORMATION_GEOMETRY_FILENAME] = self.file.plan_information_geometry_filename + self._extra_fields[PLAN_INFORMATION_PLAN_FILENAME] = self.file.plan_information_plan_filename + self._extra_fields[PLAN_INFORMATION_PLAN_NAME] = self.file.plan_information_plan_name + self._extra_fields[PLAN_INFORMATION_PROJECT_FILENAME] = self.file.plan_information_project_filename + self._extra_fields[PLAN_INFORMATION_PROJECT_TITLE] = self.file.plan_information_project_title + self._extra_fields[PLAN_INFORMATION_SIMULATION_END_TIME] = self.file.plan_information_simulation_end_time + self._extra_fields[PLAN_INFORMATION_SIMULATION_START_TIME] = self.file.plan_information_simulation_start_time + self._extra_fields[PLAN_PARAMETERS_1D_FLOW_TOLERANCE] = self.file.plan_parameters_1d_flow_tolerance + self._extra_fields[PLAN_PARAMETERS_1D_MAXIMUM_ITERATIONS] = self.file.plan_parameters_1d_maximum_iterations + self._extra_fields[PLAN_PARAMETERS_1D_MAXIMUM_ITERATIONS_WITHOUT_IMPROVEMENT] = ( + self.file.plan_parameters_1d_maximum_iterations_without_improvement + ) + self._extra_fields[PLAN_PARAMETERS_1D_MAXIMUM_WATER_SURFACE_ERROR_TO_ABORT] = ( + self.file.plan_parameters_1d_maximum_water_surface_error_to_abort + ) + self._extra_fields[PLAN_PARAMETERS_1D_STORAGE_AREA_ELEVATION_TOLERANCE] = ( + self.file.plan_parameters_1d_storage_area_elevation_tolerance + ) + self._extra_fields[PLAN_PARAMETERS_1D_THETA] = self.file.plan_parameters_1d_theta + self._extra_fields[PLAN_PARAMETERS_1D_THETA_WARMUP] = self.file.plan_parameters_1d_theta_warmup + self._extra_fields[PLAN_PARAMETERS_1D_WATER_SURFACE_ELEVATION_TOLERANCE] = ( + self.file.plan_parameters_1d_water_surface_elevation_tolerance + ) + self._extra_fields[PLAN_PARAMETERS_1D2D_GATE_FLOW_SUBMERGENCE_DECAY_EXPONENT] = ( + self.file.plan_parameters_1d2d_gate_flow_submergence_decay_exponent + ) + self._extra_fields[PLAN_PARAMETERS_1D2D_IS_STABLITY_FACTOR] = self.file.plan_parameters_1d2d_is_stablity_factor + self._extra_fields[PLAN_PARAMETERS_1D2D_LS_STABLITY_FACTOR] = self.file.plan_parameters_1d2d_ls_stablity_factor + self._extra_fields[PLAN_PARAMETERS_1D2D_MAXIMUM_NUMBER_OF_TIME_SLICES] = ( + self.file.plan_parameters_1d2d_maximum_number_of_time_slices + ) + self._extra_fields[PLAN_PARAMETERS_1D2D_MINIMUM_TIME_STEP_FOR_SLICINGHOURS] = ( + self.file.plan_parameters_1d2d_minimum_time_step_for_slicinghours + ) + self._extra_fields[PLAN_PARAMETERS_1D2D_NUMBER_OF_WARMUP_STEPS] = ( + self.file.plan_parameters_1d2d_number_of_warmup_steps + ) + self._extra_fields[PLAN_PARAMETERS_1D2D_WARMUP_TIME_STEP_HOURS] = ( + self.file.plan_parameters_1d2d_warmup_time_step_hours + ) + self._extra_fields[PLAN_PARAMETERS_1D2D_WEIR_FLOW_SUBMERGENCE_DECAY_EXPONENT] = ( + self.file.plan_parameters_1d2d_weir_flow_submergence_decay_exponent + ) + self._extra_fields[PLAN_PARAMETERS_1D2D_MAXITER] = self.file.plan_parameters_1d2d_maxiter + self._extra_fields[PLAN_PARAMETERS_2D_EQUATION_SET] = self.file.plan_parameters_2d_equation_set + self._extra_fields[PLAN_PARAMETERS_2D_NAMES] = self.file.plan_parameters_2d_names + self._extra_fields[PLAN_PARAMETERS_2D_VOLUME_TOLERANCE] = self.file.plan_parameters_2d_volume_tolerance + self._extra_fields[PLAN_PARAMETERS_2D_WATER_SURFACE_TOLERANCE] = ( + self.file.plan_parameters_2d_water_surface_tolerance + ) + self._extra_fields[METEOROLOGY_DSS_FILENAME] = self.file.meteorology_dss_filename + self._extra_fields[METEOROLOGY_DSS_PATHNAME] = self.file.meteorology_dss_pathname + self._extra_fields[METEOROLOGY_DATA_TYPE] = self.file.meteorology_data_type + self._extra_fields[METEOROLOGY_MODE] = self.file.meteorology_mode + self._extra_fields[METEOROLOGY_RASTER_CELLSIZE] = self.file.meteorology_raster_cellsize + self._extra_fields[METEOROLOGY_SOURCE] = self.file.meteorology_source + self._extra_fields[METEOROLOGY_UNITS] = self.file.meteorology_units + return self._extra_fields + - def __init__(self, href: str, **kwargs): - roles = kwargs.get("roles", []) + ["ras-file"] - description = kwargs.get("description", "The HEC-RAS plan HDF file.") - - super().__init__(href, roles=roles, description=description, **kwargs) - - self.hdf_object = PlanHDFFile(self.href) - self.extra_fields = { - key: value - for key, value in { - VERSION: self.hdf_object.file_version, - UNITS: self.hdf_object.units_system, - PLAN_INFORMATION_BASE_OUTPUT_INTERVAL: self.hdf_object.plan_information_base_output_interval, - PLAN_INFORMATION_COMPUTATION_TIME_STEP_BASE: self.hdf_object.plan_information_computation_time_step_base, - PLAN_INFORMATION_FLOW_FILENAME: self.hdf_object.plan_information_flow_filename, - PLAN_INFORMATION_GEOMETRY_FILENAME: self.hdf_object.plan_information_geometry_filename, - PLAN_INFORMATION_PLAN_FILENAME: self.hdf_object.plan_information_plan_filename, - PLAN_INFORMATION_PLAN_NAME: self.hdf_object.plan_information_plan_name, - PLAN_INFORMATION_PROJECT_FILENAME: self.hdf_object.plan_information_project_filename, - PLAN_INFORMATION_PROJECT_TITLE: self.hdf_object.plan_information_project_title, - PLAN_INFORMATION_SIMULATION_END_TIME: self.hdf_object.plan_information_simulation_end_time, - PLAN_INFORMATION_SIMULATION_START_TIME: self.hdf_object.plan_information_simulation_start_time, - PLAN_PARAMETERS_1D_FLOW_TOLERANCE: self.hdf_object.plan_parameters_1d_flow_tolerance, - PLAN_PARAMETERS_1D_MAXIMUM_ITERATIONS: self.hdf_object.plan_parameters_1d_maximum_iterations, - PLAN_PARAMETERS_1D_MAXIMUM_ITERATIONS_WITHOUT_IMPROVEMENT: self.hdf_object.plan_parameters_1d_maximum_iterations_without_improvement, - PLAN_PARAMETERS_1D_MAXIMUM_WATER_SURFACE_ERROR_TO_ABORT: self.hdf_object.plan_parameters_1d_maximum_water_surface_error_to_abort, - PLAN_PARAMETERS_1D_STORAGE_AREA_ELEVATION_TOLERANCE: self.hdf_object.plan_parameters_1d_storage_area_elevation_tolerance, - PLAN_PARAMETERS_1D_THETA: self.hdf_object.plan_parameters_1d_theta, - PLAN_PARAMETERS_1D_THETA_WARMUP: self.hdf_object.plan_parameters_1d_theta_warmup, - PLAN_PARAMETERS_1D_WATER_SURFACE_ELEVATION_TOLERANCE: self.hdf_object.plan_parameters_1d_water_surface_elevation_tolerance, - PLAN_PARAMETERS_1D2D_GATE_FLOW_SUBMERGENCE_DECAY_EXPONENT: self.hdf_object.plan_parameters_1d2d_gate_flow_submergence_decay_exponent, - PLAN_PARAMETERS_1D2D_IS_STABLITY_FACTOR: self.hdf_object.plan_parameters_1d2d_is_stablity_factor, - PLAN_PARAMETERS_1D2D_LS_STABLITY_FACTOR: self.hdf_object.plan_parameters_1d2d_ls_stablity_factor, - PLAN_PARAMETERS_1D2D_MAXIMUM_NUMBER_OF_TIME_SLICES: self.hdf_object.plan_parameters_1d2d_maximum_number_of_time_slices, - PLAN_PARAMETERS_1D2D_MINIMUM_TIME_STEP_FOR_SLICINGHOURS: self.hdf_object.plan_parameters_1d2d_minimum_time_step_for_slicinghours, - PLAN_PARAMETERS_1D2D_NUMBER_OF_WARMUP_STEPS: self.hdf_object.plan_parameters_1d2d_number_of_warmup_steps, - PLAN_PARAMETERS_1D2D_WARMUP_TIME_STEP_HOURS: self.hdf_object.plan_parameters_1d2d_warmup_time_step_hours, - PLAN_PARAMETERS_1D2D_WEIR_FLOW_SUBMERGENCE_DECAY_EXPONENT: self.hdf_object.plan_parameters_1d2d_weir_flow_submergence_decay_exponent, - PLAN_PARAMETERS_1D2D_MAXITER: self.hdf_object.plan_parameters_1d2d_maxiter, - PLAN_PARAMETERS_2D_EQUATION_SET: self.hdf_object.plan_parameters_2d_equation_set, - PLAN_PARAMETERS_2D_NAMES: self.hdf_object.plan_parameters_2d_names, - PLAN_PARAMETERS_2D_VOLUME_TOLERANCE: self.hdf_object.plan_parameters_2d_volume_tolerance, - PLAN_PARAMETERS_2D_WATER_SURFACE_TOLERANCE: self.hdf_object.plan_parameters_2d_water_surface_tolerance, - METEOROLOGY_DSS_FILENAME: self.hdf_object.meteorology_dss_filename, - METEOROLOGY_DSS_PATHNAME: self.hdf_object.meteorology_dss_pathname, - METEOROLOGY_DATA_TYPE: self.hdf_object.meteorology_data_type, - METEOROLOGY_MODE: self.hdf_object.meteorology_mode, - METEOROLOGY_RASTER_CELLSIZE: self.hdf_object.meteorology_raster_cellsize, - METEOROLOGY_SOURCE: self.hdf_object.meteorology_source, - METEOROLOGY_UNITS: self.hdf_object.meteorology_units, - }.items() - if value - } - - -class GeometryHdfAsset(GenericAsset): +class GeometryHdfAsset(GenericAsset[GeometryHDFFile]): """HEC-RAS Geometry HDF file asset.""" regex_parse_str = r".+\.g\d{2}\.hdf$" - - def __init__(self, href: str, **kwargs): - roles = kwargs.get("roles", []) + ["geometry-hdf-file"] - description = kwargs.get("description", "The HEC-RAS geometry HDF file.") - - super().__init__(href, roles=roles, description=description, **kwargs) - - self.hdf_object = GeometryHDFFile(self.href) - self.extra_fields = { - key: value - for key, value in { - VERSION: self.hdf_object.file_version, - UNITS: self.hdf_object.units_system, - # REFERENCE_LINES: self.hdf_object.reference_lines,#TODO: fix this - }.items() - if value - } + __roles__ = ["geometry-hdf-file"] + __description__ = "The HEC-RAS geometry HDF file." + __file_class__ = GeometryHDFFile + + @GenericAsset.extra_fields.getter + def extra_fields(self) -> dict: + """Return extra fields with added dynamic keys/values.""" + self._extra_fields[VERSION] = self.file.file_version + self._extra_fields[UNITS] = self.file.units_system + self._extra_fields[REFERENCE_LINES] = self.reference_lines + return self._extra_fields + + @property + @lru_cache + def reference_lines(self) -> list[gpd.GeoDataFrame] | None: + """Docstring.""" # TODO: fill out + if self.file.reference_lines is not None and not self.file.reference_lines.empty: + return list(self.file.reference_lines["refln_name"]) + + @property + @lru_cache + def has_2d(self) -> bool: + """Check if the geometry asset has 2d geometry.""" + try: + if self.file.mesh_areas(): + return True + except ValueError: + return False + + @property + @lru_cache + def has_1d(self) -> bool: + """Check if the geometry asset has 2d geometry.""" + return False # TODO: implement + + @property + @lru_cache + def geometry(self) -> Polygon | MultiPolygon: + """Retrieves concave hull of cross-sections.""" + return self.file.mesh_areas(self.crs) + + @property + @lru_cache + def geometry_wgs84(self) -> Polygon | MultiPolygon: + """Reproject geometry to wgs84.""" + # TODO: this could be generalized to be a function that takes argument for CRS. + if self.crs is None: + return NULL_GEOMETRY + else: + return reproject_to_wgs84(self.geometry, self.crs) def _plot_mesh_areas(self, ax, mesh_polygons: gpd.GeoDataFrame) -> list[Line2D]: - """ - Plots mesh areas on the given axes. - """ + """Plot mesh areas on the given axes.""" mesh_polygons.plot( ax=ax, edgecolor="silver", @@ -382,9 +440,7 @@ def _plot_mesh_areas(self, ax, mesh_polygons: gpd.GeoDataFrame) -> list[Line2D]: return legend_handle def _plot_breaklines(self, ax, breaklines: gpd.GeoDataFrame) -> list[Line2D]: - """ - Plots breaklines on the given axes. - """ + """Plot breaklines on the given axes.""" breaklines.plot(ax=ax, edgecolor="red", linestyle="-", alpha=0.3, label="Breaklines") legend_handle = [ Line2D( @@ -400,9 +456,7 @@ def _plot_breaklines(self, ax, breaklines: gpd.GeoDataFrame) -> list[Line2D]: return legend_handle def _plot_bc_lines(self, ax, bc_lines: gpd.GeoDataFrame) -> list[Line2D]: - """ - Plots boundary condition lines on the given axes. - """ + """Plot boundary condition lines on the given axes.""" legend_handles = [ Line2D([0], [0], color="none", linestyle="None", label="BC Lines"), ] @@ -432,99 +486,60 @@ def _plot_bc_lines(self, ax, bc_lines: gpd.GeoDataFrame) -> list[Line2D]: def _add_thumbnail_asset(self, filepath: str) -> None: """Add the thumbnail image as an asset with a relative href.""" + if not filepath.startswith("s3://") and not os.path.exists(filepath): + raise FileNotFoundError(f"Thumbnail file not found: {filepath}") - filename = os.path.basename(filepath) - - if filepath.startswith("s3://"): - media_type = "image/png" - else: - if not os.path.exists(filepath): - raise FileNotFoundError(f"Thumbnail file not found: {filepath}") - media_type = "image/png" - - return GenericAsset( - href=filename, - title="Model Thumbnail", + asset = GenericAsset( + href=filepath, + title=filepath.split("/")[-1], description="Thumbnail image for the model", - media_type=media_type, - roles=["thumbnail"], - extra_fields=None, ) + asset.roles = ["thumbnail", "image/png"] + return asset def thumbnail( self, - add_asset: bool, - write: bool, layers: list, title: str = "Model_Thumbnail", - add_usgs_properties: bool = False, - crs="EPSG:4326", thumbnail_dest: str = None, ): - """Create a thumbnail figure for each geometry hdf file, including - various geospatial layers such as USGS gages, mesh areas, - breaklines, and boundary condition (BC) lines. If `add_asset` or `write` - is `True`, the function saves the thumbnail to a file and optionally - adds it as an asset. + """ + Create a thumbnail figure for a geometry hdf file, includingvarious geospatial layers such as USGS gages, mesh areas, breaklines, and boundary condition (BC) lines. Parameters ---------- - add_asset : bool - Whether to add the thumbnail as an asset in the asset dictionary. If true then it also writes the thumbnail to a file. - write : bool - Whether to save the thumbnail image to a file. layers : list A list of model layers to include in the thumbnail plot. Options include "usgs_gages", "mesh_areas", "breaklines", and "bc_lines". title : str, optional Title of the figure, by default "Model Thumbnail". - add_usgs_properties : bool, optional - If usgs_gages is included in layers, adds USGS metadata to the STAC item properties. Defaults to false. + thumbnail_dest : str, optional + Directory for created thumbnails. If None then thumbnails will be exported to same level as the item. """ - fig, ax = plt.subplots(figsize=(12, 12)) legend_handles = [] for layer in layers: try: - # if layer == "usgs_gages": - # if add_usgs_properties: - # gages_gdf = self.get_usgs_data(True, geom_asset=geom_asset) - # else: - # gages_gdf = self.get_usgs_data(False, geom_asset=geom_asset) - # gages_gdf_geo = gages_gdf.to_crs(self.crs) - # legend_handles += self._plot_usgs_gages(ax, gages_gdf_geo) - # else: - # if not hasattr(geom_asset, layer): - # raise AttributeError(f"Layer {layer} not found in {geom_asset.hdf_file}") - - # if layer == "mesh_areas": - # layer_data = geom_asset.mesh_areas(self.crs, return_gdf=True) - # else: - # layer_data = getattr(geom_asset, layer) - - # if layer_data.crs is None: - # layer_data.set_crs(self.crs, inplace=True) - # layer_data_geo = layer_data.to_crs(self.crs) - if layer == "mesh_areas": - mesh_areas_data = self.mesh_areas(crs, return_gdf=True) - legend_handles += self._plot_mesh_areas(ax, mesh_areas_data) + mesh_areas_data = self.file.mesh_cells + mesh_areas_geo = mesh_areas_data.set_crs(self.crs) + legend_handles += self._plot_mesh_areas(ax, mesh_areas_geo) elif layer == "breaklines": - breaklines_data = self.breaklines - breaklines_data_geo = breaklines_data.to_crs(crs) + breaklines_data = self.file.breaklines + breaklines_data_geo = breaklines_data.set_crs(self.crs) legend_handles += self._plot_breaklines(ax, breaklines_data_geo) elif layer == "bc_lines": - bc_lines_data = self.bc_lines - bc_lines_data_geo = bc_lines_data.to_crs(crs) + bc_lines_data = self.file.bc_lines + bc_lines_data_geo = bc_lines_data.set_crs(self.crs) legend_handles += self._plot_bc_lines(ax, bc_lines_data_geo) except Exception as e: - logging.warning(f"Warning: Failed to process layer '{layer}' for {self.href}: {e}") + logger.warning(f"Warning: Failed to process layer '{layer}' for {self.href}: {e}") # Add OpenStreetMap basemap ctx.add_basemap( ax, - crs=crs, + crs=self.crs, source=ctx.providers.OpenStreetMap.Mapnik, alpha=0.4, ) @@ -533,366 +548,301 @@ def thumbnail( ax.set_ylabel("Latitude") ax.legend(handles=legend_handles, loc="center left", bbox_to_anchor=(1, 0.5)) - if add_asset or write: - hdf_ext = os.path.basename(self.href).split(".")[-2] - filename = f"thumbnail_{hdf_ext}.png" - base_dir = os.path.dirname(thumbnail_dest) - filepath = os.path.join(base_dir, filename) - - # if filepath.startswith("s3://"): - # img_data = io.BytesIO() - # fig.savefig(img_data, format="png", bbox_inches="tight") - # img_data.seek(0) - # save_bytes_s3(img_data, filepath) - # else: + hdf_ext = os.path.basename(self.href).split(".")[-2] + filename = f"thumbnail_{hdf_ext}.png" + base_dir = os.path.dirname(thumbnail_dest) + filepath = os.path.join(base_dir, filename) + + if filepath.startswith("s3://"): + pass + # TODO add thumbnail s3 functionality + # img_data = io.BytesIO() + # fig.savefig(img_data, format="png", bbox_inches="tight") + # img_data.seek(0) + # save_bytes_s3(img_data, filepath) + else: os.makedirs(base_dir, exist_ok=True) fig.savefig(filepath, dpi=80, bbox_inches="tight") - - if add_asset: - return self._add_thumbnail_asset(filepath) + return self._add_thumbnail_asset(filepath) class RunFileAsset(GenericAsset): """Run file asset for steady flow analysis.""" regex_parse_str = r".+\.r\d{2}$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["run-file", "ras-file", MediaType.TEXT] - description = "Run file for steady flow analysis which contains all the necessary input data required for the RAS computational engine." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["run-file", "ras-file", MediaType.TEXT] + __description__ = "Run file for steady flow analysis which contains all the necessary input data required for the RAS computational engine." + __file_class__ = None class ComputationalLevelOutputAsset(GenericAsset): """Computational Level Output asset.""" regex_parse_str = r".+\.hyd\d{2}$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["computational-level-output-file", "ras-file", MediaType.TEXT] - description = "Detailed Computational Level output file." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["computational-level-output-file", "ras-file", MediaType.TEXT] + __description__ = "Detailed Computational Level output file." + __file_class__ = None class GeometricPreprocessorAsset(GenericAsset): """Geometric Pre-Processor asset.""" regex_parse_str = r".+\.c\d{2}$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["geometric-preprocessor", "ras-file", MediaType.TEXT] - description = "Geometric Pre-Processor output file containing hydraulic properties, rating curves, and more." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["geometric-preprocessor", "ras-file", MediaType.TEXT] + __description__ = "Geometric Pre-Processor output file containing hydraulic properties, rating curves, and more." + __file_class__ = None # TODO: make a generic parent for these. class BoundaryConditionAsset(GenericAsset): """Boundary Condition asset.""" regex_parse_str = r".+\.b\d{2}$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["boundary-condition-file", "ras-file", MediaType.TEXT] - description = "Boundary Condition file." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["boundary-condition-file", "ras-file", MediaType.TEXT] + __description__ = "Boundary Condition file." + __file_class__ = None class UnsteadyFlowLogAsset(GenericAsset): """Unsteady Flow Log asset.""" regex_parse_str = r".+\.bco\d{2}$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["unsteady-flow-log-file", "ras-file", MediaType.TEXT] - description = "Unsteady Flow Log output file." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["unsteady-flow-log-file", "ras-file", MediaType.TEXT] + __description__ = "Unsteady Flow Log output file." + __file_class__ = None class SedimentDataAsset(GenericAsset): """Sediment Data asset.""" regex_parse_str = r".+\.s\d{2}$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["sediment-data-file", "ras-file", MediaType.TEXT] - description = "Sediment data file containing flow data, boundary conditions, and sediment data." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["sediment-data-file", "ras-file", MediaType.TEXT] + __description__ = "Sediment data file containing flow data, boundary conditions, and sediment data." + __file_class__ = None class HydraulicDesignAsset(GenericAsset): """Hydraulic Design asset.""" regex_parse_str = r".+\.h\d{2}$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["hydraulic-design-file", "ras-file", MediaType.TEXT] - description = "Hydraulic Design data file." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["hydraulic-design-file", "ras-file", MediaType.TEXT] + __description__ = "Hydraulic Design data file." + __file_class__ = None class WaterQualityAsset(GenericAsset): """Water Quality asset.""" regex_parse_str = r".+\.w\d{2}$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["water-quality-file", "ras-file", MediaType.TEXT] - description = "Water Quality file containing temperature boundary conditions and meteorological data." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["water-quality-file", "ras-file", MediaType.TEXT] + __description__ = "Water Quality file containing temperature boundary conditions and meteorological data." + __file_class__ = None class SedimentTransportCapacityAsset(GenericAsset): """Sediment Transport Capacity asset.""" regex_parse_str = r".+\.SedCap\d{2}$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["sediment-transport-capacity-file", "ras-file", MediaType.TEXT] - description = "Sediment Transport Capacity data." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["sediment-transport-capacity-file", "ras-file", MediaType.TEXT] + __description__ = "Sediment Transport Capacity data." + __file_class__ = None class XSOutputAsset(GenericAsset): """Cross Section Output asset.""" regex_parse_str = r".+\.SedXS\d{2}$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["xs-output-file", "ras-file", MediaType.TEXT] - description = "Cross section output file." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["xs-output-file", "ras-file", MediaType.TEXT] + __description__ = "Cross section output file." + __file_class__ = None class XSOutputHeaderAsset(GenericAsset): """Cross Section Output Header asset.""" regex_parse_str = r".+\.SedHeadXS\d{2}$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["xs-output-header-file", "ras-file", MediaType.TEXT] - description = "Header file for the cross section output." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["xs-output-header-file", "ras-file", MediaType.TEXT] + __description__ = "Header file for the cross section output." + __file_class__ = None class WaterQualityRestartAsset(GenericAsset): """Water Quality Restart asset.""" regex_parse_str = r".+\.wqrst\d{2}$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["water-quality-restart-file", "ras-file", MediaType.TEXT] - description = "The water quality restart file." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["water-quality-restart-file", "ras-file", MediaType.TEXT] + __description__ = "The water quality restart file." + __file_class__ = None class SedimentOutputAsset(GenericAsset): """Sediment Output asset.""" regex_parse_str = r".+\.sed$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["sediment-output-file", "ras-file", MediaType.TEXT] - description = "Detailed sediment output file." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["sediment-output-file", "ras-file", MediaType.TEXT] + __description__ = "Detailed sediment output file." + __file_class__ = None class BinaryLogAsset(GenericAsset): """Binary Log asset.""" regex_parse_str = r".+\.blf$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["binary-log-file", "ras-file", MediaType.TEXT] - description = "Binary Log file." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["binary-log-file", "ras-file", MediaType.TEXT] + __description__ = "Binary Log file." + __file_class__ = None class DSSAsset(GenericAsset): """DSS asset.""" regex_parse_str = r".+\.dss$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["ras-dss", "ras-file", MediaType.TEXT] - description = "The DSS file contains results and other simulation information." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["ras-dss", "ras-file", MediaType.TEXT] + __description__ = "The DSS file contains results and other simulation information." + __file_class__ = None class LogAsset(GenericAsset): """Log asset.""" regex_parse_str = r".+\.log$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["ras-log", "ras-file", MediaType.TEXT] - description = "The log file contains information related to simulation processes." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["ras-log", "ras-file", MediaType.TEXT] + __description__ = "The log file contains information related to simulation processes." + __file_class__ = None class RestartAsset(GenericAsset): """Restart file asset.""" regex_parse_str = r".+\.rst$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["restart-file", "ras-file", MediaType.TEXT] - description = "Restart file for resuming simulation runs." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["restart-file", "ras-file", MediaType.TEXT] + __description__ = "Restart file for resuming simulation runs." + __file_class__ = None class SiamInputAsset(GenericAsset): """SIAM Input Data file asset.""" regex_parse_str = r".+\.SiamInput$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["siam-input-file", "ras-file", MediaType.TEXT] - description = "SIAM Input Data file." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["siam-input-file", "ras-file", MediaType.TEXT] + __description__ = "SIAM Input Data file." + __file_class__ = None class SiamOutputAsset(GenericAsset): """SIAM Output Data file asset.""" regex_parse_str = r".+\.SiamOutput$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["siam-output-file", "ras-file", MediaType.TEXT] - description = "SIAM Output Data file." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["siam-output-file", "ras-file", MediaType.TEXT] + __description__ = "SIAM Output Data file." + __file_class__ = None class WaterQualityLogAsset(GenericAsset): """Water Quality Log file asset.""" regex_parse_str = r".+\.bco$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["water-quality-log", "ras-file", MediaType.TEXT] - description = "Water quality log file." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["water-quality-log", "ras-file", MediaType.TEXT] + __description__ = "Water quality log file." + __file_class__ = None class ColorScalesAsset(GenericAsset): """Color Scales file asset.""" regex_parse_str = r".+\.color-scales$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["color-scales", "ras-file", MediaType.TEXT] - description = "File that contains the water quality color scale." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["color-scales", "ras-file", MediaType.TEXT] + __description__ = "File that contains the water quality color scale." + __file_class__ = None class ComputationalMessageAsset(GenericAsset): """Computational Message file asset.""" regex_parse_str = r".+\.comp-msgs.txt$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["computational-message-file", "ras-file", MediaType.TEXT] - description = "Computational Message text file which contains messages from the computation process." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["computational-message-file", "ras-file", MediaType.TEXT] + __description__ = "Computational Message text file which contains messages from the computation process." + __file_class__ = None class UnsteadyRunFileAsset(GenericAsset): """Run file for Unsteady Flow asset.""" regex_parse_str = r".+\.x\d{2}$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["run-file", "ras-file", MediaType.TEXT] - description = "Run file for Unsteady Flow simulations." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["run-file", "ras-file", MediaType.TEXT] + __description__ = "Run file for Unsteady Flow simulations." + __file_class__ = None class OutputFileAsset(GenericAsset): """Output RAS file asset.""" regex_parse_str = r".+\.o\d{2}$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["output-file", "ras-file", MediaType.TEXT] - description = "Output RAS file which contains all computed results." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["output-file", "ras-file", MediaType.TEXT] + __description__ = "Output RAS file which contains all computed results." + __file_class__ = None class InitialConditionsFileAsset(GenericAsset): """Initial Conditions file asset.""" regex_parse_str = r".+\.IC\.O\d{2}$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["initial-conditions-file", "ras-file", MediaType.TEXT] - description = "Initial conditions file for unsteady flow plan." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["initial-conditions-file", "ras-file", MediaType.TEXT] + __description__ = "Initial conditions file for unsteady flow plan." + __file_class__ = None class PlanRestartFileAsset(GenericAsset): """Restart file for Unsteady Flow Plan asset.""" regex_parse_str = r".+\.p\d{2}\.rst$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["restart-file", "ras-file", MediaType.TEXT] - description = "Restart file for unsteady flow plan." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["restart-file", "ras-file", MediaType.TEXT] + __description__ = "Restart file for unsteady flow plan." + __file_class__ = None class RasMapperFileAsset(GenericAsset): """RAS Mapper file asset.""" regex_parse_str = r".+\.rasmap$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["ras-mapper-file", "ras-file", MediaType.TEXT] - description = "RAS Mapper file." - media_type = MediaType.TEXT - extra_fields = kwargs.get("extra_fields", {}) - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["ras-mapper-file", "ras-file", MediaType.TEXT] + __description__ = "RAS Mapper file." + __file_class__ = None class RasMapperBackupFileAsset(GenericAsset): """Backup RAS Mapper file asset.""" regex_parse_str = r".+\.rasmap\.backup$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["ras-mapper-file", "ras-file", MediaType.TEXT] - description = "Backup RAS Mapper file." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["ras-mapper-file", "ras-file", MediaType.TEXT] + __description__ = "Backup RAS Mapper file." + __file_class__ = None class RasMapperOriginalFileAsset(GenericAsset): """Original RAS Mapper file asset.""" regex_parse_str = r".+\.rasmap\.original$" - - def __init__(self, href: str, *args, **kwargs): - roles = ["ras-mapper-file", "ras-file", MediaType.TEXT] - description = "Original RAS Mapper file." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = ["ras-mapper-file", "ras-file", MediaType.TEXT] + __description__ = "Original RAS Mapper file." + __file_class__ = None class MiscTextFileAsset(GenericAsset): """Miscellaneous Text file asset.""" regex_parse_str = r".+\.txt$" - - def __init__(self, href: str, *args, **kwargs): - roles = [MediaType.TEXT] - description = "Miscellaneous text file." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = [MediaType.TEXT] + __description__ = "Miscellaneous text file." + __file_class__ = None class MiscXMLFileAsset(GenericAsset): """Miscellaneous XML file asset.""" regex_parse_str = r".+\.xml$" - - def __init__(self, href: str, *args, **kwargs): - roles = [MediaType.XML] - description = "Miscellaneous XML file." - super().__init__(href, roles=roles, description=description, *args, **kwargs) + __roles__ = [MediaType.XML] + __description__ = "Miscellaneous XML file." + __file_class__ = None RAS_ASSET_CLASSES = [ diff --git a/hecstac/ras/consts.py b/hecstac/ras/consts.py index 4021bd7..a4610bd 100644 --- a/hecstac/ras/consts.py +++ b/hecstac/ras/consts.py @@ -1,3 +1,17 @@ +"""Constants.""" + +import datetime +import json + +from shapely import Polygon, box, to_geojson + SCHEMA_URI = ( "https://raw.githubusercontent.com/fema-ffrd/hecstac/refs/heads/port-ras-stac/hecstac/ras/extension/schema.json" ) + +NULL_DATETIME = datetime.datetime(9999, 9, 9) +NULL_GEOMETRY = Polygon([(0, 0), (0, 1), (1, 1), (1, 0)]) +NULL_STAC_GEOMETRY = json.loads(to_geojson(NULL_GEOMETRY)) +NULL_BBOX = box(0, 0, 1, 1) +NULL_STAC_BBOX = NULL_BBOX.bounds +PLACEHOLDER_ID = "id" diff --git a/hecstac/ras/errors.py b/hecstac/ras/errors.py index 6e28793..fc70548 100644 --- a/hecstac/ras/errors.py +++ b/hecstac/ras/errors.py @@ -1,10 +1,13 @@ +"""Errors for the ras module.""" + + class GeometryAssetInvalidCRSError(Exception): - "Invalid crs provided to geometry asset" + """Invalid crs provided to geometry asset.""" class GeometryAssetMissingCRSError(Exception): - "Required crs is missing from geometry asset definition" + """Required crs is missing from geometry asset definition.""" class GeometryAssetNoXSError(Exception): - "1D geometry asset has no cross sections; cross sections are required to calculate the goemetry of the asset" + """1D geometry asset has no cross sections; cross sections are required to calculate the goemetry of the asset.""" diff --git a/hecstac/ras/item.py b/hecstac/ras/item.py index 8bc7669..323fed9 100644 --- a/hecstac/ras/item.py +++ b/hecstac/ras/item.py @@ -1,31 +1,36 @@ +"""HEC-RAS STAC Item class.""" + import datetime import json import logging -import os +from functools import lru_cache from pathlib import Path -from pystac import Item +import pystac +import pystac.errors +from pyproj import CRS +from pystac import Asset, Item from pystac.extensions.projection import ProjectionExtension -from pystac.extensions.storage import StorageExtension -from shapely import Polygon, box, simplify, to_geojson, union_all - -from hecstac.common.path_manager import LocalPathManager -from hecstac.ras.parser import ProjectFile - -NULL_DATETIME = datetime.datetime(9999, 9, 9) -NULL_GEOMETRY = Polygon() -NULL_STAC_GEOMETRY = json.loads(to_geojson(NULL_GEOMETRY)) -NULL_BBOX = box(0, 0, 0, 0) -NULL_STAC_BBOX = NULL_BBOX.bounds -PLACEHOLDER_ID = "id" +from pystac.utils import datetime_to_str +from shapely import Polygon, simplify, to_geojson, union_all +from shapely.geometry import shape from hecstac.common.asset_factory import AssetFactory +from hecstac.common.path_manager import LocalPathManager from hecstac.ras.assets import ( RAS_EXTENSION_MAPPING, GeometryAsset, GeometryHdfAsset, - ProjectAsset, ) +from hecstac.ras.consts import ( + NULL_DATETIME, + NULL_STAC_BBOX, + NULL_STAC_GEOMETRY, +) +from hecstac.ras.parser import ProjectFile +from hecstac.ras.utils import find_model_files + +logger = logging.getLogger(__name__) class RASModelItem(Item): @@ -44,180 +49,231 @@ class RASModelItem(Item): RAS_HAS_2D = "ras:has_2d" RAS_DATETIME_SOURCE = "ras:datetime_source" - def __init__(self, ras_project_file, item_id: str, simplify_geometry: bool = True): - - self._project = None - self.assets = {} - self.links = [] - self.thumbnail_paths = [] - self.geojson_paths = [] - self.extra_fields = {} - self.stac_extensions = None - self.pm = LocalPathManager(Path(ras_project_file).parent) - self._href = self.pm.item_path(item_id) - self.ras_project_file = ras_project_file - self._simplify_geometry = simplify_geometry - - self.pf = ProjectFile(self.ras_project_file) - self.factory = AssetFactory(RAS_EXTENSION_MAPPING) - - super().__init__( - Path(self.ras_project_file).stem, - self._geometry, - self._bbox, - self._datetime, - self._properties, - href=self._href, - ) + def __init__(self, *args, **kwargs): + """Add a few default properties to the base class.""" + super().__init__(*args, **kwargs) + self.simplify_geometry = True - # derived_assets = self.add_model_thumbnail() TODO: implement this method - ras_asset_files = self.scan_model_dir() + @classmethod + def from_prj(cls, ras_project_file, item_id: str, crs: str = None, simplify_geometry: bool = True): + """ + Create a STAC item from a HEC-RAS .prj file. + + Parameters + ---------- + ras_project_file : str + Path to the HEC-RAS project file (.prj). + item_id : str + Unique item id for the STAC item. + crs : str, optional + Coordinate reference system (CRS) to apply to the item. If None, the CRS will be extracted from the geometry .hdf file. + simplify_geometry : bool, optional + Whether to simplify geometry. Defaults to True. + + Returns + ------- + stac : RASModelItem + An instance of the class representing the STAC item. + """ + pm = LocalPathManager(Path(ras_project_file).parent) + + href = pm.item_path(item_id) + # TODO: Add option to recursively iterate through all subdirectories in a model folder. + assets = {Path(i).name: Asset(i, Path(i).name) for i in find_model_files(ras_project_file)} + + stac = cls( + Path(ras_project_file).stem, + NULL_STAC_GEOMETRY, + NULL_STAC_BBOX, + NULL_DATETIME, + {"ras_project_file": ras_project_file}, + href=href, + assets=assets, + ) + if crs: + stac.crs = crs + stac.simplify_geometry = simplify_geometry + stac.pm = pm - for fpath in ras_asset_files: - if fpath and fpath != self._href: - self.add_ras_asset(fpath) + return stac - def _register_extensions(self) -> None: - ProjectionExtension.add_to(self) - StorageExtension.add_to(self) + @property + def ras_project_file(self) -> str: + """Get the path to the HEC-RAS .prj file.""" + return self._properties.get("ras_project_file") @property - def _properties(self) -> None: - """Properties for the RAS STAC item.""" - properties = {} + @lru_cache + def factory(self) -> AssetFactory: + """Return AssetFactory for this item.""" + return AssetFactory(RAS_EXTENSION_MAPPING) - properties = {} - properties[self.PROJECT_TITLE] = self.pf.project_title - properties[self.PROJECT_VERSION] = self.pf.ras_version - properties[self.PROJECT_DESCRIPTION] = self.pf.project_description - properties[self.PROJECT_STATUS] = self.pf.project_status - properties[self.MODEL_UNITS] = self.pf.project_units + @property + @lru_cache + def pf(self) -> ProjectFile: + """Get a ProjectFile instance for the RAS Model .prj file.""" + return ProjectFile(self.ras_project_file) - # self.properties[RAS_DATETIME_SOURCE] = self.datetime_source - # self.properties[RAS_HAS_1D] = self.has_1d - # self.properties[RAS_HAS_2D] = self.has_2d - # once all assets are created, populate associations between assets - return properties + @property + def has_2d(self) -> bool: + """Whether any geometry file has 2D elements.""" + return any([a.has_2d for a in self.geometry_assets]) @property - def _bbox(self) -> tuple[float, float, float, float]: - if self._geometry == NULL_STAC_GEOMETRY: - return NULL_STAC_BBOX - return self._geometry.bounds + def has_1d(self) -> bool: + """Whether any geometry file has 2D elements.""" + return any([a.has_1d for a in self.geometry_assets]) @property - def _geometry(self) -> dict | None: - """ - gets geometry using either list of geom assets or list of hdf assets, perhaps simplified to - a given tolerance to reduce replication of data - (ie item would record simplified geometry used when searching collection, - asset would have more exact geometry representing contents of geom or hdf files) - """ - # if geometry is equal to null placeholder, continue, else return current value - geometries = [] - if 2 == 3: - # if self.has_2d: - geometries.append(self.parse_2d_geom()) + def geometry_assets(self) -> list[GeometryHdfAsset | GeometryAsset]: + """Return any RasGeomHdf in assets.""" + return [a for a in self.assets.values() if isinstance(a, (GeometryHdfAsset, GeometryAsset))] - # if hdf file is not present, get concave hull of cross sections and use as geometry - # if self.has_1d: - if 1 == 2: - geometries.append(self.parse_1d_geom()) + @property + def crs(self) -> CRS: + """Get the authority code for the model CRS.""" + try: + return CRS(self.ext.proj.wkt2) + except pystac.errors.ExtensionNotImplemented: + return None + + @crs.setter + def crs(self, crs): + """Apply the projection extension to this item given a CRS.""" + prj_ext = ProjectionExtension.ext(self, add_if_missing=True) + crs = CRS(crs) + prj_ext.apply(epsg=crs.to_epsg(), wkt2=crs.to_wkt()) - if len(geometries) == 0: - logging.error("No geometry found for RAS item.") + @property + def geometry(self) -> dict: + """Return footprint of model as a geojson.""" + if self.crs is None: + logger.warning("Geometry requested for model with no spatial reference.") + return NULL_STAC_GEOMETRY + if len(self.geometry_assets) == 0: + logger.error("No geometry found for RAS item.") return NULL_STAC_GEOMETRY - unioned_geometry = union_all(geometries) - if self._simplify_geometry: - unioned_geometry = simplify(unioned_geometry, self.simplify_tolerance) + geometries = [] + for i in self.geometry_assets: + try: + geometries.append(i.geometry_wgs84) + except Exception as e: + logger.warning(f"Could not process geometry from {i.href}") + continue + unioned_geometry = union_all(geometries) + if self.simplify_geometry: + unioned_geometry = simplify(unioned_geometry, 0.001) + if unioned_geometry.interiors: + unioned_geometry = Polygon(list(unioned_geometry.exterior.coords)) return json.loads(to_geojson(unioned_geometry)) @property - def _datetime(self) -> datetime: - """The datetime for the HMS STAC item.""" - # date = datetime.strptime(self.pf.basins[0].header.attrs["Last Modified Date"], "%d %B %Y") - # time = datetime.strptime(self.pf.basins[0].header.attrs["Last Modified Time"], "%H:%M:%S").time() - return datetime.datetime.now() + def bbox(self) -> list[float]: + """Get the bounding box of the model geometry.""" + return shape(self.geometry).bounds @property - def datetime_source(self) -> str: - if self._datetime_source == None: - if self._dts == None: - self.populate() - if len(self._dts) == 0: - self._datetime_source = "processing_time" - else: - self._datetime_source = "model_geometry" - return self._datetime_source - - def add_model_thumbnail(self, add_asset: bool, write: bool, layers: list, title: str = "Model_Thumbnail"): - - for geom in self._geom_files: - if isinstance(geom, GeometryHdfAsset): - if add_asset: - self.assets["thumbnail"] = geom.thumbnail( - add_asset=add_asset, write=write, layers=layers, title=title, thumbnail_dest=self.href - ) - - def add_ras_asset(self, fpath: str = "") -> None: - """Add an asset to the HMS STAC item.""" - if os.path.exists(fpath): - try: - asset = self.factory.create_ras_asset(fpath) - logging.debug(f"Adding asset {str(asset)}") - except TypeError as e: - logging.error(f"Error creating asset for {fpath}: {e}") - if asset is not None: - self.add_asset(asset.title, asset) - if isinstance(asset, ProjectAsset): - if self._project is not None: - logging.error( - f"Only one project asset is allowed. Found {str(asset)} when {str(self._project)} was already set." - ) - self._project = asset - - def parse_1d_geom(self): - logging.info("Creating geometry using 1d text file cross sections") - concave_hull_polygons: list[Polygon] = [] - for geom_asset in self.geometry_files: - if isinstance(geom_asset, GeometryAsset): - if self.simplify_tolerance: - concave_hull = simplify( - self._geometry_to_wgs84(geom_asset.concave_hull), - self.simplify_tolerance, - ) - else: - concave_hull = self._geometry_to_wgs84(geom_asset.concave_hull) - concave_hull_polygons.append(concave_hull) - return self._geometry - - def parse_2d_geom(self): - logging.info("Creating 2D geometry elements using hdf file mesh areas") - mesh_area_polygons: list[Polygon] = [] - for geom_asset in self.geometry_files: - if isinstance(geom_asset, GeometryHdfAsset): - if self.simplify_tolerance: - mesh_areas = simplify( - self._geometry_to_wgs84(geom_asset.mesh_areas(self.crs)), - self.simplify_tolerance, - ) - else: - mesh_areas = self._geometry_to_wgs84(geom_asset.mesh_areas(self.crs)) - mesh_area_polygons.append(mesh_areas) - return union_all(mesh_area_polygons) - - def ensure_projection_schema(self) -> None: - ProjectionExtension.ensure_has_extension(self, True) - - def scan_model_dir(self): - base_dir = os.path.dirname(self.ras_project_file) - files = [] - for root, _, filenames in os.walk(base_dir): - depth = root[len(base_dir) :].count(os.sep) - if depth > 1: - break - for filename in filenames: - files.append(os.path.join(root, filename)) - return files + def properties(self) -> None: + """Properties for the RAS STAC item.""" + if self.ras_project_file is None: + return self._properties + properties = self._properties + properties[self.RAS_HAS_1D] = self.has_1d + properties[self.RAS_HAS_2D] = self.has_2d + properties[self.PROJECT_TITLE] = self.pf.project_title + properties[self.PROJECT_VERSION] = self.pf.ras_version + properties[self.PROJECT_DESCRIPTION] = self.pf.project_description + properties[self.PROJECT_STATUS] = self.pf.project_status + properties[self.MODEL_UNITS] = self.pf.project_units + if self.datetime is not None: + properties["datetime"] = datetime_to_str(self.datetime) + else: + properties["datetime"] = None + # TODO: once all assets are created, populate associations between assets + return properties + + @properties.setter + def properties(self, properties: dict): + """Set properties.""" + self._properties = properties + + @property + def datetime(self) -> datetime.datetime | None: + """Parse datetime from model geometry and return result.""" + datetimes = [] + for i in self.geometry_assets: + i = i.file.geometry_time + if i is None: + continue + elif isinstance(i, list): + datetimes.extend([j for j in i if j is not None]) + elif isinstance(i, datetime.datetime): + datetimes.append(i) + + datetimes = list(set(datetimes)) + if len(datetimes) > 1: + self._properties["start_datetime"] = datetime_to_str(min(datetimes)) + self._properties["end_datetime"] = datetime_to_str(max(datetimes)) + self._properties[self.RAS_DATETIME_SOURCE] = "model_geometry" + item_time = None + elif len(datetimes) == 1: + item_time = datetimes[0] + self._properties[self.RAS_DATETIME_SOURCE] = "model_geometry" + else: + logger.warning("Could not extract item datetime from geometry, using item processing time.") + item_time = datetime.datetime.now() + self._properties[self.RAS_DATETIME_SOURCE] = "processing_time" + return item_time + + def add_model_thumbnails(self, layers: list, title_prefix: str = "Model_Thumbnail", thumbnail_dir=None): + """Generate model thumbnail asset for each geometry file. + + Parameters + ---------- + layers : list + List of geometry layers to be included in the plot. Options include 'mesh_areas', 'breaklines', 'bc_lines' + title_prefix : str, optional + Thumbnail title prefix, by default "Model_Thumbnail". + thumbnail_dir : str, optional + Directory for created thumbnails. If None then thumbnails will be exported to same level as the item. + """ + if thumbnail_dir: + thumbnail_dest = thumbnail_dir + else: + thumbnail_dest = self.self_href + + for geom in self.geometry_assets: + if isinstance(geom, GeometryHdfAsset) and geom.has_2d: + self.assets[f"{geom.href.rsplit('/')[-1]}_thumbnail"] = geom.thumbnail( + layers=layers, title=title_prefix, thumbnail_dest=thumbnail_dest + ) + + # TODO: Add 1d model thumbnails + + def add_asset(self, key, asset): + """Subclass asset then add.""" + subclass = self.factory.asset_from_dict(asset) + if subclass is None: + return + if self.crs is None and isinstance(subclass, GeometryHdfAsset) and subclass.file.projection is not None: + self.crs = subclass.file.projection + return super().add_asset(key, subclass) + + ### Some properties are dynamically generated. Ignore external updates ### + + @geometry.setter + def geometry(self, *args, **kwargs): + """Ignore.""" + pass + + @bbox.setter + def bbox(self, *args, **kwargs): + """Ignore.""" + pass + + @datetime.setter + def datetime(self, *args, **kwargs): + """Ignore.""" + pass diff --git a/hecstac/ras/parser.py b/hecstac/ras/parser.py index 67773a8..9068955 100644 --- a/hecstac/ras/parser.py +++ b/hecstac/ras/parser.py @@ -1,28 +1,34 @@ +"""Contains classes and methods to parse HEC-RAS files.""" + import datetime import logging import math from collections import defaultdict from enum import Enum +from functools import lru_cache from pathlib import Path from typing import Iterator import geopandas as gpd import numpy as np import pandas as pd -from pystac import Asset -from rashdf import RasHdf, RasPlanHdf -from shapely import LineString, MultiPolygon, Point, Polygon, make_valid, union_all +from rashdf import RasGeomHdf, RasPlanHdf +from shapely import GeometryCollection, LineString, MultiPolygon, Point, Polygon, make_valid, union_all from shapely.ops import unary_union from hecstac.ras.utils import ( + check_xs_direction, data_pairs_from_text_block, delimited_pairs_to_lists, + reverse, search_contents, text_block_from_start_end_str, text_block_from_start_str_length, text_block_from_start_str_to_empty_line, ) +logger = logging.getLogger(__name__) + def name_from_suffix(fpath: str, suffix: str) -> str: """Generate a name by appending a suffix to the file stem.""" @@ -30,6 +36,7 @@ def name_from_suffix(fpath: str, suffix: str) -> str: class River: + """HEC-RAS River.""" def __init__(self, river: str, reaches: list[str] = []): self.river = river @@ -39,9 +46,8 @@ def __init__(self, river: str, reaches: list[str] = []): class XS: """HEC-RAS Cross Section.""" - def __init__(self, ras_data: list[str], river_reach: str, river: str, reach: str, crs: str): + def __init__(self, ras_data: list[str], river_reach: str, river: str, reach: str): self.ras_data = ras_data - self.crs = crs self.river = river self.reach = reach self.river_reach = river_reach @@ -158,7 +164,6 @@ def gdf(self) -> gpd.GeoDataFrame: "number_of_coords": [self.number_of_coords], # "coords": [self.coords], }, - crs=self.crs, geometry="geometry", ) @@ -192,6 +197,7 @@ def subdivisions(self) -> tuple[list[float], list[float]]: @property def is_interpolated(self) -> bool: + """Check if xs is interpolated.""" if self._is_interpolated == None: self._is_interpolated = "*" in self.split_xs_header(1) return self._is_interpolated @@ -283,6 +289,8 @@ def get_mannings_discharge(self, wse: float, slope: float, units: str) -> float: class StructureType(Enum): + """Structure types.""" + XS = 1 CULVERT = 2 BRIDGE = 3 @@ -292,7 +300,7 @@ class StructureType(Enum): class Structure: - """Structure.""" + """HEC-RAS Structures.""" def __init__( self, @@ -300,11 +308,9 @@ def __init__( river_reach: str, river: str, reach: str, - crs: str, us_xs: XS, ): self.ras_data = ras_data - self.crs = crs self.river = river self.reach = reach self.river_reach = river_reach @@ -382,7 +388,6 @@ def gdf(self) -> gpd.GeoDataFrame: "width": [self.width], "ras_data": ["\n".join(self.ras_data)], }, - crs=self.crs, geometry="geometry", ) @@ -390,10 +395,9 @@ def gdf(self) -> gpd.GeoDataFrame: class Reach: """HEC-RAS River Reach.""" - def __init__(self, ras_data: list[str], river_reach: str, crs: str): + def __init__(self, ras_data: list[str], river_reach: str): reach_lines = text_block_from_start_end_str(f"River Reach={river_reach}", ["River Reach"], ras_data, -1) self.ras_data = reach_lines - self.crs = crs self.river_reach = river_reach self.river = river_reach.split(",")[0].rstrip() self.reach = river_reach.split(",")[1].rstrip() @@ -459,7 +463,7 @@ def cross_sections(self) -> dict[str, "XS"]: ["Type RM Length L Ch R", "River Reach"], self.ras_data, ) - cross_section = XS(xs_lines, self.river_reach, self.river, self.reach, self.crs) + cross_section = XS(xs_lines, self.river_reach, self.river, self.reach) cross_sections[cross_section.river_reach_rs] = cross_section return cross_sections @@ -476,7 +480,7 @@ def structures(self) -> dict[str, "Structure"]: ["Type RM Length L Ch R", "River Reach"], self.ras_data, ) - cross_section = XS(xs_lines, self.river_reach, self.river, self.reach, self.crs) + cross_section = XS(xs_lines, self.river_reach, self.river, self.reach) continue elif int(type) in [2, 3, 4, 5, 6]: # culvert or bridge or multiple openeing structure_lines = text_block_from_start_end_str( @@ -495,7 +499,6 @@ def structures(self) -> dict[str, "Structure"]: self.river_reach, self.river, self.reach, - self.crs, cross_section, ) structures[structure.river_reach_rs] = structure @@ -515,7 +518,6 @@ def gdf(self) -> gpd.GeoDataFrame: # "coords": [self.coords], "ras_data": ["\n".join(self.ras_data)], }, - crs=self.crs, geometry="geometry", ) @@ -533,8 +535,7 @@ def structures_gdf(self) -> gpd.GeoDataFrame: class Junction: """HEC-RAS Junction.""" - def __init__(self, ras_data: list[str], junct: str, crs: str): - self.crs = crs + def __init__(self, ras_data: list[str], junct: str): self.name = junct self.ras_data = text_block_from_start_str_to_empty_line(f"Junct Name={junct}", ras_data) @@ -620,22 +621,21 @@ def gdf(self): "ras_data": ["\n".join(self.ras_data)], }, geometry="geometry", - crs=self.crs, ) class StorageArea: + """HEC-RAS StorageArea.""" - def __init__(self, ras_data: list[str], crs: str): - self.crs = crs + def __init__(self, ras_data: list[str]): self.ras_data = ras_data # TODO: Implement this class Connection: + """HEC-RAS Connection.""" - def __init__(self, ras_data: list[str], crs: str): - self.crs = crs + def __init__(self, ras_data: list[str]): self.ras_data = ras_data # TODO: Implement this @@ -650,66 +650,90 @@ def __init__(self, fpath): self.file_lines = f.readlines() @property + @lru_cache def project_title(self) -> str: + """Return the project title.""" return search_contents(self.file_lines, "Proj Title") @property + @lru_cache def project_description(self) -> str: - return search_contents(self.file_lines, "Model Description", token=":") + """Return the model description.""" + return search_contents(self.file_lines, "Model Description", token=":", require_one=False) @property + @lru_cache def project_status(self) -> str: - return search_contents(self.file_lines, "Status of Model", token=":") + """Return the model status.""" + return search_contents(self.file_lines, "Status of Model", token=":", require_one=False) @property + @lru_cache def project_units(self) -> str | None: + """Return the project units.""" for line in self.file_lines: if "Units" in line: return " ".join(line.split(" ")[:-1]) @property + @lru_cache def plan_current(self) -> str | None: + """Return the current plan.""" try: - suffix = search_contents(self.file_lines, "Current Plan", expect_one=True) - return self.name_from_suffix(suffix) + suffix = search_contents(self.file_lines, "Current Plan", expect_one=True, require_one=False).strip() + return name_from_suffix(self.fpath, suffix) except Exception: - logging.warning("Ras model has no current plan") + logger.warning("Ras model has no current plan") return None @property + @lru_cache def ras_version(self) -> str | None: - version = search_contents(self.file_lines, "Program Version", token="=", expect_one=False) + """Return the ras version.""" + version = search_contents(self.file_lines, "Program Version", token="=", expect_one=False, require_one=False) if version == []: - version = search_contents(self.file_lines, "Program and Version", token=":", expect_one=False) + version = search_contents( + self.file_lines, "Program and Version", token=":", expect_one=False, require_one=False + ) if version == []: - logging.warning("Unable to parse project version") + logger.warning("Unable to parse project version") return "N/A" else: return version[0] @property + @lru_cache def plan_files(self) -> list[str]: - suffixes = search_contents(self.file_lines, "Plan File", expect_one=False) + """Return the plan files.""" + suffixes = search_contents(self.file_lines, "Plan File", expect_one=False, require_one=False) return [name_from_suffix(self.fpath, i) for i in suffixes] @property + @lru_cache def geometry_files(self) -> list[str]: - suffixes = search_contents(self.file_lines, "Geom File", expect_one=False) + """Return the geometry files.""" + suffixes = search_contents(self.file_lines, "Geom File", expect_one=False, require_one=False) return [name_from_suffix(self.fpath, i) for i in suffixes] @property + @lru_cache def steady_flow_files(self) -> list[str]: - suffixes = search_contents(self.file_lines, "Flow File", expect_one=False) + """Return the flow files.""" + suffixes = search_contents(self.file_lines, "Flow File", expect_one=False, require_one=False) return [name_from_suffix(self.fpath, i) for i in suffixes] @property + @lru_cache def quasi_unsteady_flow_files(self) -> list[str]: - suffixes = search_contents(self.file_lines, "QuasiSteady File", expect_one=False) + """Return the quasisteady flow files.""" + suffixes = search_contents(self.file_lines, "QuasiSteady File", expect_one=False, require_one=False) return [name_from_suffix(self.fpath, i) for i in suffixes] @property + @lru_cache def unsteady_flow_files(self) -> list[str]: - suffixes = search_contents(self.file_lines, "Unsteady File", expect_one=False) + """Return the unsteady flow files.""" + suffixes = search_contents(self.file_lines, "Unsteady File", expect_one=False, require_one=False) return [name_from_suffix(self.fpath, i) for i in suffixes] @@ -724,40 +748,47 @@ def __init__(self, fpath): @property def plan_title(self) -> str: + """Return plan title.""" return search_contents(self.file_lines, "Plan Title") @property def plan_version(self) -> str: + """Return program version.""" return search_contents(self.file_lines, "Program Version") @property def geometry_file(self) -> str: + """Return geometry file.""" suffix = search_contents(self.file_lines, "Geom File", expect_one=True) return name_from_suffix(self.fpath, suffix) @property def flow_file(self) -> str: + """Return flow file.""" suffix = search_contents(self.file_lines, "Flow File", expect_one=True) return name_from_suffix(self.fpath, suffix) @property def short_identifier(self) -> str: + """Return short identifier.""" return search_contents(self.file_lines, "Short Identifier", expect_one=True) @property def breach_locations(self) -> dict: """ - example file line: + Return breach locations. + + Example file line: Breach Loc= , , ,True,HH_DamEmbankment """ breach_dict = {} - matches = search_contents(self.file_lines, "Breach Loc", expect_one=False) + matches = search_contents(self.file_lines, "Breach Loc", expect_one=False, require_one=False) for line in matches: parts = line.split(",") if len(parts) >= 4: key = parts[4].strip() breach_dict[key] = eval(parts[3].strip()) - logging.debug(f"breach_dict {breach_dict}") + logger.debug(f"breach_dict {breach_dict}") return breach_dict @@ -768,18 +799,46 @@ def __init__(self, fpath): # TODO: Compare with HMS implementation self.fpath = fpath with open(fpath, "r") as f: - self.file_lines = f.readlines() + self.file_lines = f.read().splitlines() @property def geom_title(self) -> str: + """Return geometry title.""" return search_contents(self.file_lines, "Geom Title") @property def geom_version(self) -> str: + """Return program version.""" return search_contents(self.file_lines, "Program Version") @property - def rivers(self) -> dict[str, "River"]: + def geometry_time(self) -> list[datetime.datetime]: + """Get the latest node last updated entry for this geometry.""" + dts = search_contents(self.file_lines, "Node Last Edited Time", expect_one=False, require_one=False) + if len(dts) >= 1: + try: + return [datetime.datetime.strptime(d, "%b/%d/%Y %H:%M:%S") for d in dts] + except ValueError: + return [] + else: + return [] + + @property + def has_2d(self) -> bool: + """Check if RAS geometry has any 2D areas.""" + for line in self.file_lines: + if line.startswith("Storage Area Is2D=") and int(line[len("Storage Area Is2D=") :].strip()) in (1, -1): + # RAS mostly uses "-1" to indicate True and "0" to indicate False. Checking for "1" also here. + return True + return False + + @property + def has_1d(self) -> bool: + """Check if RAS geometry has any 1D components.""" + return len(self.cross_sections) > 0 + + @property + def rivers(self) -> dict[str, River]: """A dictionary of river_name: River (class) for the rivers contained in the HEC-RAS geometry file.""" tmp_rivers = defaultdict(list) for reach in self.reaches.values(): # First, group all reaches into their respective rivers @@ -792,19 +851,19 @@ def rivers(self) -> dict[str, "River"]: return tmp_rivers @property - def reaches(self) -> dict[str, "Reach"]: + def reaches(self) -> dict[str, Reach]: """A dictionary of the reaches contained in the HEC-RAS geometry file.""" - river_reaches = search_contents(self.file_lines, "River Reach", expect_one=False) - return {river_reach: Reach(self.file_lines, river_reach, self.crs) for river_reach in river_reaches} + river_reaches = search_contents(self.file_lines, "River Reach", expect_one=False, require_one=False) + return {river_reach: Reach(self.file_lines, river_reach) for river_reach in river_reaches} @property - def junctions(self) -> dict[str, "Junction"]: + def junctions(self) -> dict[str, Junction]: """A dictionary of the junctions contained in the HEC-RAS geometry file.""" - juncts = search_contents(self.file_lines, "Junct Name", expect_one=False) - return {junction: Junction(self.file_lines, junction, self.crs) for junction in juncts} + juncts = search_contents(self.file_lines, "Junct Name", expect_one=False, require_one=False) + return {junction: Junction(self.file_lines, junction) for junction in juncts} @property - def cross_sections(self) -> dict[str, "XS"]: + def cross_sections(self) -> dict[str, XS]: """A dictionary of all the cross sections contained in the HEC-RAS geometry file.""" cross_sections = {} for reach in self.reaches.values(): @@ -812,7 +871,7 @@ def cross_sections(self) -> dict[str, "XS"]: return cross_sections @property - def structures(self) -> dict[str, "Structure"]: + def structures(self) -> dict[str, Structure]: """A dictionary of the structures contained in the HEC-RAS geometry file.""" structures = {} for reach in self.reaches.values(): @@ -820,50 +879,70 @@ def structures(self) -> dict[str, "Structure"]: return structures @property - def storage_areas(self) -> dict[str, "StorageArea"]: + def storage_areas(self) -> dict[str, StorageArea]: """A dictionary of the storage areas contained in the HEC-RAS geometry file.""" - areas = search_contents(self.file_lines, "Storage Area", expect_one=False) - return {a: StorageArea(a, self.crs) for a in areas} + areas = search_contents(self.file_lines, "Storage Area", expect_one=False, require_one=False) + return {a: StorageArea(a) for a in areas} @property - def connections(self) -> dict[str, "Connection"]: + def connections(self) -> dict[str, Connection]: """A dictionary of the SA/2D connections contained in the HEC-RAS geometry file.""" - connections = search_contents(self.file_lines, "Connection", expect_one=False) - return {c: Connection(c, self.crs) for c in connections} + connections = search_contents(self.file_lines, "Connection", expect_one=False, require_one=False) + return {c: Connection(c) for c in connections} @property - def datetimes(self) -> list[datetime.datetime]: - """Get the latest node last updated entry for this geometry""" - dts = search_contents(self.file_lines, "Node Last Edited Time", expect_one=False) - if len(dts) >= 1: - try: - return [datetime.datetime.strptime(d, "%b/%d/%Y %H:%M:%S") for d in dts] - except ValueError: - return [] - else: - return [] + def reach_gdf(self): + """A GeodataFrame of the reaches contained in the HEC-RAS geometry file.""" + return gpd.GeoDataFrame(pd.concat([reach.gdf for reach in self.reaches.values()], ignore_index=True)) @property - def has_2d(self) -> bool: - """Check if RAS geometry has any 2D areas""" - for line in self.file_lines: - if line.startswith("Storage Area Is2D=") and int(line[len("Storage Area Is2D=") :].strip()) in (1, -1): - # RAS mostly uses "-1" to indicate True and "0" to indicate False. Checking for "1" also here. - return True - return False + def junction_gdf(self): + """A GeodataFrame of the junctions contained in the HEC-RAS geometry file.""" + if self.junctions: + return gpd.GeoDataFrame( + pd.concat( + [junction.gdf for junction in self.junctions.values()], + ignore_index=True, + ) + ) @property - def has_1d(self) -> bool: - """Check if RAS geometry has any 1D components""" - return len(self.cross_sections) > 0 + def xs_gdf(self) -> gpd.GeoDataFrame: + """Geodataframe of all cross sections in the geometry text file.""" + xs_gdf = pd.concat([xs.gdf for xs in self.cross_sections.values()], ignore_index=True) + + subsets = [] + for _, reach in self.reach_gdf.iterrows(): + subset_xs = xs_gdf.loc[xs_gdf["river_reach"] == reach["river_reach"]].copy() + not_reversed_xs = check_xs_direction(subset_xs, reach.geometry) + subset_xs["geometry"] = subset_xs.apply( + lambda row: ( + row.geometry + if row["river_reach_rs"] in list(not_reversed_xs["river_reach_rs"]) + else reverse(row.geometry) + ), + axis=1, + ) + subsets.append(subset_xs) + return gpd.GeoDataFrame(pd.concat(subsets)) + + @property + def structures_gdf(self) -> gpd.GeoDataFrame: + """Geodataframe of all structures in the geometry text file.""" + return gpd.GeoDataFrame(pd.concat([structure.gdf for structure in self.structures.values()], ignore_index=True)) @property - def concave_hull(self) -> Polygon: + @lru_cache + def concave_hull(self): """Compute and return the concave hull (polygon) for cross sections.""" polygons = [] - xs_gdf = pd.concat([xs.gdf for xs in self.cross_sections.values()], ignore_index=True) - for river_reach in xs_gdf["river_reach"].unique(): - xs_subset: gpd.GeoSeries = xs_gdf[xs_gdf["river_reach"] == river_reach] + xs_df = self.xs_gdf # shorthand + assert not all( + [i.is_empty for i in xs_df.geometry] + ), "No valid cross-sections found. Possibly non-georeferenced model" + assert len(xs_df) > 1, "Only one valid cross-section found." + for river_reach in xs_df["river_reach"].unique(): + xs_subset = xs_df[xs_df["river_reach"] == river_reach] points = xs_subset.boundary.explode(index_parts=True).unstack() points_last_xs = [Point(coord) for coord in xs_subset["geometry"].iloc[-1].coords] points_first_xs = [Point(coord) for coord in xs_subset["geometry"].iloc[0].coords[::-1]] @@ -872,13 +951,29 @@ def concave_hull(self) -> Polygon: polygons += list(polygon.geoms) else: polygons.append(polygon) - if len(self.junctions) > 0: - for junction in self.junctions.values(): - for _, j in junction.gdf.iterrows(): - polygons.append(self.junction_hull(xs_gdf, j)) - out_hull = union_all([make_valid(p) for p in polygons]) + if self.junction_gdf is not None: + for _, j in self.junction_gdf.iterrows(): + polygons.append(self.junction_hull(j)) + out_hull = self.clean_polygons(polygons) return out_hull + def clean_polygons(self, polygons: list) -> list: + """Make polygons valid and remove geometry collections.""" + all_valid = [] + for p in polygons: + valid = make_valid(p) + if isinstance(valid, GeometryCollection): + polys = [] + for i in valid.geoms: + if isinstance(i, MultiPolygon): + polys.extend([j for j in i.geoms]) + elif isinstance(i, Polygon): + polys.append(i) + all_valid.extend(polys) + else: + all_valid.append(valid) + return union_all(all_valid) + def junction_hull(self, xs_gdf: gpd.GeoDataFrame, junction: gpd.GeoSeries) -> Polygon: """Compute and return the concave hull (polygon) for a juction.""" junction_xs = self.determine_junction_xs(xs_gdf, junction) @@ -931,6 +1026,7 @@ def get_subtype_gdf(self, subtype: str) -> gpd.GeoDataFrame: ) # TODO: may need to add some logic here for empty dicts def iter_labeled_gdfs(self) -> Iterator[tuple[str, gpd.GeoDataFrame]]: + """Return gdf and associated property.""" for property in self.PROPERTIES_WITH_GDF: gdf = self.get_subtype_gdf(property) yield property, gdf @@ -951,10 +1047,12 @@ def __init__(self, fpath): @property def flow_title(self) -> str: + """Return flow title.""" return search_contents(self.file_lines, "Flow Title") @property def n_profiles(self) -> int: + """Return number of profiles.""" return int(search_contents(self.file_lines, "Number of Profiles")) @@ -968,28 +1066,34 @@ def __init__(self, fpath): @property def flow_title(self) -> str: + """Return flow title.""" return search_contents(self.file_lines, "Flow Title") @property def boundary_locations(self) -> list: """ - example file line: + Return boundary locations. + + Example file line: Boundary Location= , , , , ,Perimeter 1 , ,PugetSound_Ocean_Boundary , """ boundary_dict = [] - matches = search_contents(self.file_lines, "Boundary Location", expect_one=False) + matches = search_contents(self.file_lines, "Boundary Location", expect_one=False, require_one=False) for line in matches: parts = line.split(",") if len(parts) >= 7: flow_area = parts[5].strip() bc_line = parts[7].strip() boundary_dict.append({flow_area: bc_line}) - logging.debug(f"boundary_dict:{boundary_dict}") + logger.debug(f"boundary_dict:{boundary_dict}") return boundary_dict @property def reference_lines(self): - return search_contents(self.file_lines, "Observed Rating Curve=Name=Ref Line", token=":", expect_one=False) + """Return reference lines.""" + return search_contents( + self.file_lines, "Observed Rating Curve=Name=Ref Line", token=":", expect_one=False, require_one=False + ) class QuasiUnsteadyFlowFile: @@ -1005,162 +1109,173 @@ class QuasiUnsteadyFlowFile: class RASHDFFile: - """Base class for HDF assets (Plan and Geometry HDF files).""" + """Base class for parsing HDF assets (Plan and Geometry HDF files).""" - def __init__(self, fpath): + def __init__(self, fpath, hdf_constructor): self.fpath = fpath - self.hdf_object = RasHdf(fpath) + self.hdf_object = hdf_constructor(fpath) self._root_attrs: dict | None = None self._geom_attrs: dict | None = None self._structures_attrs: dict | None = None self._2d_flow_attrs: dict | None = None - # def populate( - # self, - # optional_property_dict: dict[str, str], - # required_property_dict: dict[str, str], - # ) -> dict: - # extra_fields = {} - # # go through dictionary of stac property names and class property names, only adding property to extra fields if the value is not None - # for stac_property_name, class_property_name in optional_property_dict.items(): - # property_value = getattr(self, class_property_name) - # if property_value != None: - # extra_fields[stac_property_name] = property_value - # # go through dictionary of stac property names and class property names, adding all properties to extra fields regardless of value - # for stac_property_name, class_property_name in required_property_dict.items(): - # property_value = getattr(self, class_property_name) - # extra_fields[stac_property_name] = property_value - # return extra_fields - @property def file_version(self) -> str | None: + """Return File Version.""" if self._root_attrs == None: self._root_attrs = self.hdf_object.get_root_attrs() return self._root_attrs.get("File Version") @property def units_system(self) -> str | None: + """Return Units System.""" if self._root_attrs == None: self._root_attrs = self.hdf_object.get_root_attrs() return self._root_attrs.get("Units System") @property def geometry_time(self) -> datetime.datetime | None: + """Return Geometry Time.""" if self._geom_attrs == None: self._geom_attrs = self.hdf_object.get_geom_attrs() - return self._geom_attrs.get("Geometry Time").isoformat() + return self._geom_attrs.get("Geometry Time") @property def landcover_date_last_modified(self) -> datetime.datetime | None: + """Return Land Cover Date Last Modified.""" if self._geom_attrs == None: self._geom_attrs = self.hdf_object.get_geom_attrs() return self._geom_attrs.get("Land Cover Date Last Modified") @property def landcover_filename(self) -> str | None: + """Return Land Cover Filename.""" if self._geom_attrs == None: self._geom_attrs = self.hdf_object.get_geom_attrs() return self._geom_attrs.get("Land Cover Filename") @property def landcover_layername(self) -> str | None: + """Return Land Cover Layername.""" if self._geom_attrs == None: self._geom_attrs = self.hdf_object.get_geom_attrs() return self._geom_attrs.get("Land Cover Layername") @property def rasmapperlibdll_date(self) -> datetime.datetime | None: + """Return RasMapperLib.dll Date.""" if self._geom_attrs == None: self._geom_attrs = self.hdf_object.get_geom_attrs() return self._geom_attrs.get("RasMapperLib.dll Date").isoformat() @property def si_units(self) -> bool | None: + """Return SI Units.""" if self._geom_attrs == None: self._geom_attrs = self.hdf_object.get_geom_attrs() return self._geom_attrs.get("SI Units") @property def terrain_file_date(self) -> datetime.datetime | None: + """Return Terrain File Date.""" if self._geom_attrs == None: self._geom_attrs = self.hdf_object.get_geom_attrs() return self._geom_attrs.get("Terrain File Date").isoformat() @property def terrain_filename(self) -> str | None: + """Return Terrain Filename.""" if self._geom_attrs == None: self._geom_attrs = self.hdf_object.get_geom_attrs() return self._geom_attrs.get("Terrain Filename") @property def terrain_layername(self) -> str | None: + """Return Terrain Layername.""" if self._geom_attrs == None: self._geom_attrs = self.hdf_object.get_geom_attrs() return self._geom_attrs.get("Terrain Layername") @property def geometry_version(self) -> str | None: + """Return Version.""" if self._geom_attrs == None: self._geom_attrs = self.hdf_object.get_geom_attrs() return self._geom_attrs.get("Version") @property def bridges_culverts(self) -> int | None: + """Return Bridge/Culvert Count.""" if self._structures_attrs == None: self._structures_attrs = self.hdf_object.get_geom_structures_attrs() return self._structures_attrs.get("Bridge/Culvert Count") @property def connections(self) -> int | None: + """Return Connection Count.""" if self._structures_attrs == None: self._structures_attrs = self.hdf_object.get_geom_structures_attrs() return self._structures_attrs.get("Connection Count") @property def inline_structures(self) -> int | None: + """Return Inline Structure Count.""" if self._structures_attrs == None: self._structures_attrs = self.hdf_object.get_geom_structures_attrs() return self._structures_attrs.get("Inline Structure Count") @property def lateral_structures(self) -> int | None: + """Return Lateral Structure Count.""" if self._structures_attrs == None: self._structures_attrs = self.hdf_object.get_geom_structures_attrs() return self._structures_attrs.get("Lateral Structure Count") @property def two_d_flow_cell_average_size(self) -> float | None: + """Return Cell Average Size.""" if self._2d_flow_attrs == None: self._2d_flow_attrs = self.hdf_object.get_geom_2d_flow_area_attrs() return int(np.sqrt(self._2d_flow_attrs.get("Cell Average Size"))) @property def two_d_flow_cell_maximum_index(self) -> int | None: + """Return Cell Maximum Index.""" if self._2d_flow_attrs == None: self._2d_flow_attrs = self.hdf_object.get_geom_2d_flow_area_attrs() return self._2d_flow_attrs.get("Cell Maximum Index") @property def two_d_flow_cell_maximum_size(self) -> int | None: + """Return Cell Maximum Size.""" if self._2d_flow_attrs == None: self._2d_flow_attrs = self.hdf_object.get_geom_2d_flow_area_attrs() return int(np.sqrt(self._2d_flow_attrs.get("Cell Maximum Size"))) @property def two_d_flow_cell_minimum_size(self) -> int | None: + """Return Cell Minimum Size.""" if self._2d_flow_attrs == None: self._2d_flow_attrs = self.hdf_object.get_geom_2d_flow_area_attrs() return int(np.sqrt(self._2d_flow_attrs.get("Cell Minimum Size"))) - def mesh_areas(self, crs, return_gdf=False) -> gpd.GeoDataFrame | Polygon | MultiPolygon: + def mesh_areas(self, crs: str = None, return_gdf: bool = False) -> gpd.GeoDataFrame | Polygon | MultiPolygon: + """Retrieve and process mesh area geometries. - mesh_areas = self.hdf_object.mesh_cell_polygons() + Parameters + ---------- + crs : str, optional + The coordinate reference system (CRS) to set if the mesh areas do not have one. Defaults to None + return_gdf : bool, optional + If True, returns a GeoDataFrame of the mesh areas. If False, returns a unified Polygon or Multipolygon geometry. Defaults to False. + """ + mesh_areas = self.hdf_object.mesh_areas() if mesh_areas is None or mesh_areas.empty: - raise ValueError("No mesh areas found.") + return Polygon() - if mesh_areas.crs and mesh_areas.crs != crs: - mesh_areas = mesh_areas.to_crs(crs) + if mesh_areas.crs is None and crs is not None: + mesh_areas = mesh_areas.set_crs(crs) if return_gdf: return mesh_areas @@ -1170,39 +1285,40 @@ def mesh_areas(self, crs, return_gdf=False) -> gpd.GeoDataFrame | Polygon | Mult @property def breaklines(self) -> gpd.GeoDataFrame | None: + """Return breaklines.""" breaklines = self.hdf_object.breaklines() if breaklines is None or breaklines.empty: raise ValueError("No breaklines found.") - else: - return breaklines + + return breaklines + + @property + def mesh_cells(self) -> gpd.GeoDataFrame | None: + """Return mesh cell polygons.""" + mesh_cells = self.hdf_object.mesh_cell_polygons() + + if mesh_cells is None or mesh_cells.empty: + raise ValueError("No mesh cells found.") + + return mesh_cells @property def bc_lines(self) -> gpd.GeoDataFrame | None: + """Return boundary condition lines.""" bc_lines = self.hdf_object.bc_lines() if bc_lines is None or bc_lines.empty: raise ValueError("No boundary condition lines found.") - else: - return bc_lines - - @property - def landcover_filename(self) -> str | None: - # broken example property which would give a filename to use when linking assets together - if self._geom_attrs == None: - self._geom_attrs = self.hdf_object.get_attrs("geom_or_something") - return self._geom_attrs.get("land_cover_filename") - def associate_related_assets(self, asset_dict: dict[str, Asset]) -> None: - if self.landcover_filename: - landcover_asset = asset_dict[self.parent.joinpath(self.landcover_filename).resolve()] - self.extra_fields["ras:landcover_file"] = landcover_asset.href + return bc_lines class PlanHDFFile(RASHDFFile): + """Class to parse data from Plan HDF files.""" def __init__(self, fpath: str, **kwargs): - super().__init__(fpath, **kwargs) + super().__init__(fpath, RasPlanHdf, **kwargs) self.hdf_object = RasPlanHdf(fpath) self._plan_info_attrs = None @@ -1211,255 +1327,298 @@ def __init__(self, fpath: str, **kwargs): @property def plan_information_base_output_interval(self) -> str | None: - # example property to show pattern: if attributes in which property is found is not loaded, load them - # then use key for the property in the dictionary of attributes to retrieve the property + """Return Base Output Interval.""" if self._plan_info_attrs == None: self._plan_info_attrs = self.hdf_object.get_plan_info_attrs() return self._plan_info_attrs.get("Base Output Interval") @property def plan_information_computation_time_step_base(self): + """Return Computation Time Step Base.""" if self._plan_info_attrs == None: self._plan_info_attrs = self.hdf_object.get_plan_info_attrs() return self._plan_info_attrs.get("Computation Time Step Base") @property def plan_information_flow_filename(self): + """Return Flow Filename.""" if self._plan_info_attrs == None: self._plan_info_attrs = self.hdf_object.get_plan_info_attrs() return self._plan_info_attrs.get("Flow Filename") @property def plan_information_geometry_filename(self): + """Return Geometry Filename.""" if self._plan_info_attrs == None: self._plan_info_attrs = self.hdf_object.get_plan_info_attrs() return self._plan_info_attrs.get("Geometry Filename") @property def plan_information_plan_filename(self): + """Return Plan Filename.""" if self._plan_info_attrs == None: self._plan_info_attrs = self.hdf_object.get_plan_info_attrs() return self._plan_info_attrs.get("Plan Filename") @property def plan_information_plan_name(self): + """Return Plan Name.""" if self._plan_info_attrs == None: self._plan_info_attrs = self.hdf_object.get_plan_info_attrs() return self._plan_info_attrs.get("Plan Name") @property def plan_information_project_filename(self): + """Return Project Filename.""" if self._plan_info_attrs == None: self._plan_info_attrs = self.hdf_object.get_plan_info_attrs() return self._plan_info_attrs.get("Project Filename") @property def plan_information_project_title(self): + """Return Project Title.""" if self._plan_info_attrs == None: self._plan_info_attrs = self.hdf_object.get_plan_info_attrs() return self._plan_info_attrs.get("Project Title") @property def plan_information_simulation_end_time(self): + """Return Simulation End Time.""" if self._plan_info_attrs == None: self._plan_info_attrs = self.hdf_object.get_plan_info_attrs() return self._plan_info_attrs.get("Simulation End Time").isoformat() @property def plan_information_simulation_start_time(self): + """Return Simulation Start Time.""" if self._plan_info_attrs == None: self._plan_info_attrs = self.hdf_object.get_plan_info_attrs() return self._plan_info_attrs.get("Simulation Start Time").isoformat() @property def plan_parameters_1d_flow_tolerance(self): + """Return 1D Flow Tolerance.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("1D Flow Tolerance") @property def plan_parameters_1d_maximum_iterations(self): + """Return 1D Maximum Iterations.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("1D Maximum Iterations") @property def plan_parameters_1d_maximum_iterations_without_improvement(self): + """Return 1D Maximum Iterations Without Improvement.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("1D Maximum Iterations Without Improvement") @property def plan_parameters_1d_maximum_water_surface_error_to_abort(self): + """Return 1D Maximum Water Surface Error To Abort.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("1D Maximum Water Surface Error To Abort") @property def plan_parameters_1d_storage_area_elevation_tolerance(self): + """Return 1D Storage Area Elevation Tolerance.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("1D Storage Area Elevation Tolerance") @property def plan_parameters_1d_theta(self): + """Return 1D Theta.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("1D Theta") @property def plan_parameters_1d_theta_warmup(self): + """Return 1D Theta Warmup.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("1D Theta Warmup") @property def plan_parameters_1d_water_surface_elevation_tolerance(self): + """Return 1D Water Surface Elevation Tolerance.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("1D Water Surface Elevation Tolerance") @property def plan_parameters_1d2d_gate_flow_submergence_decay_exponent(self): + """Return 1D-2D Gate Flow Submergence Decay Exponent.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("1D-2D Gate Flow Submergence Decay Exponent") @property def plan_parameters_1d2d_is_stablity_factor(self): + """Return 1D-2D IS Stablity Factor.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("1D-2D IS Stablity Factor") @property def plan_parameters_1d2d_ls_stablity_factor(self): + """Return 1D-2D LS Stablity Factor.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("1D-2D LS Stablity Factor") @property def plan_parameters_1d2d_maximum_number_of_time_slices(self): + """Return 1D-2D Maximum Number of Time Slices.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("1D-2D Maximum Number of Time Slices") @property def plan_parameters_1d2d_minimum_time_step_for_slicinghours(self): + """Return 1D-2D Minimum Time Step for Slicing(hours).""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("1D-2D Minimum Time Step for Slicing(hours)") @property def plan_parameters_1d2d_number_of_warmup_steps(self): + """Return 1D-2D Number of Warmup Steps.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("1D-2D Number of Warmup Steps") @property def plan_parameters_1d2d_warmup_time_step_hours(self): + """Return 1D-2D Warmup Time Step (hours).""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("1D-2D Warmup Time Step (hours)") @property def plan_parameters_1d2d_weir_flow_submergence_decay_exponent(self): + """Return 1D-2D Weir Flow Submergence Decay Exponent.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("1D-2D Weir Flow Submergence Decay Exponent") @property def plan_parameters_1d2d_maxiter(self): + """Return 1D2D MaxIter.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("1D2D MaxIter") @property def plan_parameters_2d_equation_set(self): + """Return 2D Equation Set.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("2D Equation Set") @property def plan_parameters_2d_names(self): + """Return 2D Names.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("2D Names") @property def plan_parameters_2d_volume_tolerance(self): + """Return 2D Volume Tolerance.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("2D Volume Tolerance") @property def plan_parameters_2d_water_surface_tolerance(self): + """Return 2D Water Surface Tolerance.""" if self._plan_parameters_attrs == None: self._plan_parameters_attrs = self.hdf_object.get_plan_param_attrs() return self._plan_parameters_attrs.get("2D Water Surface Tolerance") @property def meteorology_dss_filename(self): + """Return meteorology precip DSS Filename.""" if self._meteorology_attrs == None: self._meteorology_attrs = self.hdf_object.get_meteorology_precip_attrs() return self._meteorology_attrs.get("DSS Filename") @property def meteorology_dss_pathname(self): + """Return meteorology precip DSS Pathname.""" if self._meteorology_attrs == None: self._meteorology_attrs = self.hdf_object.get_meteorology_precip_attrs() return self._meteorology_attrs.get("DSS Pathname") @property def meteorology_data_type(self): + """Return meteorology precip Data Type.""" if self._meteorology_attrs == None: self._meteorology_attrs = self.hdf_object.get_meteorology_precip_attrs() return self._meteorology_attrs.get("Data Type") @property def meteorology_mode(self): + """Return meteorology precip Mode.""" if self._meteorology_attrs == None: self._meteorology_attrs = self.hdf_object.get_meteorology_precip_attrs() return self._meteorology_attrs.get("Mode") @property def meteorology_raster_cellsize(self): + """Return meteorology precip Raster Cellsize.""" if self._meteorology_attrs == None: self._meteorology_attrs = self.hdf_object.get_meteorology_precip_attrs() return self._meteorology_attrs.get("Raster Cellsize") @property def meteorology_source(self): + """Return meteorology precip Source.""" if self._meteorology_attrs == None: self._meteorology_attrs = self.hdf_object.get_meteorology_precip_attrs() return self._meteorology_attrs.get("Source") @property def meteorology_units(self): + """Return meteorology precip units.""" if self._meteorology_attrs == None: self._meteorology_attrs = self.hdf_object.get_meteorology_precip_attrs() return self._meteorology_attrs.get("Units") class GeometryHDFFile(RASHDFFile): + """Class to parse data from Geometry HDF files.""" def __init__(self, fpath: str, **kwargs): - super().__init__(fpath, **kwargs) + super().__init__(fpath, RasGeomHdf, **kwargs) - self.hdf_object = RasPlanHdf(fpath) + self.hdf_object = RasGeomHdf(fpath) self._plan_info_attrs = None self._plan_parameters_attrs = None self._meteorology_attrs = None + @property + def projection(self): + """Return geometry projection.""" + return self.hdf_object.projection() + @property def cross_sections(self) -> int | None: - pass + """Return geometry cross sections.""" + return self.hdf_object.cross_sections() @property def reference_lines(self) -> gpd.GeoDataFrame | None: - + """Return geometry reference lines.""" ref_lines = self.hdf_object.reference_lines() if ref_lines is None or ref_lines.empty: - raise ValueError("No reference lines found.") + logger.warning("No reference lines found.") else: return ref_lines diff --git a/hecstac/ras/utils.py b/hecstac/ras/utils.py index 03273ac..08c2a85 100644 --- a/hecstac/ras/utils.py +++ b/hecstac/ras/utils.py @@ -1,6 +1,9 @@ +"""Utility functions for the hecstac ras module.""" + import logging import os from functools import wraps +from pathlib import Path import geopandas as gpd import numpy as np @@ -8,6 +11,16 @@ from shapely.errors import UnsupportedGEOSVersionError from shapely.geometry import LineString, MultiPoint, Point +logger = logging.getLogger(__name__) + + +def find_model_files(ras_prj: str) -> list[str]: + """Find all files with the same base name and return absolute paths.""" + ras_prj = Path(ras_prj).resolve() + parent = ras_prj.parent + stem = ras_prj.stem + return [str(i.resolve()) for i in parent.glob(f"{stem}*")] + def is_ras_prj(url: str) -> bool: """Check if a file is a HEC-RAS project file.""" @@ -19,7 +32,9 @@ def is_ras_prj(url: str) -> bool: return False -def search_contents(lines: list[str], search_string: str, token: str = "=", expect_one: bool = True) -> list[str] | str: +def search_contents( + lines: list[str], search_string: str, token: str = "=", expect_one: bool = True, require_one: bool = True +) -> list[str] | str: """Split a line by a token and returns the second half of the line if the search_string is found in the first half.""" results = [] for line in lines: @@ -27,9 +42,9 @@ def search_contents(lines: list[str], search_string: str, token: str = "=", expe results.append(line.split(token)[1]) if expect_one and len(results) > 1: - raise ValueError(f"expected 1 result, got {len(results)}") - elif expect_one and len(results) == 0: - raise ValueError("expected 1 result, no results found") + raise ValueError(f"expected 1 result for {search_string}, got {len(results)} results") + elif require_one and len(results) == 0: + raise ValueError(f"1 result for {search_string} is required, no results found") elif expect_one and len(results) == 1: return results[0] else: @@ -167,7 +182,7 @@ def check_xs_direction(cross_sections: gpd.GeoDataFrame, reach: LineString): river_reach_rs.append(xs["river_reach_rs"]) except IndexError as e: - logging.debug( + logger.debug( f"cross section does not intersect river-reach: {xs['river']} {xs['reach']} {xs['river_station']}: error: {e}" ) continue @@ -189,12 +204,15 @@ def validate_point(geom): class requires_geos: + """Unsure.""" + def __init__(self, version): if version.count(".") != 2: raise ValueError("Version must be .. format") self.version = tuple(int(x) for x in version.split(".")) def __call__(self, func): + """Call.""" is_compatible = lib.geos_version >= self.version is_doc_build = os.environ.get("SPHINX_DOC_BUILD") == "1" # set in docs/conf.py if is_compatible and not is_doc_build: @@ -230,11 +248,12 @@ def wrapped(*args, **kwargs): def multithreading_enabled(func): - """Prepare multithreading by setting the writable flags of object type - ndarrays to False. + """ + Prepare multithreading by setting the writable flags of object type ndarrays to False. NB: multithreading also requires the GIL to be released, which is done in - the C extension (ufuncs.c).""" + the C extension (ufuncs.c). + """ @wraps(func) def wrapped(*args, **kwargs): @@ -258,7 +277,7 @@ def wrapped(*args, **kwargs): @requires_geos("3.7.0") @multithreading_enabled def reverse(geometry, **kwargs): - """Returns a copy of a Geometry with the order of coordinates reversed. + """Return a copy of a Geometry with the order of coordinates reversed. If a Geometry is a polygon with interior rings, the interior rings are also reversed. @@ -271,7 +290,7 @@ def reverse(geometry, **kwargs): **kwargs See :ref:`NumPy ufunc docs ` for other keyword arguments. - See also + See Also -------- is_ccw : Checks if a Geometry is clockwise. @@ -285,5 +304,4 @@ def reverse(geometry, **kwargs): >>> reverse(None) is None True """ - return lib.reverse(geometry, **kwargs) diff --git a/hecstac/utils/__init__.py b/hecstac/utils/__init__.py index e69de29..ddf8d81 100644 --- a/hecstac/utils/__init__.py +++ b/hecstac/utils/__init__.py @@ -0,0 +1 @@ +"""Utlity scripts for hecstac.""" diff --git a/hecstac/utils/generate_schema_markdown.py b/hecstac/utils/generate_schema_markdown.py deleted file mode 100644 index 93ed5e4..0000000 --- a/hecstac/utils/generate_schema_markdown.py +++ /dev/null @@ -1,385 +0,0 @@ -import json -import re -from dataclasses import dataclass, field -from typing import Any, Iterator - -import jsonschema -import requests - -# Define schema which defines expected structure for extensions schemas -META_SCHEMA = { - "$schema": "http://json-schema.org/draft-07/schema#", - "required": ["$schema", "$id", "title", "description", "oneOf", "definitions"], - "properties": { - "$schema": {"type": "string"}, - "$id": {"type": "string"}, - "title": {"type": "string"}, - "description": {"type": "string"}, - "oneOf": {"type": "array", "items": {"type": "object"}}, - "definitions": {"type": "object", "required": ["stac_extensions", "require_any_field", "fields"]}, - }, -} - -ASSET_SPECIFIC_META_SCHEMA = { - "$schema": "http://json-schema.org/draft-07/schema#", - "required": ["$schema", "$id", "title", "description", "oneOf", "definitions"], - "properties": { - "$schema": {"type": "string"}, - "$id": {"type": "string"}, - "title": {"type": "string"}, - "description": {"type": "string"}, - "oneOf": {"type": "array", "items": {"type": "object"}}, - "definitions": {"type": "object", "required": ["stac_extensions", "require_any_field", "fields", "assets"]}, - }, -} - - -@dataclass -class Field: - field_name: str - type: str | list[str] - description: str - required: bool | None - table_description: str = field(init=False) - type_str: str = field(init=False) - - def __post_init__(self) -> None: - if self.required: - self.table_description = f"**REQUIRED** {self.description}" - else: - self.table_description = self.description - if isinstance(self.type, list): - modified_type_list = [self.modify_link(t) for t in self.type] - self.type_str = " \| ".join(modified_type_list) - else: - self.type_str = self.modify_link(self.type) - - @staticmethod - def modify_link(potential_link: str) -> str: - # modify definitions link to internal markdown link, else returns input without modification - if "#/definitions/" in potential_link: - internal_link = potential_link.replace("#/definitions/", "#") - link_name = potential_link.replace("#/definitions/", "") - link_name = link_name.replace("_", " ") - linked = f"[{link_name}]({internal_link})" - return linked - else: - return potential_link - - -@dataclass -class FieldUsability: - catalog: bool - collection_properties: bool - collection_item_assets: bool - item_properties: bool - item_assets: bool - links: bool - catalog_str: str = field(init=False) - collection_properties_str: str = field(init=False) - collection_item_assets_str: str = field(init=False) - item_properties_str: str = field(init=False) - item_assets_str: str = field(init=False) - links_str: str = field(init=False) - - def __post_init__(self): - self.catalog_str = " " - if self.catalog: - self.catalog_str = "x" - self.collection_properties_str = " " - if self.collection_properties: - self.collection_properties_str = "x" - self.collection_item_assets_str = " " - if self.collection_item_assets: - self.collection_item_assets_str = "x" - self.item_properties_str = " " - if self.item_properties: - self.item_properties_str = "x" - self.item_assets_str = " " - if self.item_assets: - self.item_assets_str = "x" - self.links_str = " " - if self.links: - self.links_str = "x" - - -# Class for generating Markdown documents documenting extensions in such a way that they fall in line with template markdown file (https://github.com/stac-extensions/template/blob/main/README.md) with alterations where necessary or to reduce required manual input -# intention is to have properties which pull out headings and subheadings from schema -class ExtensionSchema: - def __init__(self, schema_url: str, field_usability: FieldUsability, prefix: str | None = None) -> None: - # reads schema url - self.identifier = schema_url - self.field_usability = field_usability - self._schema_str = read_schema_text_from_url(schema_url) - self.schema = json.loads(self._schema_str) - self.validate_schema() - self._prefix = prefix - self._not_common_definition_names: list[str] = ["fields", "stac_extensions", "require_any_field"] - self._required_item_properties: list[str] | None = None - - def validate_schema(self) -> None: - jsonschema.validate(self.schema, META_SCHEMA) - - @property - def title(self) -> str: - return self.schema["title"] - - @property - def prefix(self) -> str: - if self._prefix == None: - prefix_pattern = re.compile(r"^\^\(\?\!(.+):\)$") - prefix_exclusivity_pattern = list(self.schema["definitions"]["fields"]["patternProperties"].keys())[0] - match = prefix_pattern.match(prefix_exclusivity_pattern) - self._prefix = match.group(1) - return self._prefix - - @property - def item_property_definitions(self) -> list[Field]: - field_list: list[Field] = [] - definition_schema = {"type": "object", "required": ["type", "description"]} - for definition_key, definition_value in self.schema["definitions"]["fields"]["properties"].items(): - jsonschema.validate(definition_value, definition_schema, jsonschema.Draft7Validator) - field = Field( - definition_key, - definition_value["type"], - definition_value["description"], - definition_key in self.required_item_properties, - ) - field_list.append(field) - return field_list - - @property - def common_definitions(self) -> list[Field]: - field_list: list[Field] = [] - definition_schema = {"type": "object", "required": ["type", "description"]} - for definition_key, definition_value in self.schema["definitions"].items(): - if definition_key not in self._not_common_definition_names: - jsonschema.validate(definition_value, definition_schema, jsonschema.Draft7Validator) - field = Field(definition_key, definition_value["type"], definition_value["description"], None) - field_list.append(field) - return field_list - - def get_item_meta_schema(self) -> dict[str, Any]: - for stac_type_subschema in self.schema["oneOf"]: - if "type" not in stac_type_subschema: - for subschema in stac_type_subschema["allOf"]: - if "properties" in subschema: - meta_properties = subschema["properties"] - type_definition = meta_properties["type"] - if type_definition == {"const": "Feature"}: - return meta_properties - raise ValueError("Item meta schema was not found in expected location") - - @property - def required_item_properties(self) -> list[str]: - if self._required_item_properties == None: - self.validate_item_metadata_schema() - return self._required_item_properties - - def to_markdown(self, path: str | None) -> str: - # parses schema to markdown and saves to path - markdown_str = "" - # write title - markdown_str += f"# {self.title} Extension\n\n" - # write overall metadata (title, identifier, prefix) - markdown_str += f"- **Title:** {self.title}\n" - markdown_str += f"- **Identifier:** {self.identifier}\n" - markdown_str += f"- **Field Name Prefix:** {self.prefix}\n\n" - # write short description for extension - markdown_str += f"The {self.title} Extension is an extension to the [SpatioTemporal Asset Catalog](https://github.com/radiantearth/stac-spec) (STAC) specification. The purpose of the extension is to introduce vocabulary useful in describing RAS models as STAC items and assets.\n\n" - # summarize field usability - markdown_str += f"## Fields\n\nThe fields in the table below can be used in these parts of STAC documents:\n\n" - markdown_str += f"- [{self.field_usability.catalog_str}] Catalogs\n" - markdown_str += f"- [{self.field_usability.collection_properties_str}] Collection Properties\n" - markdown_str += f"- [{self.field_usability.collection_item_assets_str}] Collection Item Assets\n" - markdown_str += f"- [{self.field_usability.item_properties_str}] Item Properties\n" - markdown_str += f"- [{self.field_usability.item_assets_str}] Item Assets\n" - markdown_str += f"- [{self.field_usability.links_str}] Links\n\n" - # overview of fields - fields_table = self._fields_to_table(self.item_property_definitions) - markdown_str += fields_table - markdown_str += "\n\n### Additional Field Information\n\n" - # common definitions - for field in self.common_definitions: - subsection = self._field_to_subsection(field) - markdown_str += subsection - if path: - with open(path, "w") as f: - f.write(markdown_str) - return markdown_str - - @staticmethod - def _field_to_subsection(field: Field) -> str: - subsection = f"\n#### {field.field_name}\n\n" - if field.type_str == "object": - raise ValueError - subsection += f"- Type: {field.type_str}\n" - subsection += f"- Description: {field.description}\n" - return subsection - - @staticmethod - def _fields_to_table(fields: list[Field]) -> str: - max_field_name_length = len("Field Name") - max_type_length = len("Type") - max_description_length = len("Description") - for field in fields: - if len(field.field_name) > max_field_name_length: - max_field_name_length = len(field.field_name) - if len(field.type_str) > max_type_length: - max_type_length = len(field.type_str) - if len(field.table_description) > max_description_length: - max_description_length = len(field.table_description) - field_name_header = "Field Name".ljust(max_field_name_length) - type_header = "Type".ljust(max_type_length) - description_header = "Description".ljust(max_description_length) - table = f"| {field_name_header} | {type_header} | {description_header} |" - table += f"\n| {'-' * len(field_name_header)} | {'-' * len(type_header)} | {'-' * len(description_header)} |" - for field in fields: - row = f"\n| {field.field_name.ljust(max_field_name_length)} | {field.type_str.ljust(max_type_length)} | {field.table_description.ljust(max_description_length)} |" - table += row - return table - - def validate_item_metadata_schema(self) -> None: - # validates that the inner schema used to validate the structure of a stac item is structured as expected (has required properties and a ref to definitions/fields or alternatively just a ref to definitions/fields if no properties are required) - # also populates _required_item_properties - definitions_fields_referenced = False - required_property_names = None - meta_properties = self.get_item_meta_schema() - item_properties_schema = meta_properties["properties"] - if "allOf" in item_properties_schema: - subschemas = item_properties_schema["allOf"] - if len(subschemas) != 2: - raise ValueError(f"Expected 2 subschemas in item properties meta schema, got {len(subschemas)}") - for subschema in item_properties_schema["allOf"]: - if "$ref" in subschema: - reference = subschema["$ref"] - if reference != "#/definitions/fields": - raise ValueError( - f"Expected definitions/fields to hold all definitions for properties in item, got {reference}" - ) - definitions_fields_referenced = True - elif "required" in subschema: - required_property_names = subschema["required"] - else: - raise ValueError(f"Subschema found with neither $ref nor required: {subschema}") - else: - required_property_names = [] - if "$ref" not in item_properties_schema: - raise ValueError( - "Expected definitions/fields to hold all definitions for properties in item, instead $ref was not used" - ) - reference = item_properties_schema["$ref"] - if reference != "#/definitions/fields": - raise ValueError( - f"Expected definitions/fields to hold all definitions for properties in item, got {reference}" - ) - definitions_fields_referenced = True - if definitions_fields_referenced == False: - raise ValueError("Reference to definitions/fields never found") - if required_property_names == None: - raise TypeError("Required property names was neither set to an empty list nor pulled from schema") - self._required_item_properties = required_property_names - - -class ExtensionSchemaAssetSpecific(ExtensionSchema): - def __init__(self, schema_url, field_usability: FieldUsability, prefix=None): - super().__init__(schema_url, field_usability, prefix) - self.pattern_definition_dict = self.get_pattern_definition_dict() - self._not_common_definition_names.append("assets") - self._not_common_definition_names.extend(self.pattern_definition_dict.values()) - - def get_pattern_definition_dict(self) -> dict[str, str]: - pattern_to_definition_name_dict = {} - self.validate_asset_metadata_schema() - asset_definitions = self.schema["definitions"]["assets"] - for pattern, ref in asset_definitions["patternProperties"].items(): - pattern_to_definition_name_dict[pattern] = ref["$ref"].replace("#/definitions/", "", 1) - return pattern_to_definition_name_dict - - def validate_asset_metadata_schema(self) -> None: - meta_properties = self.get_item_meta_schema() - asset_properties_schema = meta_properties["assets"] - if "$ref" not in asset_properties_schema: - raise ValueError( - "Expected definitions/assets to hold all definitions for properties expected in assets, instead $ref was not used" - ) - reference = asset_properties_schema["$ref"] - if reference != "#/definitions/assets": - raise ValueError( - f"Expected definitions/assets to hold all definitions for properties expected in assets, got {reference}" - ) - - @property - def asset_definitions(self) -> Iterator[tuple[str, list[Field]]]: - no_ref_definition_schema = {"type": "object", "required": ["type", "description"]} - with_ref_definition_schema = { - "type": "object", - "required": ["allOf"], - "properties": { - "allOf": { - "type": "array", - "items": { - "oneOf": [ - {"type": "object", "required": ["description"]}, - {"type": "object", "required": ["$ref"]}, - ] - }, - } - }, - } - for pattern, definition_name in self.pattern_definition_dict.items(): - field_list: list[Field] = [] - meta_definition = self.schema["definitions"][definition_name] - required_properties: list[str] = meta_definition.get("required", []) - for asset_property_definition_name, asset_property_definition_value in meta_definition[ - "properties" - ].items(): - try: - jsonschema.validate( - asset_property_definition_value, no_ref_definition_schema, jsonschema.Draft7Validator - ) - field = Field( - asset_property_definition_name, - asset_property_definition_value["type"], - asset_property_definition_value["description"], - asset_property_definition_name in required_properties, - ) - field_list.append(field) - except jsonschema.ValidationError: - jsonschema.validate( - asset_property_definition_value, with_ref_definition_schema, jsonschema.Draft7Validator - ) - asset_property_definition_description = None - asset_property_definition_type = None - for all_of_property in asset_property_definition_value["allOf"]: - if "description" in all_of_property: - asset_property_definition_description = all_of_property["description"] - elif "$ref" in all_of_property: - asset_property_definition_type = all_of_property["$ref"] - field = Field( - asset_property_definition_name, - asset_property_definition_type, - asset_property_definition_description, - asset_property_definition_name in required_properties, - ) - field_list.append(field) - yield pattern, field_list - - def to_markdown(self, path: str | None) -> str: - markdown_str = super().to_markdown(None) - markdown_str += "\n\n## Asset Properties with Pattern Matching\n\n" - markdown_str += "This section describes the pattern used to match against the asset title along with the expected structure of that asset\n" - for pattern, field_list in self.asset_definitions: - markdown_str += f"\n### {pattern}\n\n" - properties_table = self._fields_to_table(field_list) - markdown_str += f"{properties_table}\n" - if path: - with open(path, "w") as f: - f.write(markdown_str) - return markdown_str - - -def read_schema_text_from_url(url: str) -> str: - with requests.get(url) as resp: - schema = resp.text - return schema diff --git a/hecstac/utils/placeholders.py b/hecstac/utils/placeholders.py deleted file mode 100644 index 9a333bf..0000000 --- a/hecstac/utils/placeholders.py +++ /dev/null @@ -1,11 +0,0 @@ -import datetime -import json - -from shapely import Polygon, box, to_geojson - -NULL_DATETIME = datetime.datetime(9999, 9, 9) -NULL_GEOMETRY = Polygon() -NULL_STAC_GEOMETRY = json.loads(to_geojson(NULL_GEOMETRY)) -NULL_BBOX = box(0, 0, 0, 0) -NULL_STAC_BBOX = NULL_BBOX.bounds -PLACEHOLDER_ID = "id" diff --git a/hecstac/utils/s3_utils.py b/hecstac/utils/s3_utils.py deleted file mode 100644 index 1adcd74..0000000 --- a/hecstac/utils/s3_utils.py +++ /dev/null @@ -1,73 +0,0 @@ -from urllib.parse import urlparse -import os -import boto3 -from mypy_boto3_s3 import S3ServiceResource -from pystac.stac_io import DefaultStacIO - - -class S3StacIO(DefaultStacIO): - def __init__(self, headers=None): - super().__init__(headers) - self.session = boto3.Session() - self.s3: S3ServiceResource = self.session.resource("s3") - - def read_text(self, source: str, *_, **__) -> str: - parsed = urlparse(url=source) - if parsed.scheme == "s3": - bucket = parsed.netloc - key = parsed.path[1:] - obj = self.s3.Object(bucket, key) - data_encoded: bytes = obj.get()["Body"].read() - data_decoded = data_encoded.decode() - return data_decoded - else: - return super().read_text(source) - - def write_text(self, dest: str, txt, *_, **__) -> None: - parsed = urlparse(url=dest) - if parsed.scheme == "s3": - bucket = parsed.netloc - key = parsed.path[1:] - obj = self.s3.Object(bucket, key) - obj.put(Body=txt, ContentEncoding="utf-8") - else: - return super().write_text(dest, txt, *_, **__) - - -def split_s3_path(s3_path: str) -> tuple[str, str]: - """Split an S3 path into the bucket name and the key. - - Parameters - ---------- - s3_path (str): The S3 path to split. It should be in the format 's3://bucket/key'. - - Returns - ------- - tuple: A tuple containing the bucket name and the key. If the S3 path does not contain a key, the second element - of the tuple will be None. - """ - if not s3_path.startswith("s3://"): - raise ValueError(f"s3_path does not start with s3://: {s3_path}") - bucket, _, key = s3_path[5:].partition("/") - if not key: - raise ValueError(f"s3_path contains bucket only, no key: {s3_path}") - return bucket, key - - -def init_s3_resources(): - """Create a Boto3 session using AWS credentials from environment variables and creates both an S3 client and S3 resource for interacting with AWS S3.""" - session = boto3.Session( - aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"], - aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"], - ) - - s3_client = session.client("s3") - s3_resource = session.resource("s3") - return session, s3_client, s3_resource - - -def save_bytes_s3(byte_obj: bytes, s3_key: str) -> None: - """Save bytes to S3.""" - _, s3_client, _ = init_s3_resources() - bucket, key = split_s3_path(s3_key) - s3_client.put_object(Body=byte_obj, Bucket=bucket, Key=key) diff --git a/new_ffrd_event_item.py b/new_ffrd_event_item.py index 94e6967..7e22397 100644 --- a/new_ffrd_event_item.py +++ b/new_ffrd_event_item.py @@ -1,7 +1,9 @@ +"""Creates a STAC Item from an event.""" + from pystac import Item -from hecstac.common.logger import initialize_logger -from hecstac.events.ffrd import FFRDEventItem +from hecstac.events.logger import initialize_logger +from hecstac.events.ffrd import EventItem if __name__ == "__main__": initialize_logger() @@ -38,7 +40,7 @@ ffrd_event_item_id = f"{realization}-{block_group}-{event_id}" dest_href = f"/Users/slawler/Downloads/duwamish/{ffrd_event_item_id}.json" - ffrd_event_item = FFRDEventItem( + ffrd_event_item = EventItem( realization=realization, block_group=block_group, event_id=event_id, diff --git a/new_hms_item.py b/new_hms_item.py index 6be539f..54dfedf 100644 --- a/new_hms_item.py +++ b/new_hms_item.py @@ -1,24 +1,35 @@ +"""Creates a STAC Item from a HEC-HMS model .prj file.""" + from pathlib import Path from hecstac.common.logger import initialize_logger -from hecstac.hms.item import HMSModelItem +from hecstac import HMSModelItem def sanitize_catalog_assets(item: HMSModelItem) -> HMSModelItem: - """ - Forces the asset paths in the catalog relative to item root. - """ - for asset in item.assets.values(): - if item.pm.model_root_dir in asset.href: - asset.href = asset.href.replace(item.pm.item_dir, ".") + """Force the asset paths in the catalog to be relative to the item root.""" + item_dir = Path(item.pm.item_dir).resolve() + + for _, asset in item.assets.items(): + asset_path = Path(asset.href).resolve() + + if asset_path.is_relative_to(item_dir): + asset.href = str(asset_path.relative_to(item_dir)) + else: + asset.href = ( + str(asset_path.relative_to(item_dir.parent)) + if item_dir.parent in asset_path.parents + else str(asset_path) + ) + return item if __name__ == "__main__": initialize_logger() - hms_project_file = "/Users/slawler/Downloads/duwamish/Duwamish_SST.hms" + hms_project_file = "duwamish/Duwamish_SST.hms" item_id = Path(hms_project_file).stem - hms_item = HMSModelItem(hms_project_file, item_id) + hms_item = HMSModelItem.from_prj(hms_project_file, item_id) hms_item = sanitize_catalog_assets(hms_item) hms_item.save_object(hms_item.pm.item_path(item_id)) diff --git a/new_ras_item.py b/new_ras_item.py index 234cff7..8b8e75a 100644 --- a/new_ras_item.py +++ b/new_ras_item.py @@ -1,29 +1,42 @@ +"""Creates a STAC Item from a HEC-RAS model .prj file.""" + import logging from pathlib import Path +from hecstac import RASModelItem from hecstac.common.logger import initialize_logger -from hecstac.ras.item import RASModelItem + +logger = logging.getLogger(__name__) def sanitize_catalog_assets(item: RASModelItem) -> RASModelItem: - """ - Forces the asset paths in the catalog relative to item root. - """ - for asset in item.assets.values(): - if item.pm.model_root_dir in asset.href: - asset.href = asset.href.replace(item.pm.item_dir, ".") + """Force the asset paths in the catalog to be relative to the item root.""" + item_dir = Path(item.pm.item_dir).resolve() + + for _, asset in item.assets.items(): + asset_path = Path(asset.href).resolve() + + if asset_path.is_relative_to(item_dir): + asset.href = str(asset_path.relative_to(item_dir)) + else: + asset.href = ( + str(asset_path.relative_to(item_dir.parent)) + if item_dir.parent in asset_path.parents + else str(asset_path) + ) + return item if __name__ == "__main__": initialize_logger() - ras_project_file = "/Users/slawler/Downloads/model-library-2/ffrd-duwamish/checkpoint-validation/hydraulics/duwamish-20250106/Duwamish_17110013.prj" + ras_project_file = "Muncie/Muncie.prj" item_id = Path(ras_project_file).stem + crs = None - ras_item = RASModelItem(ras_project_file, item_id) + ras_item = RASModelItem.from_prj(ras_project_file, item_id, crs=crs) + # ras_item.add_model_thumbnails(["mesh_areas", "breaklines", "bc_lines"]) ras_item = sanitize_catalog_assets(ras_item) - fs = ras_item.scan_model_dir() - ras_item.add_ras_asset() - ras_item.save_object(ras_item.pm.item_path(item_id)) - logging.info(f"Saved {ras_item.pm.item_path(item_id)}") + ras_item.save_object(ras_project_file) + logger.info(f"Saved {ras_project_file}") diff --git a/pyproject.toml b/pyproject.toml index 8674fc8..1bef4ac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,13 +23,17 @@ classifiers = [ dependencies = [ "fiona==1.9.6", "geopandas==1.0.1", - "matplotlib==3.9.0", + "matplotlib==3.7.3", "pystac==1.10.0", "rasterio==1.3.10", "requests==2.32.3", - "shapely==2.0.5", + "shapely==2.0.7", "xarray==2024.11.0", - "rioxarray==0.18.1" + "rioxarray==0.18.1", + "mypy-boto3-s3==1.35.93", + "contextily==1.6.2", + "rashdf==0.7.1", + "boto3==1.35.98" ] [project.optional-dependencies] @@ -55,6 +59,8 @@ line-length = 120 [tool.ruff.lint.per-file-ignores] "tests/**" = ["D"] "docs/**" = ["D"] +"server.py" = ["D"] + [tool.setuptools.packages.find] where = ["."]