diff --git a/README.md b/README.md index 4c15d748..0f8fddd2 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ [![pypi badge](https://img.shields.io/pypi/v/nwp-consumer?&color=086788)](https://pypi.org/project/nwp-consumer) [![documentation badge](https://img.shields.io/badge/docs-latest-333333)](https://openclimatefix.github.io/nwp-consumer/) [![contributors badge](https://img.shields.io/github/contributors/openclimatefix/nwp-consumer?color=FFFFFF)](https://github.com/openclimatefix/nwp-consumer/graphs/contributors) -[![workflows badge](https://img.shields.io/github/actions/workflow/status/openclimatefix/nwp-consumer/branch_ci.yml?branch=main&color=FFD053)](https://github.com/openclimatefix/nwp-consumer/actions/workflows/ci.yml) +[![workflows badge](https://img.shields.io/github/actions/workflow/status/openclimatefix/nwp-consumer/branch_ci.yml?branch=main&color=FFD053)](https://github.com/openclimatefix/nwp-consumer/actions/workflows/branch_ci.yml) [![ease of contribution: easy](https://img.shields.io/badge/ease%20of%20contribution:%20easy-32bd50)](https://github.com/openclimatefix/ocf-meta-repo?tab=readme-ov-file#overview-of-ocfs-nowcasting-repositories) Some renewables, such as solar and wind, generate power according to the weather conditions. @@ -102,6 +102,8 @@ parameter modifications to the model's expected coordinates in it's metadata for repository. ## Development + +### Linting and static type checking This project uses [MyPy](https://mypy.readthedocs.io/en/stable/) for static type checking and [Ruff](https://docs.astral.sh/ruff/) for linting. @@ -151,7 +153,7 @@ src and flat layouts. ## Contributing and community -[![issues badge](https://img.shields.io/github/issues/openclimatefix/ocf-template?color=FFAC5F)](https://github.com/openclimatefix/ocf-template/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc) +[![issues badge](https://img.shields.io/github/issues/openclimatefix/nwp-consumer?color=FFAC5F)](https://github.com/openclimatefix/nwp-consumer/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc) - PR's are welcome! See the [Organisation Profile](https://github.com/openclimatefix) for details on contributing - Find out about our other projects in the [OCF Meta Repo](https://github.com/openclimatefix/ocf-meta-repo) diff --git a/src/nwp_consumer/internal/entities/coordinates.py b/src/nwp_consumer/internal/entities/coordinates.py index 0732b9d8..12ab6a6b 100644 --- a/src/nwp_consumer/internal/entities/coordinates.py +++ b/src/nwp_consumer/internal/entities/coordinates.py @@ -98,6 +98,11 @@ class NWPDimensionCoordinateMap: longitude: list[float] | None = None """The longitude coordinates of the forecast grid in degrees. """ + x: list[float] | None = None + """X coordinates of an OSGB (or other alternative projection) grid.""" + y: list[float] | None = None + """Y coordinates of an OSGB (or other alternative projection) grid.""" + def __post_init__(self) -> None: """Rigidly set input value ordering and precision.""" self.variable = sorted(self.variable) @@ -204,6 +209,20 @@ def from_pandas( "Longitude coordinates should run from -180 -> 180. " "Modify the coordinate in the source data to be in ascending order.", )) + if "y" in pd_indexes \ + and pd_indexes["y"].values[0] > pd_indexes["y"].values[-1]: + return Failure(ValueError( + "Cannot create NWPDimensionCoordinateMap instance from pandas indexes " + "as the y values are not in ascending order. " + "Modify the coordinate in the source data to be in ascending order.", + )) + if "x" in pd_indexes \ + and pd_indexes["x"].values[0] > pd_indexes["x"].values[-1]: + return Failure(ValueError( + "Cannot create NWPDimensionCoordinateMap instance from pandas indexes " + "as the x values are not in ascending order. " + "Modify the coordinate in the source data to be in ascending order.", + )) # Convert the pandas Index objects to lists of the appropriate types return Success( @@ -231,6 +250,10 @@ def from_pandas( if "latitude" in pd_indexes else None, longitude=pd_indexes["longitude"].to_list() \ if "longitude" in pd_indexes else None, + y=pd_indexes["y"].to_list() \ + if "y" in pd_indexes else None, + x=pd_indexes["x"].to_list() \ + if "x" in pd_indexes else None, ), ) diff --git a/src/nwp_consumer/internal/entities/modelmetadata.py b/src/nwp_consumer/internal/entities/modelmetadata.py index f76d047f..a1cf9e36 100644 --- a/src/nwp_consumer/internal/entities/modelmetadata.py +++ b/src/nwp_consumer/internal/entities/modelmetadata.py @@ -12,9 +12,11 @@ """ import dataclasses +import datetime as dt import logging import numpy as np +import pandas as pd from .coordinates import NWPDimensionCoordinateMap from .parameters import Parameter @@ -55,6 +57,14 @@ class ModelMetadata: Which prints grid data from the grib file. """ + running_hours: list[int] + """The hours of the day that the model runs. + + Raw Repositories that provide data for the model may not have every running time. + In this instance, use `with_running_hours` to specify the running hours specific + to the repository. + """ + chunk_count_overrides: dict[str, int] = dataclasses.field(default_factory=dict) """Mapping of dimension names to the desired number of chunks in that dimension. @@ -117,6 +127,19 @@ def with_chunk_count_overrides(self, overrides: dict[str, int]) -> "ModelMetadat ) return dataclasses.replace(self, chunk_count_overrides=overrides) + def with_running_hours(self, hours: list[int]) -> "ModelMetadata": + """Returns metadata for the given model with the given running hours.""" + return dataclasses.replace(self, running_hours=hours) + + def month_its(self, year: int, month: int) -> list[dt.datetime]: + """Generate all init times for a given month.""" + days = pd.Period(f"{year}-{month}").days_in_month + its: list[dt.datetime] = [] + for day in range(1, days + 1): + for hour in self.running_hours: + its.append(dt.datetime(year, month, day, hour, tzinfo=dt.UTC)) + return its + class Models: """Namespace containing known models.""" @@ -149,6 +172,7 @@ class Models: latitude=[float(f"{lat / 10:.2f}") for lat in range(900, -900 - 1, -1)], longitude=[float(f"{lon / 10:.2f}") for lon in range(-1800, 1800 + 1, 1)], ), + running_hours=[0, 6, 12, 18], ) """ECMWF's High Resolution Integrated Forecast System.""" @@ -168,6 +192,7 @@ class Models: latitude=[v/10 for v in range(900, -900, -1)], longitude=[v/10 for v in range(-1800, 1800, 1)], ), + running_hours=[0, 12], ) """Summary statistics from ECMWF's Ensemble Forecast System.""" @@ -196,6 +221,7 @@ class Models: latitude=[v/10 for v in range(900, -900, -1)], longitude=[v/10 for v in range(-1800, 1800, 1)], ), + running_hours=[0, 6, 12, 18], ) """Full ensemble data from ECMWF's Ensemble Forecast System.""" @@ -227,6 +253,7 @@ class Models: latitude=[float(lat) for lat in range(90, -90 - 1, -1)], longitude=[float(lon) for lon in range(-180, 180 + 1, 1)], ), + running_hours=[0, 6, 12, 18], ) """NCEP's Global Forecast System.""" @@ -262,6 +289,7 @@ class Models: ], # TODO: Change to -180 -> 180 ), + running_hours=[0, 6, 12, 18], ) """MetOffice's Unified Model, in the Global configuration, at a resolution of 17km.""" @@ -295,6 +323,39 @@ class Models: for lon in np.arange(-179.929687, 179.929688 + 0.140625, 0.140625) ], ), + running_hours=[0, 6, 12, 18], ) """MetOffice's Unified Model, in the Global configuration, at a resolution of 10km.""" + MO_UM_UKV_2KM: ModelMetadata = ModelMetadata( + name="um-ukv", + resolution="2km", + expected_coordinates=NWPDimensionCoordinateMap( + init_time=[], + step=list(range(0, 55)), + variable=sorted( + [ + Parameter.CLOUD_COVER_TOTAL, + Parameter.CLOUD_COVER_HIGH, + Parameter.CLOUD_COVER_MEDIUM, + Parameter.CLOUD_COVER_LOW, + Parameter.VISIBILITY_SL, + Parameter.RELATIVE_HUMIDITY_SL, + Parameter.SNOW_DEPTH_GL, + Parameter.DOWNWARD_LONGWAVE_RADIATION_FLUX_GL, + Parameter.DOWNWARD_SHORTWAVE_RADIATION_FLUX_GL, + Parameter.TEMPERATURE_SL, + Parameter.WIND_U_COMPONENT_10m, + Parameter.WIND_V_COMPONENT_10m, + Parameter.WIND_DIRECTION_10m, + Parameter.WIND_SPEED_10m, + Parameter.TOTAL_PRECIPITATION_RATE_GL, + ], + ), + x=list(range(0, 455)), + y=list(range(0, 639)), + ), + running_hours=list(range(0, 24)), + ) + """MetOffice's Unified Model in the UKV configuration, at a resolution of 2km""" + diff --git a/src/nwp_consumer/internal/entities/parameters.py b/src/nwp_consumer/internal/entities/parameters.py index caa256c6..f02a845b 100644 --- a/src/nwp_consumer/internal/entities/parameters.py +++ b/src/nwp_consumer/internal/entities/parameters.py @@ -93,7 +93,7 @@ class ParameterData: - https://codes.ecmwf.int/grib/param-db/?filter=All """ - alternate_shortnames: list[str] = dataclasses.field(default_factory=list) + alternative_shortnames: list[str] = dataclasses.field(default_factory=list) """Alternate names for the parameter found in the wild.""" @@ -133,6 +133,7 @@ class Parameter(StrEnum): DIRECT_SHORTWAVE_RADIATION_FLUX_GL = auto() WIND_SPEED_10m = auto() WIND_SPEED_100m = auto() + WIND_DIRECTION_10m = auto() PRESSURE_MSL = auto() def metadata(self) -> ParameterData: @@ -145,7 +146,7 @@ def metadata(self) -> ParameterData: description="Temperature at screen level", units="C", limits=ParameterLimits(upper=60, lower=-90), - alternate_shortnames=["t", "t2m", "tas"], + alternative_shortnames=["t", "t2m", "tas"], grib2_code="167", ) @@ -158,7 +159,7 @@ def metadata(self) -> ParameterData: "This is made up of both direct and diffuse radiation.", units="W/m^2", limits=ParameterLimits(upper=1500, lower=0), - alternate_shortnames=["swavr", "ssrd", "dswrf", "sdswrf"], + alternative_shortnames=["swavr", "ssrd", "dswrf", "sdswrf"], grib2_code="169", ) @@ -170,7 +171,7 @@ def metadata(self) -> ParameterData: "incident on the surface expected over the next hour.", units="W/m^2", limits=ParameterLimits(upper=500, lower=0), - alternate_shortnames=["strd", "dlwrf", "sdlwrf"], + alternative_shortnames=["strd", "dlwrf", "sdlwrf"], grib2_code="175", ) @@ -182,7 +183,7 @@ def metadata(self) -> ParameterData: "to the equilibrium vapour pressure of water", units="%", limits=ParameterLimits(upper=100, lower=0), - alternate_shortnames=["r", "r2"], + alternative_shortnames=["r", "r2"], grib2_code="157", ) @@ -194,7 +195,7 @@ def metadata(self) -> ParameterData: "horizontally in daylight conditions.", units="m", limits=ParameterLimits(upper=4500, lower=0), - alternate_shortnames=["vis"], + alternative_shortnames=["vis"], grib2_code="20", ) @@ -206,7 +207,7 @@ def metadata(self) -> ParameterData: "the wind in the eastward direction.", units="m/s", limits=ParameterLimits(upper=100, lower=-100), - alternate_shortnames=["u10", "u", "uas"], + alternative_shortnames=["u10", "u", "uas"], grib2_code="165", ) @@ -219,7 +220,7 @@ def metadata(self) -> ParameterData: units="m/s", # Non-tornadic winds are usually < 100m/s limits=ParameterLimits(upper=100, lower=-100), - alternate_shortnames=["v10", "v", "vas"], + alternative_shortnames=["v10", "v", "vas"], grib2_code="166", ) @@ -231,7 +232,7 @@ def metadata(self) -> ParameterData: "the wind in the eastward direction.", units="m/s", limits=ParameterLimits(upper=100, lower=-100), - alternate_shortnames=["u100"], + alternative_shortnames=["u100"], grib2_code="246", ) @@ -243,7 +244,7 @@ def metadata(self) -> ParameterData: "the wind in the northward direction.", units="m/s", limits=ParameterLimits(upper=100, lower=-100), - alternate_shortnames=["v100"], + alternative_shortnames=["v100"], grib2_code="247", ) @@ -255,7 +256,7 @@ def metadata(self) -> ParameterData: "the wind in the eastward direction.", units="m/s", limits=ParameterLimits(upper=150, lower=-150), - alternate_shortnames=["u200"], + alternative_shortnames=["u200"], grib2_code="239", ) @@ -267,17 +268,29 @@ def metadata(self) -> ParameterData: "the wind in the northward direction.", units="m/s", limits=ParameterLimits(upper=150, lower=-150), - alternate_shortnames=["v200"], + alternative_shortnames=["v200"], grib2_code="240", ) + case self.WIND_DIRECTION_10m.name: + return ParameterData( + name=str(self), + description="The wind direction from 0 to 360. 0 represents a Northerly " + "wind and 90 is Easterly wind. This is confirmed by the UK mean " + "wind direction being Westerly and = ~200. ", + units="degrees", + limits=ParameterLimits(upper=0, lower=360), + alternative_shortnames=["wdir", "wdir10"], + grib2_code="194", + ) + case self.SNOW_DEPTH_GL.name: return ParameterData( name=str(self), description="Depth of snow on the ground.", units="m", limits=ParameterLimits(upper=12, lower=0), - alternate_shortnames=["sd", "sdwe"], + alternative_shortnames=["sd", "sdwe"], grib2_code="141", ) @@ -290,7 +303,7 @@ def metadata(self) -> ParameterData: "to the square's total area.", units="UI", limits=ParameterLimits(upper=1, lower=0), - alternate_shortnames=["hcc"], + alternative_shortnames=["hcc"], grib2_code="188", ) @@ -303,7 +316,7 @@ def metadata(self) -> ParameterData: "to the square's total area.", units="UI", limits=ParameterLimits(upper=1, lower=0), - alternate_shortnames=["mcc"], + alternative_shortnames=["mcc"], grib2_code="187", ) @@ -316,7 +329,7 @@ def metadata(self) -> ParameterData: "to the square's total area.", units="UI", limits=ParameterLimits(upper=1, lower=0), - alternate_shortnames=["lcc"], + alternative_shortnames=["lcc"], grib2_code="186", ) @@ -329,7 +342,7 @@ def metadata(self) -> ParameterData: "to the square's total area.", units="UI", limits=ParameterLimits(upper=1, lower=0), - alternate_shortnames=["tcc", "clt"], + alternative_shortnames=["tcc", "clt"], grib2_code="164", ) @@ -341,7 +354,7 @@ def metadata(self) -> ParameterData: "including rain, snow, and hail.", units="kg/m^2/s", limits=ParameterLimits(upper=0.2, lower=0), - alternate_shortnames=["prate", "tprate"], + alternative_shortnames=["prate", "tprate", "rprate"], grib2_code="260048", ) @@ -354,7 +367,7 @@ def metadata(self) -> ParameterData: "expected over the next hour.", units="W/m^2", limits=ParameterLimits(upper=1000, lower=0), - alternate_shortnames=["uvb"], + alternative_shortnames=["uvb"], grib2_code="57", ) @@ -368,7 +381,7 @@ def metadata(self) -> ParameterData: "expected over the next hour.", units="W/m^2", limits=ParameterLimits(upper=1000, lower=0), - alternate_shortnames=["dsrp"], + alternative_shortnames=["dsrp"], grib2_code="47", ) @@ -379,7 +392,7 @@ def metadata(self) -> ParameterData: "Defined as the magnitude of the wind vector.", units="m/s", limits=ParameterLimits(upper=150, lower=0), - alternate_shortnames=["10si", "si10"], + alternative_shortnames=["10si", "si10"], grib2_code="207", ) @@ -390,7 +403,7 @@ def metadata(self) -> ParameterData: "Defined as the magnitude of the wind vector.", units="m/s", limits=ParameterLimits(upper=200, lower=0), - alternate_shortnames=["100si", "si100"], + alternative_shortnames=["100si", "si100"], grib2_code="249", ) @@ -404,7 +417,7 @@ def metadata(self) -> ParameterData: "on the Earth's surface. 100 Pa = 1 hPa = 1 mbar.", units="Pa", limits=ParameterLimits(upper=105000, lower=95000), - alternate_shortnames=["mslp", "msl"], + alternative_shortnames=["mslp", "msl"], grib2_code="151", ) case _: @@ -414,7 +427,7 @@ def metadata(self) -> ParameterData: def try_from_alternate(name: str) -> ResultE["Parameter"]: """Map an alternate name to a parameter.""" for p in Parameter: - if name in p.metadata().alternate_shortnames: + if name in p.metadata().alternative_shortnames: return Success(p) return Failure(ValueError(f"Unknown shortname: {name}")) diff --git a/src/nwp_consumer/internal/entities/repometadata.py b/src/nwp_consumer/internal/entities/repometadata.py index ba169eba..aa2be668 100644 --- a/src/nwp_consumer/internal/entities/repometadata.py +++ b/src/nwp_consumer/internal/entities/repometadata.py @@ -15,8 +15,6 @@ import datetime as dt import os -import pandas as pd - from .modelmetadata import ModelMetadata from .postprocess import PostProcessOptions @@ -42,11 +40,6 @@ class RawRepositoryMetadata: but rather are defined by pre-selected agreements with the provider. """ - running_hours: list[int] - """The running hours of the model. - - Most NWP models are run at fixed intervals throughout the day.""" - delay_minutes: int """The approximate model delay in minutes. @@ -72,31 +65,23 @@ class RawRepositoryMetadata: available_models: dict[str, ModelMetadata] """A dictionary of available models and their metadata.""" - def determine_latest_it_from(self, t: dt.datetime) -> dt.datetime: + def determine_latest_it_from(self, t: dt.datetime, running_hours: list[int]) -> dt.datetime: """Determine the latest available initialization time from a given time. Args: t: The time from which to determine the latest initialization time. + running_hours: A list of hours at which the model runs each day. Returns: The latest available initialization time prior to the given time. """ it = t.replace(minute=0, second=0, microsecond=0) \ - dt.timedelta(minutes=self.delay_minutes) - while it.hour not in self.running_hours: + while it.hour not in running_hours: it -= dt.timedelta(hours=1) return it - def month_its(self, year: int, month: int) -> list[dt.datetime]: - """Generate all init times for a given month.""" - days = pd.Period(f"{year}-{month}").days_in_month - its: list[dt.datetime] = [] - for day in range(1, days + 1): - for hour in self.running_hours: - its.append(dt.datetime(year, month, day, hour, tzinfo=dt.UTC)) - return its - def missing_required_envs(self) -> list[str]: """Get a list of unset required environment variables. @@ -110,8 +95,7 @@ def __str__(self) -> str: pretty: str = "".join(( "Model Repository: ", f"\n\t{self.name} ({'archive' if self.is_archive else 'live/rolling'} dataset.)", - f"\n\truns at: {self.running_hours} hours ", - "(available after {self.delay_minutes} minute delay)", + f"\n\t\t(available after {self.delay_minutes} minute delay)", "\nEnvironment variables:", "\n\tRequired:", "\n".join(f"\t\t{var}" for var in self.required_env), diff --git a/src/nwp_consumer/internal/entities/tensorstore.py b/src/nwp_consumer/internal/entities/tensorstore.py index 4e866f1b..d580b9fa 100644 --- a/src/nwp_consumer/internal/entities/tensorstore.py +++ b/src/nwp_consumer/internal/entities/tensorstore.py @@ -305,7 +305,6 @@ def write_to_region( "Ensure the chunking is granular enough to cover the raw data region.", ) - # Perform the regional write try: da.to_zarr(store=self.path, region=region, consolidated=True) diff --git a/src/nwp_consumer/internal/entities/test_modelmetadata.py b/src/nwp_consumer/internal/entities/test_modelmetadata.py index 4954731a..5171ceea 100644 --- a/src/nwp_consumer/internal/entities/test_modelmetadata.py +++ b/src/nwp_consumer/internal/entities/test_modelmetadata.py @@ -26,6 +26,7 @@ def test_with_region(self) -> None: latitude=[float(f"{lat / 10:.2f}") for lat in range(900, -900 - 1, -1)], longitude=[float(f"{lon / 10:.2f}") for lon in range(-1800, 1800 + 1, 1)], ), + running_hours=[0, 6, 12, 18], ) @dataclasses.dataclass diff --git a/src/nwp_consumer/internal/entities/test_parameters.py b/src/nwp_consumer/internal/entities/test_parameters.py index 54b1bf97..e7e2a6e0 100644 --- a/src/nwp_consumer/internal/entities/test_parameters.py +++ b/src/nwp_consumer/internal/entities/test_parameters.py @@ -18,7 +18,7 @@ def test_metadata(self, p: Parameter) -> None: metadata = p.metadata() self.assertEqual(metadata.name, p.value) - @given(st.sampled_from([s for p in Parameter for s in p.metadata().alternate_shortnames])) + @given(st.sampled_from([s for p in Parameter for s in p.metadata().alternative_shortnames])) def test_try_from_shortname(self, shortname: str) -> None: """Test the try_from_shortname method.""" p = Parameter.try_from_alternate(shortname) @@ -28,7 +28,7 @@ def test_try_from_shortname(self, shortname: str) -> None: self.assertFalse(is_successful(p)) @given( - st.sampled_from([s for p in Parameter for s in p.metadata().alternate_shortnames]), + st.sampled_from([s for p in Parameter for s in p.metadata().alternative_shortnames]), st.sampled_from(Parameter), ) def test_rename_else_drop_ds_vars(self, shortname: str, parameter: Parameter) -> None: @@ -57,7 +57,7 @@ def test_rename_else_drop_ds_vars(self, shortname: str, parameter: Parameter) -> allowed_parameters=allowed_parameters, ) - if shortname in parameter.metadata().alternate_shortnames: + if shortname in parameter.metadata().alternative_shortnames: self.assertTrue(len(list(ds.data_vars)) == 1) self.assertEqual(next(iter(ds.data_vars)), str(parameter)) else: diff --git a/src/nwp_consumer/internal/entities/test_repometadata.py b/src/nwp_consumer/internal/entities/test_repometadata.py index 55bf3e0c..cab69cd5 100644 --- a/src/nwp_consumer/internal/entities/test_repometadata.py +++ b/src/nwp_consumer/internal/entities/test_repometadata.py @@ -13,7 +13,6 @@ class TestRawRepositoryMetadata(unittest.TestCase): name="test", is_archive=False, is_order_based=False, - running_hours=[0, 6, 12, 18], delay_minutes=60, required_env=["TEST"], optional_env={"TEST": "test"}, @@ -46,7 +45,7 @@ class TestCase: for test in tests: with self.subTest(name=test.name): - result = self.metadata.determine_latest_it_from(test.t) + result = self.metadata.determine_latest_it_from(test.t, [0, 6, 12, 18]) self.assertEqual(result, test.expected) diff --git a/src/nwp_consumer/internal/repositories/raw_repositories/ceda_ftp.py b/src/nwp_consumer/internal/repositories/raw_repositories/ceda_ftp.py index 30cb001e..0e569a06 100644 --- a/src/nwp_consumer/internal/repositories/raw_repositories/ceda_ftp.py +++ b/src/nwp_consumer/internal/repositories/raw_repositories/ceda_ftp.py @@ -114,17 +114,17 @@ def repository() -> entities.RawRepositoryMetadata: name="CEDA", is_archive=True, is_order_based=False, - running_hours=[0, 12], # 6 and 18 exist, but are lacking variables delay_minutes=(60 * 24 * 7) + (60 * 12), # 7.5 days max_connections=20, required_env=["CEDA_FTP_USER", "CEDA_FTP_PASS"], optional_env={}, postprocess_options=entities.PostProcessOptions(), available_models={ - "default": entities.Models.MO_UM_GLOBAL_17KM.with_chunk_count_overrides({ + "default": entities.Models.MO_UM_GLOBAL_17KM\ + .with_chunk_count_overrides({ "latitude": 8, "longitude": 8, - }), + }).with_running_hours([0, 12]), # 6 and 18 exist, but are lacking variables }, ) diff --git a/src/nwp_consumer/internal/repositories/raw_repositories/ecmwf_mars.py b/src/nwp_consumer/internal/repositories/raw_repositories/ecmwf_mars.py index bccf9776..e1e3a382 100644 --- a/src/nwp_consumer/internal/repositories/raw_repositories/ecmwf_mars.py +++ b/src/nwp_consumer/internal/repositories/raw_repositories/ecmwf_mars.py @@ -208,7 +208,6 @@ def repository() -> entities.RawRepositoryMetadata: name="ECMWF-MARS", is_archive=True, is_order_based=False, - running_hours=[0, 12], delay_minutes=(60 * 26), # 1 day, plus leeway max_connections=20, required_env=[ diff --git a/src/nwp_consumer/internal/repositories/raw_repositories/ecmwf_realtime.py b/src/nwp_consumer/internal/repositories/raw_repositories/ecmwf_realtime.py index 7a6bb1df..334b9cb8 100644 --- a/src/nwp_consumer/internal/repositories/raw_repositories/ecmwf_realtime.py +++ b/src/nwp_consumer/internal/repositories/raw_repositories/ecmwf_realtime.py @@ -68,7 +68,6 @@ def repository() -> entities.RawRepositoryMetadata: name="ECMWF-Realtime-S3", is_archive=False, is_order_based=True, - running_hours=[0, 6, 12, 18], delay_minutes=(60 * 7), # 7 hours max_connections=100, required_env=[ diff --git a/src/nwp_consumer/internal/repositories/raw_repositories/mo_datahub.py b/src/nwp_consumer/internal/repositories/raw_repositories/mo_datahub.py index badc723b..19f5042b 100644 --- a/src/nwp_consumer/internal/repositories/raw_repositories/mo_datahub.py +++ b/src/nwp_consumer/internal/repositories/raw_repositories/mo_datahub.py @@ -73,21 +73,21 @@ def __init__(self, order_id: str, api_key: str) -> None: @staticmethod @override def repository() -> entities.RawRepositoryMetadata: + return entities.RawRepositoryMetadata( name="MetOffice-Weather-Datahub", is_archive=False, is_order_based=True, - running_hours=[0, 12], delay_minutes=60, max_connections=10, required_env=["METOFFICE_API_KEY", "METOFFICE_ORDER_ID"], optional_env={}, postprocess_options=entities.PostProcessOptions(), available_models={ - "default": entities.Models.MO_UM_GLOBAL_10KM\ - .with_region("india"), - "um-global-india": entities.Models.MO_UM_GLOBAL_10KM\ - .with_region("india"), + "default": entities.Models.MO_UM_GLOBAL_10KM.with_region("india"), + "um-global-10km-india": entities.Models.MO_UM_GLOBAL_10KM.with_region("india"), + "um-global-10km-uk": entities.Models.MO_UM_GLOBAL_10KM.with_region("uk"), + "um-ukv-2km": entities.Models.MO_UM_UKV_2KM, }, ) @@ -282,18 +282,26 @@ def _convert(path: pathlib.Path) -> ResultE[list[xr.DataArray]]: ) try: - da: xr.DataArray = ( + ds = ( ds.pipe( entities.Parameter.rename_else_drop_ds_vars, allowed_parameters=MetOfficeDatahubRawRepository.model().expected_coordinates.variable, ) .rename(name_dict={"time": "init_time"}) - .expand_dims(dim="init_time") - .expand_dims(dim="step") - .to_dataarray(name=MetOfficeDatahubRawRepository.model().name) - ) + .expand_dims(dim="init_time")) + + if "step" not in ds.dims: + ds = ds.expand_dims(dim="step") + # Ensure x and y coordinates are present if dimensions are + if all(v in ds.dims for v in ["x", "y"]): + ds = ds.assign_coords(coords={ + "x": list(range(ds.sizes["x"])), + "y": list(range(ds.sizes["y"])), + }) + + da: xr.DataArray = ds.to_dataarray(name=MetOfficeDatahubRawRepository.model().name) da = ( - da.drop_vars( + da.drop_vars( names=[ c for c in ds.coords if c not in @@ -301,10 +309,14 @@ def _convert(path: pathlib.Path) -> ResultE[list[xr.DataArray]]: ], errors="ignore", ) - .transpose(*MetOfficeDatahubRawRepository.model().expected_coordinates.dims) - .sortby(variables=["step", "variable", "longitude"]) - .sortby(variables="latitude", ascending=False) - ) + .transpose(*MetOfficeDatahubRawRepository.model().expected_coordinates.dims)) + + if "latitude" in MetOfficeDatahubRawRepository.model().expected_coordinates.dims: + da = da.sortby(variables=["step", "variable", "longitude"]) + da = da.sortby(variables="latitude", ascending=False) + else: + da = da.sortby(variables=["step", "variable", "y", "x"]) + except Exception as e: return Failure( ValueError( @@ -312,5 +324,4 @@ def _convert(path: pathlib.Path) -> ResultE[list[xr.DataArray]]: ), ) - return Success([da]) diff --git a/src/nwp_consumer/internal/repositories/raw_repositories/noaa_s3.py b/src/nwp_consumer/internal/repositories/raw_repositories/noaa_s3.py index a57e792a..4f6f3bfa 100644 --- a/src/nwp_consumer/internal/repositories/raw_repositories/noaa_s3.py +++ b/src/nwp_consumer/internal/repositories/raw_repositories/noaa_s3.py @@ -55,7 +55,6 @@ def repository() -> entities.RawRepositoryMetadata: name="NOAA-GFS-S3", is_archive=False, is_order_based=False, - running_hours=[0, 6, 12, 18], delay_minutes=(60 * 5), # 5 hours max_connections=100, required_env=[], diff --git a/src/nwp_consumer/internal/repositories/raw_repositories/test_gribs/test_MO_UKV_agl_relative-humidity_1.5_2025012112.grib b/src/nwp_consumer/internal/repositories/raw_repositories/test_gribs/test_MO_UKV_agl_relative-humidity_1.5_2025012112.grib new file mode 100644 index 00000000..574de0d7 Binary files /dev/null and b/src/nwp_consumer/internal/repositories/raw_repositories/test_gribs/test_MO_UKV_agl_relative-humidity_1.5_2025012112.grib differ diff --git a/src/nwp_consumer/internal/repositories/raw_repositories/test_mo_datahub.py b/src/nwp_consumer/internal/repositories/raw_repositories/test_mo_datahub.py index cfdb2081..c7c90687 100644 --- a/src/nwp_consumer/internal/repositories/raw_repositories/test_mo_datahub.py +++ b/src/nwp_consumer/internal/repositories/raw_repositories/test_mo_datahub.py @@ -3,6 +3,7 @@ import os import pathlib import unittest +from unittest.mock import patch from returns.result import Failure, ResultE, Success @@ -24,7 +25,10 @@ def test__download(self) -> None: self.assertIsInstance(auth_result, Success, msg=f"{auth_result!s}") c = auth_result.unwrap() - test_it = c.repository().determine_latest_it_from(dt.datetime.now(tz=dt.UTC)) + test_it = c.repository().determine_latest_it_from( + dt.datetime.now(tz=dt.UTC), + c.model().running_hours, + ) dl_result = c._download( f"{c.request_url}/agl_u-component-of-wind-surface-adjusted_10.0_{test_it:%Y%m%d%H}_1/data", @@ -32,8 +36,7 @@ def test__download(self) -> None: self.assertIsInstance(dl_result, Success, msg=f"{dl_result!s}") - - def test__convert(self) -> None: + def test_convert(self) -> None: """Test the _convert method.""" @dataclasses.dataclass @@ -89,3 +92,42 @@ class TestCase: else: self.assertIsInstance(region_result, Success, msg=f"{region_result}") + @patch.dict(os.environ, {"MODEL": "um-ukv-2km"}, clear=True) + def test_convert_ukv(self) -> None: + + + @dataclasses.dataclass + class TestCase: + filename: str + expected_coords: NWPDimensionCoordinateMap + should_error: bool + + tests: list[TestCase] = [ + TestCase( + filename="test_MO_UKV_agl_relative-humidity_1.5_2025012112.grib", + expected_coords=dataclasses.replace( + MetOfficeDatahubRawRepository.model().expected_coordinates, + init_time=[dt.datetime(2025, 1, 21, 12, tzinfo=dt.UTC)], + variable=[Parameter.TEMPERATURE_SL], + step=[0], + ), + should_error=False, + ), + ] + + for t in tests: + with self.subTest(name=t.filename): + # Attempt to convert the file + result = MetOfficeDatahubRawRepository._convert( + path=pathlib.Path(__file__).parent.absolute() / "test_gribs" / t.filename, + ) + region_result: ResultE[NWPDimensionCoordinateMap] = result.do( + region + for das in result + for da in das + for region in NWPDimensionCoordinateMap.from_xarray(da) + ) + if t.should_error: + self.assertIsInstance(region_result, Failure, msg=f"{region_result}") + else: + self.assertIsInstance(region_result, Success, msg=f"{region_result}") diff --git a/src/nwp_consumer/internal/services/_dummy_adaptors.py b/src/nwp_consumer/internal/services/_dummy_adaptors.py index b3107d52..275d29d4 100644 --- a/src/nwp_consumer/internal/services/_dummy_adaptors.py +++ b/src/nwp_consumer/internal/services/_dummy_adaptors.py @@ -24,7 +24,6 @@ def repository() -> entities.RawRepositoryMetadata: name="ACME-Test-Models", is_archive=False, is_order_based=False, - running_hours=[0, 6, 12, 18], delay_minutes=60, max_connections=4, required_env=[], @@ -50,6 +49,7 @@ def model() -> entities.ModelMetadata: latitude=np.linspace(90, -90, 721).tolist(), longitude=np.linspace(-180, 179.8, 1440).tolist(), ), + running_hours=[0, 6, 12, 18], ) diff --git a/src/nwp_consumer/internal/services/consumer_service.py b/src/nwp_consumer/internal/services/consumer_service.py index ab02e664..1669796a 100644 --- a/src/nwp_consumer/internal/services/consumer_service.py +++ b/src/nwp_consumer/internal/services/consumer_service.py @@ -138,11 +138,16 @@ def _create_suitable_store( its: list[dt.datetime] = [] match period: case _ if period is None: - its = [repository_metadata.determine_latest_it_from(dt.datetime.now(tz=dt.UTC))] + its = [ + repository_metadata.determine_latest_it_from( + t=dt.datetime.now(tz=dt.UTC), + running_hours=model_metadata.running_hours, + ), + ] case single_it if isinstance(period, dt.datetime): its = [single_it] # type: ignore case multiple_its if isinstance(period, dt.date): - its = repository_metadata.month_its( + its = model_metadata.month_its( year=multiple_its.year, month=multiple_its.month, )