Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add UKV #226

Draft
wants to merge 21 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
[![pypi badge](https://img.shields.io/pypi/v/nwp-consumer?&color=086788)](https://pypi.org/project/nwp-consumer)
[![documentation badge](https://img.shields.io/badge/docs-latest-333333)](https://openclimatefix.github.io/nwp-consumer/)
[![contributors badge](https://img.shields.io/github/contributors/openclimatefix/nwp-consumer?color=FFFFFF)](https://github.com/openclimatefix/nwp-consumer/graphs/contributors)
[![workflows badge](https://img.shields.io/github/actions/workflow/status/openclimatefix/nwp-consumer/branch_ci.yml?branch=main&color=FFD053)](https://github.com/openclimatefix/nwp-consumer/actions/workflows/ci.yml)
[![workflows badge](https://img.shields.io/github/actions/workflow/status/openclimatefix/nwp-consumer/branch_ci.yml?branch=main&color=FFD053)](https://github.com/openclimatefix/nwp-consumer/actions/workflows/branch_ci.yml)
[![ease of contribution: easy](https://img.shields.io/badge/ease%20of%20contribution:%20easy-32bd50)](https://github.com/openclimatefix/ocf-meta-repo?tab=readme-ov-file#overview-of-ocfs-nowcasting-repositories)

Some renewables, such as solar and wind, generate power according to the weather conditions.
Expand Down Expand Up @@ -102,6 +102,8 @@ parameter modifications to the model's expected coordinates in it's metadata for
repository.

## Development

### Linting and static type checking

This project uses [MyPy](https://mypy.readthedocs.io/en/stable/) for static type checking
and [Ruff](https://docs.astral.sh/ruff/) for linting.
Expand Down Expand Up @@ -151,7 +153,7 @@ src and flat layouts.

## Contributing and community

[![issues badge](https://img.shields.io/github/issues/openclimatefix/ocf-template?color=FFAC5F)](https://github.com/openclimatefix/ocf-template/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc)
[![issues badge](https://img.shields.io/github/issues/openclimatefix/nwp-consumer?color=FFAC5F)](https://github.com/openclimatefix/nwp-consumer/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc)

- PR's are welcome! See the [Organisation Profile](https://github.com/openclimatefix) for details on contributing
- Find out about our other projects in the [OCF Meta Repo](https://github.com/openclimatefix/ocf-meta-repo)
Expand Down
23 changes: 23 additions & 0 deletions src/nwp_consumer/internal/entities/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ class NWPDimensionCoordinateMap:
longitude: list[float] | None = None
"""The longitude coordinates of the forecast grid in degrees. """

x: list[float] | None = None
"""X coordinates of an OSGB (or other alternative projection) grid."""
y: list[float] | None = None
"""Y coordinates of an OSGB (or other alternative projection) grid."""

def __post_init__(self) -> None:
"""Rigidly set input value ordering and precision."""
self.variable = sorted(self.variable)
Expand Down Expand Up @@ -204,6 +209,20 @@ def from_pandas(
"Longitude coordinates should run from -180 -> 180. "
"Modify the coordinate in the source data to be in ascending order.",
))
if "x" in pd_indexes \
and pd_indexes["x"].values[0] > pd_indexes["x"].values[-1]:
return Failure(ValueError(
"Cannot create NWPDimensionCoordinateMap instance from pandas indexes "
"as the x values are not in ascending order. "
"Modify the coordinate in the source data to be in ascending order.",
))
if "y" in pd_indexes \
and pd_indexes["y"].values[0] > pd_indexes["y"].values[-1]:
return Failure(ValueError(
"Cannot create NWPDimensionCoordinateMap instance from pandas indexes "
"as the y values are not in ascending order. "
"Modify the coordinate in the source data to be in ascending order.",
))

# Convert the pandas Index objects to lists of the appropriate types
return Success(
Expand Down Expand Up @@ -231,6 +250,10 @@ def from_pandas(
if "latitude" in pd_indexes else None,
longitude=pd_indexes["longitude"].to_list() \
if "longitude" in pd_indexes else None,
x=pd_indexes["x"].to_list() \
if "x" in pd_indexes else None,
y=pd_indexes["y"].to_list() \
if "y" in pd_indexes else None,
),
)

Expand Down
57 changes: 57 additions & 0 deletions src/nwp_consumer/internal/entities/modelmetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
"""

import dataclasses
import datetime as dt
import logging

import numpy as np
import pandas as pd

from .coordinates import NWPDimensionCoordinateMap
from .parameters import Parameter
Expand Down Expand Up @@ -55,6 +57,14 @@ class ModelMetadata:
Which prints grid data from the grib file.
"""

running_hours: list[int]
"""The hours of the day that the model runs.

Raw Repositories that provide data for the model may not have every running time.
In this instance, use `with_running_hours` to specify the running hours specific
to the repository.
"""

chunk_count_overrides: dict[str, int] = dataclasses.field(default_factory=dict)
"""Mapping of dimension names to the desired number of chunks in that dimension.

Expand Down Expand Up @@ -117,6 +127,19 @@ def with_chunk_count_overrides(self, overrides: dict[str, int]) -> "ModelMetadat
)
return dataclasses.replace(self, chunk_count_overrides=overrides)

def with_running_hours(self, hours: list[int]) -> "ModelMetadata":
"""Returns metadata for the given model with the given running hours."""
return dataclasses.replace(self, running_hours=hours)

def month_its(self, year: int, month: int) -> list[dt.datetime]:
"""Generate all init times for a given month."""
days = pd.Period(f"{year}-{month}").days_in_month
its: list[dt.datetime] = []
for day in range(1, days + 1):
for hour in self.running_hours:
its.append(dt.datetime(year, month, day, hour, tzinfo=dt.UTC))
return its

class Models:
"""Namespace containing known models."""

Expand Down Expand Up @@ -149,6 +172,7 @@ class Models:
latitude=[float(f"{lat / 10:.2f}") for lat in range(900, -900 - 1, -1)],
longitude=[float(f"{lon / 10:.2f}") for lon in range(-1800, 1800 + 1, 1)],
),
running_hours=[0, 6, 12, 18],
)
"""ECMWF's High Resolution Integrated Forecast System."""

Expand All @@ -168,6 +192,7 @@ class Models:
latitude=[v/10 for v in range(900, -900, -1)],
longitude=[v/10 for v in range(-1800, 1800, 1)],
),
running_hours=[0, 12],
)
"""Summary statistics from ECMWF's Ensemble Forecast System."""

Expand Down Expand Up @@ -195,6 +220,7 @@ class Models:
latitude=[v/10 for v in range(900, -900, -1)],
longitude=[v/10 for v in range(-1800, 1800, 1)],
),
running_hours=[0, 6, 12, 18],
)
"""Full ensemble data from ECMWF's Ensemble Forecast System."""

Expand Down Expand Up @@ -226,6 +252,7 @@ class Models:
latitude=[float(lat) for lat in range(90, -90 - 1, -1)],
longitude=[float(lon) for lon in range(-180, 180 + 1, 1)],
),
running_hours=[0, 6, 12, 18],
)
"""NCEP's Global Forecast System."""

Expand Down Expand Up @@ -261,6 +288,7 @@ class Models:
],
# TODO: Change to -180 -> 180
),
running_hours=[0, 6, 12, 18],
)
"""MetOffice's Unified Model, in the Global configuration, at a resolution of 17km."""

Expand Down Expand Up @@ -294,6 +322,35 @@ class Models:
for lon in np.arange(-179.929687, 179.929688 + 0.140625, 0.140625)
],
),
running_hours=[0, 6, 12, 18],
)
"""MetOffice's Unified Model, in the Global configuration, at a resolution of 10km."""

MO_UKV_2KM: ModelMetadata = ModelMetadata(
name="um-ukv",
resolution="2km",
expected_coordinates=NWPDimensionCoordinateMap(
init_time=[],
step=list(range(0, 55)),
variable=sorted(
[
Parameter.CLOUD_COVER_TOTAL,
Parameter.CLOUD_COVER_HIGH,
Parameter.CLOUD_COVER_MEDIUM,
Parameter.CLOUD_COVER_LOW,
Parameter.VISIBILITY_SL,
Parameter.RELATIVE_HUMIDITY_SL,
Parameter.SNOW_DEPTH_GL,
Parameter.DOWNWARD_SHORTWAVE_RADIATION_FLUX_GL,
Parameter.TEMPERATURE_SL,
Parameter.WIND_U_COMPONENT_10m,
Parameter.WIND_V_COMPONENT_10m,
],
),
x=list(range(0, 455)),
y=list(range(0, 639)),
),
running_hours=list(range(0, 24)),
)
"""MetOffice's Unified Model in the UKV configuration, at a resolution of 2km"""

24 changes: 4 additions & 20 deletions src/nwp_consumer/internal/entities/repometadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
import datetime as dt
import os

import pandas as pd

from .modelmetadata import ModelMetadata
from .postprocess import PostProcessOptions

Expand All @@ -42,11 +40,6 @@ class RawRepositoryMetadata:
but rather are defined by pre-selected agreements with the provider.
"""

running_hours: list[int]
"""The running hours of the model.

Most NWP models are run at fixed intervals throughout the day."""

delay_minutes: int
"""The approximate model delay in minutes.

Expand All @@ -72,31 +65,23 @@ class RawRepositoryMetadata:
available_models: dict[str, ModelMetadata]
"""A dictionary of available models and their metadata."""

def determine_latest_it_from(self, t: dt.datetime) -> dt.datetime:
def determine_latest_it_from(self, t: dt.datetime, running_hours: list[int]) -> dt.datetime:
"""Determine the latest available initialization time from a given time.

Args:
t: The time from which to determine the latest initialization time.
running_hours: A list of hours at which the model runs each day.

Returns:
The latest available initialization time prior to the given time.
"""
it = t.replace(minute=0, second=0, microsecond=0) \
- dt.timedelta(minutes=self.delay_minutes)
while it.hour not in self.running_hours:
while it.hour not in running_hours:
it -= dt.timedelta(hours=1)

return it

def month_its(self, year: int, month: int) -> list[dt.datetime]:
"""Generate all init times for a given month."""
days = pd.Period(f"{year}-{month}").days_in_month
its: list[dt.datetime] = []
for day in range(1, days + 1):
for hour in self.running_hours:
its.append(dt.datetime(year, month, day, hour, tzinfo=dt.UTC))
return its

def missing_required_envs(self) -> list[str]:
"""Get a list of unset required environment variables.

Expand All @@ -110,8 +95,7 @@ def __str__(self) -> str:
pretty: str = "".join((
"Model Repository: ",
f"\n\t{self.name} ({'archive' if self.is_archive else 'live/rolling'} dataset.)",
f"\n\truns at: {self.running_hours} hours ",
"(available after {self.delay_minutes} minute delay)",
f"\n\t\t(available after {self.delay_minutes} minute delay)",
"\nEnvironment variables:",
"\n\tRequired:",
"\n".join(f"\t\t{var}" for var in self.required_env),
Expand Down
1 change: 1 addition & 0 deletions src/nwp_consumer/internal/entities/test_modelmetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def test_with_region(self) -> None:
latitude=[float(f"{lat / 10:.2f}") for lat in range(900, -900 - 1, -1)],
longitude=[float(f"{lon / 10:.2f}") for lon in range(-1800, 1800 + 1, 1)],
),
running_hours=[0, 6, 12, 18],
)

@dataclasses.dataclass
Expand Down
3 changes: 1 addition & 2 deletions src/nwp_consumer/internal/entities/test_repometadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ class TestRawRepositoryMetadata(unittest.TestCase):
name="test",
is_archive=False,
is_order_based=False,
running_hours=[0, 6, 12, 18],
delay_minutes=60,
required_env=["TEST"],
optional_env={"TEST": "test"},
Expand Down Expand Up @@ -46,7 +45,7 @@ class TestCase:

for test in tests:
with self.subTest(name=test.name):
result = self.metadata.determine_latest_it_from(test.t)
result = self.metadata.determine_latest_it_from(test.t, [0, 6, 12, 18])
self.assertEqual(result, test.expected)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,17 +114,17 @@ def repository() -> entities.RawRepositoryMetadata:
name="CEDA",
is_archive=True,
is_order_based=False,
running_hours=[0, 12], # 6 and 18 exist, but are lacking variables
delay_minutes=(60 * 24 * 7) + (60 * 12), # 7.5 days
max_connections=20,
required_env=["CEDA_FTP_USER", "CEDA_FTP_PASS"],
optional_env={},
postprocess_options=entities.PostProcessOptions(),
available_models={
"default": entities.Models.MO_UM_GLOBAL_17KM.with_chunk_count_overrides({
"default": entities.Models.MO_UM_GLOBAL_17KM\
.with_chunk_count_overrides({
"latitude": 8,
"longitude": 8,
}),
}).with_running_hours([0, 12]), # 6 and 18 exist, but are lacking variables
},
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,6 @@ def repository() -> entities.RawRepositoryMetadata:
name="ECMWF-MARS",
is_archive=True,
is_order_based=False,
running_hours=[0, 12],
delay_minutes=(60 * 26), # 1 day, plus leeway
max_connections=20,
required_env=[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ def repository() -> entities.RawRepositoryMetadata:
name="ECMWF-Realtime-S3",
is_archive=False,
is_order_based=True,
running_hours=[0, 6, 12, 18],
delay_minutes=(60 * 6), # 6 hours
max_connections=100,
required_env=[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,11 @@ def __init__(self, order_id: str, api_key: str) -> None:
@staticmethod
@override
def repository() -> entities.RawRepositoryMetadata:

return entities.RawRepositoryMetadata(
name="MetOffice-Weather-Datahub",
is_archive=False,
is_order_based=True,
running_hours=[0, 12],
delay_minutes=60,
max_connections=10,
required_env=["METOFFICE_API_KEY", "METOFFICE_ORDER_ID"],
Expand All @@ -86,6 +86,7 @@ def repository() -> entities.RawRepositoryMetadata:
available_models={
"default": entities.Models.MO_UM_GLOBAL_10KM,
"um-global-10km": entities.Models.MO_UM_GLOBAL_10KM,
"um-ukv-2km": entities.Models.MO_UKV_2KM,
},
)

Expand Down Expand Up @@ -280,16 +281,19 @@ def _convert(path: pathlib.Path) -> ResultE[list[xr.DataArray]]:
)

try:
da: xr.DataArray = (
ds = (
ds.pipe(
entities.Parameter.rename_else_drop_ds_vars,
allowed_parameters=MetOfficeDatahubRawRepository.model().expected_coordinates.variable,
)
.rename(name_dict={"time": "init_time"})
.expand_dims(dim="init_time")
.expand_dims(dim="step")
.to_dataarray(name=MetOfficeDatahubRawRepository.model().name)
)
.expand_dims(dim="init_time"))

if "step" not in ds.dims:
ds = ds.expand_dims(dim="step")

da: xr.DataArray = ds.to_dataarray(name=MetOfficeDatahubRawRepository.model().name)

da = (
da.drop_vars(
names=[
Expand All @@ -299,10 +303,14 @@ def _convert(path: pathlib.Path) -> ResultE[list[xr.DataArray]]:
],
errors="ignore",
)
.transpose(*MetOfficeDatahubRawRepository.model().expected_coordinates.dims)
.sortby(variables=["step", "variable", "longitude"])
.sortby(variables="latitude", ascending=False)
)
.transpose(*MetOfficeDatahubRawRepository.model().expected_coordinates.dims))

if "latitude" in MetOfficeDatahubRawRepository.model().expected_coordinates.dims:
da = da.sortby(variables=["step", "variable", "longitude"])
da = da.sortby(variables="latitude", ascending=False)
else:
da = da.sortby(variables=["step", "variable"])

except Exception as e:
return Failure(
ValueError(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ def repository() -> entities.RawRepositoryMetadata:
name="NOAA-GFS-S3",
is_archive=False,
is_order_based=False,
running_hours=[0, 6, 12, 18],
delay_minutes=(60 * 5), # 5 hours
max_connections=100,
required_env=[],
Expand Down
Binary file not shown.
Loading
Loading