Update constants and when they are used for GFS (#372)

peterdudfield · web-flow · commit b40475eb761d · 2025-01-06T16:30:43.000Z
diff --git a/ocf_datapipes/training/pvnet_site.py b/ocf_datapipes/training/pvnet_site.py
@@ -2,6 +2,7 @@
 
 import logging
 from datetime import datetime, timedelta
+from functools import partial
 from typing import List, Optional
 
 import xarray as xr
@@ -187,6 +188,7 @@ def construct_sliced_data_pipeline(
     location_pipe: IterDataPipe,
     t0_datapipe: IterDataPipe,
     production: bool = False,
+    new_normalisation_constants: bool = False,
 ) -> dict:
     """Constructs data pipeline for the input data config file.
 
@@ -197,6 +199,7 @@ def construct_sliced_data_pipeline(
         location_pipe: Datapipe yielding locations.
         t0_datapipe: Datapipe yielding times.
         production: Whether constucting pipeline for production inference.
+        new_normalisation_constants: whether new normalisation constants are used.
     """
 
     datapipes_dict = _get_datapipes_dict(
@@ -237,12 +240,20 @@ def construct_sliced_data_pipeline(
                 roi_width_pixels=conf_nwp[nwp_key].nwp_image_size_pixels_width,
             )
             # Coarsen the data, if it is separated by 0.05 degrees each
-            nwp_datapipe = nwp_datapipe.map(potentially_coarsen)
+            potentially_coarsen_partial = partial(
+                potentially_coarsen, coarsen_to_deg=conf_nwp[nwp_key].coarsen_to_degrees
+            )
+            nwp_datapipe = nwp_datapipe.map(potentially_coarsen_partial)
             # Somewhat hacky way for India specifically, need different mean/std for ECMWF data
             if conf_nwp[nwp_key].nwp_provider in ["ecmwf"]:
                 normalize_provider = "ecmwf_india"
+            elif new_normalisation_constants and conf_nwp[nwp_key].nwp_provider in ["mo_global"]:
+                normalize_provider = "mo_global_new_india"
+            elif new_normalisation_constants and conf_nwp[nwp_key].nwp_provider in ["gfs"]:
+                normalize_provider = "gfs_india"
             else:
                 normalize_provider = conf_nwp[nwp_key].nwp_provider
+
             nwp_datapipes_dict[nwp_key] = nwp_datapipe.normalize(
                 mean=NWP_MEANS[normalize_provider],
                 std=NWP_STDS[normalize_provider],
diff --git a/ocf_datapipes/utils/consts.py b/ocf_datapipes/utils/consts.py
@@ -39,6 +39,7 @@ def __getitem__(self, key):
 NWP_PROVIDERS = [
     "ukv",
     "gfs",
+    "gfs_india",
     "icon-eu",
     "icon-global",
     "ecmwf",
@@ -47,6 +48,7 @@ def __getitem__(self, key):
     "merra2",
     "merra2_uk",
     "mo_global",
+    "mo_global_new_india",
 ]
 
 # ------ UKV
@@ -132,7 +134,8 @@ def __getitem__(self, key):
 UKV_STD = _to_data_array(UKV_STD)
 UKV_MEAN = _to_data_array(UKV_MEAN)
 
-# These were calculated from 200 random init times (step 0s) from the MO global data
+# --- MO Global (partial initial constants)
+
 MO_GLOBAL_INDIA_MEAN = {
     "temperature_sl": 298.2,
     "wind_u_component_10m": 0.5732,
@@ -151,6 +154,40 @@ def __getitem__(self, key):
 MO_GLOBAL_INDIA_MEAN = _to_data_array(MO_GLOBAL_INDIA_MEAN)
 
 
+# --- MO Global New
+
+MO_GLOBAL_INDIA_NEW_MEAN = {
+    "temperature_sl": 295.34392488,
+    "wind_u_component_10m": 0.83223102,
+    "wind_v_component_10m": 0.0802083,
+    "downward_shortwave_radiation_flux_gl": 225.54222068,
+    "cloud_cover_high": 0.34935897,
+    "cloud_cover_low": 0.096081,
+    "cloud_cover_medium": 0.13878676,
+    "relative_humidity_sl": 69.59633137,
+    "snow_depth_gl": 3.45158744,
+    "visibility_sl": 23181.81547681,
+}
+
+MO_GLOBAL_INDIA_NEW_STD = {
+    "temperature_sl": 12.26983825,
+    "wind_u_component_10m": 3.45169835,
+    "wind_v_component_10m": 2.9825603,
+    "downward_shortwave_radiation_flux_gl": 303.85182864,
+    "cloud_cover_high": 0.40563507,
+    "cloud_cover_low": 0.18374192,
+    "cloud_cover_medium": 0.25972151,
+    "relative_humidity_sl": 21.00264399,
+    "snow_depth_gl": 30.19116501,
+    "visibility_sl": 5385.35839715,
+}
+
+
+MO_GLOBAL_NEW_VARIABLE_NAMES = tuple(MO_GLOBAL_INDIA_NEW_MEAN.keys())
+MO_GLOBAL_INDIA_NEW_STD = _to_data_array(MO_GLOBAL_INDIA_NEW_STD)
+MO_GLOBAL_INDIA_NEW_MEAN = _to_data_array(MO_GLOBAL_INDIA_NEW_MEAN)
+
+
 # ------ GFS
 GFS_STD = {
     "dlwrf": 96.305916,
@@ -197,6 +234,48 @@ def __getitem__(self, key):
 GFS_MEAN = _to_data_array(GFS_MEAN)
 
 
+# ------ GFS India
+GFS_INDIA_STD_DICT = {
+    "t": 14.93798,
+    "prate": 5.965701e-05,
+    "u10": 3.4826114,
+    "v10": 3.167296,
+    "u100": 4.140226,
+    "v100": 3.984121,
+    "dlwrf": 79.30329,
+    "dswrf": 325.58582,
+    "hcc": 39.91955,
+    "lcc": 23.208075,
+    "mcc": 33.283035,
+    "r": 25.545837,
+    "sde": 0.10192183,
+    "tcc": 42.583195,
+    "vis": 3491.437,
+}
+GFS_INDIA_MEAN_DICT = {
+    "t": 298.27713,
+    "prate": 1.7736e-05,
+    "u10": 1.5782778,
+    "v10": 0.09856875,
+    "u100": 1.4558668,
+    "v100": -0.28256148,
+    "dlwrf": 356.57776,
+    "dswrf": 284.358,
+    "hcc": 26.965801,
+    "lcc": 9.2288,
+    "mcc": 17.2132,
+    "r": 38.2474,
+    "sde": 0.02070413,
+    "tcc": 36.962795,
+    "vis": 23386.936,
+}
+
+
+GFS_INDIA_VARIABLE_NAMES = tuple(GFS_INDIA_MEAN_DICT.keys())
+GFS_INDIA_STD = _to_data_array(GFS_INDIA_STD_DICT)
+GFS_INDIA_MEAN = _to_data_array(GFS_INDIA_MEAN_DICT)
+
+
 # ------ ECMWF
 # These were calculated from 100 random init times of UK data from 2020-2023
 ECMWF_STD = {
@@ -369,32 +448,38 @@ def __getitem__(self, key):
 NWP_VARIABLE_NAMES = NWPStatDict(
     ukv=UKV_VARIABLE_NAMES,
     gfs=GFS_VARIABLE_NAMES,
+    gfs_india=GFS_INDIA_VARIABLE_NAMES,
     ecmwf=ECMWF_VARIABLE_NAMES,
     ecmwf_india=INDIA_ECMWF_VARIABLE_NAMES,
     excarta=EXCARTA_VARIABLE_NAMES,
     merra2=MERRA2_VARIABLE_NAMES,
     merra2_uk=UK_MERRA2_VARIABLE_NAMES,
     mo_global=MO_GLOBAL_VARIABLE_NAMES,
+    mo_global_new_india=MO_GLOBAL_NEW_VARIABLE_NAMES,
 )
 NWP_STDS = NWPStatDict(
     ukv=UKV_STD,
     gfs=GFS_STD,
+    gfs_india=GFS_INDIA_STD,
     ecmwf=ECMWF_STD,
     ecmwf_india=INDIA_ECMWF_STD,
     excarta=EXCARTA_STD,
     merra2=MERRA2_STD,
     merra2_uk=UK_MERRA2_STD,
     mo_global=MO_GLOBAL_INDIA_STD,
+    mo_global_new_india=MO_GLOBAL_INDIA_NEW_STD,
 )
 NWP_MEANS = NWPStatDict(
     ukv=UKV_MEAN,
     gfs=GFS_MEAN,
+    gfs_india=GFS_INDIA_MEAN,
     ecmwf=ECMWF_MEAN,
     ecmwf_india=INDIA_ECMWF_MEAN,
     excarta=EXCARTA_MEAN,
     merra2=MERRA2_MEAN,
     merra2_uk=UK_MERRA2_MEAN,
     mo_global=MO_GLOBAL_INDIA_MEAN,
+    mo_global_new_india=MO_GLOBAL_INDIA_NEW_MEAN,
 )
 
 # --------------------------- SATELLITE ------------------------------
diff --git a/tests/transform/xarray/test_normalize.py b/tests/transform/xarray/test_normalize.py
@@ -29,8 +29,8 @@ def test_normalize_topo(topo_datapipe):
         calculate_mean_std_from_example=True
     )
     data = next(iter(normed_topo_datapipe))
-    assert data.mean().compute() == pytest.approx(0, abs=0.001)
-    assert data.std().compute() == pytest.approx(1, abs=0.001)
+    assert data.mean().compute() == pytest.approx(0, abs=0.01)
+    assert data.std().compute() == pytest.approx(1, abs=0.01)
 
 
 def test_normalize_gsp(gsp_datapipe):

Original file line number	Diff line number	Diff line change
`@@ -29,8 +29,8 @@ def test_normalize_topo(topo_datapipe):`
`29`	`29`	`calculate_mean_std_from_example=True`
`30`	`30`	`)`
`31`	`31`	`data = next(iter(normed_topo_datapipe))`
`32`		`- assert data.mean().compute() == pytest.approx(0, abs=0.001)`
`33`		`- assert data.std().compute() == pytest.approx(1, abs=0.001)`
	`32`	`+ assert data.mean().compute() == pytest.approx(0, abs=0.01)`
	`33`	`+ assert data.std().compute() == pytest.approx(1, abs=0.01)`
`34`	`34`
`35`	`35`
`36`	`36`	`def test_normalize_gsp(gsp_datapipe):`