Skip to content
This repository was archived by the owner on Jun 2, 2025. It is now read-only.

Commit b40475e

Browse files
Update constants and when they are used for GFS (#372)
1 parent 497d180 commit b40475e

File tree

3 files changed

+100
-4
lines changed

3 files changed

+100
-4
lines changed

ocf_datapipes/training/pvnet_site.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import logging
44
from datetime import datetime, timedelta
5+
from functools import partial
56
from typing import List, Optional
67

78
import xarray as xr
@@ -187,6 +188,7 @@ def construct_sliced_data_pipeline(
187188
location_pipe: IterDataPipe,
188189
t0_datapipe: IterDataPipe,
189190
production: bool = False,
191+
new_normalisation_constants: bool = False,
190192
) -> dict:
191193
"""Constructs data pipeline for the input data config file.
192194
@@ -197,6 +199,7 @@ def construct_sliced_data_pipeline(
197199
location_pipe: Datapipe yielding locations.
198200
t0_datapipe: Datapipe yielding times.
199201
production: Whether constucting pipeline for production inference.
202+
new_normalisation_constants: whether new normalisation constants are used.
200203
"""
201204

202205
datapipes_dict = _get_datapipes_dict(
@@ -237,12 +240,20 @@ def construct_sliced_data_pipeline(
237240
roi_width_pixels=conf_nwp[nwp_key].nwp_image_size_pixels_width,
238241
)
239242
# Coarsen the data, if it is separated by 0.05 degrees each
240-
nwp_datapipe = nwp_datapipe.map(potentially_coarsen)
243+
potentially_coarsen_partial = partial(
244+
potentially_coarsen, coarsen_to_deg=conf_nwp[nwp_key].coarsen_to_degrees
245+
)
246+
nwp_datapipe = nwp_datapipe.map(potentially_coarsen_partial)
241247
# Somewhat hacky way for India specifically, need different mean/std for ECMWF data
242248
if conf_nwp[nwp_key].nwp_provider in ["ecmwf"]:
243249
normalize_provider = "ecmwf_india"
250+
elif new_normalisation_constants and conf_nwp[nwp_key].nwp_provider in ["mo_global"]:
251+
normalize_provider = "mo_global_new_india"
252+
elif new_normalisation_constants and conf_nwp[nwp_key].nwp_provider in ["gfs"]:
253+
normalize_provider = "gfs_india"
244254
else:
245255
normalize_provider = conf_nwp[nwp_key].nwp_provider
256+
246257
nwp_datapipes_dict[nwp_key] = nwp_datapipe.normalize(
247258
mean=NWP_MEANS[normalize_provider],
248259
std=NWP_STDS[normalize_provider],

ocf_datapipes/utils/consts.py

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def __getitem__(self, key):
3939
NWP_PROVIDERS = [
4040
"ukv",
4141
"gfs",
42+
"gfs_india",
4243
"icon-eu",
4344
"icon-global",
4445
"ecmwf",
@@ -47,6 +48,7 @@ def __getitem__(self, key):
4748
"merra2",
4849
"merra2_uk",
4950
"mo_global",
51+
"mo_global_new_india",
5052
]
5153

5254
# ------ UKV
@@ -132,7 +134,8 @@ def __getitem__(self, key):
132134
UKV_STD = _to_data_array(UKV_STD)
133135
UKV_MEAN = _to_data_array(UKV_MEAN)
134136

135-
# These were calculated from 200 random init times (step 0s) from the MO global data
137+
# --- MO Global (partial initial constants)
138+
136139
MO_GLOBAL_INDIA_MEAN = {
137140
"temperature_sl": 298.2,
138141
"wind_u_component_10m": 0.5732,
@@ -151,6 +154,40 @@ def __getitem__(self, key):
151154
MO_GLOBAL_INDIA_MEAN = _to_data_array(MO_GLOBAL_INDIA_MEAN)
152155

153156

157+
# --- MO Global New
158+
159+
MO_GLOBAL_INDIA_NEW_MEAN = {
160+
"temperature_sl": 295.34392488,
161+
"wind_u_component_10m": 0.83223102,
162+
"wind_v_component_10m": 0.0802083,
163+
"downward_shortwave_radiation_flux_gl": 225.54222068,
164+
"cloud_cover_high": 0.34935897,
165+
"cloud_cover_low": 0.096081,
166+
"cloud_cover_medium": 0.13878676,
167+
"relative_humidity_sl": 69.59633137,
168+
"snow_depth_gl": 3.45158744,
169+
"visibility_sl": 23181.81547681,
170+
}
171+
172+
MO_GLOBAL_INDIA_NEW_STD = {
173+
"temperature_sl": 12.26983825,
174+
"wind_u_component_10m": 3.45169835,
175+
"wind_v_component_10m": 2.9825603,
176+
"downward_shortwave_radiation_flux_gl": 303.85182864,
177+
"cloud_cover_high": 0.40563507,
178+
"cloud_cover_low": 0.18374192,
179+
"cloud_cover_medium": 0.25972151,
180+
"relative_humidity_sl": 21.00264399,
181+
"snow_depth_gl": 30.19116501,
182+
"visibility_sl": 5385.35839715,
183+
}
184+
185+
186+
MO_GLOBAL_NEW_VARIABLE_NAMES = tuple(MO_GLOBAL_INDIA_NEW_MEAN.keys())
187+
MO_GLOBAL_INDIA_NEW_STD = _to_data_array(MO_GLOBAL_INDIA_NEW_STD)
188+
MO_GLOBAL_INDIA_NEW_MEAN = _to_data_array(MO_GLOBAL_INDIA_NEW_MEAN)
189+
190+
154191
# ------ GFS
155192
GFS_STD = {
156193
"dlwrf": 96.305916,
@@ -197,6 +234,48 @@ def __getitem__(self, key):
197234
GFS_MEAN = _to_data_array(GFS_MEAN)
198235

199236

237+
# ------ GFS India
238+
GFS_INDIA_STD_DICT = {
239+
"t": 14.93798,
240+
"prate": 5.965701e-05,
241+
"u10": 3.4826114,
242+
"v10": 3.167296,
243+
"u100": 4.140226,
244+
"v100": 3.984121,
245+
"dlwrf": 79.30329,
246+
"dswrf": 325.58582,
247+
"hcc": 39.91955,
248+
"lcc": 23.208075,
249+
"mcc": 33.283035,
250+
"r": 25.545837,
251+
"sde": 0.10192183,
252+
"tcc": 42.583195,
253+
"vis": 3491.437,
254+
}
255+
GFS_INDIA_MEAN_DICT = {
256+
"t": 298.27713,
257+
"prate": 1.7736e-05,
258+
"u10": 1.5782778,
259+
"v10": 0.09856875,
260+
"u100": 1.4558668,
261+
"v100": -0.28256148,
262+
"dlwrf": 356.57776,
263+
"dswrf": 284.358,
264+
"hcc": 26.965801,
265+
"lcc": 9.2288,
266+
"mcc": 17.2132,
267+
"r": 38.2474,
268+
"sde": 0.02070413,
269+
"tcc": 36.962795,
270+
"vis": 23386.936,
271+
}
272+
273+
274+
GFS_INDIA_VARIABLE_NAMES = tuple(GFS_INDIA_MEAN_DICT.keys())
275+
GFS_INDIA_STD = _to_data_array(GFS_INDIA_STD_DICT)
276+
GFS_INDIA_MEAN = _to_data_array(GFS_INDIA_MEAN_DICT)
277+
278+
200279
# ------ ECMWF
201280
# These were calculated from 100 random init times of UK data from 2020-2023
202281
ECMWF_STD = {
@@ -369,32 +448,38 @@ def __getitem__(self, key):
369448
NWP_VARIABLE_NAMES = NWPStatDict(
370449
ukv=UKV_VARIABLE_NAMES,
371450
gfs=GFS_VARIABLE_NAMES,
451+
gfs_india=GFS_INDIA_VARIABLE_NAMES,
372452
ecmwf=ECMWF_VARIABLE_NAMES,
373453
ecmwf_india=INDIA_ECMWF_VARIABLE_NAMES,
374454
excarta=EXCARTA_VARIABLE_NAMES,
375455
merra2=MERRA2_VARIABLE_NAMES,
376456
merra2_uk=UK_MERRA2_VARIABLE_NAMES,
377457
mo_global=MO_GLOBAL_VARIABLE_NAMES,
458+
mo_global_new_india=MO_GLOBAL_NEW_VARIABLE_NAMES,
378459
)
379460
NWP_STDS = NWPStatDict(
380461
ukv=UKV_STD,
381462
gfs=GFS_STD,
463+
gfs_india=GFS_INDIA_STD,
382464
ecmwf=ECMWF_STD,
383465
ecmwf_india=INDIA_ECMWF_STD,
384466
excarta=EXCARTA_STD,
385467
merra2=MERRA2_STD,
386468
merra2_uk=UK_MERRA2_STD,
387469
mo_global=MO_GLOBAL_INDIA_STD,
470+
mo_global_new_india=MO_GLOBAL_INDIA_NEW_STD,
388471
)
389472
NWP_MEANS = NWPStatDict(
390473
ukv=UKV_MEAN,
391474
gfs=GFS_MEAN,
475+
gfs_india=GFS_INDIA_MEAN,
392476
ecmwf=ECMWF_MEAN,
393477
ecmwf_india=INDIA_ECMWF_MEAN,
394478
excarta=EXCARTA_MEAN,
395479
merra2=MERRA2_MEAN,
396480
merra2_uk=UK_MERRA2_MEAN,
397481
mo_global=MO_GLOBAL_INDIA_MEAN,
482+
mo_global_new_india=MO_GLOBAL_INDIA_NEW_MEAN,
398483
)
399484

400485
# --------------------------- SATELLITE ------------------------------

tests/transform/xarray/test_normalize.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ def test_normalize_topo(topo_datapipe):
2929
calculate_mean_std_from_example=True
3030
)
3131
data = next(iter(normed_topo_datapipe))
32-
assert data.mean().compute() == pytest.approx(0, abs=0.001)
33-
assert data.std().compute() == pytest.approx(1, abs=0.001)
32+
assert data.mean().compute() == pytest.approx(0, abs=0.01)
33+
assert data.std().compute() == pytest.approx(1, abs=0.01)
3434

3535

3636
def test_normalize_gsp(gsp_datapipe):

0 commit comments

Comments
 (0)