11general :
2- description : Example data config for creating PVNet batches
3- name : example_pvnet
2+ description : Example config for producing PVNet samples
3+ name : example_config
44
55input_data :
6- default_history_minutes : 120
7- default_forecast_minutes : 480
6+
7+ # Either use Site OR GSP configuration
8+ site :
9+ # Path to Site data in NetCDF format
10+ file_path : PLACEHOLDER.nc
11+ # Path to metadata in CSV format
12+ metadata_file_path : PLACEHOLDER.csv
13+ time_resolution_minutes : 15
14+ interval_start_minutes : -60
15+ # Specified for intraday currently
16+ interval_end_minutes : 480
17+ dropout_timedeltas_minutes : null
18+ dropout_fraction : 0 # Fraction of samples with dropout
819
920 gsp :
10- # Path to the GSP data. This should be a zarr file
21+ # Path to GSP data in zarr format
1122 # e.g. gs://solar-pv-nowcasting-data/PV/GSP/v7/pv_gsp.zarr
12- gsp_zarr_path : PLACEHOLDER.zarr
13- history_minutes : 120
14- forecast_minutes : 480
23+ zarr_path : PLACEHOLDER.zarr
24+ interval_start_minutes : -60
25+ # Specified for intraday currently
26+ interval_end_minutes : 480
1527 time_resolution_minutes : 30
16- # A random value from the list below will be chosen as the delay when dropout is used
28+ # Random value from the list below will be chosen as the delay when dropout is used
1729 # If set to null no dropout is applied. Only values before t0 are dropped out for GSP.
1830 # Values after t0 are assumed as targets and cannot be dropped.
1931 dropout_timedeltas_minutes : null
2032 dropout_fraction : 0 # Fraction of samples with dropout
2133
22- pv :
23- pv_files_groups :
24- - label : solar_sheffield_passiv
25- # Path to the site-level PV data. This should be a netcdf
26- # e.g gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/passiv.netcdf
27- pv_filename : PLACEHOLDER.netcdf
28- # Path to the site-level PV metadata. This choudl be a csv
29- # e.g gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/system_metadata.csv
30- pv_metadata_filename : PLACEHOLDER.csv
31- # This is the list of pv_ml_ids to be sliced from the PV site level data
32- # The IDs below are 349 of the PV systems which have very little NaN data in the historic data
33- # and which are still reporting live (as of Oct 2023)
34- pv_ml_ids :
35- [
36- 154, 155, 156, 158, 159, 160, 162, 164, 165, 166, 167, 168, 169, 171, 173, 177, 178, 179,
37- 181, 182, 185, 186, 187, 188, 189, 190, 191, 192, 193, 197, 198, 199, 200, 202, 204, 205,
38- 206, 208, 209, 211, 214, 215, 216, 217, 218, 219, 220, 221, 225, 229, 230, 232, 233, 234,
39- 236, 242, 243, 245, 252, 254, 255, 256, 257, 258, 260, 261, 262, 265, 267, 268, 272, 273,
40- 275, 276, 277, 280, 281, 282, 283, 287, 289, 291, 292, 293, 294, 295, 296, 297, 298, 301,
41- 302, 303, 304, 306, 307, 309, 310, 311, 317, 318, 319, 320, 321, 322, 323, 325, 326, 329,
42- 332, 333, 335, 336, 338, 340, 342, 344, 345, 346, 348, 349, 352, 354, 355, 356, 357, 360,
43- 362, 363, 368, 369, 370, 371, 372, 374, 375, 376, 378, 380, 382, 384, 385, 388, 390, 391,
44- 393, 396, 397, 398, 399, 400, 401, 403, 404, 405, 406, 407, 409, 411, 412, 413, 414, 415,
45- 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 429, 431, 435, 437, 438, 440,
46- 441, 444, 447, 450, 451, 453, 456, 457, 458, 459, 464, 465, 466, 467, 468, 470, 471, 473,
47- 474, 476, 477, 479, 480, 481, 482, 485, 486, 488, 490, 491, 492, 493, 496, 498, 501, 503,
48- 506, 507, 508, 509, 510, 511, 512, 513, 515, 516, 517, 519, 520, 521, 522, 524, 526, 527,
49- 528, 531, 532, 536, 537, 538, 540, 541, 542, 543, 544, 545, 549, 550, 551, 552, 553, 554,
50- 556, 557, 560, 561, 563, 566, 568, 571, 572, 575, 576, 577, 579, 580, 581, 582, 584, 585,
51- 588, 590, 594, 595, 597, 600, 602, 603, 604, 606, 611, 613, 614, 616, 618, 620, 622, 623,
52- 624, 625, 626, 628, 629, 630, 631, 636, 637, 638, 640, 641, 642, 644, 645, 646, 650, 651,
53- 652, 653, 654, 655, 657, 660, 661, 662, 663, 666, 667, 668, 670, 675, 676, 679, 681, 683,
54- 684, 685, 687, 696, 698, 701, 702, 703, 704, 706, 710, 722, 723, 724, 725, 727, 728, 729,
55- 730, 732, 733, 734, 735, 736, 737
56- ]
57- history_minutes : 180
58- forecast_minutes : 0
59- time_resolution_minutes : 5
60- # A random value from the list below will be chosen as the delay when dropout is used.
61- # If set to null no dropout is applied. All PV systems are dropped together with this setting.
62- dropout_timedeltas_minutes : null
63- dropout_fraction : 0 # Fraction of samples with dropout
64- # A random value from the list below will be chosen as the delay when system dropout is used.
65- # If set to null no dropout is applied. All PV systems are indpendently with this setting.
66- system_dropout_timedeltas_minutes : null
67- # For ech sample a differnt dropout probability is used which is uniformly sampled from the min
68- # and max below
69- system_dropout_fraction_min : 0
70- system_dropout_fraction_max : 0
71-
7234 nwp :
73- ukv :
74- nwp_provider : ukv
75- nwp_zarr_path :
76- # Path(s) to UKV NWP data in zarr format
77- # e.g. gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr
78- - PLACEHOLDER.zarr
79- history_minutes : 120
80- forecast_minutes : 480
81- time_resolution_minutes : 60
82- nwp_channels :
83- # These variables exist in the CEDA training set and in the live MetOffice live service
84- - t # 2-metre temperature
85- - dswrf # downwards short-wave radiation flux
86- - dlwrf # downwards long-wave radiation flux
87- - hcc # high cloud cover
88- - mcc # medium cloud cover
89- - lcc # low cloud cover
90- - sde # snow depth water equivalent
91- - r # relative humidty
92- - vis # visibility
93- - si10 # 10-metre wind speed
94- - wdir10 # 10-metre wind direction
95- - prate # precipitation rate
96- # These variables exist in CEDA training data but not in the live MetOffice live service
97- - hcct # height of convective cloud top, meters above surface. NaN if no clouds
98- - cdcb # height of lowest cloud base > 3 oktas
99- - dpt # dew point temperature
100- - prmsl # mean sea level pressure
101- - h # geometrical? (maybe geopotential?) height
102- nwp_image_size_pixels_height : 24
103- nwp_image_size_pixels_width : 24
104- # A random value from the list below will be chosen as the delay when dropout is used
105- # If set to null no dropout is applied. Values must be negative.
106- dropout_timedeltas_minutes : [-180]
107- # Dropout applied with this probability
108- dropout_fraction : 1.0
109- # How long after the NWP init-time are we still willing to use this forecast
110- # If null we use each init-time for all steps it covers
111- max_staleness_minutes : null
11235
11336 ecmwf :
114- nwp_provider : ecmwf
37+ provider : ecmwf
11538 # Path to ECMWF NWP data in zarr format
11639 # n.b. It is not necessary to use multiple or any NWP data. These entries can be removed
117- nwp_zarr_path : PLACEHOLDER.zarr
118- history_minutes : 120
119- forecast_minutes : 480
40+ zarr_path : PLACEHOLDER.zarr
41+ interval_start_minutes : -60
42+ # Specified for intraday currently
43+ interval_end_minutes : 480
12044 time_resolution_minutes : 60
121- nwp_channels :
45+ channels :
12246 - t2m # 2-metre temperature
12347 - dswrf # downwards short-wave radiation flux
12448 - dlwrf # downwards long-wave radiation flux
@@ -136,23 +60,61 @@ input_data:
13660 - v10 # 10-metre V component of wind speed
13761 - v100 # 100-metre V component of wind speed
13862 - v200 # 200-metre V component of wind speed
139- nwp_image_size_pixels_height : 12 # roughly equivalent to UKV 24 pixels
140- nwp_image_size_pixels_width : 12
141- dropout_timedeltas_minutes : [-180]
142- dropout_fraction : 1.0
63+ # The following channels are accumulated and need to be diffed
64+ accum_channels :
65+ - dswrf # downwards short-wave radiation flux
66+ - dlwrf # downwards long-wave radiation flux
67+ - sr # direct solar radiation
68+ - duvrs # downwards UV radiation at surface
69+ image_size_pixels_height : 24
70+ image_size_pixels_width : 24
71+ dropout_timedeltas_minutes : [-360]
72+ dropout_fraction : 1.0 # Fraction of samples with dropout
73+ max_staleness_minutes : null
74+
75+ ukv :
76+ provider : ukv
77+ # Path to UKV NWP data in zarr format
78+ # e.g. gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr
79+ # n.b. It is not necessary to use multiple or any NWP data. These entries can be removed
80+ zarr_path : PLACEHOLDER.zarr
81+ interval_start_minutes : -60
82+ # Specified for intraday currently
83+ interval_end_minutes : 480
84+ time_resolution_minutes : 60
85+ channels :
86+ - t # 2-metre temperature
87+ - dswrf # downwards short-wave radiation flux
88+ - dlwrf # downwards long-wave radiation flux
89+ - hcc # high cloud cover
90+ - mcc # medium cloud cover
91+ - lcc # low cloud cover
92+ - sde # snow depth water equivalent
93+ - r # relative humidty
94+ - vis # visibility
95+ - si10 # 10-metre wind speed
96+ - wdir10 # 10-metre wind direction
97+ - prate # precipitation rate
98+ # These variables exist in CEDA training data but not in the live MetOffice live service
99+ - hcct # height of convective cloud top, meters above surface. NaN if no clouds
100+ - cdcb # height of lowest cloud base > 3 oktas
101+ - dpt # dew point temperature
102+ - prmsl # mean sea level pressure
103+ - h # geometrical? (maybe geopotential?) height
104+ image_size_pixels_height : 24
105+ image_size_pixels_width : 24
106+ dropout_timedeltas_minutes : [-360]
107+ dropout_fraction : 1.0 # Fraction of samples with dropout
143108 max_staleness_minutes : null
144109
145110 satellite :
146- satellite_zarr_path :
147- # Path(s) to non-HRV satellite data in zarr format
148- # e.g. gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
149- - PLACEHOLDER.zarr
150- history_minutes : 90
151- forecast_minutes : 0 # Deprecated for most use cases
152- live_delay_minutes : 60 # Only data up to time t0-60minutes is inluced in slice
111+ # Path to Satellite data (non-HRV) in zarr format
112+ # e.g. gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
113+ zarr_path : PLACEHOLDER.zarr
114+ interval_start_minutes : -30
115+ interval_end_minutes : 0
153116 time_resolution_minutes : 5
154- satellite_channels :
155- # Uses for each channel taken from https://resources.eumetrain.org/data/3/311/bsc_s4.pdf
117+ channels :
156118 - IR_016 # Surface, cloud phase
157119 - IR_039 # Surface, clouds, wind fields
158120 - IR_087 # Surface, clouds, atmospheric instability
@@ -164,9 +126,7 @@ input_data:
164126 - VIS008 # Surface, clouds, wind fields
165127 - WV_062 # Water vapor, high level clouds, upper air analysis
166128 - WV_073 # Water vapor, atmospheric instability, upper-level dynamics
167- satellite_image_size_pixels_height : 24
168- satellite_image_size_pixels_width : 24
169- # A random value from the list below will be chosen as the delay when dropout is used
170- # If set to null no dropout is applied. Values must be negative.
129+ image_size_pixels_height : 24
130+ image_size_pixels_width : 24
171131 dropout_timedeltas_minutes : null
172132 dropout_fraction : 0 # Fraction of samples with dropout
0 commit comments