1
1
general :
2
- description : Example data config for creating PVNet batches
3
- name : example_pvnet
2
+ description : Example config for producing PVNet samples
3
+ name : example_config
4
4
5
5
input_data :
6
- default_history_minutes : 120
7
- default_forecast_minutes : 480
6
+
7
+ # Either use Site OR GSP configuration
8
+ site :
9
+ # Path to Site data in NetCDF format
10
+ file_path : PLACEHOLDER.nc
11
+ # Path to metadata in CSV format
12
+ metadata_file_path : PLACEHOLDER.csv
13
+ time_resolution_minutes : 15
14
+ interval_start_minutes : -60
15
+ # Specified for intraday currently
16
+ interval_end_minutes : 480
17
+ dropout_timedeltas_minutes : null
18
+ dropout_fraction : 0 # Fraction of samples with dropout
8
19
9
20
gsp :
10
- # Path to the GSP data. This should be a zarr file
21
+ # Path to GSP data in zarr format
11
22
# e.g. gs://solar-pv-nowcasting-data/PV/GSP/v7/pv_gsp.zarr
12
- gsp_zarr_path : PLACEHOLDER.zarr
13
- history_minutes : 120
14
- forecast_minutes : 480
23
+ zarr_path : PLACEHOLDER.zarr
24
+ interval_start_minutes : -60
25
+ # Specified for intraday currently
26
+ interval_end_minutes : 480
15
27
time_resolution_minutes : 30
16
- # A random value from the list below will be chosen as the delay when dropout is used
28
+ # Random value from the list below will be chosen as the delay when dropout is used
17
29
# If set to null no dropout is applied. Only values before t0 are dropped out for GSP.
18
30
# Values after t0 are assumed as targets and cannot be dropped.
19
31
dropout_timedeltas_minutes : null
20
32
dropout_fraction : 0 # Fraction of samples with dropout
21
33
22
- pv :
23
- pv_files_groups :
24
- - label : solar_sheffield_passiv
25
- # Path to the site-level PV data. This should be a netcdf
26
- # e.g gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/passiv.netcdf
27
- pv_filename : PLACEHOLDER.netcdf
28
- # Path to the site-level PV metadata. This choudl be a csv
29
- # e.g gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/system_metadata.csv
30
- pv_metadata_filename : PLACEHOLDER.csv
31
- # This is the list of pv_ml_ids to be sliced from the PV site level data
32
- # The IDs below are 349 of the PV systems which have very little NaN data in the historic data
33
- # and which are still reporting live (as of Oct 2023)
34
- pv_ml_ids :
35
- [
36
- 154, 155, 156, 158, 159, 160, 162, 164, 165, 166, 167, 168, 169, 171, 173, 177, 178, 179,
37
- 181, 182, 185, 186, 187, 188, 189, 190, 191, 192, 193, 197, 198, 199, 200, 202, 204, 205,
38
- 206, 208, 209, 211, 214, 215, 216, 217, 218, 219, 220, 221, 225, 229, 230, 232, 233, 234,
39
- 236, 242, 243, 245, 252, 254, 255, 256, 257, 258, 260, 261, 262, 265, 267, 268, 272, 273,
40
- 275, 276, 277, 280, 281, 282, 283, 287, 289, 291, 292, 293, 294, 295, 296, 297, 298, 301,
41
- 302, 303, 304, 306, 307, 309, 310, 311, 317, 318, 319, 320, 321, 322, 323, 325, 326, 329,
42
- 332, 333, 335, 336, 338, 340, 342, 344, 345, 346, 348, 349, 352, 354, 355, 356, 357, 360,
43
- 362, 363, 368, 369, 370, 371, 372, 374, 375, 376, 378, 380, 382, 384, 385, 388, 390, 391,
44
- 393, 396, 397, 398, 399, 400, 401, 403, 404, 405, 406, 407, 409, 411, 412, 413, 414, 415,
45
- 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 429, 431, 435, 437, 438, 440,
46
- 441, 444, 447, 450, 451, 453, 456, 457, 458, 459, 464, 465, 466, 467, 468, 470, 471, 473,
47
- 474, 476, 477, 479, 480, 481, 482, 485, 486, 488, 490, 491, 492, 493, 496, 498, 501, 503,
48
- 506, 507, 508, 509, 510, 511, 512, 513, 515, 516, 517, 519, 520, 521, 522, 524, 526, 527,
49
- 528, 531, 532, 536, 537, 538, 540, 541, 542, 543, 544, 545, 549, 550, 551, 552, 553, 554,
50
- 556, 557, 560, 561, 563, 566, 568, 571, 572, 575, 576, 577, 579, 580, 581, 582, 584, 585,
51
- 588, 590, 594, 595, 597, 600, 602, 603, 604, 606, 611, 613, 614, 616, 618, 620, 622, 623,
52
- 624, 625, 626, 628, 629, 630, 631, 636, 637, 638, 640, 641, 642, 644, 645, 646, 650, 651,
53
- 652, 653, 654, 655, 657, 660, 661, 662, 663, 666, 667, 668, 670, 675, 676, 679, 681, 683,
54
- 684, 685, 687, 696, 698, 701, 702, 703, 704, 706, 710, 722, 723, 724, 725, 727, 728, 729,
55
- 730, 732, 733, 734, 735, 736, 737
56
- ]
57
- history_minutes : 180
58
- forecast_minutes : 0
59
- time_resolution_minutes : 5
60
- # A random value from the list below will be chosen as the delay when dropout is used.
61
- # If set to null no dropout is applied. All PV systems are dropped together with this setting.
62
- dropout_timedeltas_minutes : null
63
- dropout_fraction : 0 # Fraction of samples with dropout
64
- # A random value from the list below will be chosen as the delay when system dropout is used.
65
- # If set to null no dropout is applied. All PV systems are indpendently with this setting.
66
- system_dropout_timedeltas_minutes : null
67
- # For ech sample a differnt dropout probability is used which is uniformly sampled from the min
68
- # and max below
69
- system_dropout_fraction_min : 0
70
- system_dropout_fraction_max : 0
71
-
72
34
nwp :
73
- ukv :
74
- nwp_provider : ukv
75
- nwp_zarr_path :
76
- # Path(s) to UKV NWP data in zarr format
77
- # e.g. gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr
78
- - PLACEHOLDER.zarr
79
- history_minutes : 120
80
- forecast_minutes : 480
81
- time_resolution_minutes : 60
82
- nwp_channels :
83
- # These variables exist in the CEDA training set and in the live MetOffice live service
84
- - t # 2-metre temperature
85
- - dswrf # downwards short-wave radiation flux
86
- - dlwrf # downwards long-wave radiation flux
87
- - hcc # high cloud cover
88
- - mcc # medium cloud cover
89
- - lcc # low cloud cover
90
- - sde # snow depth water equivalent
91
- - r # relative humidty
92
- - vis # visibility
93
- - si10 # 10-metre wind speed
94
- - wdir10 # 10-metre wind direction
95
- - prate # precipitation rate
96
- # These variables exist in CEDA training data but not in the live MetOffice live service
97
- - hcct # height of convective cloud top, meters above surface. NaN if no clouds
98
- - cdcb # height of lowest cloud base > 3 oktas
99
- - dpt # dew point temperature
100
- - prmsl # mean sea level pressure
101
- - h # geometrical? (maybe geopotential?) height
102
- nwp_image_size_pixels_height : 24
103
- nwp_image_size_pixels_width : 24
104
- # A random value from the list below will be chosen as the delay when dropout is used
105
- # If set to null no dropout is applied. Values must be negative.
106
- dropout_timedeltas_minutes : [-180]
107
- # Dropout applied with this probability
108
- dropout_fraction : 1.0
109
- # How long after the NWP init-time are we still willing to use this forecast
110
- # If null we use each init-time for all steps it covers
111
- max_staleness_minutes : null
112
35
113
36
ecmwf :
114
- nwp_provider : ecmwf
37
+ provider : ecmwf
115
38
# Path to ECMWF NWP data in zarr format
116
39
# n.b. It is not necessary to use multiple or any NWP data. These entries can be removed
117
- nwp_zarr_path : PLACEHOLDER.zarr
118
- history_minutes : 120
119
- forecast_minutes : 480
40
+ zarr_path : PLACEHOLDER.zarr
41
+ interval_start_minutes : -60
42
+ # Specified for intraday currently
43
+ interval_end_minutes : 480
120
44
time_resolution_minutes : 60
121
- nwp_channels :
45
+ channels :
122
46
- t2m # 2-metre temperature
123
47
- dswrf # downwards short-wave radiation flux
124
48
- dlwrf # downwards long-wave radiation flux
@@ -136,23 +60,61 @@ input_data:
136
60
- v10 # 10-metre V component of wind speed
137
61
- v100 # 100-metre V component of wind speed
138
62
- v200 # 200-metre V component of wind speed
139
- nwp_image_size_pixels_height : 12 # roughly equivalent to UKV 24 pixels
140
- nwp_image_size_pixels_width : 12
141
- dropout_timedeltas_minutes : [-180]
142
- dropout_fraction : 1.0
63
+ # The following channels are accumulated and need to be diffed
64
+ accum_channels :
65
+ - dswrf # downwards short-wave radiation flux
66
+ - dlwrf # downwards long-wave radiation flux
67
+ - sr # direct solar radiation
68
+ - duvrs # downwards UV radiation at surface
69
+ image_size_pixels_height : 24
70
+ image_size_pixels_width : 24
71
+ dropout_timedeltas_minutes : [-360]
72
+ dropout_fraction : 1.0 # Fraction of samples with dropout
73
+ max_staleness_minutes : null
74
+
75
+ ukv :
76
+ provider : ukv
77
+ # Path to UKV NWP data in zarr format
78
+ # e.g. gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr
79
+ # n.b. It is not necessary to use multiple or any NWP data. These entries can be removed
80
+ zarr_path : PLACEHOLDER.zarr
81
+ interval_start_minutes : -60
82
+ # Specified for intraday currently
83
+ interval_end_minutes : 480
84
+ time_resolution_minutes : 60
85
+ channels :
86
+ - t # 2-metre temperature
87
+ - dswrf # downwards short-wave radiation flux
88
+ - dlwrf # downwards long-wave radiation flux
89
+ - hcc # high cloud cover
90
+ - mcc # medium cloud cover
91
+ - lcc # low cloud cover
92
+ - sde # snow depth water equivalent
93
+ - r # relative humidty
94
+ - vis # visibility
95
+ - si10 # 10-metre wind speed
96
+ - wdir10 # 10-metre wind direction
97
+ - prate # precipitation rate
98
+ # These variables exist in CEDA training data but not in the live MetOffice live service
99
+ - hcct # height of convective cloud top, meters above surface. NaN if no clouds
100
+ - cdcb # height of lowest cloud base > 3 oktas
101
+ - dpt # dew point temperature
102
+ - prmsl # mean sea level pressure
103
+ - h # geometrical? (maybe geopotential?) height
104
+ image_size_pixels_height : 24
105
+ image_size_pixels_width : 24
106
+ dropout_timedeltas_minutes : [-360]
107
+ dropout_fraction : 1.0 # Fraction of samples with dropout
143
108
max_staleness_minutes : null
144
109
145
110
satellite :
146
- satellite_zarr_path :
147
- # Path(s) to non-HRV satellite data in zarr format
148
- # e.g. gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
149
- - PLACEHOLDER.zarr
150
- history_minutes : 90
151
- forecast_minutes : 0 # Deprecated for most use cases
152
- live_delay_minutes : 60 # Only data up to time t0-60minutes is inluced in slice
111
+ # Path to Satellite data (non-HRV) in zarr format
112
+ # e.g. gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
113
+ zarr_path : PLACEHOLDER.zarr
114
+ interval_start_minutes : -30
115
+ interval_end_minutes : 0
153
116
time_resolution_minutes : 5
154
- satellite_channels :
155
- # Uses for each channel taken from https://resources.eumetrain.org/data/3/311/bsc_s4.pdf
117
+ channels :
156
118
- IR_016 # Surface, cloud phase
157
119
- IR_039 # Surface, clouds, wind fields
158
120
- IR_087 # Surface, clouds, atmospheric instability
@@ -164,9 +126,7 @@ input_data:
164
126
- VIS008 # Surface, clouds, wind fields
165
127
- WV_062 # Water vapor, high level clouds, upper air analysis
166
128
- WV_073 # Water vapor, atmospheric instability, upper-level dynamics
167
- satellite_image_size_pixels_height : 24
168
- satellite_image_size_pixels_width : 24
169
- # A random value from the list below will be chosen as the delay when dropout is used
170
- # If set to null no dropout is applied. Values must be negative.
129
+ image_size_pixels_height : 24
130
+ image_size_pixels_width : 24
171
131
dropout_timedeltas_minutes : null
172
132
dropout_fraction : 0 # Fraction of samples with dropout
0 commit comments