From 5180b32199913952c8bd80f658e2996f34ccc3d4 Mon Sep 17 00:00:00 2001 From: megawattz Date: Wed, 10 Jan 2024 13:23:53 +0000 Subject: [PATCH 1/6] update readme to inform of large commit history --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index e50dd56c..6f9a3805 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,12 @@ cd PVNet pip install -r requirements.txt ``` +The commit history is extensive. To save download time, use a depth of 1: +```bash +git clone --depth 1 https://github.com/openclimatefix/PVNet.git +``` +This means only the latest commit and its associated files will be downloaded. + ### Additional development dependencies ```bash From 6f196a236a382fd51c4f10fd0c0e78bc0022febf Mon Sep 17 00:00:00 2001 From: megawattz Date: Wed, 10 Jan 2024 18:45:09 +0000 Subject: [PATCH 2/6] update readme, change configs to reflect datapipes changes --- README.md | 188 ++++----- .../configuration/template_configuration.yaml | 398 +++++++++++++++++- .../datamodule/streamed_batches.yaml | 2 + configs.example/readme.md | 7 +- requirements.txt | 1 + 5 files changed, 474 insertions(+), 122 deletions(-) diff --git a/README.md b/README.md index 6f9a3805..9f97655b 100644 --- a/README.md +++ b/README.md @@ -34,12 +34,20 @@ git clone --depth 1 https://github.com/openclimatefix/PVNet.git ``` This means only the latest commit and its associated files will be downloaded. +Next, in the PVNet repo, install PVNet as an editable package: + +```bash +pip install -e . +``` + ### Additional development dependencies ```bash pip install -r requirements-dev.txt ``` + + ## Getting started with running PVNet Before running any code in within PVNet, copy the example configuration to a @@ -75,115 +83,103 @@ https://huggingface.co/datasets/openclimatefix/dwd-icon-eu which includes the UK OCF maintains a dataset of PV generation from 1311 private PV installations here: https://huggingface.co/datasets/openclimatefix/uk_pv -### Generating pre-made batches of data for training/validation of PVNet -PVNet contains a script for generating batches of data suitable for training the -PVNet models. +### Connecting with ocf_datapipes for batch creation -To run the script you will need to make some modifications to the datamodule -configuration. +Outside the PVNet repo, clone the ocf-datapipes repo and exit the conda env created for PVNet: https://github.com/openclimatefix/ocf_datapipes +```bash +git clone --depth 1 https://github.com/openclimatefix/ocf_datapipes.git +conda create -n ocf_datapipes python=3.10 +``` -1. First, create your new configuration file in - `./configs/datamodule/configiration/local_configuration.yaml` and paste the - sample config (shown below) -2. Duplicate the `./configs/datamodule/ocf_datapipes.yaml` to - `./configs/datamodule/_local_ocf_datapipes.yaml` and ensure the - `configuration` key points to your newly created configuration file in - step 1. -3. Also in this file, update the train, val & test periods to cover the data you - have access to. -4. To get you started with your own configuration file, see the sample config - below. Update the data paths to the location of your local GSP, NWP and PV - datasets: +Then go inside the ocf_datapipes repo to add packages -```yaml -general: - description: Demo config - name: demo_datamodule_config - -input_data: - default_history_minutes: 60 - default_forecast_minutes: 120 - - gsp: - gsp_zarr_path: /path/to/gsp-data.zarr - history_minutes: 60 - forecast_minutes: 120 - time_resolution_minutes: 30 - start_datetime: "2019-01-01T00:00:00" - end_datetime: "2019-01-08T00:00:00" - metadata_only: false - - nwp: - ukv: - nwp_zarr_path: /path/to/nwp-data.zarr - history_minutes: 60 - forecast_minutes: 120 - time_resolution_minutes: 60 - nwp_channels: # comment out channels as appropriate - - t # 2-metre temperature - - dswrf # downwards short-wave radiation flux - - dlwrf # downwards long-wave radiation flux - - hcc # high cloud cover - - mcc # medium cloud cover - - lcc # low cloud cover - - vis # visability - - r # relative humidity - - prate # precipitation rate - - si10 # 10-metre wind speed | live = unknown - nwp_image_size_pixels_height: 24 - nwp_image_size_pixels_width: 24 - nwp_provider: ukv - - pv: - pv_files_groups: - - label: pvoutput.org - pv_filename: /path/to/pv-data/pv.netcdf - pv_metadata_filename: /path/to/pv-data/metadata.csv - history_minutes: 60 - forecast_minutes: 0 # PVNet assumes no future PV generation - time_resolution_minutes: 5 - start_datetime: "2019-01-01T00:00:00" - end_datetime: "2019-01-08T00:00:00" - pv_image_size_meters_height: 24 - pv_image_size_meters_width: 24 - pv_ml_ids: [154,155,156,158,159,160,162,164,165,166,167,168,169,171,173,177,178,179,181,182,185,186,187,188,189,190,191,192,193,197,198,199,200,202,204,205,206,208,209,211,214,215,216,217,218,219,220,221,225,229,230,232,233,234,236,242,243,245,252,254,255,256,257,258,260,261,262,265,267,268,272,273,275,276,277,280,281,282,283,287,289,291,292,293,294,295,296,297,298,301,302,303,304,306,307,309,310,311,317,318,319,320,321,322,323,325,326,329,332,333,335,336,338,340,342,344,345,346,348,349,352,354,355,356,357,360,362,363,368,369,370,371,372,374,375,376,378,380,382,384,385,388,390,391,393,396,397,398,399,400,401,403,404,405,406,407,409,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,429,431,435,437,438,440,441,444,447,450,451,453,456,457,458,459,464,465,466,467,468,470,471,473,474,476,477,479,480,481,482,485,486,488,490,491,492,493,496,498,501,503,506,507,508,509,510,511,512,513,515,516,517,519,520,521,522,524,526,527,528,531,532,536,537,538,540,541,542,543,544,545,549,550,551,552,553,554,556,557,560,561,563,566,568,571,572,575,576,577,579,580,581,582,584,585,588,590,594,595,597,600,602,603,604,606,611,613,614,616,618,620,622,623,624,625,626,628,629,630,631,636,637,638,640,641,642,644,645,646,650,651,652,653,654,655,657,660,661,662,663,666,667,668,670,675,676,679,681,683,684,685,687,696,698,701,702,703,704,706,710,722,723,724,725,727,728,729,730,732,733,734,735,736,737,] - n_pv_systems_per_example: 128 - get_center: false - is_live: false - - satellite: - satellite_zarr_path: "" # Left empty to avoid using satellite data - history_minutes: 60 - forecast_minutes: 0 - live_delay_minutes: 30 - time_resolution_minutes: 5 - satellite_channels: - - IR_016 - - IR_039 - - IR_087 - - IR_097 - - IR_108 - - IR_120 - - IR_134 - - VIS006 - - VIS008 - - WV_062 - - WV_073 - satellite_image_size_pixels_height: 24 - satellite_image_size_pixels_width: 24 +```bash +pip install -r requirements.txt requirements-dev.txt +``` + +Then exit this environment, and enter back into the pvnet conda environment and install ocf_datapies in editable mode (-e). This means the package is directly linked to the source code in the ocf_datapies repo. + +```bash +pip install -e +``` + +## Generating pre-made batches of data for training/validation of PVNet + +PVNet contains a script for generating batches of data suitable for training the PVNet models. To run the script you will need to make some modifications to the datamodule configuration. + +Make sure you have copied the example configs (as already stated above): ``` +cp -r configs.example configs +``` + +### Set up and config example for batch creation + +We will use the example of creating batches using data from gcp: +`/PVNet/configs/datamodule/configuration/gcp_configuration.yaml` +Ensure that the file paths are set to the correct locations in +`gcp_configuration.yaml`. + +`PLACEHOLDER` is used to indcate where to input the location of the files. + +For OCF use cases, file locations can be found in `template_configuration.yaml` located alongside `gcp_configuration.yaml`. + +In these configurations you can update the train, val & test periods to cover the data you have access to. + With your configuration in place, you can proceed to create batches. PVNet uses [hydra](https://hydra.cc/) which enables us to pass variables via the command line that will override the configuration defined in the `./configs` directory. -Run the save_batches.py script to create batches with the following arguments as -a minimum: +When creating batches, an additional config is used which is passed into the batch creation script. This is the datamodule config located `PVNet/configs/datamodule`. + +For this example we will be using the `streamed_batches.yaml` config. Like before, a placeholder variable is used when specifing which configuration to use: + +`configuration: "PLACEHOLDER.yaml"` + +This should be given the whole path to the config on your local machine, such as for our example it should be changed to: + +`configuration: "/FULL-PATH-TO-REPO/PVNet/configs/datamodule/configuration/gcp_configuration.yaml" +` + +Where `FULL-PATH-TO-REPO` represent the whole path to the PVNet repo on your local machine. + +### Running the batch creation script + +Run the save_batches.py script to create batches with the following example arguments as: + +``` +python scripts/save_batches.py datamodule=streamed_batches +batch_output_dir="./output" +num_train_batches=10 +num_val_batches=5 +``` + +In this function the datamodule argument looks for a config under `PVNet/configs/datamodule`. The examples here are either to use "premade_batches" or "streamed_batches". + +Its important that the dates set for the training, validation and testing in the datamodule (`streamed_batches.yaml`) config are within the ranges of the dates set for the input features in the configuration (`gcp_configuration.yaml`). + +If downloading data from gcp bucket or satellite data make sure to authenticate gcloud: + +``` +gcloud auth login +``` + +For files stored in multiple locations they can be added as list. For example from the gcp_configuration.yaml file we can change from satellite data stored on a bucket: + +``` +satellite: + satellite_zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr + +``` + +To satellite data hosted by Google: ``` -python scripts/save_batches.py datamodule=local_ocf_datapipes +batch_output_dir="./output" +num_train_batches=10 +num_val_batches=5 +satellite: + satellite_zarr_paths: + - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr" + - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2021_nonhrv.zarr" ``` +Datapipes is currently set up to use 12 channels from the satellite data which is the YEAR_nonhrv.zarr rather than YEAR_hrv.zarr. + ### Training PVNet diff --git a/configs.example/datamodule/configuration/template_configuration.yaml b/configs.example/datamodule/configuration/template_configuration.yaml index 559f9d7f..2180af5f 100644 --- a/configs.example/datamodule/configuration/template_configuration.yaml +++ b/configs.example/datamodule/configuration/template_configuration.yaml @@ -15,29 +15,386 @@ input_data: end_datetime: "2021-09-01T00:00:00" nwp: - nwp_zarr_path: gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr - history_minutes: 60 - forecast_minutes: 120 - time_resolution_minutes: 60 - nwp_channels: - - t # live = t2m - - dswrf - - dlwrf - - hcc - - mcc - - lcc - - vis - - r # live = r2 - - prate # live ~= rprate - - si10 # 10-metre wind speed | live = unknown - nwp_image_size_pixels_height: 24 - nwp_image_size_pixels_width: 24 + ukv: + nwp_zarr_path: gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr + history_minutes: 60 + forecast_minutes: 120 + time_resolution_minutes: 60 + nwp_channels: # comment out channels as appropriate + - t # 2-metre temperature + - dswrf # downwards short-wave radiation flux + - dlwrf # downwards long-wave radiation flux + - hcc # high cloud cover + - mcc # medium cloud cover + - lcc # low cloud cover + - vis # visability + - r # relative humidity + - prate # precipitation rate + - si10 # 10-metre wind speed | live = unknown + nwp_image_size_pixels_height: 24 + nwp_image_size_pixels_width: 24 + nwp_provider: ukv + start_datetime: "2020-01-01T00:00:00" + end_datetime: "2021-09-01T00:00:00" pv: pv_files_groups: - label: solar_sheffield_passiv pv_filename: gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/passiv.netcdf pv_metadata_filename: gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/system_metadata_OCF_ONLY.csv + # This is the list of pv_ml_ids to be sliced from the PV site level data + pv_ml_ids: + [ + 154, + 155, + 156, + 158, + 159, + 160, + 162, + 164, + 165, + 166, + 167, + 168, + 169, + 171, + 173, + 177, + 178, + 179, + 181, + 182, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 197, + 198, + 199, + 200, + 202, + 204, + 205, + 206, + 208, + 209, + 211, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 225, + 229, + 230, + 232, + 233, + 234, + 236, + 242, + 243, + 245, + 252, + 254, + 255, + 256, + 257, + 258, + 260, + 261, + 262, + 265, + 267, + 268, + 272, + 273, + 275, + 276, + 277, + 280, + 281, + 282, + 283, + 287, + 289, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 301, + 302, + 303, + 304, + 306, + 307, + 309, + 310, + 311, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 325, + 326, + 329, + 332, + 333, + 335, + 336, + 338, + 340, + 342, + 344, + 345, + 346, + 348, + 349, + 352, + 354, + 355, + 356, + 357, + 360, + 362, + 363, + 368, + 369, + 370, + 371, + 372, + 374, + 375, + 376, + 378, + 380, + 382, + 384, + 385, + 388, + 390, + 391, + 393, + 396, + 397, + 398, + 399, + 400, + 401, + 403, + 404, + 405, + 406, + 407, + 409, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 429, + 431, + 435, + 437, + 438, + 440, + 441, + 444, + 447, + 450, + 451, + 453, + 456, + 457, + 458, + 459, + 464, + 465, + 466, + 467, + 468, + 470, + 471, + 473, + 474, + 476, + 477, + 479, + 480, + 481, + 482, + 485, + 486, + 488, + 490, + 491, + 492, + 493, + 496, + 498, + 501, + 503, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 515, + 516, + 517, + 519, + 520, + 521, + 522, + 524, + 526, + 527, + 528, + 531, + 532, + 536, + 537, + 538, + 540, + 541, + 542, + 543, + 544, + 545, + 549, + 550, + 551, + 552, + 553, + 554, + 556, + 557, + 560, + 561, + 563, + 566, + 568, + 571, + 572, + 575, + 576, + 577, + 579, + 580, + 581, + 582, + 584, + 585, + 588, + 590, + 594, + 595, + 597, + 600, + 602, + 603, + 604, + 606, + 611, + 613, + 614, + 616, + 618, + 620, + 622, + 623, + 624, + 625, + 626, + 628, + 629, + 630, + 631, + 636, + 637, + 638, + 640, + 641, + 642, + 644, + 645, + 646, + 650, + 651, + 652, + 653, + 654, + 655, + 657, + 660, + 661, + 662, + 663, + 666, + 667, + 668, + 670, + 675, + 676, + 679, + 681, + 683, + 684, + 685, + 687, + 696, + 698, + 701, + 702, + 703, + 704, + 706, + 710, + 722, + 723, + 724, + 725, + 727, + 728, + 729, + 730, + 732, + 733, + 734, + 735, + 736, + 737, + ] history_minutes: 60 forecast_minutes: 120 time_resolution_minutes: 5 @@ -50,7 +407,9 @@ input_data: is_live: false satellite: - satellite_zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr + satellite_zarr_path: + - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr" + - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2021_nonhrv.zarr" history_minutes: 60 forecast_minutes: 0 live_delay_minutes: 30 @@ -69,6 +428,3 @@ input_data: - WV_073 satellite_image_size_pixels_height: 24 satellite_image_size_pixels_width: 24 - -output_data: - filepath: "not-needed" diff --git a/configs.example/datamodule/streamed_batches.yaml b/configs.example/datamodule/streamed_batches.yaml index 1b05f323..a6afe12d 100644 --- a/configs.example/datamodule/streamed_batches.yaml +++ b/configs.example/datamodule/streamed_batches.yaml @@ -1,6 +1,8 @@ _target_: pvnet.data.datamodule.DataModule # Path to the data configuration yaml file. You can find examples in the configuration subdirectory # in configs.example/datamodule/configuration +# Use the full local path such as: /FULL/PATH/PVNet/configs/datamodule/configuration/gcp_configuration.yaml" +num_workers: 20 configuration: "PLACEHOLDER.yaml" num_workers: 20 prefetch_factor: 2 diff --git a/configs.example/readme.md b/configs.example/readme.md index 36bddee5..831e5dd8 100644 --- a/configs.example/readme.md +++ b/configs.example/readme.md @@ -1,8 +1,5 @@ -This directory contains example configuration files for the PVNet project. Many paths will need to -be each user. YOu can find these paths by searching for PLACEHOLDER within these logs. Not all of -the values with a placeholder need to be set. For example in the logger subdirectory there are -many different loggers with PLACEHOLDERS. If only one logger is used, then only that placeholder -need be set. +This directory contains example configuration files for the PVNet project. Many paths will need to unique to each user. You can find these paths by searching for PLACEHOLDER within these logs. Not all of +the values with a placeholder need to be set. For example in the logger subdirectory there are many different loggers with PLACEHOLDERS. If only one logger is used, then only that placeholder needs to be set. run experiments by: `python run.py experiment=example_simple ` diff --git a/requirements.txt b/requirements.txt index 68d72982..b1cd28ca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,3 +23,4 @@ hydra-core python-dotenv hydra-optuna-sweeper rich +gcsfs \ No newline at end of file From e8c60ede802abadd0b67dc887144d07973d5d899 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 10 Jan 2024 18:46:35 +0000 Subject: [PATCH 3/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- README.md | 8 +++---- .../configuration/template_configuration.yaml | 24 +++++++++---------- requirements.txt | 2 +- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 9f97655b..1787fef9 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ conda create -n ocf_datapipes python=3.10 Then go inside the ocf_datapipes repo to add packages ```bash -pip install -r requirements.txt requirements-dev.txt +pip install -r requirements.txt requirements-dev.txt ``` Then exit this environment, and enter back into the pvnet conda environment and install ocf_datapies in editable mode (-e). This means the package is directly linked to the source code in the ocf_datapies repo. @@ -144,7 +144,7 @@ This should be given the whole path to the config on your local machine, such as Where `FULL-PATH-TO-REPO` represent the whole path to the PVNet repo on your local machine. -### Running the batch creation script +### Running the batch creation script Run the save_batches.py script to create batches with the following example arguments as: @@ -167,7 +167,7 @@ For files stored in multiple locations they can be added as list. For example fr ``` satellite: satellite_zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr - + ``` To satellite data hosted by Google: @@ -178,7 +178,7 @@ satellite: - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr" - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2021_nonhrv.zarr" ``` -Datapipes is currently set up to use 12 channels from the satellite data which is the YEAR_nonhrv.zarr rather than YEAR_hrv.zarr. +Datapipes is currently set up to use 12 channels from the satellite data which is the YEAR_nonhrv.zarr rather than YEAR_hrv.zarr. ### Training PVNet diff --git a/configs.example/datamodule/configuration/template_configuration.yaml b/configs.example/datamodule/configuration/template_configuration.yaml index 2180af5f..d07844ac 100644 --- a/configs.example/datamodule/configuration/template_configuration.yaml +++ b/configs.example/datamodule/configuration/template_configuration.yaml @@ -21,16 +21,16 @@ input_data: forecast_minutes: 120 time_resolution_minutes: 60 nwp_channels: # comment out channels as appropriate - - t # 2-metre temperature - - dswrf # downwards short-wave radiation flux - - dlwrf # downwards long-wave radiation flux - - hcc # high cloud cover - - mcc # medium cloud cover - - lcc # low cloud cover - - vis # visability - - r # relative humidity - - prate # precipitation rate - - si10 # 10-metre wind speed | live = unknown + - t # 2-metre temperature + - dswrf # downwards short-wave radiation flux + - dlwrf # downwards long-wave radiation flux + - hcc # high cloud cover + - mcc # medium cloud cover + - lcc # low cloud cover + - vis # visability + - r # relative humidity + - prate # precipitation rate + - si10 # 10-metre wind speed | live = unknown nwp_image_size_pixels_height: 24 nwp_image_size_pixels_width: 24 nwp_provider: ukv @@ -42,7 +42,7 @@ input_data: - label: solar_sheffield_passiv pv_filename: gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/passiv.netcdf pv_metadata_filename: gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/system_metadata_OCF_ONLY.csv - # This is the list of pv_ml_ids to be sliced from the PV site level data + # This is the list of pv_ml_ids to be sliced from the PV site level data pv_ml_ids: [ 154, @@ -409,7 +409,7 @@ input_data: satellite: satellite_zarr_path: - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr" - - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2021_nonhrv.zarr" + - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2021_nonhrv.zarr" history_minutes: 60 forecast_minutes: 0 live_delay_minutes: 30 diff --git a/requirements.txt b/requirements.txt index b1cd28ca..7310878e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,4 +23,4 @@ hydra-core python-dotenv hydra-optuna-sweeper rich -gcsfs \ No newline at end of file +gcsfs From d2e51b2065a24164cadbb238c04b566b1e58ce03 Mon Sep 17 00:00:00 2001 From: megawattz Date: Thu, 18 Jan 2024 10:00:04 +0000 Subject: [PATCH 4/6] small corrections to documentation and template --- README.md | 5 ++--- configs.example/datamodule/streamed_batches.yaml | 1 - requirements.txt | 1 + 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1787fef9..d42bc049 100644 --- a/README.md +++ b/README.md @@ -156,7 +156,7 @@ In this function the datamodule argument looks for a config under `PVNet/configs Its important that the dates set for the training, validation and testing in the datamodule (`streamed_batches.yaml`) config are within the ranges of the dates set for the input features in the configuration (`gcp_configuration.yaml`). -If downloading data from gcp bucket or satellite data make sure to authenticate gcloud: +If downloading private data from a gcp bucket make sure to authenticate gcloud (the public satellite data does not need authentication): ``` gcloud auth login @@ -167,7 +167,6 @@ For files stored in multiple locations they can be added as list. For example fr ``` satellite: satellite_zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr - ``` To satellite data hosted by Google: @@ -178,7 +177,7 @@ satellite: - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr" - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2021_nonhrv.zarr" ``` -Datapipes is currently set up to use 12 channels from the satellite data which is the YEAR_nonhrv.zarr rather than YEAR_hrv.zarr. +Datapipes is currently set up to use 11 channels from the satellite data, the 12th of which is HRV and is not included in these. ### Training PVNet diff --git a/configs.example/datamodule/streamed_batches.yaml b/configs.example/datamodule/streamed_batches.yaml index a6afe12d..d703b741 100644 --- a/configs.example/datamodule/streamed_batches.yaml +++ b/configs.example/datamodule/streamed_batches.yaml @@ -2,7 +2,6 @@ _target_: pvnet.data.datamodule.DataModule # Path to the data configuration yaml file. You can find examples in the configuration subdirectory # in configs.example/datamodule/configuration # Use the full local path such as: /FULL/PATH/PVNet/configs/datamodule/configuration/gcp_configuration.yaml" -num_workers: 20 configuration: "PLACEHOLDER.yaml" num_workers: 20 prefetch_factor: 2 diff --git a/requirements.txt b/requirements.txt index 7310878e..a4b090d6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,4 +23,5 @@ hydra-core python-dotenv hydra-optuna-sweeper rich +# gcsfs is only needed when getting data from Google Cloud Storage gcsfs From 5f026089f8335a3c0bd031e605d267188723fd5a Mon Sep 17 00:00:00 2001 From: megawattz Date: Thu, 18 Jan 2024 10:04:40 +0000 Subject: [PATCH 5/6] keep 5 ml ids in template config --- .../configuration/template_configuration.yaml | 344 ------------------ 1 file changed, 344 deletions(-) diff --git a/configs.example/datamodule/configuration/template_configuration.yaml b/configs.example/datamodule/configuration/template_configuration.yaml index d07844ac..9923ffe6 100644 --- a/configs.example/datamodule/configuration/template_configuration.yaml +++ b/configs.example/datamodule/configuration/template_configuration.yaml @@ -45,355 +45,11 @@ input_data: # This is the list of pv_ml_ids to be sliced from the PV site level data pv_ml_ids: [ - 154, - 155, 156, 158, 159, 160, 162, - 164, - 165, - 166, - 167, - 168, - 169, - 171, - 173, - 177, - 178, - 179, - 181, - 182, - 185, - 186, - 187, - 188, - 189, - 190, - 191, - 192, - 193, - 197, - 198, - 199, - 200, - 202, - 204, - 205, - 206, - 208, - 209, - 211, - 214, - 215, - 216, - 217, - 218, - 219, - 220, - 221, - 225, - 229, - 230, - 232, - 233, - 234, - 236, - 242, - 243, - 245, - 252, - 254, - 255, - 256, - 257, - 258, - 260, - 261, - 262, - 265, - 267, - 268, - 272, - 273, - 275, - 276, - 277, - 280, - 281, - 282, - 283, - 287, - 289, - 291, - 292, - 293, - 294, - 295, - 296, - 297, - 298, - 301, - 302, - 303, - 304, - 306, - 307, - 309, - 310, - 311, - 317, - 318, - 319, - 320, - 321, - 322, - 323, - 325, - 326, - 329, - 332, - 333, - 335, - 336, - 338, - 340, - 342, - 344, - 345, - 346, - 348, - 349, - 352, - 354, - 355, - 356, - 357, - 360, - 362, - 363, - 368, - 369, - 370, - 371, - 372, - 374, - 375, - 376, - 378, - 380, - 382, - 384, - 385, - 388, - 390, - 391, - 393, - 396, - 397, - 398, - 399, - 400, - 401, - 403, - 404, - 405, - 406, - 407, - 409, - 411, - 412, - 413, - 414, - 415, - 416, - 417, - 418, - 419, - 420, - 421, - 422, - 423, - 424, - 425, - 426, - 427, - 429, - 431, - 435, - 437, - 438, - 440, - 441, - 444, - 447, - 450, - 451, - 453, - 456, - 457, - 458, - 459, - 464, - 465, - 466, - 467, - 468, - 470, - 471, - 473, - 474, - 476, - 477, - 479, - 480, - 481, - 482, - 485, - 486, - 488, - 490, - 491, - 492, - 493, - 496, - 498, - 501, - 503, - 506, - 507, - 508, - 509, - 510, - 511, - 512, - 513, - 515, - 516, - 517, - 519, - 520, - 521, - 522, - 524, - 526, - 527, - 528, - 531, - 532, - 536, - 537, - 538, - 540, - 541, - 542, - 543, - 544, - 545, - 549, - 550, - 551, - 552, - 553, - 554, - 556, - 557, - 560, - 561, - 563, - 566, - 568, - 571, - 572, - 575, - 576, - 577, - 579, - 580, - 581, - 582, - 584, - 585, - 588, - 590, - 594, - 595, - 597, - 600, - 602, - 603, - 604, - 606, - 611, - 613, - 614, - 616, - 618, - 620, - 622, - 623, - 624, - 625, - 626, - 628, - 629, - 630, - 631, - 636, - 637, - 638, - 640, - 641, - 642, - 644, - 645, - 646, - 650, - 651, - 652, - 653, - 654, - 655, - 657, - 660, - 661, - 662, - 663, - 666, - 667, - 668, - 670, - 675, - 676, - 679, - 681, - 683, - 684, - 685, - 687, - 696, - 698, - 701, - 702, - 703, - 704, - 706, - 710, - 722, - 723, - 724, - 725, - 727, - 728, - 729, - 730, - 732, - 733, - 734, - 735, - 736, - 737, ] history_minutes: 60 forecast_minutes: 120 From f5400fe2468e1f6d0a32d64ddb26af1199f24a3d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 18 Jan 2024 10:04:53 +0000 Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../datamodule/configuration/template_configuration.yaml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/configs.example/datamodule/configuration/template_configuration.yaml b/configs.example/datamodule/configuration/template_configuration.yaml index 9923ffe6..d2354155 100644 --- a/configs.example/datamodule/configuration/template_configuration.yaml +++ b/configs.example/datamodule/configuration/template_configuration.yaml @@ -43,14 +43,7 @@ input_data: pv_filename: gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/passiv.netcdf pv_metadata_filename: gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/system_metadata_OCF_ONLY.csv # This is the list of pv_ml_ids to be sliced from the PV site level data - pv_ml_ids: - [ - 156, - 158, - 159, - 160, - 162, - ] + pv_ml_ids: [156, 158, 159, 160, 162] history_minutes: 60 forecast_minutes: 120 time_resolution_minutes: 5