diff --git a/README.md b/README.md index 1787fef9..d42bc049 100644 --- a/README.md +++ b/README.md @@ -156,7 +156,7 @@ In this function the datamodule argument looks for a config under `PVNet/configs Its important that the dates set for the training, validation and testing in the datamodule (`streamed_batches.yaml`) config are within the ranges of the dates set for the input features in the configuration (`gcp_configuration.yaml`). -If downloading data from gcp bucket or satellite data make sure to authenticate gcloud: +If downloading private data from a gcp bucket make sure to authenticate gcloud (the public satellite data does not need authentication): ``` gcloud auth login @@ -167,7 +167,6 @@ For files stored in multiple locations they can be added as list. For example fr ``` satellite: satellite_zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr - ``` To satellite data hosted by Google: @@ -178,7 +177,7 @@ satellite: - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr" - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2021_nonhrv.zarr" ``` -Datapipes is currently set up to use 12 channels from the satellite data which is the YEAR_nonhrv.zarr rather than YEAR_hrv.zarr. +Datapipes is currently set up to use 11 channels from the satellite data, the 12th of which is HRV and is not included in these. ### Training PVNet diff --git a/configs.example/datamodule/streamed_batches.yaml b/configs.example/datamodule/streamed_batches.yaml index a6afe12d..d703b741 100644 --- a/configs.example/datamodule/streamed_batches.yaml +++ b/configs.example/datamodule/streamed_batches.yaml @@ -2,7 +2,6 @@ _target_: pvnet.data.datamodule.DataModule # Path to the data configuration yaml file. You can find examples in the configuration subdirectory # in configs.example/datamodule/configuration # Use the full local path such as: /FULL/PATH/PVNet/configs/datamodule/configuration/gcp_configuration.yaml" -num_workers: 20 configuration: "PLACEHOLDER.yaml" num_workers: 20 prefetch_factor: 2 diff --git a/requirements.txt b/requirements.txt index 7310878e..a4b090d6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,4 +23,5 @@ hydra-core python-dotenv hydra-optuna-sweeper rich +# gcsfs is only needed when getting data from Google Cloud Storage gcsfs