openclimatefix
diff --git a/‎.flake8
Lines changed: 4 additions & 0 deletions b/‎.flake8
Lines changed: 4 additions & 0 deletions
diff --git a/‎.isort.cfg
Lines changed: 2 additions & 0 deletions b/‎.isort.cfg
Lines changed: 2 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 55 additions & 10 deletions b/‎.pre-commit-config.yaml
Lines changed: 55 additions & 10 deletions
diff --git a/‎predict_pv_yield/data/dataloader.py
Lines changed: 15 additions & 13 deletions b/‎predict_pv_yield/data/dataloader.py
Lines changed: 15 additions & 13 deletions
diff --git a/‎predict_pv_yield/models/base_model.py
Lines changed: 48 additions & 30 deletions b/‎predict_pv_yield/models/base_model.py
Lines changed: 48 additions & 30 deletions
diff --git a/‎predict_pv_yield/models/conv3d/model_sat_nwp.py
Lines changed: 29 additions & 12 deletions b/‎predict_pv_yield/models/conv3d/model_sat_nwp.py
Lines changed: 29 additions & 12 deletions
@@ -0,0 +1,4 @@
+[flake8]
+max-line-length = 88
+exclude = .tox,.eggs,ci/templates,build,dist, __init__.py
+ignore = E741,F403,E265,W504,E226,W503,E501,E203
@@ -0,0 +1,2 @@
+[settings]
+profile=black
@@ -1,13 +1,58 @@
+exclude: '^(\.tox|ci/templates|\.bumpversion\.cfg)(/|$)'
 default_language_version:
-  python: python3.9
+  python: python3
 
 repos:
-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.1.0
-    hooks:
-      # list of supported hooks: https://pre-commit.com/hooks.html
-      - id: trailing-whitespace
-      - id: end-of-file-fixer
-      - id: check-yaml
-      - id: debug-statements
-      - id: detect-private-key
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.3.0
+  hooks:
+  - id: trailing-whitespace
+  - id: check-docstring-first
+  - id: check-added-large-files
+  - id: check-ast
+  - id: check-merge-conflict
+  - id: debug-statements
+  - id: end-of-file-fixer
+  - id: mixed-line-ending
+    args: ['--fix=lf']
+
+- repo: https://github.com/asottile/pyupgrade
+  rev: v2.37.3
+  hooks:
+  - id: pyupgrade
+    args: ['--py39-plus']
+
+- repo: https://github.com/myint/autoflake
+  rev: v1.5.3
+  hooks:
+  - id: autoflake
+    args: [
+      --in-place,
+      --remove-all-unused-imports,
+      --remove-unused-variables,
+    ]
+
+- repo: https://github.com/pycqa/isort
+  rev: 5.10.1
+  hooks:
+  - id: isort
+    args: [
+      --sp=.isort.cfg,
+    ]
+
+- repo: https://github.com/psf/black
+  rev: 22.8.0
+  hooks:
+  - id: black
+  - id: black-jupyter
+
+- repo: https://github.com/PyCQA/flake8
+  rev: 5.0.4
+  hooks:
+  - id: flake8
+
+- repo: https://github.com/srstevenson/nb-clean
+  rev: 2.2.1
+  hooks:
+  - id: nb-clean
+    args: ['--remove-empty-cells']
@@ -1,14 +1,12 @@
+import logging
 import os
+
+import torch
 from nowcasting_dataloader.datasets import NetCDFDataset, worker_init_fn
 from nowcasting_dataloader.fake import FakeDataset
 from nowcasting_dataset.config.load import load_yaml_configuration
-from typing import Tuple
-import logging
-import torch
 from pytorch_lightning import LightningDataModule
 
-
-
 _LOG = logging.getLogger(__name__)
 _LOG.setLevel(logging.DEBUG)
 
@@ -21,12 +19,16 @@ def get_dataloaders(
     cloud: str = "gcp",
     temp_path=".",
     data_path="prepared_ML_training_data/v4/",
-) -> Tuple:
+) -> tuple:
 
-    configuration = load_yaml_configuration(filename=f'{data_path}/configuration.yaml')
+    # configuration = load_yaml_configuration(filename=f"{data_path}/configuration.yaml")
 
     data_module = NetCDFDataModule(
-        temp_path=temp_path, data_path=data_path, cloud=cloud, n_train_data=n_train_data, n_val_data=n_validation_data
+        temp_path=temp_path,
+        data_path=data_path,
+        cloud=cloud,
+        n_train_data=n_train_data,
+        n_val_data=n_validation_data,
     )
 
     train_dataloader = data_module.train_dataloader()
@@ -75,8 +77,8 @@ def __init__(
         self.pin_memory = pin_memory
         self.fake_data = fake_data
 
-        filename = os.path.join(data_path, 'configuration.yaml')
-        _LOG.debug(f'Will be loading the configuration file {filename}')
+        filename = os.path.join(data_path, "configuration.yaml")
+        _LOG.debug(f"Will be loading the configuration file {filename}")
         self.configuration = load_yaml_configuration(filename=filename)
 
         self.dataloader_config = dict(
@@ -98,7 +100,7 @@ def train_dataloader(self):
                 self.n_train_data,
                 os.path.join(self.data_path, "train"),
                 os.path.join(self.temp_path, "train"),
-                configuration=self.configuration
+                configuration=self.configuration,
             )
 
         return torch.utils.data.DataLoader(train_dataset, **self.dataloader_config)
@@ -111,7 +113,7 @@ def val_dataloader(self):
                 self.n_val_data,
                 os.path.join(self.data_path, "test"),
                 os.path.join(self.temp_path, "test"),
-                configuration=self.configuration
+                configuration=self.configuration,
             )
 
         return torch.utils.data.DataLoader(val_dataset, **self.dataloader_config)
@@ -125,7 +127,7 @@ def test_dataloader(self):
                 self.n_val_data,
                 os.path.join(self.data_path, "test"),
                 os.path.join(self.temp_path, "test"),
-                configuration=self.configuration
+                configuration=self.configuration,
             )
 
         return torch.utils.data.DataLoader(test_dataset, **self.dataloader_config)
@@ -1,19 +1,23 @@
+import logging
+
+import numpy as np
+import pandas as pd
 import pytorch_lightning as pl
 import torch
 import torch.nn.functional as F
-
-from nowcasting_utils.visualization.visualization import plot_example
-from nowcasting_utils.visualization.line import plot_batch_results
+from nowcasting_dataloader.batch import BatchML
 from nowcasting_dataset.data_sources.nwp.nwp_data_source import NWP_VARIABLE_NAMES
+from nowcasting_utils.metrics.validation import (
+    make_validation_results,
+    save_validation_results_to_logger,
+)
 from nowcasting_utils.models.loss import WeightedLosses
-from nowcasting_utils.models.metrics import mae_each_forecast_horizon, mse_each_forecast_horizon
-from nowcasting_dataloader.batch import BatchML
-from nowcasting_utils.metrics.validation import make_validation_results, save_validation_results_to_logger
-
-import pandas as pd
-import numpy as np
-
-import logging
+from nowcasting_utils.models.metrics import (
+    mae_each_forecast_horizon,
+    mse_each_forecast_horizon,
+)
+from nowcasting_utils.visualization.line import plot_batch_results
+from nowcasting_utils.visualization.visualization import plot_example
 
 logger = logging.getLogger(__name__)
 
@@ -75,7 +79,9 @@ def __init__(self):
 
         self.weighted_losses = WeightedLosses(forecast_length=self.forecast_len)
 
-    def _training_or_validation_step(self, batch, tag: str, return_model_outputs: bool = False):
+    def _training_or_validation_step(
+        self, batch, tag: str, return_model_outputs: bool = False
+    ):
         """
         batch: The batch data
         tag: either 'Train', 'Validation' , 'Test'
@@ -120,8 +126,12 @@ def _training_or_validation_step(self, batch, tag: str, return_model_outputs: bo
 
         if tag != "Train":
             # add metrics for each forecast horizon
-            mse_each_forecast_horizon_metric = mse_each_forecast_horizon(output=y_hat, target=y)
-            mae_each_forecast_horizon_metric = mae_each_forecast_horizon(output=y_hat, target=y)
+            mse_each_forecast_horizon_metric = mse_each_forecast_horizon(
+                output=y_hat, target=y
+            )
+            mae_each_forecast_horizon_metric = mae_each_forecast_horizon(
+                output=y_hat, target=y
+            )
 
             metrics_mse = {
                 f"MSE_forecast_horizon_{i}/{tag}": mse_each_forecast_horizon_metric[i]
@@ -167,7 +177,9 @@ def validation_step(self, batch: BatchML, batch_idx):
         if batch_idx in [0, 1, 2, 3, 4]:
 
             # make sure the interesting example doesnt go above the batch size
-            INTERESTING_EXAMPLES = (i for i in INTERESTING_EXAMPLES if i < self.batch_size)
+            INTERESTING_EXAMPLES = (
+                i for i in INTERESTING_EXAMPLES if i < self.batch_size
+            )
 
             for example_i in INTERESTING_EXAMPLES:
                 # 1. Plot example
@@ -187,7 +199,7 @@ def validation_step(self, batch: BatchML, batch_idx):
                     self.logger.experiment[-1].log_image(name, fig)
                     try:
                         fig.close()
-                    except Exception as _:
+                    except Exception:
                         # could not close figure
                         pass
 
@@ -212,26 +224,30 @@ def validation_step(self, batch: BatchML, batch_idx):
             ]
 
             # plot and save to logger
-            fig = plot_batch_results(model_name=self.name, y=y, y_hat=y_hat, x=time, x_hat=time_hat)
+            fig = plot_batch_results(
+                model_name=self.name, y=y, y_hat=y_hat, x=time, x_hat=time_hat
+            )
             fig.write_html(f"temp_{batch_idx}.html")
             try:
                 self.logger.experiment[-1][name].upload(f"temp_{batch_idx}.html")
-            except:
+            except Exception:
                 pass
 
         # save validation results
-        capacity = batch.gsp.gsp_capacity[:,-self.forecast_len_30:,0].cpu().numpy()
+        capacity = batch.gsp.gsp_capacity[:, -self.forecast_len_30 :, 0].cpu().numpy()
         predictions = model_output.cpu().numpy()
-        truths = batch.gsp.gsp_yield[:, -self.forecast_len_30:, 0].cpu().numpy()
+        truths = batch.gsp.gsp_yield[:, -self.forecast_len_30 :, 0].cpu().numpy()
         predictions = predictions * capacity
         truths = truths * capacity
 
-        results = make_validation_results(truths_mw=truths,
-                                          predictions_mw=predictions,
-                                          capacity_mwp=capacity,
-                                          gsp_ids=batch.gsp.gsp_id[:, 0].cpu(),
-                                          batch_idx=batch_idx,
-                                          t0_datetimes_utc=pd.to_datetime(batch.metadata.t0_datetime_utc))
+        results = make_validation_results(
+            truths_mw=truths,
+            predictions_mw=predictions,
+            capacity_mwp=capacity,
+            gsp_ids=batch.gsp.gsp_id[:, 0].cpu(),
+            batch_idx=batch_idx,
+            t0_datetimes_utc=pd.to_datetime(batch.metadata.t0_datetime_utc),
+        )
 
         # append so in 'validation_epoch_end' the file is saved
         if batch_idx == 0:
@@ -244,10 +260,12 @@ def validation_epoch_end(self, outputs):
 
         logger.info("Validation epoch end")
 
-        save_validation_results_to_logger(results_dfs=self.results_dfs,
-                                          results_file_name=self.results_file_name,
-                                          current_epoch=self.current_epoch,
-                                          logger=self.logger)
+        save_validation_results_to_logger(
+            results_dfs=self.results_dfs,
+            results_file_name=self.results_file_name,
+            current_epoch=self.current_epoch,
+            logger=self.logger,
+        )
 
     def test_step(self, batch, batch_idx):
         self._training_or_validation_step(batch, tag="Test")
 
@@ -2,10 +2,10 @@
 
 import torch
 import torch.nn.functional as F
+from nowcasting_dataloader.batch import BatchML
 from torch import nn
 
 from predict_pv_yield.models.base_model import BaseModel
-from nowcasting_dataloader.batch import BatchML
 
 logging.basicConfig()
 _LOG = logging.getLogger("predict_pv_yield")
@@ -139,10 +139,12 @@ def __init__(
                 setattr(self, f"nwp_conv{i + 1}", layer)
 
             self.nwp_fc1 = nn.Linear(
-                in_features=self.nwp_cnn_output_size, out_features=self.fc1_output_features
+                in_features=self.nwp_cnn_output_size,
+                out_features=self.fc1_output_features,
             )
             self.nwp_fc2 = nn.Linear(
-                in_features=self.fc1_output_features, out_features=self.number_of_nwp_features
+                in_features=self.fc1_output_features,
+                out_features=self.number_of_nwp_features,
             )
 
         if self.embedding_dem:
@@ -152,22 +154,29 @@ def __init__(
 
         if self.include_pv_yield_history:
             self.pv_fc1 = nn.Linear(
-                in_features=self.number_of_pv_samples_per_batch * (self.history_len_5 + 1),
+                in_features=self.number_of_pv_samples_per_batch
+                * (self.history_len_5 + 1),
                 out_features=128,
             )
 
         fc3_in_features = self.fc2_output_features
         if include_pv_or_gsp_yield_history:
-            fc3_in_features += self.number_of_samples_per_batch * (self.history_len_30 + 1)
+            fc3_in_features += self.number_of_samples_per_batch * (
+                self.history_len_30 + 1
+            )
         if include_nwp:
             fc3_in_features += 128
         if self.embedding_dem:
             fc3_in_features += self.embedding_dem
         if self.include_pv_yield_history:
             fc3_in_features += 128
 
-        self.fc3 = nn.Linear(in_features=fc3_in_features, out_features=self.fc3_output_features)
-        self.fc4 = nn.Linear(in_features=self.fc3_output_features, out_features=self.forecast_len)
+        self.fc3 = nn.Linear(
+            in_features=fc3_in_features, out_features=self.fc3_output_features
+        )
+        self.fc4 = nn.Linear(
+            in_features=self.fc3_output_features, out_features=self.forecast_len
+        )
         # self.fc5 = nn.Linear(in_features=32, out_features=8)
         # self.fc6 = nn.Linear(in_features=8, out_features=1)
 
@@ -201,15 +210,20 @@ def forward(self, x):
         if self.include_pv_or_gsp_yield_history:
             if self.output_variable == "gsp_yield":
                 pv_yield_history = (
-                    x.gsp.gsp_yield[:, : self.history_len_30 + 1].nan_to_num(nan=0.0).float()
+                    x.gsp.gsp_yield[:, : self.history_len_30 + 1]
+                    .nan_to_num(nan=0.0)
+                    .float()
                 )
             else:
                 pv_yield_history = (
-                    x.pv.pv_yield[:, : self.history_len_30 + 1].nan_to_num(nan=0.0).float()
+                    x.pv.pv_yield[:, : self.history_len_30 + 1]
+                    .nan_to_num(nan=0.0)
+                    .float()
                 )
 
             pv_yield_history = pv_yield_history.reshape(
-                pv_yield_history.shape[0], pv_yield_history.shape[1] * pv_yield_history.shape[2]
+                pv_yield_history.shape[0],
+                pv_yield_history.shape[1] * pv_yield_history.shape[2],
             )
             # join up
             out = torch.cat((out, pv_yield_history), dim=1)
@@ -218,11 +232,14 @@ def forward(self, x):
         if self.include_pv_yield_history:
             # just take the first 128
             pv_yield_history = (
-                x.pv.pv_yield[:, : self.history_len_5 + 1, :128].nan_to_num(nan=0.0).float()
+                x.pv.pv_yield[:, : self.history_len_5 + 1, :128]
+                .nan_to_num(nan=0.0)
+                .float()
             )
 
             pv_yield_history = pv_yield_history.reshape(
-                pv_yield_history.shape[0], pv_yield_history.shape[1] * pv_yield_history.shape[2]
+                pv_yield_history.shape[0],
+                pv_yield_history.shape[1] * pv_yield_history.shape[2],
             )
             pv_yield_history = F.relu(self.pv_fc1(pv_yield_history))