openclimatefix
diff --git a/‎pvnet/__init__.py
+1 b/‎pvnet/__init__.py
+1
diff --git a/‎pvnet/callbacks.py
+30-2 b/‎pvnet/callbacks.py
+30-2
diff --git a/‎pvnet/data/__init__.py b/‎pvnet/data/__init__.py
diff --git a/‎pvnet/data/datamodule.py
+1-6 b/‎pvnet/data/datamodule.py
+1-6
diff --git a/‎pvnet/models/utils.py
+23-10 b/‎pvnet/models/utils.py
+23-10
diff --git a/‎pvnet/optimizers.py
+13-1 b/‎pvnet/optimizers.py
+13-1
diff --git a/‎pvnet/training.py
+6-3 b/‎pvnet/training.py
+6-3
diff --git a/‎pvnet/utils.py
+8-1 b/‎pvnet/utils.py
+8-1
@@ -1 +1,2 @@
+"""PVNet"""
 __version__ = "0.0.8"
@@ -1,25 +1,31 @@
-"""Custom callbacks developed to be able to use early stopping and learning rate finder even when
-pretraining parts of the network.
+"""Custom callbacks
 """
 from lightning.pytorch import Trainer
 from lightning.pytorch.callbacks import BaseFinetuning, EarlyStopping, LearningRateFinder
 from lightning.pytorch.trainer.states import TrainerFn
 
 
 class PhaseEarlyStopping(EarlyStopping):
+    """Monitor a validation metric and stop training when it stops improving. 
+    
+    Only functions in a specific phase of training.
+    """
 
     training_phase = None
 
     def switch_phase(self, phase: str):
+        """Switch phase of callback"""
         if phase == self.training_phase:
             self.activate()
         else:
             self.deactivate()
 
     def deactivate(self):
+        """Deactivate callback"""
         self.active = False
 
     def activate(self):
+        """Activate callback"""
         self.active = True
 
     def _should_skip_check(self, trainer: Trainer) -> bool:
@@ -30,21 +36,34 @@ def _should_skip_check(self, trainer: Trainer) -> bool:
 
 
 class PretrainEarlyStopping(EarlyStopping):
+    """Monitor a validation metric and stop training when it stops improving. 
+    
+    Only functions in the 'pretrain' phase of training.
+    """
     training_phase = "pretrain"
 
 
 class MainEarlyStopping(EarlyStopping):
+    """Monitor a validation metric and stop training when it stops improving. 
+    
+    Only functions in the 'main' phase of training.
+    """
     training_phase = "main"
 
 
 class PretrainFreeze(BaseFinetuning):
+    """Freeze the satellite and NWP encoders during pretraining
+    """
 
     training_phase = "pretrain"
 
     def __init__(self):
+        """Freeze the satellite and NWP encoders during pretraining
+        """
         super().__init__()
 
     def freeze_before_training(self, pl_module):
+        """Freeze satellite and NWP encoders before training start"""
         # freeze any module you want
         modules = []
         if pl_module.include_sat:
@@ -54,6 +73,7 @@ def freeze_before_training(self, pl_module):
         self.freeze(modules)
 
     def finetune_function(self, pl_module, current_epoch, optimizer):
+        """Unfreeze satellite and NWP encoders"""
         if not self.active:
             modules = []
             if pl_module.include_sat:
@@ -67,15 +87,18 @@ def finetune_function(self, pl_module, current_epoch, optimizer):
             )
 
     def switch_phase(self, phase: str):
+        """Switch phase of callback"""
         if phase == self.training_phase:
             self.activate()
         else:
             self.deactivate()
 
     def deactivate(self):
+        """Deactivate callback"""
         self.active = False
 
     def activate(self):
+        """Activate callback"""
         self.active = True
 
 
@@ -85,18 +108,23 @@ class PhasedLearningRateFinder(LearningRateFinder):
     active = True
 
     def on_fit_start(self, *args, **kwargs):
+        """Do nothing"""
         return
 
     def on_train_epoch_start(self, trainer, pl_module):
+        """Run learning rate finder on epoch start and then deactivate"""
         if self.active:
             self.lr_find(trainer, pl_module)
             self.deactivate()
 
     def switch_phase(self, phase: str):
+        """Switch training phase"""
         self.activate()
 
     def deactivate(self):
+        """Deactivate callback"""
         self.active = False
 
     def activate(self):
+        """Activate callback"""
         self.active = True
@@ -20,12 +20,6 @@ def batch_to_tensor(batch):
     return batch
 
 
-def print_yaml(path):
-    print(f"{path} :")
-    with open(path, mode="r") as stream:
-        print("".join(stream.readlines()))
-
-
 def split_batches(batch):
     """Splits a single batch of data."""
     n_samples = batch[BatchKey.gsp].shape[0]
@@ -46,6 +40,7 @@ class BatchSplitter(IterDataPipe):
     """Pipeline step to split batches of data and yield single examples"""
 
     def __init__(self, source_datapipe: IterDataPipe):
+        """Pipeline step to split batches of data and yield single examples"""
         self.source_datapipe = source_datapipe
 
     def __iter__(self):
 
@@ -1,17 +1,19 @@
+"""Utility functions"""
+
 import numpy as np
 import torch
 from ocf_datapipes.utils.consts import BatchKey
 
 
 class PredAccumulator:
-    """A class for accumulating y-predictions when using grad accumulation and the batch size is
-    small.
+    """A class for accumulating y-predictions using grad accumulation and small batch size.
 
     Attributes:
         _y_hats (list[torch.Tensor]): List of prediction tensors
     """
 
     def __init__(self):
+        """Prediction accumulator"""
         self._y_hats = []
 
     def __bool__(self):
@@ -22,43 +24,51 @@ def append(self, y_hat: torch.Tensor):
         self._y_hats += [y_hat]
 
     def flush(self) -> torch.Tensor:
+        """Return all appended predictions a single torch tensor and remove from accumulated store.
+        """
         y_hat = torch.cat(self._y_hats, dim=0)
         self._y_hats = []
         return y_hat
 
 
 class DictListAccumulator:
+    """Abstract class for accumulating dictionaries of lists"""
     @staticmethod
-    def dict_list_append(d1, d2):
+    def _dict_list_append(d1, d2):
         for k, v in d2.items():
             d1[k] += [v]
 
     @staticmethod
-    def dict_init_list(d):
+    def _dict_init_list(d):
         return {k: [v] for k, v in d.items()}
 
 
 class MetricAccumulator(DictListAccumulator):
-    """A class for accumulating, and finding the mean of logging metrics when using grad
+    """Dictionary of metrics accumulator.
+    
+    A class for accumulating, and finding the mean of logging metrics when using grad
     accumulation and the batch size is small.
 
     Attributes:
         _metrics (Dict[str, list[float]]): Dictionary containing lists of metrics.
     """
 
     def __init__(self):
+        """Dictionary of metrics accumulator."""
         self._metrics = {}
 
     def __bool__(self):
         return self._metrics != {}
 
     def append(self, loss_dict: dict[str, float]):
+        """Append lictionary of metrics to self"""
         if not self:
-            self._metrics = self.dict_init_list(loss_dict)
+            self._metrics = self._dict_init_list(loss_dict)
         else:
-            self.dict_list_append(self._metrics, loss_dict)
+            self._dict_list_append(self._metrics, loss_dict)
 
     def flush(self) -> dict[str, float]:
+        """Calculate mean of all accumulated metrics and clear"""
         mean_metrics = {k: np.mean(v) for k, v in self._metrics.items()}
         self._metrics = {}
         return mean_metrics
@@ -72,23 +82,26 @@ class BatchAccumulator(DictListAccumulator):
     """
 
     def __init__(self):
+        """Batch accumulator"""
         self._batches = {}
 
     def __bool__(self):
         return self._batches != {}
 
     @staticmethod
-    def filter_batch_dict(d):
+    def _filter_batch_dict(d):
         keep_keys = [BatchKey.gsp, BatchKey.gsp_id, BatchKey.gsp_t0_idx, BatchKey.gsp_time_utc]
         return {k: v for k, v in d.items() if k in keep_keys}
 
     def append(self, batch: dict[BatchKey, list[torch.Tensor]]):
+        """Append batch to self"""
         if not self:
-            self._batches = self.dict_init_list(self.filter_batch_dict(batch))
+            self._batches = self._dict_init_list(self._filter_batch_dict(batch))
         else:
-            self.dict_list_append(self._batches, self.filter_batch_dict(batch))
+            self._dict_list_append(self._batches, self._filter_batch_dict(batch))
 
     def flush(self) -> dict[BatchKey, list[torch.Tensor]]:
+        """Concatenate all accumulated batches, return, and clear self"""
         batch = {}
         for k, v in self._batches.items():
             if k == BatchKey.gsp_t0_idx:
 
@@ -7,7 +7,9 @@
 
 
 class AbstractOptimizer(ABC):
-    """Optimizer classes will be used by model like:
+    """Abstract class for optimizer
+    
+    Optimizer classes will be used by model like:
     > OptimizerGenerator = AbstractOptimizer()
     > optimizer = OptimizerGenerator(model.parameters())
     The returned object `optimizer` must be something that may be returned by `pytorch_lightning`'s
@@ -19,36 +21,46 @@ class AbstractOptimizer(ABC):
 
     @abstractmethod
     def __call__(self):
+        """Abstract call"""
         pass
 
 
 class Adam(AbstractOptimizer):
+    """Adam optimizer"""
     def __init__(self, lr=0.0005, **kwargs):
+        """Adam optimizer"""
         self.lr = lr
         self.kwargs = kwargs
 
     def __call__(self, model_parameters):
+        """Return optimizer"""
         return torch.optim.Adam(model_parameters, lr=self.lr, **self.kwargs)
 
 
 class AdamW(AbstractOptimizer):
+    """AdamW optimizer"""
     def __init__(self, lr=0.0005, **kwargs):
+        """AdamW optimizer"""
         self.lr = lr
         self.kwargs = kwargs
 
     def __call__(self, model_parameters):
+        """Return optimizer"""
         return torch.optim.AdamW(model_parameters, lr=self.lr, **self.kwargs)
 
 
 class AdamWReduceLROnPlateau(AbstractOptimizer):
+    """AdamW optimizer and reduce on plateau scheduler"""
     def __init__(self, lr=0.0005, patience=3, factor=0.5, threshold=2e-4, **opt_kwargs):
+        """AdamW optimizer and reduce on plateau scheduler"""
         self.lr = lr
         self.patience = patience
         self.factor = factor
         self.threshold = threshold
         self.opt_kwargs = opt_kwargs
 
     def __call__(self, model_parameters):
+        """Return optimizer"""
         opt = torch.optim.AdamW(model_parameters, lr=self.lr, **self.opt_kwargs)
         sch = torch.optim.lr_scheduler.ReduceLROnPlateau(
             opt,
 
@@ -1,3 +1,5 @@
+"""Training"""
+
 from typing import Optional
 
 import hydra
@@ -19,14 +21,15 @@
 torch.set_default_dtype(torch.float32)
 
 
-def callbacks_to_phase(callbacks, phase):
+def _callbacks_to_phase(callbacks, phase):
     for c in callbacks:
         if hasattr(c, "switch_phase"):
             c.switch_phase(phase)
 
 
 def train(config: DictConfig) -> Optional[float]:
     """Contains training pipeline.
+    
     Instantiates all PyTorch Lightning objects from config.
 
     Args:
@@ -69,7 +72,7 @@ def train(config: DictConfig) -> Optional[float]:
         should_pretrain |= hasattr(c, "training_phase") and c.training_phase == "pretrain"
 
     if should_pretrain:
-        callbacks_to_phase(callbacks, "pretrain")
+        _callbacks_to_phase(callbacks, "pretrain")
 
     trainer: Trainer = hydra.utils.instantiate(
         config.trainer,
@@ -83,7 +86,7 @@ def train(config: DictConfig) -> Optional[float]:
         datamodule.block_nwp_and_sat = True
         trainer.fit(model=model, datamodule=datamodule)
 
-    callbacks_to_phase(callbacks, "main")
+    _callbacks_to_phase(callbacks, "main")
 
     datamodule.block_nwp_and_sat = False
     trainer.should_stop = False
 
@@ -1,3 +1,5 @@
+"""Utils"""
+
 import logging
 import os
 import warnings
@@ -60,7 +62,9 @@ def get_logger(name=__name__, level=logging.INFO) -> logging.Logger:
 
 
 def extras(config: DictConfig) -> None:
-    """A couple of optional utilities, controlled by main config file:
+    """A couple of optional utilities.
+    
+    Controlled by main config file:
     - disabling warnings
     - easier access to debug mode
     - forcing debug friendly configuration
@@ -143,6 +147,7 @@ def print_config(
 
 
 def empty(*args, **kwargs):
+    """Returns nothing"""
     pass
 
 
@@ -209,6 +214,7 @@ def finish(
 
 
 def plot_batch_forecasts(batch, y_hat, batch_idx=None):
+    """Plot a batch of data and the forecast from that batch"""
     def _get_numpy(key):
         return batch[key].cpu().numpy().squeeze()
 
@@ -254,6 +260,7 @@ def _get_numpy(key):
 
 
 def construct_ocf_ml_metrics_batch_df(batch, y, y_hat):
+    """Helper function tot construct DataFrame for ocf_ml_metrics"""
     def _repeat(x):
         return np.repeat(x.squeeze(), n_times)
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
	`1`	`+"""PVNet"""`
`1`	`2`	`__version__ = "0.0.8"`