[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit db81147207e3 · 2024-11-13T10:11:16.000Z
for more information, see https://pre-commit.ci
diff --git a/pvnet/data/datamodule.py b/pvnet/data/datamodule.py
@@ -1,15 +1,11 @@
 """ Data module for pytorch lightning """
-from datetime import datetime
 from glob import glob
 
-from lightning.pytorch import LightningDataModule
-from torch.utils.data import Dataset, DataLoader
 import torch
-
-from ocf_datapipes.batch import batch_to_tensor, stack_np_examples_into_batch, NumpyBatch
-from ocf_data_sampler.torch_datasets.pvnet_uk_regional import (
-    PVNetUKRegionalDataset
-)
+from lightning.pytorch import LightningDataModule
+from ocf_data_sampler.torch_datasets.pvnet_uk_regional import PVNetUKRegionalDataset
+from ocf_datapipes.batch import NumpyBatch, batch_to_tensor, stack_np_examples_into_batch
+from torch.utils.data import DataLoader, Dataset
 
 
 def fill_nans_in_arrays(batch):
@@ -29,30 +25,28 @@ def fill_nans_in_arrays(batch):
     return batch
 
 
-
 class NumpybatchPremadeSamplesDataset(Dataset):
     """Dataset to load NumpyBatch samples"""
-    
+
     def __init__(self, sample_dir):
         """Dataset to load NumpyBatch samples
-        
+
         Args:
             sample_dir: Path to the directory of pre-saved samples.
         """
         self.sample_paths = glob(f"{sample_dir}/*.pt")
-        
-        
+
     def __len__(self):
         return len(self.sample_paths)
-    
+
     def __getitem__(self, idx):
         return fill_nans_in_arrays(torch.load(self.sample_paths[idx]))
-    
+
 
 def collate_fn(samples: list[NumpyBatch]):
     """Convert a list of NumpyBatch samples to a tensor batch"""
     return batch_to_tensor(stack_np_examples_into_batch(samples))
-        
+
 
 class DataModule(LightningDataModule):
     """Datamodule for training pvnet and using pvnet pipeline in `ocf_datapipes`."""
@@ -64,9 +58,8 @@ def __init__(
         batch_size: int = 16,
         num_workers: int = 0,
         prefetch_factor: int | None = None,
-        train_period: list[str|None] = [None, None],
-        val_period: list[str|None] = [None, None],
-        
+        train_period: list[str | None] = [None, None],
+        val_period: list[str | None] = [None, None],
     ):
         """Datamodule for training pvnet architecture.
 
@@ -85,7 +78,6 @@ def __init__(
         """
         super().__init__()
 
-
         if not ((sample_dir is not None) ^ (configuration is not None)):
             raise ValueError("Exactly one of `sample_dir` or `configuration` must be set.")
 
@@ -118,21 +110,19 @@ def _get_streamed_samples_dataset(self, start_time, end_time) -> Dataset:
     def _get_premade_samples_dataset(self, subdir) -> Dataset:
         split_dir = f"{self.sample_dir}/{subdir}"
         return NumpybatchPremadeSamplesDataset(split_dir)
-        
+
     def train_dataloader(self) -> DataLoader:
         """Construct train dataloader"""
         if self.sample_dir is not None:
             dataset = self._get_premade_samples_dataset("train")
         else:
             dataset = self._get_streamed_samples_dataset(*self.train_period)
         return DataLoader(dataset, shuffle=True, **self._common_dataloader_kwargs)
-    
+
     def val_dataloader(self) -> DataLoader:
         """Construct val dataloader"""
         if self.sample_dir is not None:
             dataset = self._get_premade_samples_dataset("val")
         else:
             dataset = self._get_streamed_samples_dataset(*self.val_period)
         return DataLoader(dataset, shuffle=False, **self._common_dataloader_kwargs)
-
-    
diff --git a/pvnet/models/base_model.py b/pvnet/models/base_model.py
@@ -18,11 +18,7 @@
 from huggingface_hub.constants import CONFIG_NAME, PYTORCH_WEIGHTS_NAME
 from huggingface_hub.file_download import hf_hub_download
 from huggingface_hub.hf_api import HfApi
-
-from ocf_datapipes.batch import BatchKey
-from ocf_datapipes.batch import copy_batch_to_device
-
-from ocf_ml_metrics.evaluation.evaluation import evaluation
+from ocf_datapipes.batch import BatchKey, copy_batch_to_device
 
 from pvnet.models.utils import (
     BatchAccumulator,
@@ -32,8 +28,6 @@
 from pvnet.optimizers import AbstractOptimizer
 from pvnet.utils import plot_batch_forecasts
 
-
-
 DATA_CONFIG_NAME = "data_config.yaml"
 
 
@@ -239,13 +233,11 @@ def get_data_config(
             )
 
         return data_config_file
-    
-    
+
     def _save_pretrained(self, save_directory: Path) -> None:
         """Save weights from a Pytorch model to a local directory."""
         model_to_save = self.module if hasattr(self, "module") else self  # type: ignore
         torch.save(model_to_save.state_dict(), save_directory / PYTORCH_WEIGHTS_NAME)
-        
 
     def save_pretrained(
         self,
@@ -416,14 +408,14 @@ def __init__(
             self.num_output_features = self.forecast_len * len(self.output_quantiles)
         else:
             self.num_output_features = self.forecast_len
-        
+
         # save all validation results to array, so we can save these to weights n biases
         self.validation_epoch_results = []
 
     def transfer_batch_to_device(self, batch, device, dataloader_idx):
         """Method to move custom batches to a given device"""
         return copy_batch_to_device(batch, device)
-    
+
     def _quantiles_to_prediction(self, y_quantiles):
         """
         Convert network prediction into a point prediction.
@@ -465,7 +457,7 @@ def _calculate_quantile_loss(self, y_quantiles, y):
             errors = y - y_quantiles[..., i]
             losses.append(torch.max((q - 1) * errors, q * errors).unsqueeze(-1))
         losses = 2 * torch.cat(losses, dim=2)
-        
+
         return losses.mean()
 
     def _calculate_common_losses(self, y, y_hat):
@@ -659,7 +651,7 @@ def validation_step(self, batch: dict, batch_idx):
         accum_batch_num = batch_idx // self.trainer.accumulate_grad_batches
 
         y_hat = self(batch)
-        
+
         y = batch[self._target_key][:, -self.forecast_len :]
 
         if (batch_idx + 1) % self.trainer.accumulate_grad_batches == 0:
diff --git a/pvnet/models/utils.py b/pvnet/models/utils.py
@@ -1,8 +1,6 @@
 """Utility functions"""
 
 import logging
-import math
-from typing import Optional
 
 import numpy as np
 import torch
diff --git a/pvnet/utils.py b/pvnet/utils.py
@@ -6,7 +6,6 @@
 
 import lightning.pytorch as pl
 import matplotlib.pyplot as plt
-import numpy as np
 import pandas as pd
 import pylab
 import rich.syntax
@@ -16,7 +15,6 @@
 from lightning.pytorch.utilities import rank_zero_only
 from ocf_datapipes.batch import BatchKey
 from ocf_datapipes.utils import Location
-from ocf_datapipes.utils.geospatial import osgb_to_lon_lat
 from omegaconf import DictConfig, OmegaConf
 
 
@@ -322,4 +320,4 @@ def _get_numpy(key):
     plt.suptitle(title)
     plt.tight_layout()
 
-    return fig
+    return fig
diff --git a/scripts/save_samples.py b/scripts/save_samples.py
@@ -20,41 +20,38 @@
 ```
 if wanting to override these values for example
 """
-    
+
 # Ensure this block of code runs only in the main process to avoid issues with worker processes.
 if __name__ == "__main__":
     import torch.multiprocessing as mp
-    
-    # Set the start method for torch multiprocessing. Choose either "forkserver" or "spawn" to be 
-    # compatible with dask's multiprocessing. 
+
+    # Set the start method for torch multiprocessing. Choose either "forkserver" or "spawn" to be
+    # compatible with dask's multiprocessing.
     mp.set_start_method("forkserver")
-    
-    # Set the sharing strategy to 'file_system' to handle file descriptor limitations. This is 
-    # important because libraries like Zarr may open many files, which can exhaust the file 
+
+    # Set the sharing strategy to 'file_system' to handle file descriptor limitations. This is
+    # important because libraries like Zarr may open many files, which can exhaust the file
     # descriptor limit if too many workers are used.
-    mp.set_sharing_strategy('file_system')
+    mp.set_sharing_strategy("file_system")
 
 
+import logging
 import os
-import sys
 import shutil
-import logging
+import sys
 import warnings
 
+import dask
 import hydra
+import torch
+from ocf_data_sampler.torch_datasets.pvnet_uk_regional import PVNetUKRegionalDataset
 from omegaconf import DictConfig, OmegaConf
 from sqlalchemy import exc as sa_exc
+from torch.utils.data import DataLoader, Dataset
 from tqdm import tqdm
 
-import torch
-from torch.utils.data import Dataset, DataLoader
-
-from ocf_data_sampler.torch_datasets.pvnet_uk_regional import PVNetUKRegionalDataset
-
 from pvnet.utils import print_config
 
-import dask 
-
 dask.config.set(scheduler="threads", num_workers=4)
 
 
@@ -71,6 +68,7 @@
 
 class SaveFuncFactory:
     """Factory for creating a function to save a sample to disk."""
+
     def __init__(self, save_dir: str, renewable: str = "pv"):
         self.save_dir = save_dir
         self.renewable = renewable
@@ -86,22 +84,22 @@ def __call__(self, sample, sample_num: int):
 
 def get_dataset(config_path: str, start_time: str, end_time: str, renewable: str = "pv") -> Dataset:
     """Get the dataset for the given renewable type."""
-    if renewable== "pv":
-            dataset_cls = PVNetUKRegionalDataset
+    if renewable == "pv":
+        dataset_cls = PVNetUKRegionalDataset
     elif renewable in ["wind", "pv_india", "pv_site"]:
         raise NotImplementedError
     else:
         raise ValueError(f"Unknown renewable: {renewable}")
-    
+
     return dataset_cls(config_path, start_time=start_time, end_time=end_time)
 
 
 def save_samples_with_dataloader(
-        dataset: Dataset, 
-        save_dir: str, 
-        num_samples: int, 
-        dataloader_kwargs: dict, 
-        renewable: str = "pv"
+    dataset: Dataset,
+    save_dir: str,
+    num_samples: int,
+    dataloader_kwargs: dict,
+    renewable: str = "pv",
 ) -> None:
     """Save samples from a dataset using a dataloader."""
     save_func = SaveFuncFactory(save_dir, renewable=renewable)
@@ -124,7 +122,7 @@ def main(config: DictConfig) -> None:
 
     # Set up directory
     os.makedirs(config_dm.sample_output_dir, exist_ok=False)
-    
+
     # Copy across configs which define the samples into the new sample directory
     with open(f"{config_dm.sample_output_dir}/datamodule.yaml", "w") as f:
         f.write(OmegaConf.to_yaml(config_dm))
@@ -141,29 +139,29 @@ def main(config: DictConfig) -> None:
         batch_sampler=None,
         num_workers=config_dm.num_workers,
         collate_fn=None,
-        pin_memory=False, # Only using CPU to prepare samples so pinning is not beneficial
+        pin_memory=False,  # Only using CPU to prepare samples so pinning is not beneficial
         drop_last=False,
         timeout=0,
         worker_init_fn=None,
         prefetch_factor=config_dm.prefetch_factor,
-        persistent_workers=False, # Not needed since we only enter the dataloader loop once
+        persistent_workers=False,  # Not needed since we only enter the dataloader loop once
     )
 
     if config_dm.num_val_samples > 0:
         print("----- Saving val samples -----")
-        
+
         val_output_dir = f"{config_dm.sample_output_dir}/val"
-        
+
         # Make directory for val samples
         os.mkdir(val_output_dir)
-        
-        # Get the dataset 
+
+        # Get the dataset
         val_dataset = get_dataset(
             config_dm.configuration,
             *config_dm.val_period,
             renewable=config.renewable,
         )
-            
+
         # Save samples
         save_samples_with_dataloader(
             dataset=val_dataset,
@@ -172,24 +170,24 @@ def main(config: DictConfig) -> None:
             dataloader_kwargs=dataloader_kwargs,
             renewable=config.renewable,
         )
-        
+
         del val_dataset
 
     if config_dm.num_train_samples > 0:
         print("----- Saving train samples -----")
-        
+
         train_output_dir = f"{config_dm.sample_output_dir}/train"
-        
+
         # Make directory for train samples
         os.mkdir(train_output_dir)
-        
-        # Get the dataset 
+
+        # Get the dataset
         train_dataset = get_dataset(
             config_dm.configuration,
             *config_dm.train_period,
             renewable=config.renewable,
         )
-            
+
         # Save samples
         save_samples_with_dataloader(
             dataset=train_dataset,
@@ -198,7 +196,7 @@ def main(config: DictConfig) -> None:
             dataloader_kwargs=dataloader_kwargs,
             renewable=config.renewable,
         )
-        
+
         del train_dataset
 
     print("----- Saving complete -----")