openclimatefix
diff --git a/‎.bumpversion.cfg
Lines changed: 1 addition & 1 deletion b/‎.bumpversion.cfg
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 4 additions & 1 deletion b/‎README.md
Lines changed: 4 additions & 1 deletion
diff --git a/‎experiments/analysis.py renamed to ‎experiments/mae_analysis.py
Lines changed: 32 additions & 16 deletions b/‎experiments/analysis.py renamed to ‎experiments/mae_analysis.py
Lines changed: 32 additions & 16 deletions
diff --git a/‎pvnet/__init__.py
Lines changed: 1 addition & 1 deletion b/‎pvnet/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pvnet/models/base_model.py
Lines changed: 75 additions & 5 deletions b/‎pvnet/models/base_model.py
Lines changed: 75 additions & 5 deletions
@@ -1,7 +1,7 @@
 [bumpversion]
 commit = True
 tag = True
-current_version = 3.0.53
+current_version = 3.0.63
 message = Bump version: {current_version} → {new_version} [skip ci]
 
 [bumpversion:file:pvnet/__init__.py]
 
@@ -1,6 +1,7 @@
 # PVNet 2.1
 
-[![Python Bump Version & release](https://github.com/openclimatefix/PVNet/actions/workflows/release.yml/badge.svg)](https://github.com/openclimatefix/PVNet/actions/workflows/release.yml)
+ [![Python Bump Version & release](https://github.com/openclimatefix/PVNet/actions/workflows/release.yml/badge.svg)](https://github.com/openclimatefix/PVNet/actions/workflows/release.yml) [![ease of contribution: hard](https://img.shields.io/badge/ease%20of%20contribution:%20hard-bb2629)](https://github.com/openclimatefix/ocf-meta-repo?tab=readme-ov-file#overview-of-ocfs-nowcasting-repositories)
+
 
 This project is used for training PVNet and running PVNet on live data.
 
@@ -85,6 +86,8 @@ OCF maintains a Zarr formatted version of the German Weather Service's (DWD)
 ICON-EU NWP model here:
 https://huggingface.co/datasets/openclimatefix/dwd-icon-eu which includes the UK
 
+Please note that the current version of [ICON loader]([url](https://github.com/openclimatefix/ocf_datapipes/blob/9ec252eeee44937c12ab52699579bdcace76e72f/ocf_datapipes/load/nwp/providers/icon.py#L9-L30)) supports a different format. If you want to use our ICON-EU dataset or your own NWP source, you can create a loader for it using [the instructions here]([url](https://github.com/openclimatefix/ocf_datapipes/tree/main/ocf_datapipes/load#nwp)).
+
 **PV**\
 OCF maintains a dataset of PV generation from 1311 private PV installations
 here: https://huggingface.co/datasets/openclimatefix/uk_pv
 
@@ -1,5 +1,8 @@
 """
-Script to generate a table comparing two run for MAE values for 48 hour 15 minute forecast
+Script to generate analysis of MAE values for multiple model forecasts
+
+Does this for 48 hour horizon forecasts with 15 minute granularity
+
 """
 
 import argparse
@@ -10,16 +13,23 @@
 import wandb
 
 
-def main(runs: list[str], run_names: list[str]) -> None:
+def main(project: str, runs: list[str], run_names: list[str]) -> None:
     """
-    Compare two runs for MAE values for 48 hour 15 minute forecast
+    Compare MAE values for multiple model forecasts for 48 hour horizon with 15 minute granularity
+
+    Args:
+            project: name of W&B project
+            runs: W&B ids of runs
+            run_names: user specified names for runs
+
     """
     api = wandb.Api()
     dfs = []
+    epoch_num = []
     for run in runs:
-        run = api.run(f"openclimatefix/india/{run}")
+        run = api.run(f"openclimatefix/{project}/{run}")
 
-        df = run.history()
+        df = run.history(samples=run.lastHistoryStep + 1)
         # Get the columns that are in the format 'MAE_horizon/step_<number>/val`
         mae_cols = [col for col in df.columns if "MAE_horizon/step_" in col and "val" in col]
         # Sort them
@@ -40,6 +50,7 @@ def main(runs: list[str], run_names: list[str]) -> None:
         # Get the step from the column name
         column_timesteps = [int(col.split("_")[-1].split("/")[0]) * 15 for col in mae_cols]
         dfs.append(df)
+        epoch_num.append(min_row_idx)
     # Get the timedelta for each group
     groupings = [
         [0, 0],
@@ -86,36 +97,41 @@ def main(runs: list[str], run_names: list[str]) -> None:
     for idx, df in enumerate(dfs):
         print(f"{run_names[idx]}: {df.mean()*100:0.3f}")
 
-    # Plot the error on per timestep, and all timesteps
+    # Plot the error per timestep
     plt.figure()
     for idx, df in enumerate(dfs):
-        plt.plot(column_timesteps, df, label=run_names[idx])
+        plt.plot(
+            column_timesteps, df, label=f"{run_names[idx]}, epoch: {epoch_num[idx]}", linestyle="-"
+        )
     plt.legend()
     plt.xlabel("Timestep (minutes)")
     plt.ylabel("MAE %")
     plt.title("MAE % for each timestep")
     plt.savefig("mae_per_timestep.png")
     plt.show()
 
-    # Plot the error on per timestep, and grouped timesteps
+    # Plot the error per grouped timestep
     plt.figure()
-    for run_name in run_names:
-        plt.plot(groups_df[run_name], label=run_name)
+    for idx, run_name in enumerate(run_names):
+        plt.plot(
+            groups_df[run_name],
+            label=f"{run_name}, epoch: {epoch_num[idx]}",
+            marker="o",
+            linestyle="-",
+        )
     plt.legend()
     plt.xlabel("Timestep (minutes)")
     plt.ylabel("MAE %")
-    plt.title("MAE % for each timestep")
-    plt.savefig("mae_per_timestep.png")
+    plt.title("MAE % for each grouped timestep")
+    plt.savefig("mae_per_grouped_timestep.png")
     plt.show()
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    "5llq8iw6"
-    parser.add_argument("--first_run", type=str, default="xdlew7ib")
-    parser.add_argument("--second_run", type=str, default="v3mja33d")
+    parser.add_argument("--project", type=str, default="")
     # Add arguments that is a list of strings
     parser.add_argument("--list_of_runs", nargs="+")
     parser.add_argument("--run_names", nargs="+")
     args = parser.parse_args()
-    main(args.list_of_runs, args.run_names)
+    main(args.project, args.list_of_runs, args.run_names)
@@ -1,2 +1,2 @@
 """PVNet"""
-__version__ = "3.0.53"
+__version__ = "3.0.63"
@@ -2,6 +2,7 @@
 import json
 import logging
 import os
+import tempfile
 from pathlib import Path
 from typing import Dict, Optional, Union
 
@@ -13,7 +14,7 @@
 import torch.nn.functional as F
 import wandb
 import yaml
-from huggingface_hub import ModelCard, ModelCardData
+from huggingface_hub import ModelCard, ModelCardData, PyTorchModelHubMixin
 from huggingface_hub.constants import CONFIG_NAME, PYTORCH_WEIGHTS_NAME
 from huggingface_hub.file_download import hf_hub_download
 from huggingface_hub.hf_api import HfApi
@@ -144,7 +145,7 @@ def minimize_data_config(input_path, output_path, model):
         yaml.dump(config, outfile, default_flow_style=False)
 
 
-class PVNetModelHubMixin:
+class PVNetModelHubMixin(PyTorchModelHubMixin):
     """
     Implementation of [`PyTorchModelHubMixin`] to provide model Hub upload/download capabilities.
     """
@@ -415,7 +416,10 @@ def __init__(
             self.num_output_features = self.forecast_len * len(self.output_quantiles)
         else:
             self.num_output_features = self.forecast_len
-    
+        
+        # save all validation results to array, so we can save these to weights n biases
+        self.validation_epoch_results = []
+
     def transfer_batch_to_device(self, batch, device, dataloader_idx):
         """Method to move custom batches to a given device"""
         return copy_batch_to_device(batch, device)
@@ -605,12 +609,62 @@ def _log_forecast_plot(self, batch, y_hat, accum_batch_num, timesteps_to_plot, p
             print(e)
         plt.close(fig)
 
+    def _log_validation_results(self, batch, y_hat, accum_batch_num):
+        """Append validation results to self.validation_epoch_results"""
+
+        # get truth values, shape (b, forecast_len)
+        y = batch[self._target_key][:, -self.forecast_len :, 0]
+        y = y.detach().cpu().numpy()
+        batch_size = y.shape[0]
+
+        # get prediction values, shape (b, forecast_len, quantiles?)
+        y_hat = y_hat.detach().cpu().numpy()
+
+        # get time_utc, shape (b, forecast_len)
+        time_utc_key = BatchKey[f"{self._target_key_name}_time_utc"]
+        time_utc = batch[time_utc_key][:, -self.forecast_len :].detach().cpu().numpy()
+
+        # get target id and change from (b,1) to (b,)
+        id_key = BatchKey[f"{self._target_key_name}_id"]
+        target_id = batch[id_key].detach().cpu().numpy()
+        target_id = target_id.squeeze()
+
+        for i in range(batch_size):
+            y_i = y[i]
+            y_hat_i = y_hat[i]
+            time_utc_i = time_utc[i]
+            target_id_i = target_id[i]
+
+            results_dict = {
+                "y": y_i,
+                "time_utc": time_utc_i,
+            }
+            if self.use_quantile_regression:
+                results_dict.update(
+                    {f"y_quantile_{q}": y_hat_i[:, i] for i, q in enumerate(self.output_quantiles)}
+                )
+            else:
+                results_dict["y_hat"] = y_hat_i
+
+            results_df = pd.DataFrame(results_dict)
+            results_df["id"] = target_id_i
+            results_df["batch_idx"] = accum_batch_num
+            results_df["example_idx"] = i
+
+            self.validation_epoch_results.append(results_df)
+
     def validation_step(self, batch: dict, batch_idx):
         """Run validation step"""
+
+        accum_batch_num = batch_idx // self.trainer.accumulate_grad_batches
+
         y_hat = self(batch)
 
         y = batch[self._target_key][:, -self.forecast_len :]
 
+        if (batch_idx + 1) % self.trainer.accumulate_grad_batches == 0:
+            self._log_validation_results(batch, y_hat, accum_batch_num)
+
         # Expand persistence to be the same shape as y
         losses = self._calculate_common_losses(y, y_hat)
         losses.update(self._calculate_val_losses(y, y_hat))
@@ -628,8 +682,6 @@ def validation_step(self, batch: dict, batch_idx):
             on_epoch=True,
         )
 
-        accum_batch_num = batch_idx // self.trainer.accumulate_grad_batches
-
         # Make plots only if using wandb logger
         if isinstance(self.logger, pl.loggers.WandbLogger) and accum_batch_num in [0, 1]:
             # Store these temporarily under self
@@ -671,6 +723,24 @@ def validation_step(self, batch: dict, batch_idx):
     def on_validation_epoch_end(self):
         """Run on epoch end"""
 
+        try:
+            # join together validation results, and save to wandb
+            validation_results_df = pd.concat(self.validation_epoch_results)
+            with tempfile.TemporaryDirectory() as tempdir:
+                filename = os.path.join(tempdir, f"validation_results_{self.current_epoch}.csv")
+                validation_results_df.to_csv(filename, index=False)
+
+                # make and log wand artifact
+                validation_artifact = wandb.Artifact(
+                    f"validation_results_epoch_{self.current_epoch}", type="dataset"
+                )
+                validation_artifact.add_file(filename)
+                wandb.log_artifact(validation_artifact)
+        except Exception as e:
+            print("Failed to log validation results to wandb")
+            print(e)
+
+        self.validation_epoch_results = []
         horizon_maes_dict = self._horizon_maes.flush()
 
         # Create the horizon accuracy curve
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`"""PVNet"""`
`2`		`-__version__ = "3.0.53"`
	`2`	`+__version__ = "3.0.63"`