From a304738f40e17a47faac5532e70b0706eab8de82 Mon Sep 17 00:00:00 2001
From: Felix Peretz <felix@openclimatefix.org>
Date: Sat, 18 Jan 2025 08:04:17 +0000
Subject: [PATCH] Output networks updated

---
 .../linear_networks/output_networks.py        | 55 +++++++++++++++----
 1 file changed, 43 insertions(+), 12 deletions(-)

diff --git a/pvnet/models/multimodal/linear_networks/output_networks.py b/pvnet/models/multimodal/linear_networks/output_networks.py
index 1f636080..7aaf9178 100644
--- a/pvnet/models/multimodal/linear_networks/output_networks.py
+++ b/pvnet/models/multimodal/linear_networks/output_networks.py
@@ -21,7 +21,8 @@
 class DynamicOutputNetwork(AbstractLinearNetwork):
     """ Dynamic output network definition """
     
-    # Input ant output dimension specified here
+    # Input and output dimensions specified here
+    # Defines feature mapping ℝ^n → ℝ^m
     def __init__(
         self,
         in_features: int,
@@ -36,8 +37,10 @@ def __init__(
     ):
         # Initialisation of dynamic output network
         super().__init__(in_features=in_features, out_features=out_features)
+        self.out_features = out_features
         
         # Default hidden architecture
+        # h_i ∈ ℝ^{d_i}, where d_i = [2n, n]
         if hidden_dims is None:
             hidden_dims = [in_features * 2, in_features]
             
@@ -45,26 +48,28 @@ def __init__(
             raise ValueError("hidden_dims must be positive")
             
         # Construction of network layers - config
+        # Network architecture parameters θ
         self.use_layer_norm = use_layer_norm
         self.use_residual = use_residual
         self.quantile_output = quantile_output
         self.num_forecast_steps = num_forecast_steps
 
         # Construction of hidden layers
-        # Sequence: Linear → LayerNorm → ReLU → Dropout
+        # H_i: ℝ^{d_i} → ℝ^{d_{i+1}}
+        # Sequential transformation φ(x) = Dropout(ReLU(LayerNorm(Wx + b)))
         self.layers = nn.ModuleList()
         prev_dim = in_features
         
         for dim in hidden_dims:
 
-            # Linear transformation / normalisatiom
+            # Affine transformation followed by distribution normalisation
             layer_block = []
             layer_block.append(nn.Linear(prev_dim, dim))
             
             if use_layer_norm:
                 layer_block.append(nn.LayerNorm(dim))
                 
-            # Non linearity / regularisation
+            # Non-linear activation and stochastic regularisation
             layer_block.extend([
                 nn.ReLU(),
                 nn.Dropout(dropout)
@@ -73,8 +78,9 @@ def __init__(
             self.layers.append(nn.Sequential(*layer_block))
             prev_dim = dim
             
-        # Output layer definition
-        # Projection for quantile preds over timesteps or standard
+        # Output layer transformation definition
+        # f: ℝ^{d_L} → ℝ^m
+        # Projection mapping P: ℝ^d → ℝ^{m×t} for temporal quantile predictions
         if quantile_output and num_forecast_steps:
             final_out_features = out_features * num_forecast_steps
         else:
@@ -83,16 +89,23 @@ def __init__(
         self.output_layer = nn.Linear(prev_dim, final_out_features)
         
         # Output activation definition
+        # ψ: ℝ^m → [0,1]^m
         if output_activation == "softmax":
             self.output_activation = nn.Softmax(dim=-1)
         elif output_activation == "sigmoid":
             self.output_activation = nn.Sigmoid()
         else:
             self.output_activation = None
-            
-        # Optional layer norm for residual connection
+
+        # Optional layer norm and residual projection
+        # g: ℝ^n → ℝ^m
         if use_residual:
-            self.residual_norm = nn.LayerNorm(out_features)
+            if quantile_output and num_forecast_steps:
+                self.residual_norm = nn.LayerNorm(out_features)
+            else:
+                final_out_features = out_features * num_forecast_steps if quantile_output and num_forecast_steps else out_features
+                self.residual_norm = nn.LayerNorm(final_out_features)
+            self.residual_proj = nn.Linear(in_features, out_features)
             
     def reshape_quantile_output(self, x: torch.Tensor) -> torch.Tensor:
 
@@ -124,11 +137,29 @@ def forward(
                 
         # Output transform, reshape and apply residual connection
         x = self.output_layer(x)        
-        x = self.reshape_quantile_output(x)        
-        if self.use_residual and x.shape == residual.shape:
-            x = self.residual_norm(x + residual)
+        x = self.reshape_quantile_output(x)    
+
+        if self.use_residual:
+            # Apply residual projection transformation
+            projected_residual = self.residual_proj(residual)
+            if self.quantile_output and self.num_forecast_steps:
+
+                # Apply residual mapping followed by normalisation
+                projected_residual = projected_residual.reshape(x.shape[0], x.shape[2])
+
+                # Collapse temporal dimensions for normalisation 
+                # ℝ^{B×T×F} → ℝ^{BT×F}
+                x = x.reshape(-1, x.shape[2])
+                x = self.residual_norm(x + projected_residual.repeat(self.num_forecast_steps, 1))
+
+                # Restore tensor dimensionality
+                # ℝ^{BT×F} → ℝ^{B×T×F}
+                x = x.reshape(-1, self.num_forecast_steps, self.out_features)
+            else:
+                x = self.residual_norm(x + projected_residual)
             
         # Apply output activation
+        # Non-linear transformation ψ
         if self.output_activation:
             x = self.output_activation(x)