ecmwf · JPXKQX · Jan 29, 2025 · Jan 29, 2025 · Jan 30, 2025 · Jan 30, 2025
diff --git a/training/docs/modules/losses.rst b/training/docs/modules/losses.rst
@@ -5,7 +5,7 @@
 This module is used to define the loss function used to train the model.
 
 Anemoi-training exposes a couple of loss functions by default to be
-used, all of which are subclassed from ``BaseWeightedLoss``. This class
+used, all of which are subclassed from ``BaseLoss``. This class
 enables scaler multiplication, and graph node weighting.
 
 .. automodule:: anemoi.training.losses.weightedloss
@@ -110,7 +110,7 @@ By default, only `all` is kept in the normalised space and scaled.
 ***********************
 
 Additionally, you can define your own loss function by subclassing
-``BaseWeightedLoss`` and implementing the ``forward`` method, or by
+``BaseLoss`` and implementing the ``forward`` method, or by
 subclassing ``FunctionalWeightedLoss`` and implementing the
 ``calculate_difference`` function. The latter abstracts the scaling, and
 node weighting, and allows you to just specify the difference

diff --git a/training/src/anemoi/training/config/training/default.yaml b/training/src/anemoi/training/config/training/default.yaml
@@ -1,6 +1,6 @@
 ---
 defaults:
-  - scalers: scalers
+  - scalers: global
 
 # resume or fork a training from a checkpoint last.ckpt or specified in hardware.files.warm_start
 run_id: null
@@ -54,7 +54,7 @@ training_loss:
   # A selection of available scalers are listed in training/scalers/scalers.yaml
   # '*' is a valid entry to use all `scalers` given, if a scaler is to be excluded
   # add `!scaler_name`, i.e. ['*', '!scaler_1'], and `scaler_1` will not be added.
-  scalers: ['pressure_level', 'general_variable', 'nan_mask_weights']
+  scalers: ['pressure_level', 'general_variable', 'nan_mask_weights', 'node_weights']
   ignore_nans: False
 
 loss_gradient_scaling: False

diff --git a/...ning/config/training/scalers/scalers.yaml → ...ining/config/training/scalers/global.yaml b/...ning/config/training/scalers/scalers.yaml → ...ining/config/training/scalers/global.yaml
@@ -4,6 +4,9 @@ variable_groups:
 
 # Several scalers can be added here. In order to be applied their names must be included in the loss.
 # scaler name must be included in `scalers` in the losses for this to be applied.
+# All scalers needs a `scale_dim` argument representing the dimension/s on which it is applied
+# -1 : channels dimmension (timesteps, variables, ...)
+# -2 : grid dimension
 builders:
   general_variable:
     # Variable groups definition for scaling by variable level.
@@ -49,10 +52,10 @@ builders:
     _target_: anemoi.training.losses.scaling.variable_tendency.VarTendencyScaler
     scale_dim: -1 # dimension on which scaling applied
 
+  # Scalers from node attributes
   node_weights:
-    _target_: anemoi.training.losses.nodeweights.GraphNodeAttribute
-    target_nodes: ${graph.data}
-    node_attribute: area_weight
-    scale_dim: 2 # dimension on which scaling applied
-
-  # limited_area_mask
+    _target_: anemoi.training.losses.scaling.node_attributes.GraphNodeAttributeScaler
+    nodes_name: ${graph.data}
+    nodes_attribute_name: area_weight
+    apply_output_mask: True
+    scale_dim: -2 # dimension on which scaling applied
diff --git a/training/src/anemoi/training/config/training/scalers/stretched.yaml b/training/src/anemoi/training/config/training/scalers/stretched.yaml
@@ -0,0 +1,66 @@
+variable_groups:
+  default: sfc
+  pl: [q, t, u, v, w, z]
+
+# Several scalers can be added here. In order to be applied their names must be included in the loss.
+# scaler name must be included in `scalers` in the losses for this to be applied.
+# All scalers needs a `scale_dim` argument representing the dimension/s on which it is applied
+# -1 : channels dimmension (timesteps, variables, ...)
+# -2 : grid dimension
+builders:
+  general_variable:
+    # Variable groups definition for scaling by variable level.
+    # The variable level scaling methods are defined under additional_scalers
+    # A default group is required and is appended as prefix to the metric of all variables not assigned to a group.
+    _target_: anemoi.training.losses.scaling.variable.GeneralVariableLossScaler
+    scale_dim: -1 # dimension on which scaling applied
+    weights:
+      default: 1
+      q: 0.6 #1
+      t: 6   #1
+      u: 0.8 #0.5
+      v: 0.5 #0.33
+      w: 0.001
+      z: 12  #1
+      sp: 10
+      10u: 0.1
+      10v: 0.1
+      2d: 0.5
+      tp: 0.025
+      cp: 0.0025
+
+  pressure_level:
+    _target_: anemoi.training.losses.scaling.variable_level.ReluVariableLevelScaler
+    group: pl
+    y_intercept: 0.2
+    slope: 0.001
+    scale_dim: -1 # dimension on which scaling applied
+
+  # mask NaNs with zeros in the loss function
+  nan_mask_weights:
+    _target_: anemoi.training.losses.scaling.loss_weights_mask.NaNMaskScaler
+    scale_dim: (-2, -1) # dimension on which scaling applied
+
+  # tendency scalers
+  # scale the prognostic losses by the stdev of the variable tendencies (e.g. the 6-hourly differences of the data)
+  # useful if including slow vs fast evolving variables in the training (e.g. Land/Ocean vs Atmosphere)
+  # if using this option 'variable_loss_scalings' should all be set close to 1.0 for prognostic variables
+  stdev_tendency:
+    _target_: anemoi.training.losses.scaling.variable_tendency.StdevTendencyScaler
+    scale_dim: -1 # dimension on which scaling applied
+  var_tendency:
+    _target_: anemoi.training.losses.scaling.variable_tendency.VarTendencyScaler
+    scale_dim: -1 # dimension on which scaling applied
+
+  # Scalers from node attributes
+  node_weights:
+    _target_: anemoi.training.losses.scaling.node_attributes.GraphNodeAttributeScaler
+    nodes_name: ${graph.data}
+    nodes_attribute_name: area_weight
+    scale_dim: -2 # dimension on which scaling applied
+
+  limited_area_mask:
+    _target_: anemoi.training.losses.scaling.node_attributes.GraphNodeAttributeScaler
+    nodes_name: ${graph.data}
+    nodes_attribute_name: cutout_mask
+    scale_dim: -2
diff --git a/training/src/anemoi/training/diagnostics/callbacks/plot.py b/training/src/anemoi/training/diagnostics/callbacks/plot.py
@@ -42,7 +42,7 @@
 from anemoi.training.diagnostics.plots import plot_loss
 from anemoi.training.diagnostics.plots import plot_power_spectrum
 from anemoi.training.diagnostics.plots import plot_predicted_multilevel_flat_sample
-from anemoi.training.losses.weightedloss import BaseWeightedLoss
+from anemoi.training.losses.weightedloss import BaseLoss
 
 if TYPE_CHECKING:
     from typing import Any
@@ -855,9 +855,9 @@ def _plot(
         )
         self.parameter_names = [self.parameter_names[i] for i in argsort_indices]
 
-        if not isinstance(pl_module.loss, BaseWeightedLoss):
+        if not isinstance(pl_module.loss, BaseLoss):
             LOGGER.warning(
-                "Loss function must be a subclass of BaseWeightedLoss, or provide `squash`.",
+                "Loss function must be a subclass of BaseLoss, or provide `squash`.",
                 RuntimeWarning,
             )
 

diff --git a/training/src/anemoi/training/losses/huber.py b/training/src/anemoi/training/losses/huber.py
@@ -13,12 +13,12 @@
 
 import torch
 
-from anemoi.training.losses.weightedloss import BaseWeightedLoss
+from anemoi.training.losses.weightedloss import BaseLoss
 
 LOGGER = logging.getLogger(__name__)
 
 
-class WeightedHuberLoss(BaseWeightedLoss):
+class WeightedHuberLoss(BaseLoss):
     """Node-weighted Huber loss."""
 
     name = "whuber"

diff --git a/training/src/anemoi/training/losses/limitedarea.py b/training/src/anemoi/training/losses/limitedarea.py
@@ -14,12 +14,12 @@
 
 import torch
 
-from anemoi.training.losses.weightedloss import BaseWeightedLoss
+from anemoi.training.losses.weightedloss import BaseLoss
 
 LOGGER = logging.getLogger(__name__)
 
 
-class WeightedMSELossLimitedArea(BaseWeightedLoss):
+class WeightedMSELossLimitedArea(BaseLoss):
     """Node-weighted MSE loss, calculated only within or outside the limited area.
 
     Further, the loss can be computed for the specified region (default),

diff --git a/training/src/anemoi/training/losses/logcosh.py b/training/src/anemoi/training/losses/logcosh.py
@@ -42,7 +42,6 @@ class WeightedLogCoshLoss(BaseWeightedLoss):
 
     def __init__(
         self,
-        node_weights: torch.Tensor,
         ignore_nans: bool = False,
         **kwargs,
     ) -> None:
@@ -56,11 +55,7 @@ def __init__(
             Allow nans in the loss and apply methods ignoring nans for measuring the loss, by default False
 
         """
-        super().__init__(
-            node_weights=node_weights,
-            ignore_nans=ignore_nans,
-            **kwargs,
-        )
+        super().__init__(ignore_nans=ignore_nans, **kwargs)
 
     def forward(
         self,
@@ -94,4 +89,8 @@ def forward(
         """
         out = LogCosh.apply(pred - target)
         out = self.scale(out, scaler_indices, without_scalers=without_scalers)
-        return self.scale_by_node_weights(out, squash)
+
+        if squash:
+            out = self.avg_function(out, dim=-1)
+
+        return self.sum_function(out, dim=(0, 1, 2))
diff --git a/training/src/anemoi/training/losses/loss.py b/training/src/anemoi/training/losses/loss.py
@@ -0,0 +1,83 @@
+# (C) Copyright 2025- Anemoi contributors.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+from __future__ import annotations
+
+import logging
+
+import torch
+from hydra.utils import instantiate
+from omegaconf import DictConfig
+from omegaconf import OmegaConf
+from anemoi.training.losses.weightedloss import BaseLoss
+
+
+LOGGER = logging.getLogger(__name__)
+
+
+# Future import breaks other type hints TODO Harrison Cook
+def get_loss_function(
+    config: DictConfig,
+    scalers: dict[str, tuple[int | tuple[int, ...] | torch.Tensor]] | None = None,
+    **kwargs
+) -> BaseLoss | torch.nn.ModuleList:
+    """Get loss functions from config.
+
+    Can be ModuleList if multiple losses are specified.
+
+    Parameters
+    ----------
+    config : DictConfig
+        Loss function configuration, should include `scalers` if scalers are to be added to the loss function.
+    scalers : dict[str, tuple[int | tuple[int, ...] | torch.Tensor]], optional
+        Scalers which can be added to the loss function. Defaults to None., by default None
+        If a scaler is to be added to the loss, ensure it is in `scalers` in the loss config
+        E.g.
+            If `scalers: ['variable']` is set in the config, and `variable` in `scalers`
+            `variable` will be added to the scaler of the loss function.
+    kwargs : Any
+        Additional arguments to pass to the loss function
+
+    Returns
+    -------
+    Union[BaseLoss, torch.nn.ModuleList]
+        Loss function, or list of metrics
+
+    Raises
+    ------
+    TypeError
+        If not a subclass of `BaseLoss`
+    ValueError
+        If scaler is not found in valid scalers
+    """
+    config_container = OmegaConf.to_container(config, resolve=False)
+    if isinstance(config_container, list):
+        return torch.nn.ModuleList([
+            get_loss_function(OmegaConf.create(loss_config), scalers=scalers, **kwargs) for loss_config in config
+        ])
+
+    loss_config = OmegaConf.to_container(config, resolve=True)
+    scalers_to_include = loss_config.pop("scalers", [])
+
+    if "*" in scalers_to_include:
+        scalers_to_include = [s for s in list(scalers.keys()) if f"!{s}" not in scalers_to_include]
+
+    # Instantiate the loss function with the loss_init_config
+    loss_function = instantiate(loss_config, **kwargs)
+
+    if not isinstance(loss_function, BaseLoss):
+        error_msg = f"Loss must be a subclass of 'BaseLoss', not {type(loss_function)}"
+        raise TypeError(error_msg)
+
+    for key in scalers_to_include:
+        if key not in scalers or []:
+            error_msg = f"Scaler {key!r} not found in valid scalers: {list(scalers.keys())}"
+            raise ValueError(error_msg)
+        loss_function.add_scaler(*scalers[key], name=key)
+
+    return loss_function
diff --git a/training/src/anemoi/training/losses/mae.py b/training/src/anemoi/training/losses/mae.py
@@ -14,19 +14,18 @@
 
 import torch
 
-from anemoi.training.losses.weightedloss import BaseWeightedLoss
+from anemoi.training.losses.weightedloss import BaseLoss
 
 LOGGER = logging.getLogger(__name__)
 
 
-class WeightedMAELoss(BaseWeightedLoss):
+class WeightedMAELoss(BaseLoss):
     """Node-weighted MAE loss."""
 
     name = "wmae"
 
     def __init__(
         self,
-        node_weights: torch.Tensor,
         ignore_nans: bool = False,
         **kwargs,
     ) -> None:
@@ -36,17 +35,11 @@ def __init__(
 
         Parameters
         ----------
-        node_weights : torch.Tensor of shape (N, )
-            Weight of each node in the loss function
         ignore_nans : bool, optional
             Allow nans in the loss and apply methods ignoring nans for measuring the loss, by default False
 
         """
-        super().__init__(
-            node_weights=node_weights,
-            ignore_nans=ignore_nans,
-            **kwargs,
-        )
+        super().__init__(ignore_nans=ignore_nans, **kwargs)
 
     def forward(
         self,
@@ -80,4 +73,8 @@ def forward(
         """
         out = torch.abs(pred - target)
         out = self.scale(out, scaler_indices, without_scalers=without_scalers)
-        return self.scale_by_node_weights(out, squash)
+
+        if squash:
+            out = self.avg_function(out, dim=-1)
+
+        return self.sum_function(out, dim=(0, 1, 2))
diff --git a/training/src/anemoi/training/losses/mse.py b/training/src/anemoi/training/losses/mse.py
@@ -14,37 +14,30 @@
 
 import torch
 
-from anemoi.training.losses.weightedloss import BaseWeightedLoss
+from anemoi.training.losses.weightedloss import BaseLoss
 
 LOGGER = logging.getLogger(__name__)
 
 
-class WeightedMSELoss(BaseWeightedLoss):
+class WeightedMSELoss(BaseLoss):
     """Node-weighted MSE loss."""
 
     name = "wmse"
 
     def __init__(
         self,
-        node_weights: torch.Tensor,
         ignore_nans: bool = False,
         **kwargs,
     ) -> None:
         """Node- and feature weighted MSE Loss.
 
         Parameters
         ----------
-        node_weights : torch.Tensor of shape (N, )
-            Weight of each node in the loss function
         ignore_nans : bool, optional
             Allow nans in the loss and apply methods ignoring nans for measuring the loss, by default False
 
         """
-        super().__init__(
-            node_weights=node_weights,
-            ignore_nans=ignore_nans,
-            **kwargs,
-        )
+        super().__init__(ignore_nans=ignore_nans, **kwargs)
 
     def forward(
         self,
@@ -77,4 +70,8 @@ def forward(
         """
         out = torch.square(pred - target)
         out = self.scale(out, scaler_indices, without_scalers=without_scalers)
-        return self.scale_by_node_weights(out, squash)
+
+        if squash:
+            out = self.avg_function(out, dim=-1)
+
+        return self.sum_function(out, dim=(0, 1, 2))