Tested the ensemble layers

DomInvivo · DomInvivo · commit 015373f52224 · 2023-12-13T17:38:29.000-05:00
diff --git a/graphium/nn/base_layers.py b/graphium/nn/base_layers.py
@@ -236,7 +236,7 @@ class MuReadoutGraphium(MuReadout):
 
     def __init__(self, in_features, *args, **kwargs):
         super().__init__(in_features, *args, **kwargs)
-        self.base_width = in_features
+        self._base_width = in_features
 
     @property
     def absolute_width(self):
@@ -442,7 +442,7 @@ def __init__(
         in_dim: int,
         hidden_dims: Union[Iterable[int], int],
         out_dim: int,
-        depth: int,
+        depth: Optional[int] = None,
         activation: Union[str, Callable] = "relu",
         last_activation: Union[str, Callable] = "none",
         dropout: float = 0.0,
@@ -530,12 +530,12 @@ def __init__(
 
         all_dims = [in_dim] + self.hidden_dims + [out_dim]
         fully_connected = []
-        if depth == 0:
+        if self.depth == 0:
             self.fully_connected = None
             return
         else:
-            for ii in range(depth):
-                if ii < (depth - 1):
+            for ii in range(self.depth):
+                if ii < (self.depth - 1):
                     # Define the parameters for all intermediate layers
                     this_activation = activation
                     this_normalization = normalization
@@ -551,7 +551,7 @@ def __init__(
                 if constant_droppath_rate:
                     this_drop_rate = droppath_rate
                 else:
-                    this_drop_rate = DropPath.get_stochastic_drop_rate(droppath_rate, ii, depth)
+                    this_drop_rate = DropPath.get_stochastic_drop_rate(droppath_rate, ii, self.depth)
 
                 # Add a fully-connected layer
                 fully_connected.append(
diff --git a/graphium/nn/ensemble_layers.py b/graphium/nn/ensemble_layers.py
@@ -50,6 +50,7 @@ def reset_parameters(self):
         """
         Reset the parameters of the linear layer using the `init_fn`.
         """
+        set_base_shapes(self, None, rescale_params=False)  # Set the shapes of the tensors, useful for mup
         # Initialize weight using the provided initialization function
         self.init_fn(self.weight)
 
@@ -169,7 +170,7 @@ def __init__(
                 in_dim, out_dim, num_ensemble=num_ensemble, bias=bias, init_fn=init_fn
             )
         else:
-            self.linear = EnsembleMuReadoutGraphium(in_dim, out_dim, bias=bias)
+            self.linear = EnsembleMuReadoutGraphium(in_dim, out_dim, num_ensemble=num_ensemble, bias=bias)
 
         self.reset_parameters()
 
@@ -202,9 +203,10 @@ def __init__(
         readout_zero_init=False,
         output_mult=1.0,
     ):
+        self.in_dim = in_dim
         self.output_mult = output_mult
         self.readout_zero_init = readout_zero_init
-        self.base_width = in_dim
+        self._base_width = in_dim
         super().__init__(
             in_dim=in_dim,
             out_dim=out_dim,
@@ -254,7 +256,7 @@ def forward(self, x):
 
     @property
     def absolute_width(self):
-        return float(self.in_features)
+        return float(self.in_dim)
 
     @property
     def base_width(self):
@@ -279,8 +281,8 @@ def __init__(
         in_dim: int,
         hidden_dims: Union[Iterable[int], int],
         out_dim: int,
-        depth: int,
         num_ensemble: int,
+        depth: Optional[int] = None,
         reduction: Optional[Union[str, Callable]] = "none",
         activation: Union[str, Callable] = "relu",
         last_activation: Union[str, Callable] = "none",
@@ -304,13 +306,13 @@ def __init__(
                 or a list of dimensions in the hidden layers.
             out_dim:
                 Output dimension of the MLP.
+            num_ensemble:
+                Number of MLPs that run in parallel.
             depth:
                 If `hidden_dims` is an integer, `depth` is 1 + the number of
                 hidden layers to use.
                 If `hidden_dims` is a list, then
                 `depth` must be `None` or equal to `len(hidden_dims) + 1`
-            num_ensemble:
-                Number of MLPs that run in parallel.
             reduction:
                 Reduction to use at the end of the MLP. Choices:
 
@@ -358,7 +360,6 @@ def __init__(
             hidden_dims=hidden_dims,
             out_dim=out_dim,
             depth=depth,
-            num_ensemble=num_ensemble,
             activation=activation,
             last_activation=last_activation,
             dropout=dropout,
@@ -369,6 +370,8 @@ def __init__(
             last_layer_is_readout=last_layer_is_readout,
             droppath_rate=droppath_rate,
             constant_droppath_rate=constant_droppath_rate,
+            fc_layer=EnsembleFCLayer,
+            fc_layer_kwargs={"num_ensemble": num_ensemble},
         )
 
         self.reduction = self._parse_reduction(reduction)
diff --git a/graphium/utils/spaces.py b/graphium/utils/spaces.py
@@ -4,6 +4,7 @@
 import torchmetrics.functional as TorchMetrics
 
 import graphium.nn.base_layers as BaseLayers
+import graphium.nn.ensemble_layers as EnsembleLayers
 from graphium.nn.architectures import FeedForwardNN, FeedForwardPyg, TaskHeads
 import graphium.utils.custom_lr as CustomLR
 import graphium.data.datamodule as Datamodules
@@ -28,7 +29,7 @@
 }
 
 ENSEMBLE_FC_LAYERS_DICT = {
-    "ens-fc": BaseLayers.EnsembleFCLayer,
+    "ens-fc": EnsembleLayers.EnsembleFCLayer,
 }
 
 PYG_LAYERS_DICT = {
diff --git a/tests/test_ensemble_layers.py b/tests/test_ensemble_layers.py
@@ -0,0 +1,256 @@
+"""
+Unit tests for the different layers of graphium/nn/ensemble_layers
+"""
+
+import numpy as np
+import torch
+from torch.nn import Linear
+import unittest as ut
+
+from graphium.nn.base_layers import FCLayer, MLP
+from graphium.nn.ensemble_layers import EnsembleLinear, EnsembleFCLayer, EnsembleMLP, EnsembleMuReadoutGraphium
+
+
+class test_Ensemble_Layers(ut.TestCase):
+
+    # for drop_rate=0.5, test if the output shape is correct
+    def check_ensemble_linear(self, in_dim: int, out_dim: int, num_ensemble: int, batch_size: int, more_batch_dim:int):
+
+        msg = f"Testing EnsembleLinear with in_dim={in_dim}, out_dim={out_dim}, num_ensemble={num_ensemble}, batch_size={batch_size}, more_batch_dim={more_batch_dim}"
+
+        # Create EnsembleLinear instance
+        ensemble_linear = EnsembleLinear(in_dim, out_dim, num_ensemble)
+
+        # Create equivalent separate Linear layers with synchronized weights and biases
+        linear_layers = [Linear(in_dim, out_dim) for _ in range(num_ensemble)]
+        for i, linear_layer in enumerate(linear_layers):
+            linear_layer.weight.data = ensemble_linear.weight.data[i]
+            if ensemble_linear.bias is not None:
+                linear_layer.bias.data = ensemble_linear.bias.data[i].squeeze()
+
+        # Test with a sample input
+        input_tensor = torch.randn(batch_size, in_dim)
+        ensemble_output = ensemble_linear(input_tensor)
+
+        # Check for the output shape
+        self.assertEqual(ensemble_output.shape, (num_ensemble, batch_size, out_dim), msg=msg)
+
+        # Make sure that the outputs of the individual layers are the same as the ensemble output
+        for i, linear_layer in enumerate(linear_layers):
+
+            individual_output = linear_layer(input_tensor)
+            individual_output = individual_output.detach().numpy()
+            ensemble_output_i = ensemble_output[i].detach().numpy()
+            np.testing.assert_allclose(ensemble_output_i, individual_output, atol=1e-5, err_msg=msg)
+
+
+        # Test with a sample input with the extra `num_ensemble` and `more_batch_dim` dimension
+        if more_batch_dim:
+            out_shape = (more_batch_dim, num_ensemble, batch_size, out_dim)
+            input_tensor = torch.randn(more_batch_dim, num_ensemble, batch_size, in_dim)
+        else:
+            out_shape = (num_ensemble, batch_size, out_dim)
+            input_tensor = torch.randn(num_ensemble, batch_size, in_dim)
+        ensemble_output = ensemble_linear(input_tensor)
+
+        # Check for the output shape
+        self.assertEqual(ensemble_output.shape, out_shape, msg=msg)
+
+        # Make sure that the outputs of the individual layers are the same as the ensemble output
+        for i, linear_layer in enumerate(linear_layers):
+
+            if more_batch_dim:
+                individual_output = linear_layer(input_tensor[:, i])
+                ensemble_output_i = ensemble_output[:, i]
+            else:
+                individual_output = linear_layer(input_tensor[i])
+                ensemble_output_i = ensemble_output[i]
+            individual_output = individual_output.detach().numpy()
+            ensemble_output_i = ensemble_output_i.detach().numpy()
+            np.testing.assert_allclose(ensemble_output_i, individual_output, atol=1e-5, err_msg=msg)
+
+
+
+    def test_ensemble_linear(self):
+        # more_batch_dim=0
+        self.check_ensemble_linear(in_dim=11, out_dim=5, num_ensemble=3, batch_size=13, more_batch_dim=0)
+        self.check_ensemble_linear(in_dim=11, out_dim=5, num_ensemble=3, batch_size=1, more_batch_dim=0)
+        self.check_ensemble_linear(in_dim=11, out_dim=5, num_ensemble=1, batch_size=13, more_batch_dim=0)
+
+        # more_batch_dim=1
+        self.check_ensemble_linear(in_dim=11, out_dim=5, num_ensemble=3, batch_size=13, more_batch_dim=1)
+        self.check_ensemble_linear(in_dim=11, out_dim=5, num_ensemble=3, batch_size=1, more_batch_dim=1)
+        self.check_ensemble_linear(in_dim=11, out_dim=5, num_ensemble=1, batch_size=13, more_batch_dim=1)
+
+        # more_batch_dim=7
+        self.check_ensemble_linear(in_dim=11, out_dim=5, num_ensemble=3, batch_size=13, more_batch_dim=7)
+        self.check_ensemble_linear(in_dim=11, out_dim=5, num_ensemble=3, batch_size=1, more_batch_dim=7)
+        self.check_ensemble_linear(in_dim=11, out_dim=5, num_ensemble=1, batch_size=13, more_batch_dim=7)
+
+
+    # for drop_rate=0.5, test if the output shape is correct
+    def check_ensemble_fclayer(self, in_dim: int, out_dim: int, num_ensemble: int, batch_size: int, more_batch_dim:int, is_readout_layer=False):
+
+        msg = f"Testing EnsembleFCLayer with in_dim={in_dim}, out_dim={out_dim}, num_ensemble={num_ensemble}, batch_size={batch_size}, more_batch_dim={more_batch_dim}"
+
+        # Create EnsembleFCLayer instance
+        ensemble_fclayer = EnsembleFCLayer(in_dim, out_dim, num_ensemble, is_readout_layer=is_readout_layer)
+
+        # Create equivalent separate FCLayer layers with synchronized weights and biases
+        fc_layers = [FCLayer(in_dim, out_dim, is_readout_layer=is_readout_layer) for _ in range(num_ensemble)]
+        for i, fc_layer in enumerate(fc_layers):
+            fc_layer.linear.weight.data = ensemble_fclayer.linear.weight.data[i]
+            if ensemble_fclayer.bias is not None:
+                fc_layer.linear.bias.data = ensemble_fclayer.linear.bias.data[i].squeeze()
+
+        # Test with a sample input
+        input_tensor = torch.randn(batch_size, in_dim)
+        ensemble_output = ensemble_fclayer(input_tensor)
+
+        # Check for the output shape
+        self.assertEqual(ensemble_output.shape, (num_ensemble, batch_size, out_dim), msg=msg)
+
+        # Make sure that the outputs of the individual layers are the same as the ensemble output
+        for i, fc_layer in enumerate(fc_layers):
+
+            individual_output = fc_layer(input_tensor)
+            individual_output = individual_output.detach().numpy()
+            ensemble_output_i = ensemble_output[i].detach().numpy()
+            np.testing.assert_allclose(ensemble_output_i, individual_output, atol=1e-5, err_msg=msg)
+
+
+        # Test with a sample input with the extra `num_ensemble` and `more_batch_dim` dimension
+        if more_batch_dim:
+            out_shape = (more_batch_dim, num_ensemble, batch_size, out_dim)
+            input_tensor = torch.randn(more_batch_dim, num_ensemble, batch_size, in_dim)
+        else:
+            out_shape = (num_ensemble, batch_size, out_dim)
+            input_tensor = torch.randn(num_ensemble, batch_size, in_dim)
+        ensemble_output = ensemble_fclayer(input_tensor)
+
+        # Check for the output shape
+        self.assertEqual(ensemble_output.shape, out_shape, msg=msg)
+
+        # Make sure that the outputs of the individual layers are the same as the ensemble output
+        for i, fc_layer in enumerate(fc_layers):
+
+            if more_batch_dim:
+                individual_output = fc_layer(input_tensor[:, i])
+                ensemble_output_i = ensemble_output[:, i]
+            else:
+                individual_output = fc_layer(input_tensor[i])
+                ensemble_output_i = ensemble_output[i]
+            individual_output = individual_output.detach().numpy()
+            ensemble_output_i = ensemble_output_i.detach().numpy()
+            np.testing.assert_allclose(ensemble_output_i, individual_output, atol=1e-5, err_msg=msg)
+
+
+
+    def test_ensemble_fclayer(self):
+        # more_batch_dim=0
+        self.check_ensemble_fclayer(in_dim=11, out_dim=5, num_ensemble=3, batch_size=13, more_batch_dim=0)
+        self.check_ensemble_fclayer(in_dim=11, out_dim=5, num_ensemble=3, batch_size=1, more_batch_dim=0)
+        self.check_ensemble_fclayer(in_dim=11, out_dim=5, num_ensemble=1, batch_size=13, more_batch_dim=0)
+
+        # more_batch_dim=1
+        self.check_ensemble_fclayer(in_dim=11, out_dim=5, num_ensemble=3, batch_size=13, more_batch_dim=1)
+        self.check_ensemble_fclayer(in_dim=11, out_dim=5, num_ensemble=3, batch_size=1, more_batch_dim=1)
+        self.check_ensemble_fclayer(in_dim=11, out_dim=5, num_ensemble=1, batch_size=13, more_batch_dim=1)
+
+        # more_batch_dim=7
+        self.check_ensemble_fclayer(in_dim=11, out_dim=5, num_ensemble=3, batch_size=13, more_batch_dim=7)
+        self.check_ensemble_fclayer(in_dim=11, out_dim=5, num_ensemble=3, batch_size=1, more_batch_dim=7)
+        self.check_ensemble_fclayer(in_dim=11, out_dim=5, num_ensemble=1, batch_size=13, more_batch_dim=7)
+
+        # Test `is_readout_layer`
+        self.check_ensemble_fclayer(in_dim=11, out_dim=5, num_ensemble=3, batch_size=13, more_batch_dim=0, is_readout_layer=True)
+        self.check_ensemble_fclayer(in_dim=11, out_dim=5, num_ensemble=3, batch_size=13, more_batch_dim=1, is_readout_layer=True)
+        self.check_ensemble_fclayer(in_dim=11, out_dim=5, num_ensemble=3, batch_size=13, more_batch_dim=7, is_readout_layer=True)
+
+
+
+
+    # for drop_rate=0.5, test if the output shape is correct
+    def check_ensemble_mlp(self, in_dim: int, out_dim: int, num_ensemble: int, batch_size: int, more_batch_dim:int, last_layer_is_readout=False):
+
+        msg = f"Testing EnsembleMLP with in_dim={in_dim}, out_dim={out_dim}, num_ensemble={num_ensemble}, batch_size={batch_size}, more_batch_dim={more_batch_dim}"
+
+        # Create EnsembleMLP instance
+        hidden_dims = [17, 17, 17]
+        ensemble_mlp = EnsembleMLP(in_dim, hidden_dims, out_dim, num_ensemble, last_layer_is_readout=last_layer_is_readout)
+
+        # Create equivalent separate MLP layers with synchronized weights and biases
+        mlps = [MLP(in_dim, hidden_dims, out_dim, last_layer_is_readout=last_layer_is_readout) for _ in range(num_ensemble)]
+        for i, mlp in enumerate(mlps):
+            for j, layer in enumerate(mlp.fully_connected):
+                layer.linear.weight.data = ensemble_mlp.fully_connected[j].linear.weight.data[i]
+                if layer.bias is not None:
+                    layer.linear.bias.data = ensemble_mlp.fully_connected[j].linear.bias.data[i].squeeze()
+
+        # Test with a sample input
+        input_tensor = torch.randn(batch_size, in_dim)
+        ensemble_output = ensemble_mlp(input_tensor)
+
+        # Check for the output shape
+        self.assertEqual(ensemble_output.shape, (num_ensemble, batch_size, out_dim), msg=msg)
+
+        # Make sure that the outputs of the individual layers are the same as the ensemble output
+        for i, mlp in enumerate(mlps):
+
+            individual_output = mlp(input_tensor)
+            individual_output = individual_output.detach().numpy()
+            ensemble_output_i = ensemble_output[i].detach().numpy()
+            np.testing.assert_allclose(ensemble_output_i, individual_output, atol=1e-5, err_msg=msg)
+
+
+        # Test with a sample input with the extra `num_ensemble` and `more_batch_dim` dimension
+        if more_batch_dim:
+            out_shape = (more_batch_dim, num_ensemble, batch_size, out_dim)
+            input_tensor = torch.randn(more_batch_dim, num_ensemble, batch_size, in_dim)
+        else:
+            out_shape = (num_ensemble, batch_size, out_dim)
+            input_tensor = torch.randn(num_ensemble, batch_size, in_dim)
+        ensemble_output = ensemble_mlp(input_tensor)
+
+        # Check for the output shape
+        self.assertEqual(ensemble_output.shape, out_shape, msg=msg)
+
+        # Make sure that the outputs of the individual layers are the same as the ensemble output
+        for i, mlp in enumerate(mlps):
+
+            if more_batch_dim:
+                individual_output = mlp(input_tensor[:, i])
+                ensemble_output_i = ensemble_output[:, i]
+            else:
+                individual_output = mlp(input_tensor[i])
+                ensemble_output_i = ensemble_output[i]
+            individual_output = individual_output.detach().numpy()
+            ensemble_output_i = ensemble_output_i.detach().numpy()
+            np.testing.assert_allclose(ensemble_output_i, individual_output, atol=1e-5, err_msg=msg)
+
+
+
+    def test_ensemble_mlp(self):
+        # more_batch_dim=0
+        self.check_ensemble_mlp(in_dim=11, out_dim=5, num_ensemble=3, batch_size=13, more_batch_dim=0)
+        self.check_ensemble_mlp(in_dim=11, out_dim=5, num_ensemble=3, batch_size=1, more_batch_dim=0)
+        self.check_ensemble_mlp(in_dim=11, out_dim=5, num_ensemble=1, batch_size=13, more_batch_dim=0)
+
+        # more_batch_dim=1
+        self.check_ensemble_mlp(in_dim=11, out_dim=5, num_ensemble=3, batch_size=13, more_batch_dim=1)
+        self.check_ensemble_mlp(in_dim=11, out_dim=5, num_ensemble=3, batch_size=1, more_batch_dim=1)
+        self.check_ensemble_mlp(in_dim=11, out_dim=5, num_ensemble=1, batch_size=13, more_batch_dim=1)
+
+        # more_batch_dim=7
+        self.check_ensemble_mlp(in_dim=11, out_dim=5, num_ensemble=3, batch_size=13, more_batch_dim=7)
+        self.check_ensemble_mlp(in_dim=11, out_dim=5, num_ensemble=3, batch_size=1, more_batch_dim=7)
+        self.check_ensemble_mlp(in_dim=11, out_dim=5, num_ensemble=1, batch_size=13, more_batch_dim=7)
+
+        # Test `last_layer_is_readout`
+        self.check_ensemble_mlp(in_dim=11, out_dim=5, num_ensemble=3, batch_size=13, more_batch_dim=0, last_layer_is_readout=True)
+        self.check_ensemble_mlp(in_dim=11, out_dim=5, num_ensemble=3, batch_size=13, more_batch_dim=1, last_layer_is_readout=True)
+        self.check_ensemble_mlp(in_dim=11, out_dim=5, num_ensemble=3, batch_size=13, more_batch_dim=7, last_layer_is_readout=True)
+
+
+if __name__ == '__main__':
+    ut.main()

Original file line number	Diff line number	Diff line change
`@@ -4,6 +4,7 @@`
`4`	`4`	`import torchmetrics.functional as TorchMetrics`
`5`	`5`
`6`	`6`	`import graphium.nn.base_layers as BaseLayers`
	`7`	`+import graphium.nn.ensemble_layers as EnsembleLayers`
`7`	`8`	`from graphium.nn.architectures import FeedForwardNN, FeedForwardPyg, TaskHeads`
`8`	`9`	`import graphium.utils.custom_lr as CustomLR`
`9`	`10`	`import graphium.data.datamodule as Datamodules`
`@@ -28,7 +29,7 @@`
`28`	`29`	`}`
`29`	`30`
`30`	`31`	`ENSEMBLE_FC_LAYERS_DICT = {`
`31`		`- "ens-fc": BaseLayers.EnsembleFCLayer,`
	`32`	`+ "ens-fc": EnsembleLayers.EnsembleFCLayer,`
`32`	`33`	`}`
`33`	`34`
`34`	`35`	`PYG_LAYERS_DICT = {`