applied black linting

DomInvivo · DomInvivo · commit 44f9eba94cb3 · 2023-12-12T17:34:38.000-05:00
diff --git a/graphium/nn/architectures/global_architectures.py b/graphium/nn/architectures/global_architectures.py
@@ -174,6 +174,7 @@ def __init__(
     def _parse_layers(self, layer_type, residual_type):
         # Parse the layer and residuals
         from graphium.utils.spaces import LAYERS_DICT, RESIDUALS_DICT
+
         self.layer_class, self.layer_name = self._parse_class_from_dict(layer_type, LAYERS_DICT)
         self.residual_class, self.residual_name = self._parse_class_from_dict(residual_type, RESIDUALS_DICT)
 
@@ -532,7 +533,9 @@ def __init__(
         if num_ensemble_2 is None:
             layer_kwargs["num_ensemble"] = num_ensemble
         else:
-            assert num_ensemble_2 == num_ensemble, f"num_ensemble={num_ensemble} != num_ensemble_2={num_ensemble_2}"
+            assert (
+                num_ensemble_2 == num_ensemble
+            ), f"num_ensemble={num_ensemble} != num_ensemble_2={num_ensemble_2}"
 
         super().__init__(
             in_dim=in_dim,
@@ -559,7 +562,6 @@ def __init__(
         self.reduction = reduction
         self.reduction_fn = self._parse_reduction(reduction)
 
-
     def _parse_reduction(self, reduction: Optional[Union[str, Callable]]) -> Optional[Callable]:
         r"""
         Parse the reduction argument.
@@ -587,10 +589,10 @@ def _parse_reduction(self, reduction: Optional[Union[str, Callable]]) -> Optiona
     def _parse_layers(self, layer_type, residual_type):
         # Parse the layer and residuals
         from graphium.utils.spaces import ENSEMBLE_LAYERS_DICT, RESIDUALS_DICT
+
         self.layer_class, self.layer_name = self._parse_class_from_dict(layer_type, ENSEMBLE_LAYERS_DICT)
         self.residual_class, self.residual_name = self._parse_class_from_dict(residual_type, RESIDUALS_DICT)
 
-
     def forward(self, h: torch.Tensor) -> torch.Tensor:
         r"""
         Apply the ensemble MLP on the input features, then reduce the output if specified.
diff --git a/graphium/nn/ensemble_layers.py b/graphium/nn/ensemble_layers.py
@@ -10,14 +10,16 @@
 
 from graphium.nn.base_layers import FCLayer, MLP
 
+
 class EnsembleLinear(nn.Module):
-    def __init__(self,
-                in_dim: int,
-                out_dim: int,
-                num_ensemble: int,
-                bias: bool = True,
-                init_fn: Optional[Callable] = None,
-                 ):
+    def __init__(
+        self,
+        in_dim: int,
+        out_dim: int,
+        num_ensemble: int,
+        bias: bool = True,
+        init_fn: Optional[Callable] = None,
+    ):
         r"""
         Multiple linear layers that are applied in parallel with batched matrix multiplication with `torch.matmul`.
 
@@ -38,7 +40,7 @@ def __init__(self,
         if bias:
             self.bias = nn.Parameter(torch.Tensor(num_ensemble, 1, out_dim))
         else:
-            self.register_parameter('bias', None)
+            self.register_parameter("bias", None)
 
         # Initialize parameters
         self.init_fn = init_fn if init_fn is not None else mupi.xavier_uniform_
@@ -79,6 +81,7 @@ def forward(self, h: torch.Tensor) -> torch.Tensor:
 
         return h
 
+
 class EnsembleFCLayer(FCLayer):
     def __init__(
         self,
@@ -162,7 +165,9 @@ def __init__(
 
         # Linear layer, or MuReadout layer
         if not is_readout_layer:
-            self.linear = EnsembleLinear(in_dim, out_dim, num_ensemble=num_ensemble, bias=bias, init_fn=init_fn)
+            self.linear = EnsembleLinear(
+                in_dim, out_dim, num_ensemble=num_ensemble, bias=bias, init_fn=init_fn
+            )
         else:
             self.linear = EnsembleMuReadoutGraphium(in_dim, out_dim, bias=bias)
 
@@ -180,20 +185,23 @@ def __repr__(self):
         rep = rep[:-1] + f", num_ensemble={self.linear.weight.shape[0]})"
         return rep
 
+
 class EnsembleMuReadoutGraphium(EnsembleLinear):
     """
     This layer implements an ensemble version of μP with a 1/width multiplier and a
     constant variance initialization for both weights and biases.
     """
-    def __init__(self,
-                in_dim: int,
-                out_dim: int,
-                num_ensemble: int,
-                bias: bool = True,
-                init_fn: Optional[Callable] = None,
-                readout_zero_init=False,
-                output_mult=1.0
-                 ):
+
+    def __init__(
+        self,
+        in_dim: int,
+        out_dim: int,
+        num_ensemble: int,
+        bias: bool = True,
+        init_fn: Optional[Callable] = None,
+        readout_zero_init=False,
+        output_mult=1.0,
+    ):
         self.output_mult = output_mult
         self.readout_zero_init = readout_zero_init
         self.base_width = in_dim
@@ -214,35 +222,35 @@ def reset_parameters(self) -> None:
             super().reset_parameters()
 
     def width_mult(self):
-        assert hasattr(self.weight, 'infshape'), (
-            'Please call set_base_shapes(...). If using torch.nn.DataParallel, '
-            'switch to distributed training with '
-            'torch.nn.parallel.DistributedDataParallel instead'
+        assert hasattr(self.weight, "infshape"), (
+            "Please call set_base_shapes(...). If using torch.nn.DataParallel, "
+            "switch to distributed training with "
+            "torch.nn.parallel.DistributedDataParallel instead"
         )
         return self.weight.infshape.width_mult()
 
     def _rescale_parameters(self):
-        '''Rescale parameters to convert SP initialization to μP initialization.
+        """Rescale parameters to convert SP initialization to μP initialization.
 
         Warning: This method is NOT idempotent and should be called only once
         unless you know what you are doing.
-        '''
-        if hasattr(self, '_has_rescaled_params') and self._has_rescaled_params:
+        """
+        if hasattr(self, "_has_rescaled_params") and self._has_rescaled_params:
             raise RuntimeError(
                 "`_rescale_parameters` has been called once before already. "
                 "Unless you know what you are doing, usually you should not be calling `_rescale_parameters` more than once.\n"
                 "If you called `set_base_shapes` on a model loaded from a checkpoint, "
                 "or just want to re-set the base shapes of an existing model, "
                 "make sure to set the flag `rescale_params=False`.\n"
-                "To bypass this error and *still rescale parameters*, set `self._has_rescaled_params=False` before this call.")
+                "To bypass this error and *still rescale parameters*, set `self._has_rescaled_params=False` before this call."
+            )
         if self.bias is not None:
-            self.bias.data *= self.width_mult()**0.5
-        self.weight.data *= self.width_mult()**0.5
+            self.bias.data *= self.width_mult() ** 0.5
+        self.weight.data *= self.width_mult() ** 0.5
         self._has_rescaled_params = True
 
     def forward(self, x):
-        return super().forward(
-            self.output_mult * x / self.width_mult())
+        return super().forward(self.output_mult * x / self.width_mult())
 
     @property
     def absolute_width(self):
@@ -361,7 +369,6 @@ def __init__(
             last_layer_is_readout=last_layer_is_readout,
             droppath_rate=droppath_rate,
             constant_droppath_rate=constant_droppath_rate,
-
         )
 
         self.reduction = self._parse_reduction(reduction)
@@ -415,4 +422,3 @@ def __repr__(self):
         """
         rep = super().__repr__()
         rep = rep[:-1] + f", num_ensemble={self.layers[0].linear.weight.shape[0]})"
-