Match API between BNRE and NRE-C

michaeldeistler · janfb · commit ad5ff0366527 · 2022-12-22T16:42:45.000+01:00
diff --git a/sbi/inference/snre/bnre.py b/sbi/inference/snre/bnre.py
@@ -1,4 +1,4 @@
-from typing import Callable, Optional, Union
+from typing import Callable, Dict, Optional, Union
 
 import torch
 from torch import Tensor, nn, ones
@@ -18,14 +18,14 @@ def __init__(
         logging_level: Union[int, str] = "warning",
         summary_writer: Optional[TensorboardSummaryWriter] = None,
         show_progress_bars: bool = True,
-        regularization_strength: float = 100.0,
     ):
 
-        r"""Balanced neural ratio estimation (BNRE)[1]. BNRE is a variation of NRE aiming to
-        produce more conservative posterior approximations
+        r"""Balanced neural ratio estimation (BNRE)[1]. BNRE is a variation of NRE
+        aiming to produce more conservative posterior approximations
 
         [1] Delaunoy, A., Hermans, J., Rozet, F., Wehenkel, A., & Louppe, G..
-        Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation.
+        Towards Reliable Simulation-Based Inference with Balanced Neural Ratio
+        Estimation.
         NeurIPS 2022. https://arxiv.org/abs/2208.13624
 
         Args:
@@ -36,27 +36,78 @@ def __init__(
                 a string, use a pre-configured network of the provided type (one of
                 linear, mlp, resnet). Alternatively, a function that builds a custom
                 neural network can be provided. The function will be called with the
-                first batch of simulations $(\theta, x)$, which can thus be used for shape
-                inference and potentially for z-scoring. It needs to return a PyTorch
-                `nn.Module` implementing the classifier.
+                first batch of simulations $(\theta, x)$, which can thus be used for
+                shape inference and potentially for z-scoring. It needs to return a
+                PyTorch `nn.Module` implementing the classifier.
             device: Training device, e.g., "cpu", "cuda" or "cuda:{0, 1, ...}".
             logging_level: Minimum severity of messages to log. One of the strings
                 INFO, WARNING, DEBUG, ERROR and CRITICAL.
             summary_writer: A tensorboard `SummaryWriter` to control, among others, log
                 file location (default is `<current working directory>/logs`.)
             show_progress_bars: Whether to show a progressbar during simulation and
                 sampling.
-            regularization_strength: The multiplicative coefficient applied to the
-                balancing regularizer ($\lambda$)
         """
 
-        self.regularization_strength = regularization_strength
-        kwargs = del_entries(
-            locals(), entries=("self", "__class__", "regularization_strength")
-        )
+        kwargs = del_entries(locals(), entries=("self", "__class__"))
         super().__init__(**kwargs)
 
-    def _loss(self, theta: Tensor, x: Tensor, num_atoms: int) -> Tensor:
+    def train(
+        self,
+        regularization_strength: float = 100.0,
+        training_batch_size: int = 50,
+        learning_rate: float = 5e-4,
+        validation_fraction: float = 0.1,
+        stop_after_epochs: int = 20,
+        max_num_epochs: int = 2**31 - 1,
+        clip_max_norm: Optional[float] = 5.0,
+        resume_training: bool = False,
+        discard_prior_samples: bool = False,
+        retrain_from_scratch: bool = False,
+        show_train_summary: bool = False,
+        dataloader_kwargs: Optional[Dict] = None,
+    ) -> nn.Module:
+        r"""Return classifier that approximates the ratio $p(\theta,x)/p(\theta)p(x)$.
+        Args:
+
+            regularization_strength: The multiplicative coefficient applied to the
+                balancing regularizer ($\lambda$).
+            training_batch_size: Training batch size.
+            learning_rate: Learning rate for Adam optimizer.
+            validation_fraction: The fraction of data to use for validation.
+            stop_after_epochs: The number of epochs to wait for improvement on the
+                validation set before terminating training.
+            max_num_epochs: Maximum number of epochs to run. If reached, we stop
+                training even when the validation loss is still decreasing. Otherwise,
+                we train until validation loss increases (see also `stop_after_epochs`).
+            clip_max_norm: Value at which to clip the total gradient norm in order to
+                prevent exploding gradients. Use None for no clipping.
+            exclude_invalid_x: Whether to exclude simulation outputs `x=NaN` or `x=±∞`
+                during training. Expect errors, silent or explicit, when `False`.
+            resume_training: Can be used in case training time is limited, e.g. on a
+                cluster. If `True`, the split between train and validation set, the
+                optimizer, the number of epochs, and the best validation log-prob will
+                be restored from the last time `.train()` was called.
+            discard_prior_samples: Whether to discard samples simulated in round 1, i.e.
+                from the prior. Training may be sped up by ignoring such less targeted
+                samples.
+            retrain_from_scratch: Whether to retrain the conditional density
+                estimator for the posterior from scratch each round.
+            show_train_summary: Whether to print the number of epochs and validation
+                loss and leakage after the training.
+            dataloader_kwargs: Additional or updated kwargs to be passed to the training
+                and validation dataloaders (like, e.g., a collate_fn)
+        Returns:
+            Classifier that approximates the ratio $p(\theta,x)/p(\theta)p(x)$.
+        """
+        kwargs = del_entries(locals(), entries=("self", "__class__"))
+        kwargs["loss_kwargs"] = {
+            "regularization_strength": kwargs.pop("regularization_strength")
+        }
+        return super().train(**kwargs)
+
+    def _loss(
+        self, theta: Tensor, x: Tensor, num_atoms: int, regularization_strength: float
+    ) -> Tensor:
         """Returns the binary cross-entropy loss for the trained classifier.
 
         The classifier takes as input a $(\theta,x)$ pair. It is trained to predict 1
@@ -87,4 +138,4 @@ def _loss(self, theta: Tensor, x: Tensor, num_atoms: int) -> Tensor:
             .square()
         )
 
-        return bce + self.regularization_strength * regularizer
+        return bce + regularization_strength * regularizer
diff --git a/sbi/inference/snre/snre_a.py b/sbi/inference/snre/snre_a.py
@@ -1,4 +1,4 @@
-from typing import Callable, Dict, Optional, Union
+from typing import Any, Callable, Dict, Optional, Union
 
 import torch
 from torch import Tensor, nn, ones
@@ -60,6 +60,7 @@ def train(
         retrain_from_scratch: bool = False,
         show_train_summary: bool = False,
         dataloader_kwargs: Optional[Dict] = None,
+        loss_kwargs: Dict[str, Any] = {},
     ) -> nn.Module:
         r"""Return classifier that approximates the ratio $p(\theta,x)/p(\theta)p(x)$.
 
@@ -87,6 +88,7 @@ def train(
                 loss and leakage after the training.
             dataloader_kwargs: Additional or updated kwargs to be passed to the training
                 and validation dataloaders (like, e.g., a collate_fn)
+            loss_kwargs: Additional or updated kwargs to be passed to the self._loss fn.
 
         Returns:
             Classifier that approximates the ratio $p(\theta,x)/p(\theta)p(x)$.
diff --git a/sbi/inference/snre/snre_base.py b/sbi/inference/snre/snre_base.py
@@ -6,7 +6,6 @@
 from torch import Tensor, eye, nn, ones, optim
 from torch.distributions import Distribution
 from torch.nn.utils.clip_grad import clip_grad_norm_
-from torch.utils import data
 from torch.utils.tensorboard.writer import SummaryWriter
 
 from sbi import utils as utils
diff --git a/sbi/inference/snre/snre_c.py b/sbi/inference/snre/snre_c.py
@@ -6,7 +6,7 @@
 
 from sbi.inference.snre.snre_base import RatioEstimator
 from sbi.types import TensorboardSummaryWriter
-from sbi.utils import del_entries, repeat_rows
+from sbi.utils import del_entries
 
 
 class SNRE_C(RatioEstimator):
@@ -85,9 +85,10 @@ def train(
                 `num_atoms` for SNRE_B except SNRE_C has an additional independently
                 drawn sample. The total number of alternative parameters `NRE-C` "sees"
                 is $2K-1$ or `2 * num_classes - 1` divided between two loss terms.
-            gamma: Determines the relative weight of the sum of all $K$ dependently drawn
-                classes against the marginally drawn one. Specifically, $p(y=k) := p_K$,
-                $p(y=0) := p_0$, $p_0 = 1 - K p_K$, and finally $\gamma := K p_K / p_0$.
+            gamma: Determines the relative weight of the sum of all $K$ dependently
+                drawn classes against the marginally drawn one. Specifically,
+                $p(y=k) :=p_K$, $p(y=0) := p_0$, $p_0 = 1 - K p_K$, and finally
+                $\gamma := K p_K / p_0$.
             training_batch_size: Training batch size.
             learning_rate: Learning rate for Adam optimizer.
             validation_fraction: The fraction of data to use for validation.
@@ -125,26 +126,31 @@ def train(
     def _loss(
         self, theta: Tensor, x: Tensor, num_atoms: int, gamma: float
     ) -> torch.Tensor:
-        r"""Return cross-entropy loss (via ''multi-class sigmoid'' activation) for 1-out-of-`K + 1` classification.
+        r"""Return cross-entropy loss (via ''multi-class sigmoid'' activation) for
+        1-out-of-`K + 1` classification.
 
-        At optimum, this loss function returns the exact likelihood-to-evidence ratio in the first round.
-        Details of loss computation are described in Contrastive Neural Ratio Estimation[1]. The paper
-        does not discuss the sequential case.
+        At optimum, this loss function returns the exact likelihood-to-evidence ratio
+        in the first round.
+        Details of loss computation are described in Contrastive Neural Ratio
+        Estimation[1]. The paper does not discuss the sequential case.
 
         [1] _Contrastive Neural Ratio Estimation_, Benajmin Kurt Miller, et. al.,
             NeurIPS 2022, https://arxiv.org/abs/2210.06170
         """
 
         # Reminder: K = num_classes
-        # The algorithm is written with K, so we convert back to K format rather than reasoning in num_atoms.
+        # The algorithm is written with K, so we convert back to K format rather than
+        # reasoning in num_atoms.
         num_classes = num_atoms - 1
         assert num_classes >= 1, f"num_classes = {num_classes} must be greater than 1."
 
         assert theta.shape[0] == x.shape[0], "Batch sizes for theta and x must match."
         batch_size = theta.shape[0]
 
-        # We append an contrastive theta to the marginal case because we will remove the jointly drawn
-        # sample in the logits_marginal[:, 0] position. That makes the remaining sample marginally drawn.
+        # We append an contrastive theta to the marginal case because we will remove
+        # the jointly drawn
+        # sample in the logits_marginal[:, 0] position. That makes the remaining sample
+        # marginally drawn.
         # We have a batch of `batch_size` datapoints.
         logits_marginal = self._classifier_logits(theta, x, num_classes + 1).reshape(
             batch_size, num_classes + 1
@@ -191,10 +197,12 @@ def _loss(
     def _get_prior_probs_marginal_and_joint(
         num_classes: int, gamma: float
     ) -> Tuple[float, float]:
-        """Return a tuple (p_marginal, p_joint) where `p_marginal := `$p_0$, `p_joint := `$p_K$.
+        """Return a tuple (p_marginal, p_joint) where `p_marginal := `$p_0$,
+        `p_joint := `$p_K$.
 
-        We let the joint (dependently drawn) class to be equally likely across K options.
-        The marginal class is therefore restricted to get the remaining probability.
+        We let the joint (dependently drawn) class to be equally likely across K
+        options. The marginal class is therefore restricted to get the remaining
+        probability.
         """
         assert num_classes >= 1
         p_joint = gamma / (1 + gamma * num_classes)
diff --git a/tests/linearGaussian_snre_test.py b/tests/linearGaussian_snre_test.py
@@ -195,20 +195,24 @@ def simulator(theta):
 
     if method_str == "sre":
         inference = SNRE_B(**kwargs)
+        train_kwargs = {}
     elif method_str == "aalr":
         inference = AALR(**kwargs)
+        train_kwargs = {}
     elif method_str == "bnre":
-        inference = BNRE(regularization_strength=20, **kwargs)
+        inference = BNRE(**kwargs)
+        train_kwargs = {"regularization_strength": 20}
     elif method_str == "nrec":
         inference = SNRE_C(**kwargs)
+        train_kwargs = {}
     else:
         raise ValueError(f"{method_str} is not an allowed option")
 
     # Should use default `num_atoms=10` for SRE; `num_atoms=2` for AALR
     theta, x = simulate_for_sbi(
         simulator, prior, num_simulations, simulation_batch_size=50
     )
-    ratio_estimator = inference.append_simulations(theta, x).train()
+    ratio_estimator = inference.append_simulations(theta, x).train(**train_kwargs)
     potential_fn, theta_transform = ratio_estimator_based_potential(
         ratio_estimator=ratio_estimator, prior=prior, x_o=x_o
     )
diff --git a/tutorials/16_implemented_methods.ipynb b/tutorials/16_implemented_methods.ipynb
@@ -272,10 +272,10 @@
    "source": [
     "from sbi.inference import BNRE\n",
     "\n",
-    "inference = BNRE(prior, regularization_strength=100.)\n",
+    "inference = BNRE(prior)\n",
     "theta = prior.sample((num_sims,))\n",
     "x = simulator(theta)\n",
-    "_ = inference.append_simulations(theta, x).train()\n",
+    "_ = inference.append_simulations(theta, x).train(regularization_strength=100.)\n",
     "posterior = inference.build_posterior().set_default_x(x_o)"
    ]
   },
@@ -420,7 +420,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.9.7 (conda)",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -434,7 +434,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.8.12"
   },
   "vscode": {
    "interpreter": {