From d2690483fa914bb374a1d7c078f08ef6c06c2985 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Fri, 18 Sep 2020 12:01:59 -0300
Subject: [PATCH 1/6] change metropolis kernel for independent metropolis
 kernel

---
 pymc3/smc/sample_smc.py | 14 +++++++-----
 pymc3/smc/smc.py        | 49 +++++++++++++++++++----------------------
 2 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/pymc3/smc/sample_smc.py b/pymc3/smc/sample_smc.py
index 31b6500d1b..5803081276 100644
--- a/pymc3/smc/sample_smc.py
+++ b/pymc3/smc/sample_smc.py
@@ -35,7 +35,7 @@ def sample_smc(
     n_steps=25,
     start=None,
     tune_steps=True,
-    p_acc_rate=0.99,
+    p_acc_rate=0.85,
     threshold=0.5,
     save_sim_data=False,
     model=None,
@@ -61,7 +61,7 @@ def sample_smc(
         acceptance rate and `p_acc_rate`, the max number of steps is ``n_steps``.
     start: dict, or array of dict
         Starting point in parameter space. It should be a list of dict with length `chains`.
-        When None (default) the starting point is sampled from the prior distribution. 
+        When None (default) the starting point is sampled from the prior distribution.
     tune_steps: bool
         Whether to compute the number of steps automatically or not. Defaults to True
     p_acc_rate: float
@@ -147,8 +147,10 @@ def sample_smc(
         cores = 1
 
     _log.info(
-            f"Multiprocess sampling ({chains} chain{'s' if chains > 1 else ''} "
-            f"in {cores} job{'s' if cores > 1 else ''})"
+        (
+            f"Sampling {chains} chain{'s' if chains > 1 else ''} "
+            f"in {cores} job{'s' if cores > 1 else ''}"
+        )
     )
 
     if random_seed == -1:
@@ -194,17 +196,17 @@ def sample_smc(
     else:
         results = []
         for i in range(chains):
-            results.append(sample_smc_int(*params, random_seed[i], i, _log))
+            results.append((sample_smc_int(*params, random_seed[i], i, _log)))
 
     traces, sim_data, log_marginal_likelihoods, betas, accept_ratios, nsteps = zip(*results)
     trace = MultiTrace(traces)
     trace.report._n_draws = draws
     trace.report._n_tune = 0
-    trace.report._t_sampling = time.time() - t1
     trace.report.log_marginal_likelihood = np.array(log_marginal_likelihoods)
     trace.report.betas = betas
     trace.report.accept_ratios = accept_ratios
     trace.report.nsteps = nsteps
+    trace.report._t_sampling = time.time() - t1
 
     if save_sim_data:
         return trace, {modelcontext(model).observed_RVs[0].name: np.array(sim_data)}
diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py
index f0a7f00020..d93252f6f2 100644
--- a/pymc3/smc/smc.py
+++ b/pymc3/smc/smc.py
@@ -16,6 +16,7 @@
 
 import numpy as np
 from scipy.special import logsumexp
+from scipy.stats import multivariate_normal
 from theano import function as theano_function
 import theano.tensor as tt
 
@@ -33,7 +34,7 @@ def __init__(
         n_steps=25,
         start=None,
         tune_steps=True,
-        p_acc_rate=0.99,
+        p_acc_rate=0.85,
         threshold=0.5,
         save_sim_data=False,
         model=None,
@@ -42,7 +43,7 @@ def __init__(
     ):
 
         self.draws = draws
-        self.kernel = kernel
+        self.kernel = kernel.lower()
         self.n_steps = n_steps
         self.start = start
         self.tune_steps = tune_steps
@@ -64,8 +65,6 @@ def __init__(
         self.acc_rate = 1
         self.acc_per_chain = np.ones(self.draws)
         self.variables = inputvars(self.model.vars)
-        self.dimension = sum(v.dsize for v in self.variables)
-        self.scalings = np.ones(self.draws) * 2.38 / (self.dimension) ** 0.5
         self.weights = np.ones(self.draws) / self.draws
         self.log_marginal_likelihood = 0
         self.sim_data = []
@@ -78,7 +77,9 @@ def initialize_population(self):
         var_info = OrderedDict()
         if self.start is None:
             init_rnd = sample_prior_predictive(
-                self.draws, var_names=[v.name for v in self.model.unobserved_RVs], model=self.model,
+                self.draws,
+                var_names=[v.name for v in self.model.unobserved_RVs],
+                model=self.model,
             )
         else:
             init_rnd = self.start
@@ -102,7 +103,7 @@ def setup_kernel(self):
         """
         shared = make_shared_replacements(self.variables, self.model)
 
-        if self.kernel.lower() == "abc":
+        if self.kernel == "abc":
             factors = [var.logpt for var in self.model.free_RVs]
             factors += [tt.sum(factor) for factor in self.model.potentials]
             self.prior_logp_func = logp_forw([tt.sum(factors)], self.variables, shared)
@@ -122,7 +123,7 @@ def setup_kernel(self):
                 self.draws,
                 self.save_sim_data,
             )
-        elif self.kernel.lower() == "metropolis":
+        elif self.kernel == "metropolis":
             self.prior_logp_func = logp_forw([self.model.varlogpt], self.variables, shared)
             self.likelihood_logp_func = logp_forw([self.model.datalogpt], self.variables, shared)
 
@@ -136,7 +137,7 @@ def initialize_logp(self):
         self.prior_logp = np.array(priors).squeeze()
         self.likelihood_logp = np.array(likelihoods).squeeze()
 
-        if self.save_sim_data:
+        if self.kernel == "abc" and self.save_sim_data:
             self.sim_data = self.likelihood_logp_func.get_data()
 
     def update_weights_beta(self):
@@ -181,7 +182,6 @@ def resample(self):
         self.likelihood_logp = self.likelihood_logp[resampling_indexes]
         self.posterior_logp = self.prior_logp + self.likelihood_logp * self.beta
         self.acc_per_chain = self.acc_per_chain[resampling_indexes]
-        self.scalings = self.scalings[resampling_indexes]
         if self.save_sim_data:
             self.sim_data = self.sim_data[resampling_indexes]
 
@@ -198,17 +198,13 @@ def update_proposal(self):
 
     def tune(self):
         """
-        Tune scaling and n_steps based on the acceptance rate.
+        Tune n_steps based on the acceptance rate.
         """
-        ave_scaling = np.exp(np.log(self.scalings.mean()) + (self.acc_per_chain.mean() - 0.234))
-        self.scalings = 0.5 * (
-            ave_scaling + np.exp(np.log(self.scalings) + (self.acc_per_chain - 0.234))
-        )
-
         if self.tune_steps:
             acc_rate = max(1.0 / self.proposed, self.acc_rate)
             self.n_steps = min(
-                self.max_steps, max(2, int(np.log(1 - self.p_acc_rate) / np.log(1 - acc_rate))),
+                self.max_steps,
+                max(2, int(np.log(1 - self.p_acc_rate) / np.log(1 - acc_rate))),
             )
 
         self.proposed = self.draws * self.n_steps
@@ -216,29 +212,29 @@ def tune(self):
     def mutate(self):
         ac_ = np.empty((self.n_steps, self.draws))
 
-        proposals = (
-            np.random.multivariate_normal(
-                np.zeros(self.dimension), self.cov, size=(self.n_steps, self.draws)
-            )
-            * self.scalings[:, None]
-        )
         log_R = np.log(np.random.rand(self.n_steps, self.draws))
 
+        dist = multivariate_normal(self.posterior.mean(axis=0), self.cov)
+
         for n_step in range(self.n_steps):
-            proposal = floatX(self.posterior + proposals[n_step])
+            proposal = dist.rvs(size=self.draws)
+            proposal = proposal.reshape(len(proposal), -1)
+            forward = dist.logpdf(proposal)
+            backward = multivariate_normal(proposal.mean(axis=0), self.cov).logpdf(self.posterior)
             ll = np.array([self.likelihood_logp_func(prop) for prop in proposal])
             pl = np.array([self.prior_logp_func(prop) for prop in proposal])
             proposal_logp = pl + ll * self.beta
-            accepted = log_R[n_step] < (proposal_logp - self.posterior_logp)
+            accepted = log_R[n_step] < (
+                (proposal_logp + backward) - (self.posterior_logp + forward)
+            )
             ac_[n_step] = accepted
             self.posterior[accepted] = proposal[accepted]
             self.posterior_logp[accepted] = proposal_logp[accepted]
             self.prior_logp[accepted] = pl[accepted]
             self.likelihood_logp[accepted] = ll[accepted]
-            if self.save_sim_data:
+            if self.kernel == "abc" and self.save_sim_data:
                 self.sim_data[accepted] = self.likelihood_logp_func.get_data()[accepted]
 
-        self.acc_per_chain = np.mean(ac_, axis=0)
         self.acc_rate = np.mean(ac_)
 
     def posterior_to_trace(self):
@@ -341,6 +337,7 @@ def __init__(
         self.observations = self.sum_stat(observations)
 
     def posterior_to_function(self, posterior):
+        model = self.model
         var_info = self.var_info
 
         varvalues = []

From 851f6d6cad6689f1736cd5b939c4a0a22f4b0526 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Fri, 18 Sep 2020 12:33:49 -0300
Subject: [PATCH 2/6] fix pyupgrade

---
 pymc3/smc/sample_smc.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pymc3/smc/sample_smc.py b/pymc3/smc/sample_smc.py
index 5803081276..3b462cc3c8 100644
--- a/pymc3/smc/sample_smc.py
+++ b/pymc3/smc/sample_smc.py
@@ -147,10 +147,8 @@ def sample_smc(
         cores = 1
 
     _log.info(
-        (
             f"Sampling {chains} chain{'s' if chains > 1 else ''} "
             f"in {cores} job{'s' if cores > 1 else ''}"
-        )
     )
 
     if random_seed == -1:
@@ -196,7 +194,7 @@ def sample_smc(
     else:
         results = []
         for i in range(chains):
-            results.append((sample_smc_int(*params, random_seed[i], i, _log)))
+            results.append(sample_smc_int(*params, random_seed[i], i, _log))
 
     traces, sim_data, log_marginal_likelihoods, betas, accept_ratios, nsteps = zip(*results)
     trace = MultiTrace(traces)

From 123d822d7930f0b83498549af9906850158a24e6 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Fri, 18 Sep 2020 13:05:31 -0300
Subject: [PATCH 3/6] fix float32

---
 pymc3/smc/smc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py
index d93252f6f2..67e96387b2 100644
--- a/pymc3/smc/smc.py
+++ b/pymc3/smc/smc.py
@@ -217,7 +217,7 @@ def mutate(self):
         dist = multivariate_normal(self.posterior.mean(axis=0), self.cov)
 
         for n_step in range(self.n_steps):
-            proposal = dist.rvs(size=self.draws)
+            proposal = floatX(dist.rvs(size=self.draws))
             proposal = proposal.reshape(len(proposal), -1)
             forward = dist.logpdf(proposal)
             backward = multivariate_normal(proposal.mean(axis=0), self.cov).logpdf(self.posterior)

From 8afb171515fc2de4aebb8273cc6e8145b8130fbd Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Fri, 18 Sep 2020 16:58:39 -0300
Subject: [PATCH 4/6] clean code

---
 RELEASE-NOTES.md | 1 +
 pymc3/smc/smc.py | 3 ---
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index e3dd7e72ca..a8a272acaa 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -12,6 +12,7 @@
 ### New features
 - `sample_posterior_predictive_w` can now feed on `xarray.Dataset` - e.g. from `InferenceData.posterior`. (see [#4042](https://github.com/pymc-devs/pymc3/pull/4042))
 - Add MLDA, a new stepper for multilevel sampling. MLDA can be used when a hierarchy of approximate posteriors of varying accuracy is available, offering improved sampling efficiency especially in high-dimensional problems and/or where gradients are not available (see [#3926](https://github.com/pymc-devs/pymc3/pull/3926))
+- Change SMC metropolis kernel to independent metropolis kernel [#4115](https://github.com/pymc-devs/pymc3/pull/3926))
 
 
 ## PyMC3 3.9.3 (11 August 2020)
diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py
index 67e96387b2..52e17f245c 100644
--- a/pymc3/smc/smc.py
+++ b/pymc3/smc/smc.py
@@ -63,7 +63,6 @@ def __init__(
         self.max_steps = n_steps
         self.proposed = draws * n_steps
         self.acc_rate = 1
-        self.acc_per_chain = np.ones(self.draws)
         self.variables = inputvars(self.model.vars)
         self.weights = np.ones(self.draws) / self.draws
         self.log_marginal_likelihood = 0
@@ -181,7 +180,6 @@ def resample(self):
         self.prior_logp = self.prior_logp[resampling_indexes]
         self.likelihood_logp = self.likelihood_logp[resampling_indexes]
         self.posterior_logp = self.prior_logp + self.likelihood_logp * self.beta
-        self.acc_per_chain = self.acc_per_chain[resampling_indexes]
         if self.save_sim_data:
             self.sim_data = self.sim_data[resampling_indexes]
 
@@ -337,7 +335,6 @@ def __init__(
         self.observations = self.sum_stat(observations)
 
     def posterior_to_function(self, posterior):
-        model = self.model
         var_info = self.var_info
 
         varvalues = []

From 41ee74fc59b92c1a3b06b67364de761fd91d9ce0 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Mon, 21 Sep 2020 08:36:47 -0300
Subject: [PATCH 5/6] add inline comments and update docstring

---
 pymc3/smc/sample_smc.py | 17 +++++++++--------
 pymc3/smc/smc.py        |  8 ++++++--
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/pymc3/smc/sample_smc.py b/pymc3/smc/sample_smc.py
index 3b462cc3c8..4a5fbed53b 100644
--- a/pymc3/smc/sample_smc.py
+++ b/pymc3/smc/sample_smc.py
@@ -105,17 +105,18 @@ def sample_smc(
 
      1. Initialize :math:`\beta` at zero and stage at zero.
      2. Generate N samples :math:`S_{\beta}` from the prior (because when :math `\beta = 0` the
-         tempered posterior is the prior).
+        tempered posterior is the prior).
      3. Increase :math:`\beta` in order to make the effective sample size equals some predefined
         value (we use :math:`Nt`, where :math:`t` is 0.5 by default).
      4. Compute a set of N importance weights W. The weights are computed as the ratio of the
         likelihoods of a sample at stage i+1 and stage i.
      5. Obtain :math:`S_{w}` by re-sampling according to W.
-     6. Use W to compute the covariance for the proposal distribution.
-     7. For stages other than 0 use the acceptance rate from the previous stage to estimate the
-        scaling of the proposal distribution and `n_steps`.
-     8. Run N Metropolis chains (each one of length `n_steps`), starting each one from a different
-        sample in :math:`S_{w}`.
+     6. Use W to compute the mean and covariance for the proposal distribution, a MVNormal.
+     7. For stages other than 0 use the acceptance rate from the previous stage to estimate
+        `n_steps`.
+     8. Run N independent Metropolis-Hastings (IMH) chains (each one of length `n_steps`),
+        starting each one from a different sample in :math:`S_{w}`. Samples are IMH as the proposal
+        mean is the of the previous posterior stage and not the current point in parameter space.
      9. Repeat from step 3 until :math:`\beta \ge 1`.
      10. The final result is a collection of N samples from the posterior.
 
@@ -147,8 +148,8 @@ def sample_smc(
         cores = 1
 
     _log.info(
-            f"Sampling {chains} chain{'s' if chains > 1 else ''} "
-            f"in {cores} job{'s' if cores > 1 else ''}"
+        f"Sampling {chains} chain{'s' if chains > 1 else ''} "
+        f"in {cores} job{'s' if cores > 1 else ''}"
     )
 
     if random_seed == -1:
diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py
index 52e17f245c..0ebadec7ee 100644
--- a/pymc3/smc/smc.py
+++ b/pymc3/smc/smc.py
@@ -212,12 +212,16 @@ def mutate(self):
 
         log_R = np.log(np.random.rand(self.n_steps, self.draws))
 
+        # The proposal distribution is a MVNormal, with mean and covariance computed from the previous tempered posterior
         dist = multivariate_normal(self.posterior.mean(axis=0), self.cov)
 
         for n_step in range(self.n_steps):
-            proposal = floatX(dist.rvs(size=self.draws))
-            proposal = proposal.reshape(len(proposal), -1)
+            # The proposal is independent from the current point.
+            # We have to take that into account to compute the Metropolis-Hastings acceptance
+            proposal = floatX(dist.rvs(size=self.draws).reshape(len(proposal), -1))
+            # To do that we compute the logp of moving to a new point
             forward = dist.logpdf(proposal)
+            # And to going back from that new point
             backward = multivariate_normal(proposal.mean(axis=0), self.cov).logpdf(self.posterior)
             ll = np.array([self.likelihood_logp_func(prop) for prop in proposal])
             pl = np.array([self.prior_logp_func(prop) for prop in proposal])

From f1cb5ca99fef4327ab7ab99f6482943d0919035e Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Mon, 21 Sep 2020 10:00:55 -0300
Subject: [PATCH 6/6] add inline comments and update docstring

---
 pymc3/smc/smc.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py
index 0ebadec7ee..83136f3c58 100644
--- a/pymc3/smc/smc.py
+++ b/pymc3/smc/smc.py
@@ -218,7 +218,8 @@ def mutate(self):
         for n_step in range(self.n_steps):
             # The proposal is independent from the current point.
             # We have to take that into account to compute the Metropolis-Hastings acceptance
-            proposal = floatX(dist.rvs(size=self.draws).reshape(len(proposal), -1))
+            proposal = floatX(dist.rvs(size=self.draws))
+            proposal = proposal.reshape(len(proposal), -1)
             # To do that we compute the logp of moving to a new point
             forward = dist.logpdf(proposal)
             # And to going back from that new point