diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index ebe696e824..a313d6cc33 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -20,6 +20,7 @@ It also brings some dreadfully awaited fixes, so be sure to go through the chang - `OrderedProbit` distribution added (see [#4232](https://github.com/pymc-devs/pymc3/pull/4232)). - `plot_posterior_predictive_glm` now works with `arviz.InferenceData` as well (see [#4234](https://github.com/pymc-devs/pymc3/pull/4234)) - Add `logcdf` method to all univariate discrete distributions (see [#4387](https://github.com/pymc-devs/pymc3/pull/4387)). +- Add `random` method to `MvGaussianRandomWalk` (see [#4388](https://github.com/pymc-devs/pymc3/pull/4388)) ### Maintenance - Fixed bug whereby partial traces returns after keyboard interrupt during parallel sampling had fewer draws than would've been available [#4318](https://github.com/pymc-devs/pymc3/pull/4318) diff --git a/pymc3/distributions/timeseries.py b/pymc3/distributions/timeseries.py index 5174851547..e3e1aa15bc 100644 --- a/pymc3/distributions/timeseries.py +++ b/pymc3/distributions/timeseries.py @@ -275,7 +275,6 @@ def _random(self, sigma, mu, size, sample_shape): """Implement a Gaussian random walk as a cumulative sum of normals. axis = len(size) - 1 denotes the axis along which cumulative sum would be calculated. This might need to be corrected in future when issue #4010 is fixed. - Lines 318-322 ties the starting point of each instance of random walk to 0" """ if size[len(sample_shape)] == sample_shape: axis = len(sample_shape) @@ -284,6 +283,8 @@ def _random(self, sigma, mu, size, sample_shape): rv = stats.norm(mu, sigma) data = rv.rvs(size).cumsum(axis=axis) data = np.array(data) + + # the following lines center the random walk to start at the origin. if len(data.shape) > 1: for i in range(data.shape[0]): data[i] = data[i] - data[i][0] @@ -435,7 +436,7 @@ def __init__( self.init = init self.innovArgs = (mu, cov, tau, chol, lower) - self.innov = multivariate.MvNormal.dist(*self.innovArgs) + self.innov = multivariate.MvNormal.dist(*self.innovArgs, shape=self.shape) self.mean = tt.as_tensor_variable(0.0) def logp(self, x): @@ -452,6 +453,10 @@ def logp(self, x): ------- TensorVariable """ + + if x.ndim == 1: + x = x[np.newaxis, :] + x_im1 = x[:-1] x_i = x[1:] @@ -460,6 +465,70 @@ def logp(self, x): def _distr_parameters_for_repr(self): return ["mu", "cov"] + def random(self, point=None, size=None): + """ + Draw random values from MvGaussianRandomWalk. + + Parameters + ---------- + point: dict, optional + Dict of variable values on which random values are to be + conditioned (uses default point if not specified). + size: int or tuple of ints, optional + Desired size of random sample (returns one sample if not + specified). + + Returns + ------- + array + + + Examples + ------- + .. code-block:: python + + with pm.Model(): + mu = np.array([1.0, 0.0]) + cov = np.array([[1.0, 0.0], + [0.0, 2.0]]) + + # draw one sample from a 2-dimensional Gaussian random walk with 10 timesteps + sample = MvGaussianRandomWalk(mu, cov, shape=(10, 2)).random() + + # draw three samples from a 2-dimensional Gaussian random walk with 10 timesteps + sample = MvGaussianRandomWalk(mu, cov, shape=(10, 2)).random(size=3) + + # draw four samples from a 2-dimensional Gaussian random walk with 10 timesteps, + # indexed with a (2, 2) array + sample = MvGaussianRandomWalk(mu, cov, shape=(10, 2)).random(size=(2, 2)) + """ + + # for each draw specified by the size input, we need to draw time_steps many + # samples from MvNormal. + + size = to_tuple(size) + multivariate_samples = self.innov.random(point=point, size=size) + # this has shape (size, self.shape) + if len(self.shape) == 2: + # have time dimension in first slot of shape. Therefore the time + # component can be accessed with the index equal to the length of size. + time_axis = len(size) + multivariate_samples = multivariate_samples.cumsum(axis=time_axis) + if time_axis != 0: + # this for loop covers the case where size is a tuple + for idx in np.ndindex(size): + multivariate_samples[idx] = ( + multivariate_samples[idx] - multivariate_samples[idx][0] + ) + else: + # size was passed as None + multivariate_samples = multivariate_samples - multivariate_samples[0] + + # if the above statement fails, then only a spatial dimension was passed in for self.shape. + # Therefore don't subtract off the initial value since otherwise you get all zeros + # as your output. + return multivariate_samples + class MvStudentTRandomWalk(MvGaussianRandomWalk): r""" diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 8d19836174..5e735b6aed 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -1720,3 +1720,53 @@ def test_matrix_normal_random_with_random_variables(): prior = pm.sample_prior_predictive(2) assert prior["mu"].shape == (2, D, K) + + +class TestMvGaussianRandomWalk(SeededTest): + @pytest.mark.parametrize( + ["sample_shape", "dist_shape", "mu_shape", "param"], + generate_shapes(include_params=True), + ids=str, + ) + def test_with_np_arrays(self, sample_shape, dist_shape, mu_shape, param): + dist = pm.MvGaussianRandomWalk.dist( + mu=np.ones(mu_shape), **{param: np.eye(3)}, shape=dist_shape + ) + output_shape = to_tuple(sample_shape) + dist_shape + assert dist.random(size=sample_shape).shape == output_shape + + @pytest.mark.xfail + @pytest.mark.parametrize( + ["sample_shape", "dist_shape", "mu_shape"], + generate_shapes(include_params=False), + ids=str, + ) + def test_with_chol_rv(self, sample_shape, dist_shape, mu_shape): + with pm.Model() as model: + mu = pm.Normal("mu", 0.0, 1.0, shape=mu_shape) + sd_dist = pm.Exponential.dist(1.0, shape=3) + chol, corr, stds = pm.LKJCholeskyCov( + "chol_cov", n=3, eta=2, sd_dist=sd_dist, compute_corr=True + ) + mv = pm.MvGaussianRandomWalk("mv", mu, chol=chol, shape=dist_shape) + prior = pm.sample_prior_predictive(samples=sample_shape) + + assert prior["mv"].shape == to_tuple(sample_shape) + dist_shape + + @pytest.mark.xfail + @pytest.mark.parametrize( + ["sample_shape", "dist_shape", "mu_shape"], + generate_shapes(include_params=False), + ids=str, + ) + def test_with_cov_rv(self, sample_shape, dist_shape, mu_shape): + with pm.Model() as model: + mu = pm.Normal("mu", 0.0, 1.0, shape=mu_shape) + sd_dist = pm.Exponential.dist(1.0, shape=3) + chol, corr, stds = pm.LKJCholeskyCov( + "chol_cov", n=3, eta=2, sd_dist=sd_dist, compute_corr=True + ) + mv = pm.MvGaussianRandomWalk("mv", mu, cov=pm.math.dot(chol, chol.T), shape=dist_shape) + prior = pm.sample_prior_predictive(samples=sample_shape) + + assert prior["mv"].shape == to_tuple(sample_shape) + dist_shape