pymc-devs · junpenglao · Feb 16, 2019 · Jan 30, 2019 · Jan 30, 2019 · Jan 31, 2019
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -4,6 +4,8 @@
 
 ### New features
 
+- `Mixture` now supports mixtures of multidimensional probability distributions, not just lists of 1D distributions.
+
 ### Maintenance
 
 - All occurances of `sd` as a parameter name have been renamed to `sigma`. `sd` will continue to function for backwards compatibility.
@@ -13,6 +15,13 @@
 - Added a fix to allow the imputation of single missing values of observed data, which previously would fail (Fix issue #3122).
 - Fix for #3346. The `draw_values` function was too permissive with what could be grabbed from inside `point`, which lead to an error when sampling posterior predictives of variables that depended on shared variables that had changed their shape after `pm.sample()` had been called.
 - Fix for #3354. `draw_values` now adds the theano graph descendants of `TensorConstant` or `SharedVariables` to the named relationship nodes stack, only if these descendants are `ObservedRV` or `MultiObservedRV` instances.
+- Fixed bug in broadcast_distrution_samples, which did not handle correctly cases in which some samples did not have the size tuple prepended.
+- Changed `MvNormal.random`'s usage of `tensordot` for Cholesky encoded covariances. This lead to wrong axis broadcasting and seemed to be the cause for issue #3343.
+- Fixed defect in `Mixture.random` when multidimensional mixtures were involved. The mixture component was not preserved across all the elements of the dimensions of the mixture. This meant that the correlations across elements within a given draw of the mixture were partly broken.
+- Restructured `Mixture.random` to allow better use of vectorized calls to `comp_dists.random`.
+- Added tests for mixtures of multidimensional distributions to the test suite.
+- Fixed incorrect usage of `broadcast_distribution_samples` in `DiscreteWeibull`.
+- `Mixture`'s default dtype is now determined by `theano.config.floatX`.
 
 ### Deprecations
 

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
@@ -347,12 +347,12 @@ def _ppf(self, p):
 
     def _random(self, q, beta, size=None):
         p = np.random.uniform(size=size)
-        p, q, beta = broadcast_distribution_samples([p, q, beta], size=size)
 
         return np.ceil(np.power(np.log(1 - p) / np.log(q), 1. / beta)) - 1
 
     def random(self, point=None, size=None):
         q, beta = draw_values([self.q, self.beta], point=point, size=size)
+        q, beta = broadcast_distribution_samples([q, beta], size=size)
 
         return generate_samples(self._random, q, beta,
                                 dist_shape=self.shape,

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
@@ -254,7 +254,7 @@ class _DrawValuesContextBlocker(_DrawValuesContext, metaclass=InitContextMeta):
     """
     def __new__(cls, *args, **kwargs):
         # resolves the parent instance
-        instance = super(_DrawValuesContextBlocker, cls).__new__(cls)
+        instance = super().__new__(cls)
         instance._parent = None
         return instance
 
@@ -599,13 +599,19 @@ def generate_samples(generator, *args, **kwargs):
         parameters. This may be required when the parameter shape
         does not determine the shape of a single sample, for example,
         the shape of the probabilities in the Categorical distribution.
+    not_broadcast_kwargs: dict or None
+        Key word argument dictionary to provide to the random generator, which
+        must not be broadcasted with the rest of the *args and **kwargs.
 
     Any remaining *args and **kwargs are passed on to the generator function.
     """
     dist_shape = kwargs.pop('dist_shape', ())
     one_d = _is_one_d(dist_shape)
     size = kwargs.pop('size', None)
     broadcast_shape = kwargs.pop('broadcast_shape', None)
+    not_broadcast_kwargs = kwargs.pop('not_broadcast_kwargs', None)
+    if not_broadcast_kwargs is None:
+        not_broadcast_kwargs = dict()
     if size is None:
         size = 1
 
@@ -625,6 +631,8 @@ def generate_samples(generator, *args, **kwargs):
             kwargs = {k: v.reshape(v.shape + (1,) * (max_dims - v.ndim)) for k, v in kwargs.items()}
             inputs = args + tuple(kwargs.values())
             broadcast_shape = np.broadcast(*inputs).shape  # size of generator(size=1)
+    # Update kwargs with the keyword arguments that were not broadcasted
+    kwargs.update(not_broadcast_kwargs)
 
     dist_shape = to_tuple(dist_shape)
     broadcast_shape = to_tuple(broadcast_shape)
@@ -639,20 +647,30 @@ def generate_samples(generator, *args, **kwargs):
             samples = generator(size=broadcast_shape, *args, **kwargs)
         elif dist_shape == broadcast_shape:
             samples = generator(size=size_tup + dist_shape, *args, **kwargs)
-        elif len(dist_shape) == 0 and size_tup and broadcast_shape[:len(size_tup)] == size_tup:
-            # Input's dist_shape is scalar, but it has size repetitions.
-            # So now the size matches but we have to manually broadcast to
-            # the right dist_shape
-            samples = [generator(*args, **kwargs)]
-            if samples[0].shape == broadcast_shape:
-                samples = samples[0]
+        elif len(dist_shape) == 0 and size_tup and broadcast_shape:
+            # There is no dist_shape (scalar distribution) but the parameters
+            # broadcast shape and size_tup determine the size to provide to
+            # the generator
+            if broadcast_shape[:len(size_tup)] == size_tup:
+                # Input's dist_shape is scalar, but it has size repetitions.
+                # So now the size matches but we have to manually broadcast to
+                # the right dist_shape
+                samples = [generator(*args, **kwargs)]
+                if samples[0].shape == broadcast_shape:
+                    samples = samples[0]
+                else:
+                    suffix = broadcast_shape[len(size_tup):] + dist_shape
+                    samples.extend([generator(*args, **kwargs).
+                                    reshape(broadcast_shape)[..., np.newaxis]
+                                    for _ in range(np.prod(suffix,
+                                                           dtype=int) - 1)])
+                    samples = np.hstack(samples).reshape(size_tup + suffix)
             else:
-                suffix = broadcast_shape[len(size_tup):] + dist_shape
-                samples.extend([generator(*args, **kwargs).
-                                reshape(broadcast_shape)[..., np.newaxis]
-                                for _ in range(np.prod(suffix,
-                                                       dtype=int) - 1)])
-                samples = np.hstack(samples).reshape(size_tup + suffix)
+                # The parameter shape is given, but we have to concatenate it
+                # with the size tuple
+                samples = generator(size=size_tup + broadcast_shape,
+                                    *args,
+                                    **kwargs)
         else:
             samples = None
     # Args have been broadcast correctly, can just ask for the right shape out
@@ -686,27 +704,68 @@ def generate_samples(generator, *args, **kwargs):
 
 
 def broadcast_distribution_samples(samples, size=None):
+    """Broadcast samples drawn from distributions taking into account the
+    size (i.e. the number of samples) of the draw, which is prepended to
+    the sample's shape.
+
+    Parameters
+    ----------
+    samples: Iterable of ndarrays holding the sampled values
+    size: None, int or tuple (optional)
+        size of the sample set requested.
+
+    Returns
+    -------
+    List of broadcasted sample arrays
+
+    Examples
+    --------
+    .. code-block:: python
+        size = 100
+        sample0 = np.random.randn(size)
+        sample1 = np.random.randn(size, 5)
+        sample2 = np.random.randn(size, 4, 5)
+        out = broadcast_distribution_samples([sample0, sample1, sample2],
+                                             size=size)
+        assert all((o.shape == (size, 4, 5) for o in out))
+        assert np.all(sample0[:, None, None] == out[0])
+        assert np.all(sample1[:, None, :] == out[1])
+        assert np.all(sample2 == out[2])
+
+    .. code-block:: python
+        size = 100
+        sample0 = np.random.randn(size)
+        sample1 = np.random.randn(5)
+        sample2 = np.random.randn(4, 5)
+        out = broadcast_distribution_samples([sample0, sample1, sample2],
+                                             size=size)
+        assert all((o.shape == (size, 4, 5) for o in out))
+        assert np.all(sample0[:, None, None] == out[0])
+        assert np.all(sample1 == out[1])
+        assert np.all(sample2 == out[2])
+    """
     if size is None:
         return np.broadcast_arrays(*samples)
     _size = to_tuple(size)
-    try:
-        broadcasted_samples = np.broadcast_arrays(*samples)
-    except ValueError:
-        # Raw samples shapes
-        p_shapes = [p.shape for p in samples]
-        # samples shapes without the size prepend
-        sp_shapes = [s[len(_size):] if _size == s[:len(_size)] else s
-                     for s in p_shapes]
-        broadcast_shape = np.broadcast(*[np.empty(s) for s in sp_shapes]).shape
-        broadcasted_samples = []
-        for param, p_shape, sp_shape in zip(samples, p_shapes, sp_shapes):
-            if _size == p_shape[:len(_size)]:
-                slicer_head = [slice(None)] * len(_size)
-            else:
-                slicer_head = [np.newaxis] * len(_size)
+    # Raw samples shapes
+    p_shapes = [p.shape for p in samples]
+    # samples shapes without the size prepend
+    sp_shapes = [s[len(_size):] if _size == s[:len(_size)] else s
+                 for s in p_shapes]
+    broadcast_shape = np.broadcast(*[np.empty(s) for s in sp_shapes]).shape
+    broadcasted_samples = []
+    for param, p_shape, sp_shape in zip(samples, p_shapes, sp_shapes):
+        if _size == p_shape[:len(_size)]:
+            # If size prepends the shape, then we have to add broadcasting axis
+            # in the middle
+            slicer_head = [slice(None)] * len(_size)
             slicer_tail = ([np.newaxis] * (len(broadcast_shape) -
                                            len(sp_shape)) +
                            [slice(None)] * len(sp_shape))
-            broadcasted_samples.append(param[tuple(slicer_head + slicer_tail)])
-        broadcasted_samples = np.broadcast_arrays(*broadcasted_samples)
-    return broadcasted_samples
+        else:
+            # If size does not prepend the shape, then we have leave the
+            # parameter as is
+            slicer_head = []
+            slicer_tail = [slice(None)] * len(sp_shape)
+        broadcasted_samples.append(param[tuple(slicer_head + slicer_tail)])
+    return np.broadcast_arrays(*broadcasted_samples)