Revert functionality change of size kwarg

michaelosthege · michaelosthege · commit d4894a7b5a3e · 2021-04-25T12:29:32.000+02:00
Corresponding tests were reverted, or edited to use other parametrization flavors. The Ellipsis feature now works with all three dimensionality kwargs. The MultinomialRV implementation was removed, because the broadcasting behavior was implemented in Aesara. Closes pymc-devs#4662
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -9,10 +9,10 @@
 ### New Features
 - The `CAR` distribution has been added to allow for use of conditional autoregressions which often are used in spatial and network models.
 - The dimensionality of model variables can now be parametrized through either of `shape`, `dims` or `size` (see [#4625](https://github.com/pymc-devs/pymc3/pull/4625)):
-  - With `shape` the length of dimensions must be given numerically or as scalar Aesara `Variables`. A `SpecifyShape` `Op` is added automatically unless `Ellipsis` is used. Using `shape` restricts the model variable to the exact length and re-sizing is no longer possible.
+  - With `shape` the length of all dimensions must be given numerically or as scalar Aesara `Variables`. A `SpecifyShape` `Op` is added automatically unless `Ellipsis` is used. Using `shape` restricts the model variable to the exact length and re-sizing is no longer possible.
   - `dims` keeps model variables re-sizeable (for example through `pm.Data`) and leads to well defined coordinates in `InferenceData` objects.
-  - The `size` kwarg creates new dimensions in addition to what is implied by RV parameters.
-  - An `Ellipsis` (`...`) in the last position of `shape` or `dims` can be used as short-hand notation for implied dimensions.
+  - The `size` kwarg resembles the behavior found in Aesara and NumPy: It does not include _support_ dimensions.
+  - An `Ellipsis` (`...`) in the last position of either kwarg can be used as short-hand notation for implied dimensions.
 - ...
 
 ### Maintenance
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
@@ -49,9 +49,9 @@
 
 PLATFORM = sys.platform
 
-Shape = Union[int, Sequence[Union[str, type(Ellipsis)]], Variable]
-Dims = Union[str, Sequence[Union[str, None, type(Ellipsis)]]]
-Size = Union[int, Tuple[int, ...]]
+Shape = Union[int, Sequence[Union[int, type(Ellipsis), Variable]], Variable]
+Dims = Union[str, Sequence[Union[str, type(Ellipsis), None]]]
+Size = Union[int, Sequence[Union[int, type(Ellipsis), Variable]], Variable]
 
 
 class _Unpickling:
@@ -146,7 +146,7 @@ def _validate_shape_dims_size(
         raise ValueError("The `shape` parameter must be an int, list or tuple.")
     if not isinstance(dims, (type(None), str, list, tuple)):
         raise ValueError("The `dims` parameter must be a str, list or tuple.")
-    if not isinstance(size, (type(None), int, list, tuple)):
+    if not isinstance(size, (type(None), int, list, tuple, Variable)):
         raise ValueError("The `size` parameter must be an int, list or tuple.")
 
     # Auto-convert non-tupled parameters
@@ -162,17 +162,14 @@ def _validate_shape_dims_size(
         shape = tuple(shape)
     if not isinstance(dims, (type(None), tuple)):
         dims = tuple(dims)
-    if not isinstance(size, (type(None), tuple)):
+    if not isinstance(size, (type(None), tuple, Variable)):
         size = tuple(size)
 
-    if not _valid_ellipsis_position(shape):
-        raise ValueError(
-            f"Ellipsis in `shape` may only appear in the last position. Actual: {shape}"
-        )
-    if not _valid_ellipsis_position(dims):
-        raise ValueError(f"Ellipsis in `dims` may only appear in the last position. Actual: {dims}")
-    if size is not None and Ellipsis in size:
-        raise ValueError(f"The `size` parameter cannot contain an Ellipsis. Actual: {size}")
+    for kwarg, val in dict(shape=shape, dims=dims, size=size).items():
+        if not _valid_ellipsis_position(val):
+            raise ValueError(
+                f"Ellipsis in `{kwarg}` may only appear in the last position. Actual: {val}"
+            )
     return shape, dims, size
 
 
@@ -247,12 +244,12 @@ def __new__(
             rng = model.default_rng
 
         _, dims, _ = _validate_shape_dims_size(dims=dims)
-        resize = None
+        batch_shape = None
 
         # Create the RV without specifying testval, because the testval may have a shape
         # that only matches after replicating with a size implied by dims (see below).
         rv_out = cls.dist(*args, rng=rng, testval=None, **kwargs)
-        n_implied = rv_out.ndim
+        ndim_implied = rv_out.ndim
 
         # The `.dist()` can wrap automatically with a SpecifyShape Op which brings informative
         # error messages earlier in model construction.
@@ -268,33 +265,33 @@ def __new__(
                 # Auto-complete the dims tuple to the full length
                 dims = (*dims[:-1], *[None] * rv_out.ndim)
 
-            n_resize = len(dims) - n_implied
+            ndim_batch = len(dims) - ndim_implied
 
-            # All resize dims must be known already (numerically or symbolically).
-            unknown_resize_dims = set(dims[:n_resize]) - set(model.dim_lengths)
-            if unknown_resize_dims:
+            # All batch dims must be known already (numerically or symbolically).
+            unknown_batch_dims = set(dims[:ndim_batch]) - set(model.dim_lengths)
+            if unknown_batch_dims:
                 raise KeyError(
-                    f"Dimensions {unknown_resize_dims} are unknown to the model and cannot be used to specify a `size`."
+                    f"Dimensions {unknown_batch_dims} are unknown to the model and cannot be used to specify a `size`."
                 )
 
-            # The numeric/symbolic resize tuple can be created using model.RV_dim_lengths
-            resize = tuple(model.dim_lengths[dname] for dname in dims[:n_resize])
+            # The numeric/symbolic batch shape can be created using model.RV_dim_lengths
+            batch_shape = tuple(model.dim_lengths[dname] for dname in dims[:ndim_batch])
         elif observed is not None:
             if not hasattr(observed, "shape"):
                 observed = pandas_to_array(observed)
-            n_resize = observed.ndim - n_implied
-            resize = tuple(observed.shape[d] for d in range(n_resize))
+            ndim_batch = observed.ndim - ndim_implied
+            batch_shape = tuple(observed.shape[d] for d in range(ndim_batch))
 
-        if resize:
+        if batch_shape:
             # A batch size was specified through `dims`, or implied by `observed`.
-            rv_out = change_rv_size(rv_var=rv_out, new_size=resize, expand=True)
+            rv_out = change_rv_size(rv_var=rv_out, new_size=batch_shape, expand=True)
 
         if dims is not None:
             # Now that we have a handle on the output RV, we can register named implied dimensions that
             # were not yet known to the model, such that they can be used for size further downstream.
-            for di, dname in enumerate(dims[n_resize:]):
+            for di, dname in enumerate(dims[ndim_batch:]):
                 if not dname in model.dim_lengths:
-                    model.add_coord(dname, values=None, length=rv_out.shape[n_resize + di])
+                    model.add_coord(dname, values=None, length=rv_out.shape[ndim_batch + di])
 
         if testval is not None:
             # Assigning the testval earlier causes trouble because the RV may not be created with the final shape already.
@@ -336,6 +333,9 @@ def dist(
         size : int, tuple, Variable, optional
             A scalar or tuple for replicating the RV in addition
             to its implied shape/dimensionality.
+
+            Ellipsis (...) may be used in the last position of the tuple,
+            such that only batch dimensions must be specified.
         testval : optional
             Test value to be attached to the output RV.
             Must match its shape exactly.
@@ -351,29 +351,46 @@ def dist(
         shape, _, size = _validate_shape_dims_size(shape=shape, size=size)
         assert_shape = None
 
-        # Create the RV without specifying size or testval.
-        # The size will be expanded later (if necessary) and only then the testval fits.
+        # Create the RV without specifying size or testval, because we don't know
+        # a-priori if `size` contains a batch shape.
+        # In the end `testval` must match the final shape, but it is
+        # not taken into consideration when creating the RV.
+        # Batch-shape expansion (if necessary) of the RV happens later.
         rv_native = cls.rv_op(*dist_params, size=None, **kwargs)
 
-        if shape is None and size is None:
-            size = ()
+        # Now we know the implied dimensionality and can figure out the batch shape
+        batch_shape = ()
+        if size is not None:
+            if not isinstance(size, Variable) and Ellipsis in size:
+                batch_shape = size[:-1]
+            else:
+                # Parametrization through size does not include support dimensions,
+                # but may include a batch shape.
+                ndim_support = rv_native.owner.op.ndim_supp
+                ndim_inputs = rv_native.ndim - ndim_support
+                # Be careful to avoid len(size) because it may be symbolic
+                if ndim_inputs == 0:
+                    batch_shape = size
+                else:
+                    batch_shape = size[:-ndim_inputs]
         elif shape is not None:
-            # SpecifyShape is automatically applied for symbolic and non-Ellipsis shapes
-            if isinstance(shape, Variable):
-                assert_shape = shape
-                size = ()
+            if not isinstance(shape, Variable) and Ellipsis in shape:
+                # Can't assert a shape without knowing all dimension lengths.
+                # The batch shape are all entries before ...
+                batch_shape = tuple(shape[:-1])
             else:
-                if Ellipsis in shape:
-                    size = tuple(shape[:-1])
+                # Fully symbolic, or without Ellipsis shapes can be asserted.
+                assert_shape = shape
+                # The batch shape are the entries that preceed the implied dimensions.
+                # Be careful to avoid len(shape) because it may be symbolic
+                if rv_native.ndim == 0:
+                    batch_shape = shape
                 else:
-                    size = tuple(shape[: len(shape) - rv_native.ndim])
-                    assert_shape = shape
-        # no-op conditions:
-        # `elif size is not None` (User already specified how to expand the RV)
-        # `else` (Unreachable)
-
-        if size:
-            rv_out = change_rv_size(rv_var=rv_native, new_size=size, expand=True)
+                    batch_shape = shape[: -rv_native.ndim]
+        # else: both dimensionality kwargs are None
+
+        if batch_shape:
+            rv_out = change_rv_size(rv_var=rv_native, new_size=batch_shape, expand=True)
         else:
             rv_out = rv_native
 
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
@@ -26,7 +26,6 @@
 from aesara.graph.op import Op
 from aesara.tensor.nlinalg import det, eigh, matrix_inverse, trace
 from aesara.tensor.random.basic import MultinomialRV, dirichlet, multivariate_normal
-from aesara.tensor.random.utils import broadcast_params
 from aesara.tensor.slinalg import (
     Cholesky,
     Solve,
@@ -427,27 +426,6 @@ def _distr_parameters_for_repr(self):
         return ["a"]
 
 
-class MultinomialRV(MultinomialRV):
-    """Aesara's `MultinomialRV` doesn't broadcast; this one does."""
-
-    @classmethod
-    def rng_fn(cls, rng, n, p, size):
-        if n.ndim > 0 or p.ndim > 1:
-            n, p = broadcast_params([n, p], cls.ndims_params)
-            size = tuple(size or ())
-
-            if size:
-                n = np.broadcast_to(n, size + n.shape)
-                p = np.broadcast_to(p, size + p.shape)
-
-            res = np.empty(p.shape)
-            for idx in np.ndindex(p.shape[:-1]):
-                res[idx] = rng.multinomial(n[idx], p[idx])
-            return res
-        else:
-            return rng.multinomial(n, p, size=size)
-
-
 multinomial = MultinomialRV()
 
 
diff --git a/pymc3/tests/test_data_container.py b/pymc3/tests/test_data_container.py
@@ -163,7 +163,7 @@ def test_shared_data_as_rv_input(self):
         with pm.Model() as m:
             x = pm.Data("x", [1.0, 2.0, 3.0])
             assert x.eval().shape == (3,)
-            y = pm.Normal("y", mu=x, size=2)
+            y = pm.Normal("y", mu=x, shape=(2, ...))
             assert y.eval().shape == (2, 3)
             idata = pm.sample(
                 chains=1,
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
@@ -1986,29 +1986,33 @@ def test_multinomial_mode(self, p, n):
     @pytest.mark.parametrize(
         "p, size, n",
         [
-            [[0.25, 0.25, 0.25, 0.25], (4,), 2],
-            [[0.25, 0.25, 0.25, 0.25], (1, 4), 3],
+            [[0.25, 0.25, 0.25, 0.25], (7,), 2],
+            [[0.25, 0.25, 0.25, 0.25], (1, 7), 3],
             # 3: expect to fail
-            # [[.25, .25, .25, .25], (10, 4)],
-            [[0.25, 0.25, 0.25, 0.25], (10, 1, 4), 5],
+            # [[.25, .25, .25, .25], (10, 7)],
+            [[0.25, 0.25, 0.25, 0.25], (10, 1, 7), 5],
             # 5: expect to fail
-            # [[[.25, .25, .25, .25]], (2, 4), [7, 11]],
-            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), 13],
-            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (1, 2, 4), [23, 29]],
+            # [[[.25, .25, .25, .25]], (2, 5), [7, 11]],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (5, 2), 13],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (5, 7, 2), [23, 29]],
             [
                 [[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]],
-                (10, 2, 4),
+                (10, 8, 2),
                 [31, 37],
             ],
-            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), [17, 19]],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (3, 2), [17, 19]],
         ],
     )
     def test_multinomial_random(self, p, size, n):
         p = np.asarray(p)
         with Model() as model:
             m = Multinomial("m", n=n, p=p, size=size)
-
-        assert m.eval().shape == size + p.shape
+        # The support has length 4 in all test parametrizations!
+        # Broadcasting of the `p` parameter does not affect the ndim_supp
+        # of the Op, hence the broadcasted p must be included in `size`.
+        support_shape = (p.shape[-1],)
+        assert support_shape == (4,)
+        assert m.eval().shape == size + support_shape
 
     @pytest.mark.skip(reason="Moment calculations have not been refactored yet")
     def test_multinomial_mode_with_shape(self):
@@ -2109,7 +2113,7 @@ def test_batch_multinomial(self):
             decimal=select_by_precision(float64=6, float32=3),
         )
 
-        dist = Multinomial.dist(n=n, p=p, size=2)
+        dist = Multinomial.dist(n=n, p=p, size=(2, ...))
         sample = dist.eval()
         assert_allclose(sample, np.stack([vals, vals], axis=0))
 
diff --git a/pymc3/tests/test_model.py b/pymc3/tests/test_model.py
@@ -35,6 +35,7 @@
 from pymc3 import Deterministic, Potential
 from pymc3.blocking import DictToArrayBijection, RaveledVars
 from pymc3.distributions import Normal, logpt_sum, transforms
+from pymc3.exceptions import ShapeError
 from pymc3.model import Point, ValueGradFunction
 from pymc3.tests.helpers import SeededTest
 
@@ -465,6 +466,10 @@ def test_make_obs_var():
         # Create the testval attribute simply for the sake of model testing
         fake_distribution.name = input_name
 
+    # The function requires data and RV dimensionality to be compatible
+    with pytest.raises(ShapeError, match="Dimensionality of data and RV don't match."):
+        fake_model.make_obs_var(fake_distribution, np.ones((3, 3, 1)), None, None)
+
     # Check function behavior using the various inputs
     # dense, sparse: Ensure that the missing values are appropriately set to None
     # masked: a deterministic variable is returned
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
@@ -967,14 +967,14 @@ def test_multivariate2(self):
 
     def test_layers(self):
         with pm.Model() as model:
-            a = pm.Uniform("a", lower=0, upper=1, size=5)
-            b = pm.Binomial("b", n=1, p=a, size=7)
+            a = pm.Uniform("a", lower=0, upper=1, size=10)
+            b = pm.Binomial("b", n=1, p=a)
 
         model.default_rng.get_value(borrow=True).seed(232093)
 
         b_sampler = aesara.function([], b)
         avg = np.stack([b_sampler() for i in range(10000)]).mean(0)
-        npt.assert_array_almost_equal(avg, 0.5 * np.ones((7, 5)), decimal=2)
+        npt.assert_array_almost_equal(avg, 0.5 * np.ones((10,)), decimal=2)
 
     def test_transformed(self):
         n = 18
diff --git a/pymc3/tests/test_shape_handling.py b/pymc3/tests/test_shape_handling.py
diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py
diff --git a/pymc3/tests/test_transforms.py b/pymc3/tests/test_transforms.py