From 8a6b9b3ea6eb595b1cc963109ac6b081acb98d9b Mon Sep 17 00:00:00 2001
From: lucianopaz <luciano.paz.neuro@gmail.com>
Date: Sat, 25 Sep 2021 01:05:30 +0200
Subject: [PATCH 1/2] Refactor DensityDist into v4

---
 RELEASE-NOTES.md                          |   5 +
 docs/source/Probability_Distributions.rst |   8 +-
 pymc/distributions/distribution.py        | 266 +++++++++++++++++-----
 pymc/gp/gp.py                             |  28 ++-
 pymc/tests/test_distributions.py          |  39 +++-
 pymc/tests/test_distributions_random.py   | 107 ++-------
 pymc/tests/test_gp.py                     |   8 +-
 pymc/tests/test_idata_conversion.py       |  16 +-
 pymc/tests/test_model.py                  |   3 +-
 pymc/tests/test_moment.py                 |  38 ++++
 pymc/tests/test_parallel_sampling.py      |  36 +--
 pymc/tests/test_sampling.py               |   6 +-
 12 files changed, 364 insertions(+), 196 deletions(-)
 create mode 100644 pymc/tests/test_moment.py

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 97261f82d8..742f5d9462 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -10,6 +10,9 @@
 - `pm.sample` now returns results as `InferenceData` instead of `MultiTrace` by default (see [#4744](https://github.com/pymc-devs/pymc/pull/4744)).
 - `pm.sample_prior_predictive` no longer returns transformed variable values by default. Pass them by name in `var_names` if you want to obtain these draws (see [4769](https://github.com/pymc-devs/pymc/pull/4769)).
 - ⚠ `pm.Bound` interface no longer accepts a callable class as argument, instead it requires an instantiated distribution (created via the `.dist()` API) to be passed as an argument. In addition, Bound no longer returns a class instance but works as a normal PyMC distribution. Finally, it is no longer possible to do predictive random sampling from Bounded variables. Please, consult the new documentation for details on how to use Bounded variables (see [4815](https://github.com/pymc-devs/pymc/pull/4815)).
+- `pm.DensityDist` no longer accepts the `logp` as its first position argument. It is now an optional keyword argument. If you pass a callable as the first positional argument, a `TypeError` will be raised (see [5026](https://github.com/pymc-devs/pymc3/pull/5026)).
+- `pm.DensityDist` now accepts distribution parameters as positional arguments. Passing them as a dictionary in the `observed` keyword argument is no longer supported and will raise an error (see [5026](https://github.com/pymc-devs/pymc3/pull/5026)).
+- The signature of the `logp` and `random` functions that can be passed into a `pm.DensityDist` has been changed (see [5026](https://github.com/pymc-devs/pymc3/pull/5026)).
 - ...
 
 ### New Features
@@ -25,6 +28,8 @@
 - The `Polya-Gamma` distribution has been added (see [#4531](https://github.com/pymc-devs/pymc/pull/4531)). To make use of this distribution, the [`polyagamma>=1.3.1`](https://pypi.org/project/polyagamma/) library must be installed and available in the user's environment.
 - A small change to the mass matrix tuning methods jitter+adapt_diag (the default) and adapt_diag improves performance early on during tuning for some models. [#5004](https://github.com/pymc-devs/pymc/pull/5004)
 - New experimental mass matrix tuning method jitter+adapt_diag_grad. [#5004](https://github.com/pymc-devs/pymc/pull/5004)
+- `pm.DensityDist` can now accept an optional `logcdf` keyword argument to pass in a function to compute the cummulative density function of the distribution (see [5026](https://github.com/pymc-devs/pymc3/pull/5026)).
+- `pm.DensityDist` can now accept an optional `get_moment` keyword argument to pass in a function to compute the moment of the distribution (see [5026](https://github.com/pymc-devs/pymc3/pull/5026)).
 - ...
 
 ### Maintenance
diff --git a/docs/source/Probability_Distributions.rst b/docs/source/Probability_Distributions.rst
index 63d339e499..7b8999e487 100644
--- a/docs/source/Probability_Distributions.rst
+++ b/docs/source/Probability_Distributions.rst
@@ -58,16 +58,16 @@ An exponential survival function, where :math:`c=0` denotes failure (or non-surv
     f(c, t) = \left\{ \begin{array}{l} \exp(-\lambda t), \text{if c=1} \\
                \lambda \exp(-\lambda t), \text{if c=0}  \end{array} \right.
 
-Such a function can be implemented as a PyMC distribution by writing a function that specifies the log-probability, then passing that function as an argument to the ``DensityDist`` function, which creates an instance of a PyMC distribution with the custom function as its log-probability.
+Such a function can be implemented as a PyMC distribution by writing a function that specifies the log-probability, then passing that function as a keyword argument to the ``DensityDist`` function, which creates an instance of a PyMC3 distribution with the custom function as its log-probability.
 
 For the exponential survival function, this is:
 
 ::
 
-    def logp(failure, value):
-        return (failure * log(λ) - λ * value).sum()
+    def logp(value, t, λ):
+        return (value * log(λ) - λ * t).sum()
 
-    exp_surv = pm.DensityDist('exp_surv', logp, observed={'failure':failure, 'value':t})
+    exp_surv = pm.DensityDist('exp_surv', t, λ, logp=logp, observed=failure)
 
 Similarly, if a random number generator is required, a function returning random numbers corresponding to the probability distribution can be passed as the ``random`` argument.
 
diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py
index 2ffa092cd6..3b9b91593e 100644
--- a/pymc/distributions/distribution.py
+++ b/pymc/distributions/distribution.py
@@ -13,17 +13,18 @@
 #   limitations under the License.
 import contextvars
 import functools
-import multiprocessing
 import sys
 import types
 import warnings
 
 from abc import ABCMeta
 from functools import singledispatch
-from typing import Optional
+from typing import Callable, Optional, Sequence
 
 import aesara
+import numpy as np
 
+from aesara.tensor.basic import as_tensor_variable
 from aesara.tensor.random.op import RandomVariable
 from aesara.tensor.random.var import RandomStateSharedVariable
 from aesara.tensor.var import TensorVariable
@@ -41,12 +42,14 @@
     maybe_resize,
     resize_from_dims,
     resize_from_observed,
+    to_tuple,
 )
 from pymc.printing import str_for_dist
 from pymc.util import UNSET
 from pymc.vartypes import string_types
 
 __all__ = [
+    "DensityDistRV",
     "DensityDist",
     "Distribution",
     "Continuous",
@@ -387,96 +390,241 @@ class NoDistribution(Distribution):
     """
 
 
-class DensityDist(Distribution):
-    """Distribution based on a given log density function.
+class DensityDistRV(RandomVariable):
+    """
+    Base class for DensityDistRV
+
+    This should be subclassed when defining custom DensityDist objects.
+    """
+
+    name = "DensityDistRV"
+    _print_name = ("DensityDist", "\\operatorname{DensityDist}")
+
+    @classmethod
+    def rng_fn(cls, rng, *args):
+        args = list(args)
+        size = args.pop(-1)
+        return cls._random_fn(*args, rng=rng, size=size)
 
-    A distribution with the passed log density function is created.
-    Requires a custom random function passed as kwarg `random` to
-    enable prior or posterior predictive sampling.
 
+class DensityDist(NoDistribution):
+    """A distribution that can be used to wrap black-box log density functions.
+
+    Creates a Distribution and registers the supplied log density function to be used
+    for inference. It is also possible to supply a `random` method in order to be able
+    to sample from the prior or posterior predictive distributions.
     """
 
-    def __init__(
-        self,
-        logp,
-        shape=(),
-        dtype=None,
-        initval=0,
-        random=None,
-        wrap_random_with_dist_shape=True,
-        check_shape_in_random=True,
-        *args,
+    def __new__(
+        cls,
+        name: str,
+        *dist_params,
+        logp: Optional[Callable] = None,
+        logcdf: Optional[Callable] = None,
+        random: Optional[Callable] = None,
+        get_moment: Optional[Callable] = None,
+        ndim_supp: int = 0,
+        ndims_params: Optional[Sequence[int]] = None,
+        dtype: str = "floatX",
         **kwargs,
     ):
         """
         Parameters
         ----------
-
-        logp: callable
-            A callable that has the following signature ``logp(value)`` and
-            returns an Aesara tensor that represents the distribution's log
-            probability density.
-        shape: tuple (Optional): defaults to `()`
-            The shape of the distribution. The default value indicates a scalar.
-            If the distribution is *not* scalar-valued, the programmer should pass
-            a value here.
-        dtype: None, str (Optional)
-            The dtype of the distribution.
-        initval: number or array (Optional)
-            The ``initval`` of the RV's tensor that follow the ``DensityDist``
-            distribution.
-        args, kwargs: (Optional)
-            These are passed to the parent class' ``__init__``.
+        name : str
+        dist_params : Tuple
+            A sequence of the distribution's parameter. These will be converted into
+            aesara tensors internally. These parameters could be other ``RandomVariable``
+            instances.
+        logp : Optional[Callable]
+            A callable that calculates the log density of some given observed ``value``
+            conditioned on certain distribution parameter values. It must have the
+            following signature: ``logp(value, *dist_params)``, where ``value`` is
+            an Aesara tensor that represents the observed value, and ``dist_params``
+            are the tensors that hold the values of the distribution parameters.
+            This function must return an Aesara tensor. If ``None``, a ``NotImplemented``
+            error will be raised when trying to compute the distribution's logp.
+        logcdf : Optional[Callable]
+            A callable that calculates the log cummulative probability of some given observed
+            ``value`` conditioned on certain distribution parameter values. It must have the
+            following signature: ``logcdf(value, *dist_params)``, where ``value`` is
+            an Aesara tensor that represents the observed value, and ``dist_params``
+            are the tensors that hold the values of the distribution parameters.
+            This function must return an Aesara tensor. If ``None``, a ``NotImplemented``
+            error will be raised when trying to compute the distribution's logcdf.
+        random : Optional[Callable]
+            A callable that can be used to generate random draws from the distribution.
+            It must have the following signature: ``random(*dist_params, rng=None, size=None)``.
+            The distribution parameters are passed as positional arguments in the
+            same order as they are supplied when the ``DensityDist`` is constructed.
+            The keyword arguments are ``rnd``, which will provide the random variable's
+            associated :py:class:`~numpy.random.Generator`, and ``size``, that will represent
+            the desired size of the random draw. If ``None``, a ``NotImplemented``
+            error will be raised when trying to draw random samples from the distribution's
+            prior or posterior predictive.
+        get_moment : Optional[Callable]
+            A callable that can be used to compute the moments of the distribution.
+            It must have the following signature: ``get_moment(rv, size, *rv_inputs)``.
+            The distribution's :class:`~aesara.tensor.random.op.RandomVariable` is passed
+            as the first argument ``rv``. ``size`` is the random variable's size implied
+            by the ``dims``, ``size`` and parameters supplied to the distribution. Finally,
+            ``rv_inputs`` is the sequence of the distribution parameters, in the same order
+            as they were supplied when the DensityDist was created. If ``None``, a
+            ``NotImplemented`` error will be raised when trying to draw random samples from
+            the distribution's prior or posterior predictive.
+        ndim_supp : int
+            The number of dimensions in the support of the distribution. Defaults to assuming
+            a scalar distribution, i.e. ``ndim_supp = 0``.
+        ndims_params : Optional[Sequence[int]]
+            The list of number of dimensions in the support of each of the distribution's
+            parameters. If ``None``, it is assumed that all parameters are scalars, hence
+            the number of dimensions of their support will be 0.
+        dtype : str
+            The dtype of the distribution. All draws and observations passed into the distribution
+            will be casted onto this dtype.
+        kwargs :
+            Extra keyword arguments are passed to the parent's class ``__new__`` method.
 
         Examples
         --------
             .. code-block:: python
 
+                def logp(value, mu):
+                    return -(value - mu)**2
+
                 with pm.Model():
                     mu = pm.Normal('mu',0,1)
-                    normal_dist = pm.Normal.dist(mu, 1)
                     pm.DensityDist(
                         'density_dist',
-                        normal_dist.logp,
+                        mu,
+                        logp=logp,
                         observed=np.random.randn(100),
                     )
                     idata = pm.sample(100)
 
             .. code-block:: python
 
+                def logp(value, mu):
+                    return -(value - mu)**2
+
+                def random(mu, rng=None, size=None):
+                    return rng.normal(loc=mu, scale=1, size=size)
+
                 with pm.Model():
                     mu = pm.Normal('mu', 0 , 1)
-                    normal_dist = pm.Normal.dist(mu, 1, shape=3)
                     dens = pm.DensityDist(
                         'density_dist',
-                        normal_dist.logp,
+                        mu,
+                        logp=logp,
+                        random=random,
                         observed=np.random.randn(100, 3),
-                        shape=3,
+                        size=(100, 3),
                     )
                     prior = pm.sample_prior_predictive(10)['density_dist']
                 assert prior.shape == (10, 100, 3)
 
         """
-        if dtype is None:
+
+        if dist_params is None:
+            dist_params = []
+        elif len(dist_params) > 0 and callable(dist_params[0]):
+            raise TypeError(
+                "The DensityDist API has changed, you are using the old API "
+                "where logp was the first positional argument. In the current API, "
+                "the logp is a keyword argument, amongst other changes. Please refer "
+                "to the API documentation for more information on how to use the "
+                "new DensityDist API."
+            )
+        dist_params = [as_tensor_variable(param) for param in dist_params]
+
+        # Assume scalar ndims_params
+        if ndims_params is None:
+            ndims_params = [0] * len(dist_params)
+
+        if logp is None:
+            logp = default_not_implemented(name, "logp")
+
+        if logcdf is None:
+            logcdf = default_not_implemented(name, "logcdf")
+
+        if random is None:
+            random = default_not_implemented(name, "random")
+
+        if get_moment is None:
+            get_moment = default_not_implemented(name, "get_moment")
+
+        rv_op = type(
+            f"DensityDist_{name}",
+            (DensityDistRV,),
+            dict(
+                name=f"DensityDist_{name}",
+                inplace=False,
+                ndim_supp=ndim_supp,
+                ndims_params=ndims_params,
+                dtype=dtype,
+                # Specifc to DensityDist
+                _random_fn=random,
+            ),
+        )()
+
+        # Register custom logp
+        rv_type = type(rv_op)
+
+        @_logp.register(rv_type)
+        def density_dist_logp(op, rv, rvs_to_values, *dist_params, **kwargs):
+            value_var = rvs_to_values.get(rv, rv)
+            return logp(
+                value_var,
+                *dist_params,
+            )
+
+        @_logcdf.register(rv_type)
+        def density_dist_logcdf(op, var, rvs_to_values, *dist_params, **kwargs):
+            value_var = rvs_to_values.get(var, var)
+            return logcdf(value_var, *dist_params, **kwargs)
+
+        @_get_moment.register(rv_type)
+        def density_dist_get_moment(op, rv, size, *rv_inputs):
+            return get_moment(rv, size, *rv_inputs)
+
+        cls.rv_op = rv_op
+        return super().__new__(cls, name, *dist_params, **kwargs)
+
+    @classmethod
+    def dist(cls, *args, **kwargs):
+        output = super().dist(args, **kwargs)
+        if cls.rv_op.dtype == "floatX":
             dtype = aesara.config.floatX
-        super().__init__(shape, dtype, initval, *args, **kwargs)
-        self.logp = logp
-        if type(self.logp) == types.MethodType:
-            if PLATFORM != "linux":
-                warnings.warn(
-                    "You are passing a bound method as logp for DensityDist, this can lead to "
-                    "errors when sampling on platforms other than Linux. Consider using a "
-                    "plain function instead, or subclass Distribution."
-                )
-            elif type(multiprocessing.get_context()) != multiprocessing.context.ForkContext:
-                warnings.warn(
-                    "You are passing a bound method as logp for DensityDist, this can lead to "
-                    "errors when sampling when multiprocessing cannot rely on forking. Consider using a "
-                    "plain function instead, or subclass Distribution."
-                )
-        self.rand = random
-        self.wrap_random_with_dist_shape = wrap_random_with_dist_shape
-        self.check_shape_in_random = check_shape_in_random
+        else:
+            dtype = cls.rv_op.dtype
+        ndim_supp = cls.rv_op.ndim_supp
+        if not hasattr(output.tag, "test_value"):
+            size = to_tuple(kwargs.get("size", None)) + (1,) * ndim_supp
+            output.tag.test_value = np.zeros(size, dtype)
+        return output
+
+
+def default_not_implemented(rv_name, method_name):
+    if method_name == "random":
+        # This is a hack to catch the NotImplementedError when creating the RV without random
+        # If the message starts with "Cannot sample from", then it uses the test_value as
+        # the initial_val.
+        message = (
+            f"Cannot sample from the DensityDist '{rv_name}' because the {method_name} "
+            "keyword argument was not provided when the distribution was "
+            f"but this method had not been provided when the distribution was "
+            f"constructed. Please re-build your model and provide a callable "
+            f"to '{rv_name}'s {method_name} keyword argument.\n"
+        )
+    else:
+        message = (
+            f"Attempted to run {method_name} on the DensityDist '{rv_name}', "
+            f"but this method had not been provided when the distribution was "
+            f"constructed. Please re-build your model and provide a callable "
+            f"to '{rv_name}'s {method_name} keyword argument.\n"
+        )
+
+    def func(*args, **kwargs):
+        raise NotImplementedError(message)
 
-    def _distr_parameters_for_repr(self):
-        return []
+    return func
diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index b1a1dde678..9f913474d4 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -12,7 +12,6 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-import functools
 import warnings
 
 import aesara.tensor as at
@@ -730,12 +729,31 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw
                 )
         else:
             self.sigma = noise
-        logp = functools.partial(self._build_marginal_likelihood_logp, X=X, Xu=Xu, sigma=noise)
         if is_observed:
-            return pm.DensityDist(name, logp, observed=y, **kwargs)
+            return pm.DensityDist(
+                name,
+                X,
+                Xu,
+                self.sigma,
+                logp=self._build_marginal_likelihood_logp,
+                observed=y,
+                ndims_params=[2, 2, 0],
+                size=X.shape[0],
+                **kwargs,
+            )
         else:
-            shape = infer_shape(X, kwargs.pop("shape", None))
-            return pm.DensityDist(name, logp, size=shape, **kwargs)
+            return pm.DensityDist(
+                name,
+                X,
+                Xu,
+                self.sigma,
+                logp=self._build_marginal_likelihood_logp,
+                observed=y,
+                ndims_params=[2, 2, 0],
+                # ndim_supp=1,
+                size=X.shape[0],
+                **kwargs,
+            )
 
     def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total):
         sigma2 = at.square(sigma)
diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py
index ba1aef3f15..a272a5658b 100644
--- a/pymc/tests/test_distributions.py
+++ b/pymc/tests/test_distributions.py
@@ -120,6 +120,7 @@ def polyagamma_cdf(*args, **kwargs):
     logpt,
     logpt_sum,
 )
+from pymc.distributions.shape_utils import to_tuple
 from pymc.math import kronecker
 from pymc.model import Deterministic, Model, Point, Potential
 from pymc.tests.helpers import select_by_precision
@@ -1580,7 +1581,7 @@ def test_binomial(self):
             {"n": NatSmall, "p": Unit},
         )
 
-    @pytest.mark.xfail(reason="checkd tests has not been refactored")
+    @pytest.mark.xfail(reason="checkd tests have not been refactored")
     @pytest.mark.skipif(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
     def test_beta_binomial_distribution(self):
         self.checkd(
@@ -2448,7 +2449,7 @@ def test_orderedprobit(self, n):
             lambda value, eta, cutpoints: orderedprobit_logpdf(value, eta, cutpoints),
         )
 
-    @pytest.mark.xfail(reason="DensityDist no longer supported")
+    @pytest.mark.xfail(reason="checkd tests have not been refactored")
     def test_densitydist(self):
         def logp(x):
             return -log(2 * 0.5) - abs(x - 0.5) / 0.5
@@ -3227,14 +3228,13 @@ def test_issue_4499(self):
         assert_almost_equal(m.logp({"x": np.ones(10)}), 0 * 10)
 
 
-@pytest.mark.xfail(reason="DensityDist no longer supported")
 def test_serialize_density_dist():
     def func(x):
         return -2 * (x ** 2).sum()
 
     with pm.Model():
         pm.Normal("x")
-        y = pm.DensityDist("y", func)
+        y = pm.DensityDist("y", logp=func)
         pm.sample(draws=5, tune=1, mp_ctx="spawn")
 
     import cloudpickle
@@ -3287,3 +3287,34 @@ def test_logp_gives_migration_instructions(method, newcode):
         with pytest.raises(AttributeError, match=rf"use `{newcode}`"):
             f()
     pass
+
+
+def test_density_dist_old_api_error():
+    with pm.Model():
+        with pytest.raises(
+            TypeError, match="The DensityDist API has changed, you are using the old API"
+        ):
+            pm.DensityDist("a", lambda x: x)
+
+
+@pytest.mark.parametrize("size", [None, (), (2,)], ids=str)
+def test_density_dist_multivariate_logp(size):
+    supp_shape = 5
+    with pm.Model() as model:
+
+        def logp(value, mu):
+            return pm.MvNormal.logp(value, mu, at.eye(mu.shape[0]))
+
+        mu = pm.Normal("mu", size=supp_shape)
+        a = pm.DensityDist("a", mu, logp=logp, ndims_params=[1], ndim_supp=1, size=size)
+    mu_val = np.random.normal(loc=0, scale=1, size=supp_shape).astype(aesara.config.floatX)
+    a_val = np.random.normal(loc=mu_val, scale=1, size=to_tuple(size) + (supp_shape,)).astype(
+        aesara.config.floatX
+    )
+    log_densityt = pm.logpt(a, a.tag.value_var, sum=False)
+    assert (
+        log_densityt.eval(
+            {a.tag.value_var: a_val, mu.tag.value_var: mu_val},
+        ).shape
+        == to_tuple(size)
+    )
diff --git a/pymc/tests/test_distributions_random.py b/pymc/tests/test_distributions_random.py
index a8c8e7cf45..b6990c6a38 100644
--- a/pymc/tests/test_distributions_random.py
+++ b/pymc/tests/test_distributions_random.py
@@ -1838,111 +1838,54 @@ def test_mixture_random_shape_fast():
     assert rand3.shape == (100, 20)
 
 
-@pytest.mark.xfail(reason="This distribution has not been refactored for v4")
 class TestDensityDist:
-    @pytest.mark.parametrize("shape", [(), (3,), (3, 2)], ids=str)
-    def test_density_dist_with_random_sampleable(self, shape):
+    @pytest.mark.parametrize("size", [(), (3,), (3, 2)], ids=str)
+    def test_density_dist_with_random(self, size):
         with pm.Model() as model:
             mu = pm.Normal("mu", 0, 1)
-            normal_dist = pm.Normal.dist(mu, 1, shape=shape)
             obs = pm.DensityDist(
                 "density_dist",
-                normal_dist.logp,
-                observed=np.random.randn(100, *shape),
-                shape=shape,
-                random=normal_dist.random,
+                mu,
+                random=lambda mu, rng=None, size=None: rng.normal(loc=mu, scale=1, size=size),
+                observed=np.random.randn(100, *size),
+                size=size,
             )
-            idata = pm.sample(100, cores=1)
 
-        samples = 500
-        size = 100
-        ppc = pm.sample_posterior_predictive(idata, samples=samples, model=model, size=size)
-        assert ppc["density_dist"].shape == (samples, size) + obs.distribution.shape
+        assert obs.eval().shape == (100,) + size
 
-    @pytest.mark.parametrize("shape", [(), (3,), (3, 2)], ids=str)
-    def test_density_dist_with_random_sampleable_failure(self, shape):
+    def test_density_dist_without_random(self):
         with pm.Model() as model:
             mu = pm.Normal("mu", 0, 1)
-            normal_dist = pm.Normal.dist(mu, 1, shape=shape)
             pm.DensityDist(
                 "density_dist",
-                normal_dist.logp,
-                observed=np.random.randn(100, *shape),
-                shape=shape,
-                random=normal_dist.random,
-                wrap_random_with_dist_shape=False,
-            )
-            idata = pm.sample(100, cores=1)
-
-        samples = 500
-        with pytest.raises(RuntimeError):
-            pm.sample_posterior_predictive(idata, samples=samples, model=model, size=100)
-
-    @pytest.mark.parametrize("shape", [(), (3,), (3, 2)], ids=str)
-    def test_density_dist_with_random_sampleable_hidden_error(self, shape):
-        with pm.Model() as model:
-            mu = pm.Normal("mu", 0, 1)
-            normal_dist = pm.Normal.dist(mu, 1, shape=shape)
-            obs = pm.DensityDist(
-                "density_dist",
-                normal_dist.logp,
-                observed=np.random.randn(100, *shape),
-                shape=shape,
-                random=normal_dist.random,
-                wrap_random_with_dist_shape=False,
-                check_shape_in_random=False,
-            )
-            idata = pm.sample(100, cores=1)
-
-        samples = 500
-        ppc = pm.sample_posterior_predictive(idata, samples=samples, model=model)
-        assert len(ppc["density_dist"]) == samples
-        assert ((samples,) + obs.distribution.shape) != ppc["density_dist"].shape
-
-    def test_density_dist_with_random_sampleable_handcrafted_success(self):
-        with pm.Model() as model:
-            mu = pm.Normal("mu", 0, 1)
-            normal_dist = pm.Normal.dist(mu, 1)
-            rvs = pm.Normal.dist(mu, 1, shape=100).random
-            obs = pm.DensityDist(
-                "density_dist",
-                normal_dist.logp,
+                mu,
+                logp=lambda value, mu: pm.Normal.logp(value, mu, 1),
                 observed=np.random.randn(100),
-                random=rvs,
-                wrap_random_with_dist_shape=False,
             )
             idata = pm.sample(100, cores=1)
 
         samples = 500
-        size = 100
-        ppc = pm.sample_posterior_predictive(idata, samples=samples, model=model, size=size)
-        assert ppc["density_dist"].shape == (samples, size) + obs.distribution.shape
+        with pytest.raises(NotImplementedError):
+            pm.sample_posterior_predictive(idata, samples=samples, model=model, size=100)
 
-    @pytest.mark.xfail
-    def test_density_dist_with_random_sampleable_handcrafted_success_fast(self):
+    @pytest.mark.parametrize("size", [(), (3,), (3, 2)], ids=str)
+    def test_density_dist_with_random_multivariate(self, size):
+        supp_shape = 5
         with pm.Model() as model:
-            mu = pm.Normal("mu", 0, 1)
-            normal_dist = pm.Normal.dist(mu, 1)
-            rvs = pm.Normal.dist(mu, 1, shape=100).random
+            mu = pm.Normal("mu", 0, 1, size=supp_shape)
             obs = pm.DensityDist(
                 "density_dist",
-                normal_dist.logp,
-                observed=np.random.randn(100),
-                random=rvs,
-                wrap_random_with_dist_shape=False,
+                mu,
+                random=lambda mu, rng=None, size=None: rng.multivariate_normal(
+                    mean=mu, cov=np.eye(len(mu)), size=size
+                ),
+                observed=np.random.randn(100, *size, supp_shape),
+                size=size,
+                ndims_params=[1],
+                ndim_supp=1,
             )
-            pm.sample(100, cores=1)
 
-    def test_density_dist_without_random_not_sampleable(self):
-        with pm.Model() as model:
-            mu = pm.Normal("mu", 0, 1)
-            normal_dist = pm.Normal.dist(mu, 1)
-            pm.DensityDist("density_dist", normal_dist.logp, observed=np.random.randn(100))
-            idata = pm.sample(100, cores=1)
-
-        samples = 500
-        with pytest.raises(ValueError):
-            pm.sample_posterior_predictive(idata, samples=samples, model=model, size=100)
+        assert obs.eval().shape == (100,) + size + (supp_shape,)
 
 
 class TestNestedRandom(SeededTest):
diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py
index 5ae4d25b25..424e87e5e8 100644
--- a/pymc/tests/test_gp.py
+++ b/pymc/tests/test_gp.py
@@ -915,7 +915,7 @@ def testAdditiveMarginal(self):
         fp = np.random.randn(self.Xnew.shape[0])
         npt.assert_allclose(fp1.logp({"fp1": fp}), fp2.logp({"fp2": fp}), atol=0, rtol=1e-2)
 
-    @pytest.mark.xfail(reason="DensityDist was not yet refactored")
+    @pytest.mark.xfail(reason="MarginalSparse must be refactored to not use DensityDist")
     @pytest.mark.parametrize("approx", ["FITC", "VFE", "DTC"])
     def testAdditiveMarginalSparse(self, approx):
         Xu = np.random.randn(10, 3)
@@ -946,8 +946,10 @@ def testAdditiveMarginalSparse(self, approx):
         with model2:
             fp2 = gptot.conditional("fp2", self.Xnew)
 
-        fp = np.random.randn(self.Xnew.shape[0])
-        npt.assert_allclose(fp1.logp({"fp1": fp}), fp2.logp({"fp2": fp}), atol=0, rtol=1e-2)
+        fp = np.random.randn(self.Xnew.shape[0], 1)
+        npt.assert_allclose(
+            pm.logp(fp1, fp1).eval({fp1: fp}), pm.logp(fp2, fp2).eval({fp2: fp}), atol=0, rtol=1e-2
+        )
 
     @pytest.mark.xfail(reason="MvNormal was not yet refactored")
     def testAdditiveLatent(self):
diff --git a/pymc/tests/test_idata_conversion.py b/pymc/tests/test_idata_conversion.py
index a4dfffede2..93b38c0ccf 100644
--- a/pymc/tests/test_idata_conversion.py
+++ b/pymc/tests/test_idata_conversion.py
@@ -366,12 +366,15 @@ def test_multiple_observed_rv(self, log_likelihood):
         fails = check_multiple_attrs(test_dict, inference_data)
         assert not fails
 
-    @pytest.mark.xfail(reason="DensityDist not refactored for v4")
+    @pytest.mark.xfail(reason="MultiObservedRV is no longer used in v4")
     def test_multiple_observed_rv_without_observations(self):
         with pm.Model():
             mu = pm.Normal("mu")
             x = pm.DensityDist(  # pylint: disable=unused-variable
-                "x", logpt(pm.Normal.dist(mu, 1.0)), observed={"value": 0.1}
+                "x",
+                mu,
+                logp=lambda value, mu: pm.Normal.logp(value, mu, 1),
+                observed=0.1,
             )
             inference_data = pm.sample(100, chains=2, return_inferencedata=True)
         test_dict = {
@@ -384,7 +387,7 @@ def test_multiple_observed_rv_without_observations(self):
         assert not fails
         assert inference_data.observed_data.value.dtype.kind == "f"
 
-    @pytest.mark.xfail(reason="DensityDist not refactored for v4")
+    @pytest.mark.xfail(reason="MultiObservedRV is no longer used in v4")
     @pytest.mark.parametrize("multiobs", (True, False))
     def test_multiobservedrv_to_observed_data(self, multiobs):
         # fake regression data, with weights (W)
@@ -400,12 +403,13 @@ def test_multiobservedrv_to_observed_data(self, multiobs):
             b = pm.Normal("b", 0, 10)
             mu = a + b * X
             sigma = pm.HalfNormal("sigma", 1)
+            w = W
 
-            def weighted_normal(y, w):
-                return w * logpt(pm.Normal.dist(mu=mu, sd=sigma), y)
+            def weighted_normal(value, mu, sigma, w):
+                return w * pm.Normal.logp(value, mu, sigma)
 
             y_logp = pm.DensityDist(  # pylint: disable=unused-variable
-                "y_logp", weighted_normal, observed={"y": Y, "w": W}
+                "y_logp", mu, sigma, w, logp=weighted_normal, observed=Y, size=N
             )
             idata = pm.sample(
                 20, tune=20, return_inferencedata=True, idata_kwargs={"density_dist_obs": multiobs}
diff --git a/pymc/tests/test_model.py b/pymc/tests/test_model.py
index 61a23fc97c..c005f29990 100644
--- a/pymc/tests/test_model.py
+++ b/pymc/tests/test_model.py
@@ -346,7 +346,6 @@ def test_aesara_switch_broadcast_edge_cases_2(self):
         npt.assert_allclose(m.dlogp([m.rvs_to_values[mu]])({"mu": 0}), 2.499424682024436, rtol=1e-5)
 
 
-@pytest.mark.xfail(reason="DensityDist not refactored for v4")
 def test_multiple_observed_rv():
     "Test previously buggy multi-observed RV comparison code."
     y1_data = np.random.randn(10)
@@ -354,7 +353,7 @@ def test_multiple_observed_rv():
     with pm.Model() as model:
         mu = pm.Normal("mu")
         x = pm.DensityDist(  # pylint: disable=unused-variable
-            "x", pm.Normal.dist(mu, 1.0).logp, observed={"value": 0.1}
+            "x", mu, logp=lambda value, mu: pm.Normal.logp(value, mu, 1.0), observed=0.1
         )
     assert not model["x"] == model["mu"]
     assert model["x"] == model["x"]
diff --git a/pymc/tests/test_moment.py b/pymc/tests/test_moment.py
new file mode 100644
index 0000000000..ec8fd9450d
--- /dev/null
+++ b/pymc/tests/test_moment.py
@@ -0,0 +1,38 @@
+import aesara
+import numpy as np
+import pytest
+
+from aesara import tensor as at
+
+import pymc as pm
+
+from pymc.distributions.distribution import get_moment
+from pymc.distributions.shape_utils import to_tuple
+
+
+@pytest.mark.parametrize("size", [None, (), (2,), (3, 2)], ids=str)
+def test_density_dist_moment_scalar(size):
+    def moment(rv, size, mu):
+        return (at.ones(size) * mu).astype(rv.dtype)
+
+    mu_val = np.array(np.random.normal(loc=2, scale=1)).astype(aesara.config.floatX)
+    with pm.Model():
+        mu = pm.Normal("mu")
+        a = pm.DensityDist("a", mu, get_moment=moment, size=size)
+    evaled_moment = get_moment(a).eval({mu: mu_val})
+    assert evaled_moment.shape == to_tuple(size)
+    assert np.all(evaled_moment == mu_val)
+
+
+@pytest.mark.parametrize("size", [None, (), (2,), (3, 2)], ids=str)
+def test_density_dist_moment_multivariate(size):
+    def moment(rv, size, mu):
+        return (at.ones(size)[..., None] * mu).astype(rv.dtype)
+
+    mu_val = np.random.normal(loc=2, scale=1, size=5).astype(aesara.config.floatX)
+    with pm.Model():
+        mu = pm.Normal("mu", size=5)
+        a = pm.DensityDist("a", mu, get_moment=moment, ndims_params=[1], ndim_supp=1, size=size)
+    evaled_moment = get_moment(a).eval({mu: mu_val})
+    assert evaled_moment.shape == to_tuple(size) + (5,)
+    assert np.all(evaled_moment == mu_val)
diff --git a/pymc/tests/test_parallel_sampling.py b/pymc/tests/test_parallel_sampling.py
index bdbfc9854f..645103d985 100644
--- a/pymc/tests/test_parallel_sampling.py
+++ b/pymc/tests/test_parallel_sampling.py
@@ -186,7 +186,6 @@ def test_iterator():
             pass
 
 
-@pytest.mark.xfail(reason="DensityDist was not yet refactored for v4")
 def test_spawn_densitydist_function():
     with pm.Model() as model:
         mu = pm.Normal("mu", 0, 1)
@@ -194,38 +193,19 @@ def test_spawn_densitydist_function():
         def func(x):
             return -2 * (x ** 2).sum()
 
-        obs = pm.DensityDist("density_dist", func, observed=np.random.randn(100))
+        obs = pm.DensityDist("density_dist", logp=func, observed=np.random.randn(100))
         pm.sample(draws=10, tune=10, step=pm.Metropolis(), cores=2, mp_ctx="spawn")
 
 
-@pytest.mark.xfail(reason="DensityDist was not yet refactored for v4")
 def test_spawn_densitydist_bound_method():
+    N = 100
     with pm.Model() as model:
         mu = pm.Normal("mu", 0, 1)
-        normal_dist = pm.Normal.dist(mu, 1)
-        logp = lambda x: pm.logp(normal_dist, x, transformed=False)
-        obs = pm.DensityDist("density_dist", logp, observed=np.random.randn(100))
-        msg = "logp for DensityDist is a bound method, leading to RecursionError while serializing"
-        with pytest.raises(ValueError, match=msg):
-            pm.sample(draws=10, tune=10, step=pm.Metropolis(), cores=2, mp_ctx="spawn")
+        normal_dist = pm.Normal.dist(mu, 1, size=N)
 
+        def logp(x):
+            out = pm.logp(normal_dist, x, transformed=False)
+            return out
 
-@pytest.mark.xfail(reason="DensityDist was not yet refactored for v4")
-def test_spawn_densitydist_syswarning(monkeypatch):
-    monkeypatch.setattr("pymc.distributions.distribution.PLATFORM", "win32")
-    with pm.Model() as model:
-        mu = pm.Normal("mu", 0, 1)
-        normal_dist = pm.Normal.dist(mu, 1)
-        with pytest.warns(UserWarning, match="errors when sampling on platforms"):
-            obs = pm.DensityDist("density_dist", normal_dist.logp, observed=np.random.randn(100))
-
-
-@pytest.mark.xfail(reason="DensityDist was not yet refactored for v4")
-def test_spawn_densitydist_mpctxwarning(monkeypatch):
-    ctx = multiprocessing.get_context("spawn")
-    monkeypatch.setattr(multiprocessing, "get_context", lambda: ctx)
-    with pm.Model() as model:
-        mu = pm.Normal("mu", 0, 1)
-        normal_dist = pm.Normal.dist(mu, 1)
-        with pytest.warns(UserWarning, match="errors when sampling when multiprocessing"):
-            obs = pm.DensityDist("density_dist", normal_dist.logp, observed=np.random.randn(100))
+        obs = pm.DensityDist("density_dist", logp=logp, observed=np.random.randn(N), size=N)
+        pm.sample(draws=10, tune=10, step=pm.Metropolis(), cores=2, mp_ctx="spawn")
diff --git a/pymc/tests/test_sampling.py b/pymc/tests/test_sampling.py
index 7d929a428b..ad65e4c164 100644
--- a/pymc/tests/test_sampling.py
+++ b/pymc/tests/test_sampling.py
@@ -1043,7 +1043,6 @@ def test_shared(self):
         assert gen2["y"].shape == (draws, n2)
         assert gen2["o"].shape == (draws, n2)
 
-    @pytest.mark.xfail(reason="DensityDist not refactored for v4")
     def test_density_dist(self):
         obs = np.random.normal(-1, 0.1, size=10)
         with pm.Model():
@@ -1051,8 +1050,9 @@ def test_density_dist(self):
             sd = pm.Gamma("sd", 1, 2)
             a = pm.DensityDist(
                 "a",
-                pm.Normal.dist(mu, sd).logp,
-                random=pm.Normal.dist(mu, sd).random,
+                mu,
+                sd,
+                random=lambda mu, sd, rng=None, size=None: rng.normal(loc=mu, scale=sd, size=size),
                 observed=obs,
             )
             prior = pm.sample_prior_predictive()

From b9b9ea1f0d2032b9b220ad7deb7df582cc0a06f4 Mon Sep 17 00:00:00 2001
From: lucianopaz <luciano.paz.neuro@gmail.com>
Date: Tue, 28 Sep 2021 16:38:12 +0200
Subject: [PATCH 2/2] Addressed review comments

---
 docs/source/Probability_Distributions.rst | 8 ++++----
 pymc/distributions/distribution.py        | 2 +-
 pymc/tests/test_moment.py                 | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/source/Probability_Distributions.rst b/docs/source/Probability_Distributions.rst
index 7b8999e487..22af81667a 100644
--- a/docs/source/Probability_Distributions.rst
+++ b/docs/source/Probability_Distributions.rst
@@ -58,16 +58,16 @@ An exponential survival function, where :math:`c=0` denotes failure (or non-surv
     f(c, t) = \left\{ \begin{array}{l} \exp(-\lambda t), \text{if c=1} \\
                \lambda \exp(-\lambda t), \text{if c=0}  \end{array} \right.
 
-Such a function can be implemented as a PyMC distribution by writing a function that specifies the log-probability, then passing that function as a keyword argument to the ``DensityDist`` function, which creates an instance of a PyMC3 distribution with the custom function as its log-probability.
+Such a function can be implemented as a PyMC distribution by writing a function that specifies the log-probability, then passing that function as a keyword argument to the ``DensityDist`` function, which creates an instance of a PyMC distribution with the custom function as its log-probability.
 
 For the exponential survival function, this is:
 
 ::
 
-    def logp(value, t, λ):
-        return (value * log(λ) - λ * t).sum()
+    def logp(value, t, lam):
+        return (value * log(lam) - lam * t).sum()
 
-    exp_surv = pm.DensityDist('exp_surv', t, λ, logp=logp, observed=failure)
+    exp_surv = pm.DensityDist('exp_surv', t, lam, logp=logp, observed=failure)
 
 Similarly, if a random number generator is required, a function returning random numbers corresponding to the probability distribution can be passed as the ``random`` argument.
 
diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py
index 3b9b91593e..05f996bab0 100644
--- a/pymc/distributions/distribution.py
+++ b/pymc/distributions/distribution.py
@@ -434,7 +434,7 @@ def __new__(
         name : str
         dist_params : Tuple
             A sequence of the distribution's parameter. These will be converted into
-            aesara tensors internally. These parameters could be other ``RandomVariable``
+            Aesara tensors internally. These parameters could be other ``RandomVariable``
             instances.
         logp : Optional[Callable]
             A callable that calculates the log density of some given observed ``value``
diff --git a/pymc/tests/test_moment.py b/pymc/tests/test_moment.py
index ec8fd9450d..17ab2423a8 100644
--- a/pymc/tests/test_moment.py
+++ b/pymc/tests/test_moment.py
@@ -10,7 +10,7 @@
 from pymc.distributions.shape_utils import to_tuple
 
 
-@pytest.mark.parametrize("size", [None, (), (2,), (3, 2)], ids=str)
+@pytest.mark.parametrize("size", [(), (2,), (3, 2)], ids=str)
 def test_density_dist_moment_scalar(size):
     def moment(rv, size, mu):
         return (at.ones(size) * mu).astype(rv.dtype)
@@ -24,7 +24,7 @@ def moment(rv, size, mu):
     assert np.all(evaled_moment == mu_val)
 
 
-@pytest.mark.parametrize("size", [None, (), (2,), (3, 2)], ids=str)
+@pytest.mark.parametrize("size", [(), (2,), (3, 2)], ids=str)
 def test_density_dist_moment_multivariate(size):
     def moment(rv, size, mu):
         return (at.ones(size)[..., None] * mu).astype(rv.dtype)