refactor kumaraswamy (#4706)

farhanreynaldo · ricardoV94 · web-flow · commit d848b9cb0cc5 · 2021-05-17T13:06:22.000+02:00
* refactor kumaraswamy
* add logcdf method

Co-authored-by: Farhan Reynaldo &lt;farhanreynaldo@gmail.com&gt;
Co-authored-by: Ricardo &lt;ricardo.vieira1994@gmail.com&gt;
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -9,6 +9,7 @@
 
 ### New Features
 - The `CAR` distribution has been added to allow for use of conditional autoregressions which often are used in spatial and network models.
+- Add `logcdf` method to Kumaraswamy distribution (see [#4706](https://github.com/pymc-devs/pymc3/pull/4706)).
 - ...
 
 ### Maintenance
diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
@@ -1271,6 +1271,22 @@ def logcdf(value, alpha, beta):
         )
 
 
+class KumaraswamyRV(RandomVariable):
+    name = "kumaraswamy"
+    ndim_supp = 0
+    ndims_params = [0, 0]
+    dtype = "floatX"
+    _print_name = ("Kumaraswamy", "\\operatorname{Kumaraswamy}")
+
+    @classmethod
+    def rng_fn(cls, rng, a, b, size):
+        u = rng.uniform(size=size)
+        return (1 - (1 - u) ** (1 / b)) ** (1 / a)
+
+
+kumaraswamy = KumaraswamyRV()
+
+
 class Kumaraswamy(UnitContinuous):
     r"""
     Kumaraswamy log-likelihood.
@@ -1313,67 +1329,54 @@ class Kumaraswamy(UnitContinuous):
     b: float
         b > 0.
     """
+    rv_op = kumaraswamy
 
-    def __init__(self, a, b, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-        self.a = a = at.as_tensor_variable(floatX(a))
-        self.b = b = at.as_tensor_variable(floatX(b))
-
-        ln_mean = at.log(b) + at.gammaln(1 + 1 / a) + at.gammaln(b) - at.gammaln(1 + 1 / a + b)
-        self.mean = at.exp(ln_mean)
-        ln_2nd_raw_moment = (
-            at.log(b) + at.gammaln(1 + 2 / a) + at.gammaln(b) - at.gammaln(1 + 2 / a + b)
-        )
-        self.variance = at.exp(ln_2nd_raw_moment) - self.mean ** 2
+    @classmethod
+    def dist(cls, a, b, *args, **kwargs):
+        a = at.as_tensor_variable(floatX(a))
+        b = at.as_tensor_variable(floatX(b))
 
         assert_negative_support(a, "a", "Kumaraswamy")
         assert_negative_support(b, "b", "Kumaraswamy")
 
-    def _random(self, a, b, size=None):
-        u = np.random.uniform(size=size)
-        return (1 - (1 - u) ** (1 / b)) ** (1 / a)
+        return super().dist([a, b], *args, **kwargs)
 
-    def random(self, point=None, size=None):
+    def logp(value, a, b):
         """
-        Draw random values from Kumaraswamy distribution.
+        Calculate log-probability of Kumaraswamy distribution at specified value.
 
         Parameters
         ----------
-        point: dict, optional
-            Dict of variable values on which random values are to be
-            conditioned (uses default point if not specified).
-        size: int, optional
-            Desired size of random sample (returns one sample if not
-            specified).
+        value: numeric
+            Value(s) for which log-probability is calculated. If the log probabilities for multiple
+            values are desired the values must be provided in a numpy array or Aesara tensor
 
         Returns
         -------
-        array
+        TensorVariable
         """
-        # a, b = draw_values([self.a, self.b], point=point, size=size)
-        # return generate_samples(self._random, a, b, dist_shape=self.shape, size=size)
+        logp = at.log(a) + at.log(b) + (a - 1) * at.log(value) + (b - 1) * at.log(1 - value ** a)
 
-    def logp(self, value):
-        """
-        Calculate log-probability of Kumaraswamy distribution at specified value.
+        return bound(logp, value >= 0, value <= 1, a > 0, b > 0)
+
+    def logcdf(value, a, b):
+        r"""
+        Compute the log of cumulative distribution function for the Kumaraswamy distribution
+        at the specified value.
 
         Parameters
         ----------
-        value: numeric
-            Value(s) for which log-probability is calculated. If the log probabilities for multiple
-            values are desired the values must be provided in a numpy array or Aesara tensor
+        value: numeric or np.ndarray or aesara.tensor
+            Value(s) for which log CDF is calculated. If the log CDF for
+            multiple values are desired the values must be provided in a numpy
+            array or Aesara tensor.
 
         Returns
         -------
         TensorVariable
         """
-        a = self.a
-        b = self.b
-
-        logp = at.log(a) + at.log(b) + (a - 1) * at.log(value) + (b - 1) * at.log(1 - value ** a)
-
-        return bound(logp, value >= 0, value <= 1, a > 0, b > 0)
+        logcdf = log1mexp(-(b * at.log1p(-(value ** a))))
+        return bound(at.switch(value < 1, logcdf, 0), value >= 0, a > 0, b > 0)
 
 
 class Exponential(PositiveContinuous):
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
@@ -1110,15 +1110,28 @@ def test_beta(self):
             decimal=select_by_precision(float64=5, float32=3),
         )
 
-    @pytest.mark.xfail(reason="Distribution not refactored yet")
     def test_kumaraswamy(self):
-        # Scipy does not have a built-in Kumaraswamy pdf
+        # Scipy does not have a built-in Kumaraswamy
         def scipy_log_pdf(value, a, b):
             return (
                 np.log(a) + np.log(b) + (a - 1) * np.log(value) + (b - 1) * np.log(1 - value ** a)
             )
 
-        self.check_logp(Kumaraswamy, Unit, {"a": Rplus, "b": Rplus}, scipy_log_pdf)
+        def scipy_log_cdf(value, a, b):
+            return pm.math.log1mexp_numpy(-(b * np.log1p(-(value ** a))))
+
+        self.check_logp(
+            Kumaraswamy,
+            Unit,
+            {"a": Rplus, "b": Rplus},
+            scipy_log_pdf,
+        )
+        self.check_logcdf(
+            Kumaraswamy,
+            Unit,
+            {"a": Rplus, "b": Rplus},
+            scipy_log_cdf,
+        )
 
     def test_exponential(self):
         self.check_logp(
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
@@ -277,12 +277,6 @@ class TestWald(BaseTestCases.BaseTestCase):
     params = {"mu": 1.0, "lam": 1.0, "alpha": 0.0}
 
 
-@pytest.mark.xfail(reason="This distribution has not been refactored for v4")
-class TestKumaraswamy(BaseTestCases.BaseTestCase):
-    distribution = pm.Kumaraswamy
-    params = {"a": 1.0, "b": 1.0}
-
-
 @pytest.mark.xfail(reason="This distribution has not been refactored for v4")
 class TestAsymmetricLaplace(BaseTestCases.BaseTestCase):
     distribution = pm.AsymmetricLaplace
@@ -498,6 +492,28 @@ class TestMoyal(BaseTestDistribution):
     ]
 
 
+class TestKumaraswamy(BaseTestDistribution):
+    def kumaraswamy_rng_fn(self, a, b, size, uniform_rng_fct):
+        return (1 - (1 - uniform_rng_fct(size=size)) ** (1 / b)) ** (1 / a)
+
+    def seeded_kumaraswamy_rng_fn(self):
+        uniform_rng_fct = functools.partial(
+            getattr(np.random.RandomState, "uniform"), self.get_random_state()
+        )
+        return functools.partial(self.kumaraswamy_rng_fn, uniform_rng_fct=uniform_rng_fct)
+
+    pymc_dist = pm.Kumaraswamy
+    pymc_dist_params = {"a": 1.0, "b": 1.0}
+    expected_rv_op_params = {"a": 1.0, "b": 1.0}
+    reference_dist_params = {"a": 1.0, "b": 1.0}
+    reference_dist = seeded_kumaraswamy_rng_fn
+    tests_to_run = [
+        "check_pymc_params_match_rv_op",
+        "check_pymc_draws_match_reference",
+        "check_rv_size",
+    ]
+
+
 class TestStudentTLam(BaseTestDistribution):
     pymc_dist = pm.StudentT
     lam, sigma = get_tau_sigma(tau=2.0)