From aa6ae059414816541596ed1caf7d2d06a7b957dc Mon Sep 17 00:00:00 2001 From: lucianopaz Date: Fri, 15 May 2020 18:40:38 +0200 Subject: [PATCH 1/6] TST: Added test for the failing sampler --- pymc3/distributions/dist_math.py | 47 ++++++++++++++++++++++++++++++++ pymc3/tests/test_dist_math.py | 12 +++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py index 81c595c921..6c2c772453 100644 --- a/pymc3/distributions/dist_math.py +++ b/pymc3/distributions/dist_math.py @@ -33,6 +33,11 @@ f = floatX c = - .5 * np.log(2. * np.pi) +_beta_clip_values = { + dtype: (np.nextafter(0, 1, dtype=dtype), np.nextafter(1, 0, dtype=dtype)) + for dtype in ["float16", "float32", "float64", "float128"] +} + def bound(logp, *conditions, **kwargs): @@ -548,3 +553,45 @@ def incomplete_beta(a, b, value): tt.and_(tt.le(b * value, one), tt.le(value, 0.95)), ps, t)) + + +def clipped_beta_rvs(a, b, size=None, dtype="float64"): + """Draw beta distributed random samples in the open :math:`(0, 1)` interval. + + The samples are generated with ``numpy.random.beta``, but any value that + is equal to 0 or 1 will be shifted towards the next floating point in the + interval :math:`[0, 1]`, depending on the floating point precision that is + given by ``dtype``. + + Parameters + ---------- + a : float or array_like of floats + Alpha, positive (>0). + b : float or array_like of floats + Beta, positive (>0). + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a single value is returned if ``a`` and ``b`` are both scalars. + Otherwise, ``np.broadcast(a, b).size`` samples are drawn. + dtype : str or dtype instance + The floating point precision that the samples should have. This also + determines the value that will be used to shift any samples returned + by the numpy random number generator that are zero or one. + + Returns + ------- + out : ndarray or scalar + Drawn samples from the parameterized beta distribution. The numpy + implementation can yield values that are equal to zero or one. We + assume the support of the Beta distribution to be in the open interval + :math:`(0, 1)`, so we shift any sample that is equal to 0 to + ``np.nextafter(0, 1, dtype=dtype)`` and any sample that is equal to 1 + if shifted to ``np.nextafter(1, 0, dtype=dtype)``. + + """ + out = np.random.beta(a, b, size=size).astype(dtype) + lower, upper = _beta_clip_values[dtype] + out[out == 0] = lower + out[out == 1] = upper + return out \ No newline at end of file diff --git a/pymc3/tests/test_dist_math.py b/pymc3/tests/test_dist_math.py index b55e9792bc..f54f91bc2e 100644 --- a/pymc3/tests/test_dist_math.py +++ b/pymc3/tests/test_dist_math.py @@ -24,7 +24,9 @@ from ..theanof import floatX from ..distributions import Discrete from ..distributions.dist_math import ( - bound, factln, alltrue_scalar, MvNormalLogp, SplineWrapper, i0e) + bound, factln, alltrue_scalar, MvNormalLogp, SplineWrapper, i0e, + clipped_beta_rvs, +) def test_bound(): @@ -216,3 +218,11 @@ def test_grad(self): utt.verify_grad(i0e, [-2.]) utt.verify_grad(i0e, [[0.5, -2.]]) utt.verify_grad(i0e, [[[0.5, -2.]]]) + + +@pytest.mark.parametrize("dtype", ["float16", "float32", "float64", "float128"]) +def test_clipped_beta_rvs(dtype): + # Verify that the samples drawn from the beta distribution are never + # equal to zero or one (issue #3898) + values = clipped_beta_rvs(0.01, 0.01, size=1000000, dtype=dtype) + assert not (np.any(values == 0) or np.any(values == 1)) \ No newline at end of file From 4c62f6179fa6a005877682e0516e70bfbe8dd7b8 Mon Sep 17 00:00:00 2001 From: lucianopaz Date: Fri, 15 May 2020 18:41:16 +0200 Subject: [PATCH 2/6] BUG: Make Beta.random to use the clipped random number generator --- pymc3/distributions/continuous.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index c1c511e835..dfa82dc70b 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -32,6 +32,7 @@ from .dist_math import ( alltrue_elemwise, betaln, bound, gammaln, i0e, incomplete_beta, logpow, normal_lccdf, normal_lcdf, SplineWrapper, std_cdf, zvalue, + clipped_beta_rvs, ) from .distribution import (Continuous, draw_values, generate_samples) @@ -1290,7 +1291,7 @@ def random(self, point=None, size=None): """ alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size) - return generate_samples(stats.beta.rvs, alpha, beta, + return generate_samples(clipped_beta_rvs, alpha, beta, dist_shape=self.shape, size=size) From fd713be249f1b2954b524add4a98ed4eec3d1e25 Mon Sep 17 00:00:00 2001 From: lucianopaz Date: Fri, 15 May 2020 18:45:18 +0200 Subject: [PATCH 3/6] DOC: Added to release notes --- RELEASE-NOTES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index d593ad2141..34e0b68836 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -25,6 +25,7 @@ - End of sampling report now uses `arviz.InferenceData` internally and avoids storing pointwise log likelihood (see [#3883](https://github.com/pymc-devs/pymc3/pull/3883)). - The multiprocessing start method on MacOS is now set to "forkserver", to avoid crashes (see issue [#3849](https://github.com/pymc-devs/pymc3/issues/3849), solved by [#3919](https://github.com/pymc-devs/pymc3/pull/3919)). +- Forced the `Beta` distribution's `random` method to generate samples that are in the open interval $(0, 1)$, i.e. no value can be equal to zero or equal to one (issue [#3898](https://github.com/pymc-devs/pymc3/issues/3898) fixed by [#3922](https://github.com/pymc-devs/pymc3/pull/3919)). ### Deprecations - Remove `sample_ppc` and `sample_ppc_w` that were deprecated in 3.6. From e53c1c488d5074565f4e2da152fc526906854676 Mon Sep 17 00:00:00 2001 From: lucianopaz Date: Fri, 15 May 2020 18:50:24 +0200 Subject: [PATCH 4/6] Fix the PR link in the release notes --- RELEASE-NOTES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 34e0b68836..6ebf36917c 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -25,7 +25,7 @@ - End of sampling report now uses `arviz.InferenceData` internally and avoids storing pointwise log likelihood (see [#3883](https://github.com/pymc-devs/pymc3/pull/3883)). - The multiprocessing start method on MacOS is now set to "forkserver", to avoid crashes (see issue [#3849](https://github.com/pymc-devs/pymc3/issues/3849), solved by [#3919](https://github.com/pymc-devs/pymc3/pull/3919)). -- Forced the `Beta` distribution's `random` method to generate samples that are in the open interval $(0, 1)$, i.e. no value can be equal to zero or equal to one (issue [#3898](https://github.com/pymc-devs/pymc3/issues/3898) fixed by [#3922](https://github.com/pymc-devs/pymc3/pull/3919)). +- Forced the `Beta` distribution's `random` method to generate samples that are in the open interval $(0, 1)$, i.e. no value can be equal to zero or equal to one (issue [#3898](https://github.com/pymc-devs/pymc3/issues/3898) fixed by [#3924](https://github.com/pymc-devs/pymc3/pull/3924)). ### Deprecations - Remove `sample_ppc` and `sample_ppc_w` that were deprecated in 3.6. From 12e30f1bbc0c383885503f5fe873b26745ce35ee Mon Sep 17 00:00:00 2001 From: lucianopaz Date: Fri, 15 May 2020 21:03:11 +0200 Subject: [PATCH 5/6] FIX: use scipy.stats and change ref_rand to point to clipped_beta_rvs --- pymc3/distributions/dist_math.py | 13 +++++++------ pymc3/tests/test_distributions_random.py | 3 ++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py index 6c2c772453..175df921dd 100644 --- a/pymc3/distributions/dist_math.py +++ b/pymc3/distributions/dist_math.py @@ -19,6 +19,7 @@ ''' import numpy as np import scipy.linalg +import scipy.stats import theano.tensor as tt import theano from theano.scalar import UnaryScalarOp, upgrade_to_float_no_complex @@ -558,7 +559,7 @@ def incomplete_beta(a, b, value): def clipped_beta_rvs(a, b, size=None, dtype="float64"): """Draw beta distributed random samples in the open :math:`(0, 1)` interval. - The samples are generated with ``numpy.random.beta``, but any value that + The samples are generated with ``scipy.stats.beta.rvs``, but any value that is equal to 0 or 1 will be shifted towards the next floating point in the interval :math:`[0, 1]`, depending on the floating point precision that is given by ``dtype``. @@ -566,9 +567,9 @@ def clipped_beta_rvs(a, b, size=None, dtype="float64"): Parameters ---------- a : float or array_like of floats - Alpha, positive (>0). + Alpha, strictly positive (>0). b : float or array_like of floats - Beta, positive (>0). + Beta, strictly positive (>0). size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -582,15 +583,15 @@ def clipped_beta_rvs(a, b, size=None, dtype="float64"): Returns ------- out : ndarray or scalar - Drawn samples from the parameterized beta distribution. The numpy + Drawn samples from the parameterized beta distribution. The scipy implementation can yield values that are equal to zero or one. We assume the support of the Beta distribution to be in the open interval :math:`(0, 1)`, so we shift any sample that is equal to 0 to ``np.nextafter(0, 1, dtype=dtype)`` and any sample that is equal to 1 - if shifted to ``np.nextafter(1, 0, dtype=dtype)``. + is shifted to ``np.nextafter(1, 0, dtype=dtype)``. """ - out = np.random.beta(a, b, size=size).astype(dtype) + out = scipy.stats.beta.rvs(a, b, size=size).astype(dtype) lower, upper = _beta_clip_values[dtype] out[out == 0] = lower out[out == 1] = upper diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 5da7c4ce5e..17e7c29140 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -22,6 +22,7 @@ import theano import pymc3 as pm +from pymc3.distributions.dist_math import clipped_beta_rvs from pymc3.distributions.distribution import (draw_values, _DrawValuesContext, _DrawValuesContextBlocker) @@ -548,7 +549,7 @@ def ref_rand(size, mu, lam, alpha): def test_beta(self): def ref_rand(size, alpha, beta): - return st.beta.rvs(a=alpha, b=beta, size=size) + return clipped_beta_rvs(a=alpha, b=beta, size=size) pymc3_random(pm.Beta, {'alpha': Rplus, 'beta': Rplus}, ref_rand=ref_rand) def test_exponential(self): From cb04b6ba7c0251df2ab6ba3fde28c1c6cd656d1c Mon Sep 17 00:00:00 2001 From: lucianopaz Date: Fri, 15 May 2020 22:15:04 +0200 Subject: [PATCH 6/6] FIX: Use np.maximum and np.minimum to work with scalars and arrays --- pymc3/distributions/dist_math.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py index 175df921dd..2a7e5c1bb9 100644 --- a/pymc3/distributions/dist_math.py +++ b/pymc3/distributions/dist_math.py @@ -593,6 +593,4 @@ def clipped_beta_rvs(a, b, size=None, dtype="float64"): """ out = scipy.stats.beta.rvs(a, b, size=size).astype(dtype) lower, upper = _beta_clip_values[dtype] - out[out == 0] = lower - out[out == 1] = upper - return out \ No newline at end of file + return np.maximum(np.minimum(out, upper), lower) \ No newline at end of file