Skip to content

Commit f5d8324

Browse files
lucianopazColCarroll
authored andcommitted
Improve shape handling in generate_samples (#3456)
* Added shape_utils script * Import from shape_utils and change generate samples * Fixed lint and errors * Still errors with broadcast shapes * Fixed triangular errors * Fixed distrubutions random errors. * Fixed Multinomial error * Fixed multinomial n shape error * Added shape broadcasting tests * Added broadcast samples tests * Made all tests use fixtures. Finished shape broadcasting tests. * Fixed shape_utils docstrings. * Added random variable sampling to test_shape testsuite. * Removed redundant broadcast_distribution_samples from random methods. This is now handled in generate_samples. * Fixed lint * Moved test_shape_handling to last travis environment * Changed test skip to xfail * Fixed Bound random error * Changes to increase code coverage * Moved to_tuple to shape_utils * Used numpy vectorize to fix 3422 * Fixed signature for empty input and one_d reshaping. * Removed unused fixture and added more details to RELEASE-NOTES * Addressed colcarrol comments
1 parent a7f62a0 commit f5d8324

13 files changed

+793
-287
lines changed

.travis.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,14 @@ install:
2222
- conda list && pip freeze
2323

2424
env:
25-
- FLOATX='float32' TESTCMD="--durations=10 --ignore=pymc3/tests/test_examples.py --cov-append --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_variational_inference.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_updates.py --ignore=pymc3/tests/test_posteriors.py --ignore=pymc3/tests/test_sampling.py --ignore=pymc3/tests/test_parallel_sampling.py --ignore=pymc3/tests/test_dist_math.py --ignore=pymc3/tests/test_distribution_defaults.py --ignore=pymc3/tests/test_distributions_timeseries.py --ignore=pymc3/tests/test_random.py --ignore=pymc3/tests/test_gp.py"
25+
- FLOATX='float32' TESTCMD="--durations=10 --ignore=pymc3/tests/test_examples.py --cov-append --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_variational_inference.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_updates.py --ignore=pymc3/tests/test_posteriors.py --ignore=pymc3/tests/test_sampling.py --ignore=pymc3/tests/test_parallel_sampling.py --ignore=pymc3/tests/test_dist_math.py --ignore=pymc3/tests/test_distribution_defaults.py --ignore=pymc3/tests/test_distributions_timeseries.py --ignore=pymc3/tests/test_random.py --ignore=pymc3/tests/test_gp.py --ignore=pymc3/tests/test_shape_handling.py"
2626
- FLOATX='float32' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append pymc3/tests/test_distributions_random.py pymc3/tests/test_shared.py pymc3/tests/test_smc.py pymc3/tests/test_sampling.py pymc3/tests/test_parallel_sampling.py pymc3/tests/test_dist_math.py pymc3/tests/test_distribution_defaults.py pymc3/tests/test_distributions_timeseries.py pymc3/tests/test_random.py"
2727
- FLOATX='float32' TESTCMD="--durations=10 --cov-append pymc3/tests/test_examples.py pymc3/tests/test_posteriors.py pymc3/tests/test_gp.py"
28-
- FLOATX='float32' TESTCMD="--durations=10 --cov-append pymc3/tests/test_variational_inference.py pymc3/tests/test_updates.py"
29-
- FLOATX='float64' TESTCMD="--durations=10 --cov-append --ignore=pymc3/tests/test_examples.py --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_variational_inference.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_updates.py --ignore=pymc3/tests/test_posteriors.py --ignore=pymc3/tests/test_sampling.py --ignore=pymc3/tests/test_parallel_sampling.py --ignore=pymc3/tests/test_dist_math.py --ignore=pymc3/tests/test_distribution_defaults.py --ignore=pymc3/tests/test_distributions_timeseries.py --ignore=pymc3/tests/test_random.py --ignore=pymc3/tests/test_gp.py"
28+
- FLOATX='float32' TESTCMD="--durations=10 --cov-append pymc3/tests/test_variational_inference.py pymc3/tests/test_updates.py pymc3/tests/test_shape_handling.py"
29+
- FLOATX='float64' TESTCMD="--durations=10 --cov-append --ignore=pymc3/tests/test_examples.py --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_variational_inference.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_updates.py --ignore=pymc3/tests/test_posteriors.py --ignore=pymc3/tests/test_sampling.py --ignore=pymc3/tests/test_parallel_sampling.py --ignore=pymc3/tests/test_dist_math.py --ignore=pymc3/tests/test_distribution_defaults.py --ignore=pymc3/tests/test_distributions_timeseries.py --ignore=pymc3/tests/test_random.py --ignore=pymc3/tests/test_gp.py --ignore=pymc3/tests/test_shape_handling.py"
3030
- FLOATX='float64' TESTCMD="--durations=10 --cov-append pymc3/tests/test_distributions_random.py pymc3/tests/test_shared.py pymc3/tests/test_smc.py pymc3/tests/test_sampling.py pymc3/tests/test_parallel_sampling.py pymc3/tests/test_dist_math.py pymc3/tests/test_distribution_defaults.py pymc3/tests/test_distributions_timeseries.py pymc3/tests/test_random.py"
3131
- FLOATX='float64' TESTCMD="--durations=10 --cov-append pymc3/tests/test_examples.py pymc3/tests/test_posteriors.py pymc3/tests/test_gp.py"
32-
- FLOATX='float64' TESTCMD="--durations=10 --cov-append pymc3/tests/test_variational_inference.py pymc3/tests/test_updates.py"
32+
- FLOATX='float64' TESTCMD="--durations=10 --cov-append pymc3/tests/test_variational_inference.py pymc3/tests/test_updates.py pymc3/tests/test_shape_handling.py"
3333

3434
script:
3535
- . ./scripts/test.sh $TESTCMD

RELEASE-NOTES.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
- Add function `set_data` to update variables defined as `Data`.
99
- `Mixture` now supports mixtures of multidimensional probability distributions, not just lists of 1D distributions.
1010
- `GLM.from_formula` and `LinearComponent.from_formula` can extract variables from the calling scope. Customizable via the new `eval_env` argument. Fixing #3382.
11+
- Added the `distributions.shape_utils` module with functions used to help broadcast samples drawn from distributions using the `size` keyword argument.
12+
- Used `numpy.vectorize` in `distributions.distribution._compile_theano_function`. This enables `sample_prior_predictive` and `sample_posterior_predictive` to ask for tuples of samples instead of just integers. This fixes issue #3422.
1113

1214
### Maintenance
1315
- All occurances of `sd` as a parameter name have been renamed to `sigma`. `sd` will continue to function for backwards compatibility.
@@ -31,6 +33,10 @@
3133
- Add `sigma`, `tau`, and `sd` to signature of `NormalMixture`.
3234
- Resolved issue #3248. Set default lower and upper values of -inf and inf for pm.distributions.continuous.TruncatedNormal. This avoids errors caused by their previous values of None.
3335
- Resolved issue #3399. Converted all calls to `pm.distributions.bound._ContinuousBounded` and `pm.distributions.bound._DiscreteBounded` to use only and all positional arguments.
36+
- Restructured `distributions.distribution.generate_samples` to use the `shape_utils` module. This solves issues #3421 and #3147 by using the `size` aware broadcating functions in `shape_utils`.
37+
- Fixed the `Multinomial.random` and `Multinomial.random_` methods to make them compatible with the new `generate_samples` function. In the process, a bug of the `Multinomial.random_` shape handling was discovered and fixed.
38+
- Fixed a defect found in `Bound.random` where the `point` dictionary was passed to `generate_samples` as an `arg` instead of in `not_broadcast_kwargs`.
39+
- Fixed a defect found in `Bound.random_` where `total_size` could end up as a `float64` instead of being an integer if given `size=tuple()`.
3440

3541
### Deprecations
3642

pymc3/distributions/bound.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def _random(self, lower, upper, point=None, size=None):
5959
"Drawing samples from distributions with "
6060
"array-valued bounds is not supported."
6161
)
62-
total_size = np.prod(size)
62+
total_size = np.prod(size).astype(np.int)
6363
samples = []
6464
s = 0
6565
while s < total_size:
@@ -81,17 +81,32 @@ def random(self, point=None, size=None):
8181
elif self.lower is not None and self.upper is not None:
8282
lower, upper = draw_values([self.lower, self.upper], point=point, size=size)
8383
return generate_samples(
84-
self._random, lower, upper, point, dist_shape=self.shape, size=size
84+
self._random,
85+
lower,
86+
upper,
87+
dist_shape=self.shape,
88+
size=size,
89+
not_broadcast_kwargs={'point': point},
8590
)
8691
elif self.lower is not None:
8792
lower = draw_values([self.lower], point=point, size=size)
8893
return generate_samples(
89-
self._random, lower, np.inf, point, dist_shape=self.shape, size=size
94+
self._random,
95+
lower,
96+
np.inf,
97+
dist_shape=self.shape,
98+
size=size,
99+
not_broadcast_kwargs={'point': point},
90100
)
91101
else:
92102
upper = draw_values([self.upper], point=point, size=size)
93103
return generate_samples(
94-
self._random, -np.inf, upper, point, dist_shape=self.shape, size=size
104+
self._random,
105+
-np.inf,
106+
upper,
107+
dist_shape=self.shape,
108+
size=size,
109+
not_broadcast_kwargs={'point': point},
95110
)
96111

97112

pymc3/distributions/continuous.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@
1919
alltrue_elemwise, betaln, bound, gammaln, i0e, incomplete_beta, logpow,
2020
normal_lccdf, normal_lcdf, SplineWrapper, std_cdf, zvalue,
2121
)
22-
from .distribution import (Continuous, draw_values, generate_samples,
23-
broadcast_distribution_samples)
22+
from .distribution import (Continuous, draw_values, generate_samples)
2423

2524
__all__ = ['Uniform', 'Flat', 'HalfFlat', 'Normal', 'TruncatedNormal', 'Beta',
2625
'Kumaraswamy', 'Exponential', 'Laplace', 'StudentT', 'Cauchy',
@@ -966,8 +965,6 @@ def random(self, point=None, size=None):
966965
"""
967966
mu, lam, alpha = draw_values([self.mu, self.lam, self.alpha],
968967
point=point, size=size)
969-
mu, lam, alpha = broadcast_distribution_samples([mu, lam, alpha],
970-
size=size)
971968
return generate_samples(self._random,
972969
mu, lam, alpha,
973970
dist_shape=self.shape,
@@ -1297,7 +1294,6 @@ def random(self, point=None, size=None):
12971294
"""
12981295
a, b = draw_values([self.a, self.b],
12991296
point=point, size=size)
1300-
a, b = broadcast_distribution_samples([a, b], size=size)
13011297
return generate_samples(self._random, a, b,
13021298
dist_shape=self.shape,
13031299
size=size)
@@ -1674,7 +1670,6 @@ def random(self, point=None, size=None):
16741670
array
16751671
"""
16761672
mu, tau = draw_values([self.mu, self.tau], point=point, size=size)
1677-
mu, tau = broadcast_distribution_samples([mu, tau], size=size)
16781673
return generate_samples(self._random, mu, tau,
16791674
dist_shape=self.shape,
16801675
size=size)
@@ -1965,7 +1960,6 @@ def random(self, point=None, size=None):
19651960
"""
19661961
alpha, m = draw_values([self.alpha, self.m],
19671962
point=point, size=size)
1968-
alpha, m = broadcast_distribution_samples([alpha, m], size=size)
19691963
return generate_samples(self._random, alpha, m,
19701964
dist_shape=self.shape,
19711965
size=size)
@@ -2090,7 +2084,6 @@ def random(self, point=None, size=None):
20902084
"""
20912085
alpha, beta = draw_values([self.alpha, self.beta],
20922086
point=point, size=size)
2093-
alpha, beta = broadcast_distribution_samples([alpha, beta], size=size)
20942087
return generate_samples(self._random, alpha, beta,
20952088
dist_shape=self.shape,
20962089
size=size)
@@ -2669,7 +2662,6 @@ def random(self, point=None, size=None):
26692662
"""
26702663
alpha, beta = draw_values([self.alpha, self.beta],
26712664
point=point, size=size)
2672-
alpha, beta = broadcast_distribution_samples([alpha, beta], size=size)
26732665

26742666
def _random(a, b, size=None):
26752667
return b * (-np.log(np.random.uniform(size=size)))**(1 / a)
@@ -2963,8 +2955,6 @@ def random(self, point=None, size=None):
29632955
"""
29642956
mu, sigma, nu = draw_values([self.mu, self.sigma, self.nu],
29652957
point=point, size=size)
2966-
mu, sigma, nu = broadcast_distribution_samples([mu, sigma, nu],
2967-
size=size)
29682958

29692959
def _random(mu, sigma, nu, size=None):
29702960
return (np.random.normal(mu, sigma, size=size)
@@ -3369,7 +3359,7 @@ def random(self, point=None, size=None):
33693359
scale = upper - lower
33703360
c_ = (c - lower) / scale
33713361
return generate_samples(stats.triang.rvs, c=c_, loc=lower, scale=scale,
3372-
size=size, dist_shape=self.shape, random_state=None)
3362+
size=size, dist_shape=self.shape)
33733363

33743364
def logp(self, value):
33753365
"""

pymc3/distributions/discrete.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55

66
from pymc3.util import get_variable_name
77
from .dist_math import bound, factln, binomln, betaln, logpow, random_choice
8-
from .distribution import (Discrete, draw_values, generate_samples,
9-
broadcast_distribution_samples)
8+
from .distribution import Discrete, draw_values, generate_samples
9+
from .shape_utils import broadcast_distribution_samples
1010
from pymc3.math import tround, sigmoid, logaddexp, logit, log1pexp
1111
from ..theanof import floatX, intX
1212

@@ -351,7 +351,6 @@ def _random(self, q, beta, size=None):
351351

352352
def random(self, point=None, size=None):
353353
q, beta = draw_values([self.q, self.beta], point=point, size=size)
354-
q, beta = broadcast_distribution_samples([q, beta], size=size)
355354

356355
return generate_samples(self._random, q, beta,
357356
dist_shape=self.shape,

pymc3/distributions/dist_math.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from theano.tensor.slinalg import Cholesky
1212
from theano.scan_module import until
1313
from theano import scan
14+
from .shape_utils import to_tuple
1415

1516
from .special import gammaln
1617
from pymc3.theanof import floatX
@@ -20,17 +21,6 @@
2021
c = - .5 * np.log(2. * np.pi)
2122

2223

23-
def to_tuple(shape):
24-
"""Convert ints, arrays, and Nones to tuples"""
25-
if shape is None:
26-
return tuple()
27-
temp = np.atleast_1d(shape)
28-
if temp.size == 0:
29-
return tuple()
30-
else:
31-
return tuple(temp)
32-
33-
3424
def bound(logp, *conditions, **kwargs):
3525
"""
3626
Bounds a log probability density with several conditions.

0 commit comments

Comments
 (0)