Skip to content

Commit 7485aa0

Browse files
committed
Merge branch 'aloctavodia-sample_ppc_ma'
2 parents 8c81624 + db0da22 commit 7485aa0

32 files changed

+874
-107
lines changed

.travis.yml

+12-10
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,25 @@
11
language: python
2+
sudo: false
23

34
before_install:
45
- . ./scripts/install_miniconda.sh
5-
- "export DISPLAY=:99.0"
6-
- "sh -e /etc/init.d/xvfb start"
7-
- if [ ${MATPLOTLIB} = "1.2" ]; then mkdir $HOME/.matplotlib; fi
8-
- if [ ${MATPLOTLIB} = "1.2" ]; then cp ${SRCDIR}/tools/matplotlibrc $HOME/.matplotlib/matplotlibrc; fi
6+
- sh -e /etc/init.d/xvfb start
7+
- export DISPLAY=":99.0"
98

109
install:
1110
- . ./scripts/create_testenv.sh
1211
- pip install coveralls pylint
1312

1413
env:
15-
- PYTHON_VERSION=2.7 TESTCMD="--durations=10 --ignore=pymc3/tests/test_examples.py --cov-append --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_variational_inference.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_updates.py"
16-
- PYTHON_VERSION=2.7 RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append pymc3/tests/test_distributions_random.py pymc3/tests/test_shared.py pymc3/tests/test_smc.py"
17-
- PYTHON_VERSION=2.7 TESTCMD="--durations=10 --cov-append pymc3/tests/test_examples.py pymc3/tests/test_variational_inference.py pymc3/tests/test_updates.py"
18-
- PYTHON_VERSION=3.6 TESTCMD="--durations=10 --cov-append --ignore=pymc3/tests/test_examples.py --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_variational_inference.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_updates.py"
19-
- PYTHON_VERSION=3.6 TESTCMD="--durations=10 --cov-append pymc3/tests/test_distributions_random.py pymc3/tests/test_shared.py pymc3/tests/test_smc.py"
20-
- PYTHON_VERSION=3.6 TESTCMD="--durations=10 --cov-append pymc3/tests/test_examples.py pymc3/tests/test_variational_inference.py pymc3/tests/test_updates.py"
14+
- PYTHON_VERSION=2.7 FLOATX='float32' TESTCMD="--durations=10 --ignore=pymc3/tests/test_examples.py --cov-append --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_variational_inference.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_updates.py"
15+
- PYTHON_VERSION=2.7 FLOATX='float32' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append pymc3/tests/test_distributions_random.py pymc3/tests/test_shared.py pymc3/tests/test_smc.py"
16+
- PYTHON_VERSION=2.7 FLOATX='float32' TESTCMD="--durations=10 --cov-append pymc3/tests/test_examples.py pymc3/tests/test_variational_inference.py pymc3/tests/test_updates.py"
17+
- PYTHON_VERSION=2.7 FLOATX='float64' TESTCMD="--durations=10 --ignore=pymc3/tests/test_examples.py --cov-append --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_variational_inference.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_updates.py"
18+
- PYTHON_VERSION=2.7 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append pymc3/tests/test_distributions_random.py pymc3/tests/test_shared.py pymc3/tests/test_smc.py"
19+
- PYTHON_VERSION=2.7 FLOATX='float64' TESTCMD="--durations=10 --cov-append pymc3/tests/test_examples.py pymc3/tests/test_variational_inference.py pymc3/tests/test_updates.py"
20+
- PYTHON_VERSION=3.6 FLOATX='float64' TESTCMD="--durations=10 --cov-append --ignore=pymc3/tests/test_examples.py --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_variational_inference.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_updates.py"
21+
- PYTHON_VERSION=3.6 FLOATX='float64' TESTCMD="--durations=10 --cov-append pymc3/tests/test_distributions_random.py pymc3/tests/test_shared.py pymc3/tests/test_smc.py"
22+
- PYTHON_VERSION=3.6 FLOATX='float64' TESTCMD="--durations=10 --cov-append pymc3/tests/test_examples.py pymc3/tests/test_variational_inference.py pymc3/tests/test_updates.py"
2123
script:
2224
- . ./scripts/test.sh $TESTCMD
2325

docs/source/examples.rst

+2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ Howto
1212
notebooks/sampler-stats.ipynb
1313
notebooks/Diagnosing_biased_Inference_with_Divergences.ipynb
1414
notebooks/posterior_predictive.ipynb
15+
notebooks/model_comparison.ipynb
16+
notebooks/model_averaging.ipynb
1517
notebooks/howto_debugging.ipynb
1618
notebooks/PyMC3_tips_and_heuristic.ipynb
1719
notebooks/LKJ.ipynb

docs/source/notebooks/model_averaging.ipynb

+495
Large diffs are not rendered by default.

pymc3/backends/smc_text.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -177,13 +177,13 @@ def highest_sampled_stage(self):
177177
-------
178178
stage number : int
179179
"""
180-
return max(self.stage_number(s) for s in glob(self.path('*')))
180+
return max(self.stage_number(s) for s in glob(self.stage_path('*')))
181181

182182
def atmip_path(self, stage_number):
183183
"""Consistent naming for atmip params."""
184184
return os.path.join(self.stage_path(stage_number), 'atmip.params.pkl')
185185

186-
def load_atmip_params(self, stage_number):
186+
def load_atmip_params(self, stage_number, model):
187187
"""Load saved parameters from last sampled ATMIP stage.
188188
189189
Parameters
@@ -196,8 +196,14 @@ def load_atmip_params(self, stage_number):
196196
else:
197197
prev = stage_number - 1
198198
pm._log.info('Loading parameters from completed stage {}'.format(prev))
199-
with open(self.atmip_path(prev), 'rb') as buff:
200-
return pickle.load(buff)
199+
200+
with model:
201+
with open(self.atmip_path(prev), 'rb') as buff:
202+
step = pickle.load(buff)
203+
204+
# update step stage to current stage
205+
step.stage = stage_number
206+
return step
201207

202208
def dump_atmip_params(self, step):
203209
"""Save atmip params to file."""
@@ -278,7 +284,7 @@ def recover_existing_results(self, stage, draws, step, n_jobs, model=None):
278284
# load incomplete stage results
279285
pm._log.info('Reloading existing results ...')
280286
mtrace = self.load_multitrace(stage, model=model)
281-
if len(mtrace) > 0:
287+
if len(mtrace.chains) > 0:
282288
# continue sampling if traces exist
283289
pm._log.info('Checking for corrupted files ...')
284290
return self.check_multitrace(mtrace, draws=draws, n_chains=step.n_chains)

pymc3/distributions/distribution.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def getattr_value(self, val):
8787

8888
def _repr_latex_(self, name=None, dist=None):
8989
return None
90-
90+
9191

9292
def TensorType(dtype, shape):
9393
return tt.TensorType(str(dtype), np.atleast_1d(shape) == 1)
@@ -123,6 +123,11 @@ def __init__(self, shape=(), dtype=None, defaults=('mode', ),
123123
dtype = 'int64'
124124
if dtype != 'int16' and dtype != 'int64':
125125
raise TypeError('Discrete classes expect dtype to be int16 or int64.')
126+
127+
if kwargs.get('transform', None) is not None:
128+
raise ValueError("Transformations for discrete distributions "
129+
"are not allowed.")
130+
126131
super(Discrete, self).__init__(
127132
shape, dtype, defaults=defaults, *args, **kwargs)
128133

pymc3/distributions/transforms.py

-7
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,6 @@ def __init__(self, dist, transform, *args, **kwargs):
7070
# force the last dim not broadcastable
7171
self.type = tt.TensorType(v.dtype, b)
7272

73-
def _repr_latex_(self, name=None, dist=None):
74-
if name is None:
75-
name = self.name
76-
if dist is None:
77-
dist = self.dist
78-
return dist._repr_latex_(self, name=name, dist=dist)
79-
8073
def logp(self, x):
8174
return (self.dist.logp(self.transform_used.backward(x)) +
8275
self.transform_used.jacobian_det(x))

pymc3/examples/data/milk.csv

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
kcal.per.g,neocortex,log_mass
2+
0.490,0.552,0.668
3+
0.470,0.645,1.658
4+
0.560,0.645,1.681
5+
0.890,0.676,0.920
6+
0.920,0.688,-0.386
7+
0.800,0.589,-2.120
8+
0.460,0.617,-0.755
9+
0.710,0.603,-1.139
10+
0.680,0.700,0.438
11+
0.970,0.704,1.176
12+
0.840,0.734,2.510
13+
0.620,0.675,1.681
14+
0.540,0.713,3.569
15+
0.490,0.726,4.375
16+
0.480,0.702,3.707
17+
0.550,0.763,3.500
18+
0.710,0.755,4.006

pymc3/model.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,7 @@ def Var(self, name, dist, data=None, total_size=None):
539539
name=name,
540540
orig_name=get_transformed_name(name, dist.transform)))
541541
self.deterministics.append(var)
542+
self.add_random_variable(var)
542543
return var
543544
elif isinstance(data, dict):
544545
with self:
@@ -985,7 +986,7 @@ def Deterministic(name, var, model=None):
985986
986987
Returns
987988
-------
988-
n : var but with name name
989+
var : var, with name attribute
989990
"""
990991
model = modelcontext(model)
991992
var.name = model.name_for(name)
@@ -1009,6 +1010,7 @@ def Potential(name, var, model=None):
10091010
model = modelcontext(model)
10101011
var.name = model.name_for(name)
10111012
model.potentials.append(var)
1013+
model.add_random_variable(var)
10121014
return var
10131015

10141016

pymc3/sampling.py

+129-11
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import sys
1919
sys.setrecursionlimit(10000)
2020

21-
__all__ = ['sample', 'iter_sample', 'sample_ppc', 'init_nuts']
21+
__all__ = ['sample', 'iter_sample', 'sample_ppc', 'sample_ppc_w', 'init_nuts']
2222

2323
STEP_METHODS = (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis,
2424
BinaryGibbsMetropolis, Slice, CategoricalGibbsMetropolis)
@@ -484,14 +484,15 @@ def _update_start_vals(a, b, model):
484484

485485
a.update({k: v for k, v in b.items() if k not in a})
486486

487+
487488
def sample_ppc(trace, samples=None, model=None, vars=None, size=None,
488489
random_seed=None, progressbar=True):
489490
"""Generate posterior predictive samples from a model given a trace.
490491
491492
Parameters
492493
----------
493494
trace : backend, list, or MultiTrace
494-
Trace generated from MCMC sampling
495+
Trace generated from MCMC sampling.
495496
samples : int
496497
Number of posterior predictive samples to generate. Defaults to the
497498
length of `trace`
@@ -503,12 +504,19 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None,
503504
size : int
504505
The number of random draws from the distribution specified by the
505506
parameters in each sample of the trace.
507+
random_seed : int
508+
Seed for the random number generator.
509+
progressbar : bool
510+
Whether or not to display a progress bar in the command line. The
511+
bar shows the percentage of completion, the sampling speed in
512+
samples per second (SPS), and the estimated remaining time until
513+
completion ("expected time of arrival"; ETA).
506514
507515
Returns
508516
-------
509517
samples : dict
510-
Dictionary with the variables as keys. The values corresponding
511-
to the posterior predictive samples.
518+
Dictionary with the variables as keys. The values corresponding to the
519+
posterior predictive samples.
512520
"""
513521
if samples is None:
514522
samples = len(trace)
@@ -521,18 +529,128 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None,
521529

522530
seed(random_seed)
523531

532+
indices = randint(0, len(trace), samples)
524533
if progressbar:
525-
indices = tqdm(randint(0, len(trace), samples), total=samples)
526-
else:
527-
indices = randint(0, len(trace), samples)
534+
indices = tqdm(indices, total=samples)
535+
536+
try:
537+
ppc = defaultdict(list)
538+
for idx in indices:
539+
param = trace[idx]
540+
for var in vars:
541+
ppc[var.name].append(var.distribution.random(point=param,
542+
size=size))
543+
544+
except KeyboardInterrupt:
545+
pass
546+
547+
finally:
548+
if progressbar:
549+
indices.close()
550+
551+
return {k: np.asarray(v) for k, v in ppc.items()}
552+
553+
554+
def sample_ppc_w(traces, samples=None, models=None, size=None, weights=None,
555+
random_seed=None, progressbar=True):
556+
"""Generate weighted posterior predictive samples from a list of models and
557+
a list of traces according to a set of weights.
558+
559+
Parameters
560+
----------
561+
traces : list
562+
List of traces generated from MCMC sampling. The number of traces should
563+
be equal to the number of weights.
564+
samples : int
565+
Number of posterior predictive samples to generate. Defaults to the
566+
length of the shorter trace in traces.
567+
models : list
568+
List of models used to generate the list of traces. The number of models
569+
should be equal to the number of weights and the number of observed RVs
570+
should be the same for all models.
571+
By default a single model will be inferred from `with` context, in this
572+
case results will only be meaningful if all models share the same
573+
distributions for the observed RVs.
574+
size : int
575+
The number of random draws from the distributions specified by the
576+
parameters in each sample of the trace.
577+
weights: array-like
578+
Individual weights for each trace. Default, same weight for each model.
579+
random_seed : int
580+
Seed for the random number generator.
581+
progressbar : bool
582+
Whether or not to display a progress bar in the command line. The
583+
bar shows the percentage of completion, the sampling speed in
584+
samples per second (SPS), and the estimated remaining time until
585+
completion ("expected time of arrival"; ETA).
586+
587+
Returns
588+
-------
589+
samples : dict
590+
Dictionary with the variables as keys. The values corresponding to the
591+
posterior predictive samples from the weighted models.
592+
"""
593+
seed(random_seed)
594+
595+
if models is None:
596+
models = [modelcontext(models)] * len(traces)
597+
598+
if weights is None:
599+
weights = [1] * len(traces)
600+
601+
if len(traces) != len(weights):
602+
raise ValueError('The number of traces and weights should be the same')
603+
604+
if len(models) != len(weights):
605+
raise ValueError('The number of models and weights should be the same')
606+
607+
lenght_morv = len(models[0].observed_RVs)
608+
if not all(len(i.observed_RVs) == lenght_morv for i in models):
609+
raise ValueError(
610+
'The number of observed RVs should be the same for all models')
611+
612+
weights = np.asarray(weights)
613+
p = weights / np.sum(weights)
614+
615+
min_tr = min([len(i) for i in traces])
616+
617+
n = (min_tr * p).astype('int')
618+
# ensure n sum up to min_tr
619+
idx = np.argmax(n)
620+
n[idx] = n[idx] + min_tr - np.sum(n)
528621

529-
ppc = defaultdict(list)
530-
for idx in indices:
531-
param = trace[idx]
532-
for var in vars:
622+
trace = np.concatenate([np.random.choice(traces[i], j)
623+
for i, j in enumerate(n)])
624+
625+
variables = []
626+
for i, m in enumerate(models):
627+
variables.extend(m.observed_RVs * n[i])
628+
629+
len_trace = len(trace)
630+
631+
if samples is None:
632+
samples = len_trace
633+
634+
indices = randint(0, len_trace, samples)
635+
636+
if progressbar:
637+
indices = tqdm(indices, total=samples)
638+
639+
try:
640+
ppc = defaultdict(list)
641+
for idx in indices:
642+
param = trace[idx]
643+
var = variables[idx]
533644
ppc[var.name].append(var.distribution.random(point=param,
534645
size=size))
535646

647+
except KeyboardInterrupt:
648+
pass
649+
650+
finally:
651+
if progressbar:
652+
indices.close()
653+
536654
return {k: np.asarray(v) for k, v in ppc.items()}
537655

538656

pymc3/step_methods/smc.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,7 @@ def ATMIP_sample(n_steps, step=None, start=None, homepath=None, chain=0, stage=0
520520
step.stage = stage
521521
draws = 1
522522
else:
523-
step = stage_handler.load_atmip_params(stage)
523+
step = stage_handler.load_atmip_params(stage, model=model)
524524
draws = step.n_steps
525525

526526
stage_handler.clean_directory(stage, None, rm_flag)

pymc3/tests/backend_fixtures.py

+4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from pymc3.tests import models
88
from pymc3.backends import base
99
import pytest
10+
import theano
1011

1112

1213
class ModelBackendSetupTestCase(object):
@@ -227,6 +228,7 @@ def record_point(self, val):
227228
else:
228229
self.strace.record(point=point)
229230

231+
@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
230232
def test_standard_close(self):
231233
for idx in range(self.draws):
232234
self.record_point(idx)
@@ -266,13 +268,15 @@ class SelectionTestCase(ModelBackendSampledTestCase):
266268
- shape
267269
"""
268270

271+
@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
269272
def test_get_values_default(self):
270273
for varname in self.test_point.keys():
271274
expected = np.concatenate([self.expected[chain][varname]
272275
for chain in [0, 1]])
273276
result = self.mtrace.get_values(varname)
274277
npt.assert_equal(result, expected)
275278

279+
@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
276280
def test_get_values_nocombine_burn_keyword(self):
277281
burn = 2
278282
for varname in self.test_point.keys():

0 commit comments

Comments
 (0)