Skip to content

Commit f9917a2

Browse files
author
Junpeng Lao
authored
Merge pull request #2254 from junpenglao/update_examples
Update examples
2 parents e8da421 + 5ecc539 commit f9917a2

12 files changed

+130
-161
lines changed

pymc3/distributions/distribution.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -483,8 +483,17 @@ class Bound(object):
483483
484484
Example
485485
-------
486-
boundedNormal = pymc3.Bound(pymc3.Normal, lower=0.0)
487-
par = boundedNormal(mu=0.0, sd=1.0, testval=1.0)
486+
# Bounded distribution can be defined before the model context
487+
PositiveNormal = pm.Bound(pm.Normal, lower=0.0)
488+
with pm.Model():
489+
par1 = PositiveNormal('par1', mu=0.0, sd=1.0, testval=1.0)
490+
# or within the model context
491+
NegativeNormal = pm.Bound(pm.Normal, upper=0.0)
492+
par2 = NegativeNormal('par2', mu=0.0, sd=1.0, testval=1.0)
493+
494+
# or you can define it implicitly within the model context
495+
par3 = pm.Bound(pm.Normal, lower=-1.0, upper=1.0)(
496+
'par3', mu=0.0, sd=1.0, testval=1.0)
488497
"""
489498

490499
def __init__(self, distribution, lower=-np.inf, upper=np.inf):
@@ -497,7 +506,8 @@ def __call__(self, *args, **kwargs):
497506
raise ValueError('Observed Bound distributions are not allowed. '
498507
'If you want to model truncated data '
499508
'you can use a pm.Potential in combination '
500-
'with the cumulative probability function.')
509+
'with the cumulative probability function. See '
510+
'pymc3/examples/censored_data.py for an example.')
501511
first, args = args[0], args[1:]
502512

503513
return Bounded(first, self.distribution, self.lower, self.upper,

pymc3/examples/LKJ_correlation.py

+23-28
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,54 @@
11
import theano.tensor as tt
22
import numpy as np
33
from numpy.random import multivariate_normal
4-
54
import pymc3 as pm
65

76
# Generate some multivariate normal data:
87
n_obs = 1000
98

109
# Mean values:
11-
mu = np.linspace(0, 2, num=4)
12-
n_var = len(mu)
10+
mu_r = np.linspace(0, 2, num=4)
11+
n_var = len(mu_r)
1312

1413
# Standard deviations:
1514
stds = np.ones(4) / 2.0
1615

1716
# Correlation matrix of 4 variables:
18-
corr = np.array([[1., 0.75, 0., 0.15],
19-
[0.75, 1., -0.06, 0.19],
20-
[0., -0.06, 1., -0.04],
21-
[0.15, 0.19, -0.04, 1.]])
22-
cov_matrix = np.diag(stds).dot(corr.dot(np.diag(stds)))
23-
24-
dataset = multivariate_normal(mu, cov_matrix, size=n_obs)
25-
17+
corr_r = np.array([[1., 0.75, 0., 0.15],
18+
[0.75, 1., -0.06, 0.19],
19+
[0., -0.06, 1., -0.04],
20+
[0.15, 0.19, -0.04, 1.]])
21+
cov_matrix = np.diag(stds).dot(corr_r.dot(np.diag(stds)))
2622

27-
# In order to convert the upper triangular correlation values to a complete
28-
# correlation matrix, we need to construct an index matrix:
29-
n_elem = int(n_var * (n_var - 1) / 2)
30-
tri_index = np.zeros([n_var, n_var], dtype=int)
31-
tri_index[np.triu_indices(n_var, k=1)] = np.arange(n_elem)
32-
tri_index[np.triu_indices(n_var, k=1)[::-1]] = np.arange(n_elem)
23+
dataset = multivariate_normal(mu_r, cov_matrix, size=n_obs)
3324

3425
with pm.Model() as model:
3526

3627
mu = pm.Normal('mu', mu=0, sd=1, shape=n_var)
3728

38-
# We can specify separate priors for sigma and the correlation matrix:
39-
sigma = pm.Uniform('sigma', shape=n_var)
40-
corr_triangle = pm.LKJCorr('corr', n=1, p=n_var)
41-
corr_matrix = corr_triangle[tri_index]
42-
corr_matrix = tt.fill_diagonal(corr_matrix, 1)
29+
# Note that we access the distribution for the standard
30+
# deviations, and do not create a new random variable.
31+
sd_dist = pm.HalfCauchy.dist(beta=2.5)
32+
packed_chol = pm.LKJCholeskyCov('chol_cov', n=n_var, eta=1, sd_dist=sd_dist)
33+
# compute the covariance matrix
34+
chol = pm.expand_packed_triangular(n_var, packed_chol, lower=True)
35+
cov = tt.dot(chol, chol.T)
4336

44-
cov_matrix = tt.diag(sigma).dot(corr_matrix.dot(tt.diag(sigma)))
37+
# Extract the standard deviations etc
38+
sd = pm.Deterministic('sd', tt.sqrt(tt.diag(cov)))
39+
corr = tt.diag(sd**-1).dot(cov.dot(tt.diag(sd**-1)))
40+
r = pm.Deterministic('r', corr[np.triu_indices(n_var, k=1)])
4541

46-
like = pm.MvNormal('likelihood', mu=mu, cov=cov_matrix, observed=dataset)
42+
like = pm.MvNormal('likelihood', mu=mu, chol=chol, observed=dataset)
4743

4844

4945
def run(n=1000):
5046
if n == "short":
5147
n = 50
5248
with model:
53-
start = pm.find_MAP()
54-
step = pm.NUTS(scaling=start)
55-
trace = pm.sample(n, step=step, start=start)
56-
return trace
49+
trace = pm.sample(n)
50+
pm.traceplot(trace, varnames=['mu', 'r'],
51+
lines={'mu': mu_r, 'r': corr_r[np.triu_indices(n_var, k=1)]})
5752

5853
if __name__ == '__main__':
5954
run()

pymc3/examples/arbitrary_stochastic.py

+12-10
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,25 @@
44

55

66
def build_model():
7+
# data
8+
failure = np.array([0., 1.])
9+
value = np.array([1., 0.])
10+
11+
# custom log-liklihood
12+
def logp(failure, value):
13+
return tt.sum(failure * tt.log(lam) - lam * value)
14+
15+
# model
716
with pm.Model() as model:
8-
lam = pm.Exponential('lam', 1)
9-
failure = np.array([0, 1])
10-
value = np.array([1, 0])
11-
12-
def logp(failure, value):
13-
return tt.sum(failure * np.log(lam) - lam * value)
17+
lam = pm.Exponential('lam', 1.)
1418
pm.DensityDist('x', logp, observed={'failure': failure, 'value': value})
1519
return model
1620

1721

1822
def run(n_samples=3000):
1923
model = build_model()
20-
start = model.test_point
21-
h = pm.find_hessian(start, model=model)
22-
step = pm.Metropolis(model.vars, h, blocked=True, model=model)
23-
trace = pm.sample(n_samples, step=step, start=start, model=model)
24+
with model:
25+
trace = pm.sample(n_samples)
2426
return trace
2527

2628
if __name__ == "__main__":

pymc3/examples/arma_example.py

+11-12
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
from pymc3 import Normal, sample, Model, plots, Potential, variational, HalfCauchy
1+
import pymc3 as pm
22
from theano import scan, shared
33

44
import numpy as np
55
"""
66
ARMA example
7-
It is interesting to note just how much more compact this is that the original STAN example
7+
It is interesting to note just how much more compact this is than the original STAN example
88
99
The original implementation is in the STAN documentation by Gelman et al and is reproduced below
1010
@@ -52,11 +52,11 @@
5252

5353
def build_model():
5454
y = shared(np.array([15, 10, 16, 11, 9, 11, 10, 18], dtype=np.float32))
55-
with Model() as arma_model:
56-
sigma = HalfCauchy('sigma', 5)
57-
theta = Normal('theta', 0, sd=2)
58-
phi = Normal('phi', 0, sd=2)
59-
mu = Normal('mu', 0, sd=10)
55+
with pm.Model() as arma_model:
56+
sigma = pm.HalfCauchy('sigma', 5.)
57+
theta = pm.Normal('theta', 0., sd=2.)
58+
phi = pm.Normal('phi', 0., sd=2.)
59+
mu = pm.Normal('mu', 0., sd=10.)
6060

6161
err0 = y[0] - (mu + phi * mu)
6262

@@ -69,19 +69,18 @@ def calc_next(last_y, this_y, err, mu, phi, theta):
6969
outputs_info=[err0],
7070
non_sequences=[mu, phi, theta])
7171

72-
Potential('like', Normal.dist(0, sd=sigma).logp(err))
73-
variational.advi(n=2000)
72+
pm.Potential('like', pm.Normal.dist(0, sd=sigma).logp(err))
7473
return arma_model
7574

7675

7776
def run(n_samples=1000):
7877
model = build_model()
7978
with model:
80-
trace = sample(draws=n_samples)
79+
trace = pm.sample(draws=n_samples)
8180

8281
burn = n_samples // 10
83-
plots.traceplot(trace[burn:])
84-
plots.forestplot(trace[burn:])
82+
pm.plots.traceplot(trace[burn:])
83+
pm.plots.forestplot(trace[burn:])
8584

8685

8786
if __name__ == '__main__':

pymc3/examples/baseball.py

+23-22
Original file line numberDiff line numberDiff line change
@@ -5,33 +5,34 @@
55

66
import pymc3 as pm
77
import numpy as np
8-
import theano
98

10-
data = np.loadtxt(pm.get_data('efron-morris-75-data.tsv'), delimiter="\t", skiprows=1, usecols=(2,3))
11-
12-
atBats = data[:,0].astype(theano.config.floatX)
13-
hits = data[:,1].astype(theano.config.floatX)
14-
15-
N = len( hits )
16-
17-
model = pm.Model()
18-
19-
# we want to bound the kappa below
20-
BoundedKappa = pm.Bound( pm.Pareto, lower=1.0 )
21-
22-
with model:
23-
phi = pm.Uniform( 'phi', lower=0.0, upper=1.0 )
24-
kappa = BoundedKappa( 'kappa', alpha=1.0001, m=1.5 )
25-
thetas = pm.Beta( 'thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=N )
26-
ys = pm.Binomial( 'ys', n=atBats, p=thetas, observed=hits )
27-
28-
def run( n=100000 ):
9+
def build_model():
10+
data = np.loadtxt(pm.get_data('efron-morris-75-data.tsv'), delimiter="\t",
11+
skiprows=1, usecols=(2,3))
12+
13+
atbats = pm.floatX(data[:,0])
14+
hits = pm.floatX(data[:,1])
15+
16+
N = len(hits)
17+
18+
# we want to bound the kappa below
19+
BoundedKappa = pm.Bound(pm.Pareto, lower=1.0)
20+
21+
with pm.Model() as model:
22+
phi = pm.Uniform('phi', lower=0.0, upper=1.0)
23+
kappa = BoundedKappa('kappa', alpha=1.0001, m=1.5)
24+
thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=N)
25+
ys = pm.Binomial('ys', n=atbats, p=thetas, observed=hits)
26+
return model
27+
28+
def run(n=2000):
29+
model = build_model()
2930
with model:
3031
# initialize NUTS() with ADVI under the hood
31-
trace = pm.sample( n )
32+
trace = pm.sample(n, nuts_kwargs={'target_accept':.99})
3233

3334
# drop some first samples as burnin
34-
pm.traceplot( trace[1000:] )
35+
pm.traceplot(trace[1000:])
3536

3637
if __name__ == '__main__':
3738
run()

pymc3/examples/censored_data.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def normal_lcdf(mu, sigma, x):
3838
z = (x - mu) / sigma
3939
return tt.switch(
4040
tt.lt(z, -1.0),
41-
tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) - tt.sqr(z) / 2,
41+
tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) - tt.sqr(z) / 2.,
4242
tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.)
4343
)
4444

@@ -47,7 +47,7 @@ def normal_lccdf(mu, sigma, x):
4747
z = (x - mu) / sigma
4848
return tt.switch(
4949
tt.gt(z, 1.0),
50-
tt.log(tt.erfcx(z / tt.sqrt(2.)) / 2) - tt.sqr(z) / 2.,
50+
tt.log(tt.erfcx(z / tt.sqrt(2.)) / 2.) - tt.sqr(z) / 2.,
5151
tt.log1p(-tt.erfc(-z / tt.sqrt(2.)) / 2.)
5252
)
5353

pymc3/examples/custom_dists.py

+14-11
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import matplotlib.pyplot as plt
1212
import numpy as np
1313

14-
import pymc3
14+
import pymc3 as pm
1515
import theano.tensor as tt
1616

1717
np.random.seed(42)
@@ -24,20 +24,23 @@
2424
ydata = np.random.normal(ydata, 10)
2525
data = {'x': xdata, 'y': ydata}
2626

27-
with pymc3.Model() as model:
28-
alpha = pymc3.Uniform('intercept', -100, 100)
27+
# define loglikelihood outside of the model context, otherwise njobs wont work:
28+
# Lambdas defined in local namespace are not picklable (see issue #1995)
29+
def loglike1(value):
30+
return -1.5 * tt.log(1 + value**2)
31+
def loglike2(value):
32+
return -tt.log(tt.abs_(value))
33+
34+
with pm.Model() as model:
35+
alpha = pm.Normal('intercept', mu=0, sd=100)
2936
# Create custom densities
30-
beta = pymc3.DensityDist('slope', lambda value: -
31-
1.5 * tt.log(1 + value**2), testval=0)
32-
sigma = pymc3.DensityDist(
33-
'sigma', lambda value: -tt.log(tt.abs_(value)), testval=1)
37+
beta = pm.DensityDist('slope', loglike1, testval=0)
38+
sigma = pm.DensityDist('sigma', loglike2, testval=1)
3439
# Create likelihood
35-
like = pymc3.Normal('y_est', mu=alpha + beta *
40+
like = pm.Normal('y_est', mu=alpha + beta *
3641
xdata, sd=sigma, observed=ydata)
3742

38-
start = pymc3.find_MAP()
39-
step = pymc3.NUTS(scaling=start) # Instantiate sampler
40-
trace = pymc3.sample(10000, step, start=start)
43+
trace = pm.sample(2000, njobs=2)
4144

4245

4346
#################################################

pymc3/examples/disaster_model_arbitrary_deterministic.py

+8-26
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@
77

88
import pymc3 as pm
99
import theano.tensor as tt
10-
from theano import as_op
11-
from numpy import arange, array, empty
10+
from numpy import arange, array
1211

1312
__all__ = ['disasters_data', 'switchpoint', 'early_mean', 'late_mean', 'rate',
1413
'disasters']
@@ -23,18 +22,6 @@
2322
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1])
2423
years = len(disasters_data)
2524

26-
# here is the trick
27-
28-
29-
@as_op(itypes=[tt.lscalar, tt.dscalar, tt.dscalar], otypes=[tt.dvector])
30-
def rateFunc(switchpoint, early_mean, late_mean):
31-
''' Concatenate Poisson means '''
32-
out = empty(years)
33-
out[:switchpoint] = early_mean
34-
out[switchpoint:] = late_mean
35-
return out
36-
37-
3825
with pm.Model() as model:
3926

4027
# Prior for distribution of switchpoint location
@@ -46,23 +33,18 @@ def rateFunc(switchpoint, early_mean, late_mean):
4633
# Allocate appropriate Poisson rates to years before and after current
4734
# switchpoint location
4835
idx = arange(years)
49-
# theano style:
50-
# rate = switch(switchpoint >= idx, early_mean, late_mean)
51-
# non-theano style
52-
rate = rateFunc(switchpoint, early_mean, late_mean)
53-
36+
rate = tt.switch(switchpoint >= idx, early_mean, late_mean)
37+
5438
# Data likelihood
5539
disasters = pm.Poisson('disasters', rate, observed=disasters_data)
5640

57-
# Initial values for stochastic nodes
58-
start = {'early_mean': 2., 'late_mean': 3.}
59-
6041
# Use slice sampler for means
6142
step1 = pm.Slice([early_mean, late_mean])
6243
# Use Metropolis for switchpoint, since it accomodates discrete variables
6344
step2 = pm.Metropolis([switchpoint])
64-
65-
# njobs>1 works only with most recent (mid August 2014) Theano version:
66-
# https://github.com/Theano/Theano/pull/2021
67-
tr = pm.sample(1000, tune=500, start=start, step=[step1, step2], njobs=1)
45+
46+
# Initial values for stochastic nodes
47+
start = {'early_mean': 2., 'late_mean': 3.}
48+
49+
tr = pm.sample(1000, tune=500, start=start, step=[step1, step2], njobs=2)
6850
pm.traceplot(tr)

0 commit comments

Comments
 (0)