Skip to content

Update examples #2254

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 12, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions pymc3/distributions/distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,8 +476,17 @@ class Bound(object):

Example
-------
boundedNormal = pymc3.Bound(pymc3.Normal, lower=0.0)
par = boundedNormal(mu=0.0, sd=1.0, testval=1.0)
# Bounded distribution can be defined before the model context
PositiveNormal = pm.Bound(pm.Normal, lower=0.0)
with pm.Model():
par1 = PositiveNormal('par1', mu=0.0, sd=1.0, testval=1.0)
# or within the model context
NegativeNormal = pm.Bound(pm.Normal, upper=0.0)
par2 = NegativeNormal('par2', mu=0.0, sd=1.0, testval=1.0)

# or you can define it implicitly within the model context
par3 = pm.Bound(pm.Normal, lower=-1.0, upper=1.0)(
'par3', mu=0.0, sd=1.0, testval=1.0)
"""

def __init__(self, distribution, lower=-np.inf, upper=np.inf):
Expand All @@ -490,7 +499,8 @@ def __call__(self, *args, **kwargs):
raise ValueError('Observed Bound distributions are not allowed. '
'If you want to model truncated data '
'you can use a pm.Potential in combination '
'with the cumulative probability function.')
'with the cumulative probability function. See '
'pymc3/examples/censored_data.py for an example.')
first, args = args[0], args[1:]

return Bounded(first, self.distribution, self.lower, self.upper,
Expand Down
51 changes: 23 additions & 28 deletions pymc3/examples/LKJ_correlation.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,54 @@
import theano.tensor as tt
import numpy as np
from numpy.random import multivariate_normal

import pymc3 as pm

# Generate some multivariate normal data:
n_obs = 1000

# Mean values:
mu = np.linspace(0, 2, num=4)
n_var = len(mu)
mu_r = np.linspace(0, 2, num=4)
n_var = len(mu_r)

# Standard deviations:
stds = np.ones(4) / 2.0

# Correlation matrix of 4 variables:
corr = np.array([[1., 0.75, 0., 0.15],
[0.75, 1., -0.06, 0.19],
[0., -0.06, 1., -0.04],
[0.15, 0.19, -0.04, 1.]])
cov_matrix = np.diag(stds).dot(corr.dot(np.diag(stds)))

dataset = multivariate_normal(mu, cov_matrix, size=n_obs)

corr_r = np.array([[1., 0.75, 0., 0.15],
[0.75, 1., -0.06, 0.19],
[0., -0.06, 1., -0.04],
[0.15, 0.19, -0.04, 1.]])
cov_matrix = np.diag(stds).dot(corr_r.dot(np.diag(stds)))

# In order to convert the upper triangular correlation values to a complete
# correlation matrix, we need to construct an index matrix:
n_elem = int(n_var * (n_var - 1) / 2)
tri_index = np.zeros([n_var, n_var], dtype=int)
tri_index[np.triu_indices(n_var, k=1)] = np.arange(n_elem)
tri_index[np.triu_indices(n_var, k=1)[::-1]] = np.arange(n_elem)
dataset = multivariate_normal(mu_r, cov_matrix, size=n_obs)

with pm.Model() as model:

mu = pm.Normal('mu', mu=0, sd=1, shape=n_var)

# We can specify separate priors for sigma and the correlation matrix:
sigma = pm.Uniform('sigma', shape=n_var)
corr_triangle = pm.LKJCorr('corr', n=1, p=n_var)
corr_matrix = corr_triangle[tri_index]
corr_matrix = tt.fill_diagonal(corr_matrix, 1)
# Note that we access the distribution for the standard
# deviations, and do not create a new random variable.
sd_dist = pm.HalfCauchy.dist(beta=2.5)
packed_chol = pm.LKJCholeskyCov('chol_cov', n=n_var, eta=1, sd_dist=sd_dist)
# compute the covariance matrix
chol = pm.expand_packed_triangular(n_var, packed_chol, lower=True)
cov = tt.dot(chol, chol.T)

cov_matrix = tt.diag(sigma).dot(corr_matrix.dot(tt.diag(sigma)))
# Extract the standard deviations etc
sd = pm.Deterministic('sd', tt.sqrt(tt.diag(cov)))
corr = tt.diag(sd**-1).dot(cov.dot(tt.diag(sd**-1)))
r = pm.Deterministic('r', corr[np.triu_indices(n_var, k=1)])

like = pm.MvNormal('likelihood', mu=mu, cov=cov_matrix, observed=dataset)
like = pm.MvNormal('likelihood', mu=mu, chol=chol, observed=dataset)


def run(n=1000):
if n == "short":
n = 50
with model:
start = pm.find_MAP()
step = pm.NUTS(scaling=start)
trace = pm.sample(n, step=step, start=start)
return trace
trace = pm.sample(n)
pm.traceplot(trace, varnames=['mu', 'r'],
lines={'mu': mu_r, 'r': corr_r[np.triu_indices(n_var, k=1)]})

if __name__ == '__main__':
run()
22 changes: 12 additions & 10 deletions pymc3/examples/arbitrary_stochastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,25 @@


def build_model():
# data
failure = np.array([0., 1.])
value = np.array([1., 0.])

# custom log-liklihood
def logp(failure, value):
return tt.sum(failure * tt.log(lam) - lam * value)

# model
with pm.Model() as model:
lam = pm.Exponential('lam', 1)
failure = np.array([0, 1])
value = np.array([1, 0])

def logp(failure, value):
return tt.sum(failure * np.log(lam) - lam * value)
lam = pm.Exponential('lam', 1.)
pm.DensityDist('x', logp, observed={'failure': failure, 'value': value})
return model


def run(n_samples=3000):
model = build_model()
start = model.test_point
h = pm.find_hessian(start, model=model)
step = pm.Metropolis(model.vars, h, blocked=True, model=model)
trace = pm.sample(n_samples, step=step, start=start, model=model)
with model:
trace = pm.sample(n_samples)
return trace

if __name__ == "__main__":
Expand Down
23 changes: 11 additions & 12 deletions pymc3/examples/arma_example.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from pymc3 import Normal, sample, Model, plots, Potential, variational, HalfCauchy
import pymc3 as pm
from theano import scan, shared

import numpy as np
"""
ARMA example
It is interesting to note just how much more compact this is that the original STAN example
It is interesting to note just how much more compact this is than the original STAN example

The original implementation is in the STAN documentation by Gelman et al and is reproduced below

Expand Down Expand Up @@ -52,11 +52,11 @@

def build_model():
y = shared(np.array([15, 10, 16, 11, 9, 11, 10, 18], dtype=np.float32))
with Model() as arma_model:
sigma = HalfCauchy('sigma', 5)
theta = Normal('theta', 0, sd=2)
phi = Normal('phi', 0, sd=2)
mu = Normal('mu', 0, sd=10)
with pm.Model() as arma_model:
sigma = pm.HalfCauchy('sigma', 5.)
theta = pm.Normal('theta', 0., sd=2.)
phi = pm.Normal('phi', 0., sd=2.)
mu = pm.Normal('mu', 0., sd=10.)

err0 = y[0] - (mu + phi * mu)

Expand All @@ -69,19 +69,18 @@ def calc_next(last_y, this_y, err, mu, phi, theta):
outputs_info=[err0],
non_sequences=[mu, phi, theta])

Potential('like', Normal.dist(0, sd=sigma).logp(err))
variational.advi(n=2000)
pm.Potential('like', pm.Normal.dist(0, sd=sigma).logp(err))
return arma_model


def run(n_samples=1000):
model = build_model()
with model:
trace = sample(draws=n_samples)
trace = pm.sample(draws=n_samples)

burn = n_samples // 10
plots.traceplot(trace[burn:])
plots.forestplot(trace[burn:])
pm.plots.traceplot(trace[burn:])
pm.plots.forestplot(trace[burn:])


if __name__ == '__main__':
Expand Down
45 changes: 23 additions & 22 deletions pymc3/examples/baseball.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,34 @@

import pymc3 as pm
import numpy as np
import theano

data = np.loadtxt(pm.get_data('efron-morris-75-data.tsv'), delimiter="\t", skiprows=1, usecols=(2,3))

atBats = data[:,0].astype(theano.config.floatX)
hits = data[:,1].astype(theano.config.floatX)

N = len( hits )

model = pm.Model()

# we want to bound the kappa below
BoundedKappa = pm.Bound( pm.Pareto, lower=1.0 )

with model:
phi = pm.Uniform( 'phi', lower=0.0, upper=1.0 )
kappa = BoundedKappa( 'kappa', alpha=1.0001, m=1.5 )
thetas = pm.Beta( 'thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=N )
ys = pm.Binomial( 'ys', n=atBats, p=thetas, observed=hits )

def run( n=100000 ):
def build_model():
data = np.loadtxt(pm.get_data('efron-morris-75-data.tsv'), delimiter="\t",
skiprows=1, usecols=(2,3))

atbats = pm.floatX(data[:,0])
hits = pm.floatX(data[:,1])

N = len(hits)

# we want to bound the kappa below
BoundedKappa = pm.Bound(pm.Pareto, lower=1.0)

with pm.Model() as model:
phi = pm.Uniform('phi', lower=0.0, upper=1.0)
kappa = BoundedKappa('kappa', alpha=1.0001, m=1.5)
thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=N)
ys = pm.Binomial('ys', n=atbats, p=thetas, observed=hits)
return model

def run(n=2000):
model = build_model()
with model:
# initialize NUTS() with ADVI under the hood
trace = pm.sample( n )
trace = pm.sample(n, nuts_kwargs={'target_accept':.99})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow, does it require this to converge?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's fine without, but there is a divergent warning.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No problem; just surprised.


# drop some first samples as burnin
pm.traceplot( trace[1000:] )
pm.traceplot(trace[1000:])

if __name__ == '__main__':
run()
4 changes: 2 additions & 2 deletions pymc3/examples/censored_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def normal_lcdf(mu, sigma, x):
z = (x - mu) / sigma
return tt.switch(
tt.lt(z, -1.0),
tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) - tt.sqr(z) / 2,
tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) - tt.sqr(z) / 2.,
tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.)
)

Expand All @@ -47,7 +47,7 @@ def normal_lccdf(mu, sigma, x):
z = (x - mu) / sigma
return tt.switch(
tt.gt(z, 1.0),
tt.log(tt.erfcx(z / tt.sqrt(2.)) / 2) - tt.sqr(z) / 2.,
tt.log(tt.erfcx(z / tt.sqrt(2.)) / 2.) - tt.sqr(z) / 2.,
tt.log1p(-tt.erfc(-z / tt.sqrt(2.)) / 2.)
)

Expand Down
25 changes: 14 additions & 11 deletions pymc3/examples/custom_dists.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import matplotlib.pyplot as plt
import numpy as np

import pymc3
import pymc3 as pm
import theano.tensor as tt

np.random.seed(42)
Expand All @@ -24,20 +24,23 @@
ydata = np.random.normal(ydata, 10)
data = {'x': xdata, 'y': ydata}

with pymc3.Model() as model:
alpha = pymc3.Uniform('intercept', -100, 100)
# define loglikelihood outside of the model context, otherwise njobs wont work:
# Lambdas defined in local namespace are not picklable (see issue #1995)
def loglike1(value):
return -1.5 * tt.log(1 + value**2)
def loglike2(value):
return -tt.log(tt.abs_(value))

with pm.Model() as model:
alpha = pm.Normal('intercept', mu=0, sd=100)
# Create custom densities
beta = pymc3.DensityDist('slope', lambda value: -
1.5 * tt.log(1 + value**2), testval=0)
sigma = pymc3.DensityDist(
'sigma', lambda value: -tt.log(tt.abs_(value)), testval=1)
beta = pm.DensityDist('slope', loglike1, testval=0)
sigma = pm.DensityDist('sigma', loglike2, testval=1)
# Create likelihood
like = pymc3.Normal('y_est', mu=alpha + beta *
like = pm.Normal('y_est', mu=alpha + beta *
xdata, sd=sigma, observed=ydata)

start = pymc3.find_MAP()
step = pymc3.NUTS(scaling=start) # Instantiate sampler
trace = pymc3.sample(10000, step, start=start)
trace = pm.sample(2000, njobs=2)


#################################################
Expand Down
34 changes: 8 additions & 26 deletions pymc3/examples/disaster_model_arbitrary_deterministic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@

import pymc3 as pm
import theano.tensor as tt
from theano import as_op
from numpy import arange, array, empty
from numpy import arange, array

__all__ = ['disasters_data', 'switchpoint', 'early_mean', 'late_mean', 'rate',
'disasters']
Expand All @@ -23,18 +22,6 @@
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1])
years = len(disasters_data)

# here is the trick


@as_op(itypes=[tt.lscalar, tt.dscalar, tt.dscalar], otypes=[tt.dvector])
def rateFunc(switchpoint, early_mean, late_mean):
''' Concatenate Poisson means '''
out = empty(years)
out[:switchpoint] = early_mean
out[switchpoint:] = late_mean
return out


with pm.Model() as model:

# Prior for distribution of switchpoint location
Expand All @@ -46,23 +33,18 @@ def rateFunc(switchpoint, early_mean, late_mean):
# Allocate appropriate Poisson rates to years before and after current
# switchpoint location
idx = arange(years)
# theano style:
# rate = switch(switchpoint >= idx, early_mean, late_mean)
# non-theano style
rate = rateFunc(switchpoint, early_mean, late_mean)

rate = tt.switch(switchpoint >= idx, early_mean, late_mean)

# Data likelihood
disasters = pm.Poisson('disasters', rate, observed=disasters_data)

# Initial values for stochastic nodes
start = {'early_mean': 2., 'late_mean': 3.}

# Use slice sampler for means
step1 = pm.Slice([early_mean, late_mean])
# Use Metropolis for switchpoint, since it accomodates discrete variables
step2 = pm.Metropolis([switchpoint])

# njobs>1 works only with most recent (mid August 2014) Theano version:
# https://github.com/Theano/Theano/pull/2021
tr = pm.sample(1000, tune=500, start=start, step=[step1, step2], njobs=1)

# Initial values for stochastic nodes
start = {'early_mean': 2., 'late_mean': 3.}

tr = pm.sample(1000, tune=500, start=start, step=[step1, step2], njobs=2)
pm.traceplot(tr)
Loading