Skip to content

Allow arrays in pm.Bound #2277

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 29, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions pymc3/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
from .discrete import Geometric
from .discrete import Categorical

from .distribution import Bound
from .distribution import DensityDist
from .distribution import Distribution
from .distribution import Continuous
Expand Down Expand Up @@ -76,6 +75,8 @@
from .transforms import log
from .transforms import sum_to_1

from .bound import Bound

__all__ = ['Uniform',
'Flat',
'Normal',
Expand Down Expand Up @@ -136,5 +137,6 @@
'Triangular',
'DiscreteWeibull',
'Gumbel',
'Interpolated'
'Interpolated',
'Bound',
]
232 changes: 232 additions & 0 deletions pymc3/distributions/bound.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
from numbers import Real

import numpy as np
import theano.tensor as tt
import theano

from pymc3.distributions.distribution import (
Distribution, Discrete, Continuous, draw_values, generate_samples)
from pymc3.distributions import transforms
from pymc3.distributions.dist_math import bound

__all__ = ['Bound']


class _Bounded(Distribution):
def __init__(self, distribution, lower, upper, default, *args, **kwargs):
self.lower = lower
self.upper = upper
self._wrapped = distribution.dist(*args, **kwargs)

if default is None:
defaults = self._wrapped.defaults
for name in defaults:
setattr(self, name, getattr(self._wrapped, name))
else:
defaults = ('_default',)
self._default = default

super(_Bounded, self).__init__(
shape=self._wrapped.shape,
dtype=self._wrapped.dtype,
testval=self._wrapped.testval,
defaults=defaults,
transform=self._wrapped.transform)

def logp(self, value):
logp = self._wrapped.logp(value)
bounds = []
if self.lower is not None:
bounds.append(value >= self.lower)
if self.upper is not None:
bounds.append(value <= self.upper)
if len(bounds) > 0:
return bound(logp, *bounds)
else:
return logp

def _random(self, lower, upper, point=None, size=None):
lower = np.asarray(lower)
upper = np.asarray(upper)
if lower.size > 1 or upper.size > 1:
raise ValueError('Drawing samples from distributions with '
'array-valued bounds is not supported.')
samples = np.zeros(size, dtype=self.dtype).flatten()
i, n = 0, len(samples)
while i < len(samples):
sample = self._wrapped.random(point=point, size=n)
select = sample[np.logical_and(sample >= lower, sample <= upper)]
samples[i:(i + len(select))] = select[:]
i += len(select)
n -= len(select)
if size is not None:
return np.reshape(samples, size)
else:
return samples

def random(self, point=None, size=None, repeat=None):
if self.lower is None and self.upper is None:
return self._wrapped.random(point=point, size=size)
elif self.lower is not None and self.upper is not None:
lower, upper = draw_values([self.lower, self.upper], point=point)
return generate_samples(self._random, lower, upper, point,
dist_shape=self.shape,
size=size)
elif self.lower is not None:
lower = draw_values([self.lower], point=point)
return generate_samples(self._random, lower, np.inf, point,
dist_shape=self.shape,
size=size)
else:
upper = draw_values([self.upper], point=point)
return generate_samples(self._random, -np.inf, upper, point,
dist_shape=self.shape,
size=size)


class _DiscreteBounded(_Bounded, Discrete):
def __init__(self, distribution, lower, upper,
transform='infer', *args, **kwargs):
if transform == 'infer':
transform = None
if transform is not None:
raise ValueError('Can not transform discrete variable.')

if lower is None and upper is None:
default = None
elif lower is not None and upper is not None:
default = (lower + upper) // 2
if upper is not None:
default = upper - 1
if lower is not None:
default = lower + 1

super(_DiscreteBounded, self).__init__(
distribution=distribution, lower=lower, upper=upper,
default=default, *args, **kwargs)


class _ContinuousBounded(_Bounded, Continuous):
R"""
An upper, lower or upper+lower bounded distribution

Parameters
----------
distribution : pymc3 distribution
Distribution to be transformed into a bounded distribution
lower : float (optional)
Lower bound of the distribution, set to -inf to disable.
upper : float (optional)
Upper bound of the distribibution, set to inf to disable.
tranform : 'infer' or object
If 'infer', infers the right transform to apply from the supplied bounds.
If transform object, has to supply .forward() and .backward() methods.
See pymc3.distributions.transforms for more information.
"""

def __init__(self, distribution, lower, upper,
transform='infer', *args, **kwargs):
dtype = kwargs.get('dtype', theano.config.floatX)

if lower is not None:
lower = tt.as_tensor_variable(lower).astype(dtype)
if upper is not None:
upper = tt.as_tensor_variable(upper).astype(dtype)

if transform == 'infer':
if lower is None and upper is None:
transform = None
default = None
elif lower is not None and upper is not None:
transform = transforms.interval(lower, upper)
default = 0.5 * (lower + upper)
elif upper is not None:
transform = transforms.upperbound(upper)
default = upper - 1
else:
transform = transforms.lowerbound(lower)
default = lower + 1
else:
default = None

super(_ContinuousBounded, self).__init__(
distribution=distribution, lower=lower, upper=upper,
transform=transform, default=default, *args, **kwargs)


class Bound(object):
R"""
Create a new upper, lower or upper+lower bounded distribution.

The resulting distribution is not normalized anymore. This
is usually fine if the bounds are constants. If you need
truncated distributions, use `Bound` in combination with
a `pm.Potential` with the cumulative probability function.

The bounds are inclusive for discrete distributions.

Parameters
----------
distribution : pymc3 distribution
Distribution to be transformed into a bounded distribution.
lower : float or array like, optional
Lower bound of the distribution.
upper : float or array like, optional
Upper bound of the distribution.

Example
-------
# Bounded distribution can be defined before the model context
PositiveNormal = pm.Bound(pm.Normal, lower=0.0)
with pm.Model():
par1 = PositiveNormal('par1', mu=0.0, sd=1.0, testval=1.0)
# or within the model context
NegativeNormal = pm.Bound(pm.Normal, upper=0.0)
par2 = NegativeNormal('par2', mu=0.0, sd=1.0, testval=1.0)

# or you can define it implicitly within the model context
par3 = pm.Bound(pm.Normal, lower=-1.0, upper=1.0)(
'par3', mu=0.0, sd=1.0, testval=1.0)
"""

def __init__(self, distribution, lower=None, upper=None):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do you feel about returning dynamicly created class that can be ofc pickled with __reduce__? I saw code above and that can be not so difficult and take little coding

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel worried about this extra Bound class that does nothing and is not Distribution itself but proposed to be the one

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather not make this any more complicated than it already is.
Sadly, we don't distinguish properly between random variables and distributions, but use the constructor of our distribution objects to build random variables (observed and otherwise). This leads to the somewhat strange behaviour

issubclass(pm.Normal, pm.Distribution)  # -> True
isinstance(pm.Normal('a'), pm.Distribution) # -> False
isinstance(pm.Normal('a'), pm.Normal) # -> False

As we really don't want to second to be true, I think the first actually shouldn't be true in the first place. We shouldn't think of our distribution classes as distributions, but as factory functions for random variables. The distributions are pm.Normal.dist.
So I kind of don't see the point of adding another non-standard thing on top of the metaclasses and overriden __new__ that's already in there, to make a subclass check true, that shouldn't be true in the first place. Writing it isn't the problem, it just makes the code ever harder to read.

We do have the following now, which I think is the important part:

BoundNormal = pm.Bound(pm.Normal)
isinstance(BoundNormal.dist(), pm.Continuous) # -> True

The original design of pm.Bound is I guess a bit unfortunate, we wouldn't have that problem in the first place if it took a distribution as parameter and returned a random variable:

dist = pm.Normal.dist(mu=5, sd=3)
pm.Bound("a", dist, lower=3)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds reasonable. Hope no one will meet the problem of that kind

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aseyboldt for the thing you mention above:

dist = pm.Normal.dist(mu=5, sd=3)
pm.Bound("a", dist, lower=3)

Could we not use tt.clip to do the same thing?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, clip would be more like censoring, as the values below lower are just set to lower. Bound mostly sets an appropriate transformation.

if isinstance(lower, Real) and lower == -np.inf:
lower = None
if isinstance(upper, Real) and upper == np.inf:
upper = None

if not issubclass(distribution, Distribution):
raise ValueError('"distribution" must be a Distribution subclass.')

self.distribution = distribution
self.lower = lower
self.upper = upper

def __call__(self, *args, **kwargs):
if 'observed' in kwargs:
raise ValueError('Observed Bound distributions are not supported. '
'If you want to model truncated data '
'you can use a pm.Potential in combination '
'with the cumulative probability function. See '
'pymc3/examples/censored_data.py for an example.')
first, args = args[0], args[1:]

if issubclass(self.distribution, Continuous):
return _ContinuousBounded(first, self.distribution,
self.lower, self.upper, *args, **kwargs)
elif issubclass(self.distribution, Discrete):
return _DiscreteBounded(first, self.distribution,
self.lower, self.upper, *args, **kwargs)
else:
raise ValueError('Distribution is neither continuous nor discrete.')

def dist(self, *args, **kwargs):
if issubclass(self.distribution, Continuous):
return _ContinuousBounded.dist(
self.distribution, self.lower, self.upper, *args, **kwargs)

elif issubclass(self.distribution, Discrete):
return _DiscreteBounded.dist(
self.distribution, self.lower, self.upper, *args, **kwargs)
else:
raise ValueError('Distribution is neither continuous nor discrete.')
3 changes: 2 additions & 1 deletion pymc3/distributions/continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
bound, logpow, gammaln, betaln, std_cdf, i0,
i1, alltrue_elemwise, SplineWrapper
)
from .distribution import Continuous, draw_values, generate_samples, Bound
from .distribution import Continuous, draw_values, generate_samples
from .bound import Bound

__all__ = ['Uniform', 'Flat', 'Normal', 'Beta', 'Exponential', 'Laplace',
'StudentT', 'Cauchy', 'HalfCauchy', 'Gamma', 'Weibull',
Expand Down
Loading