Skip to content

Commit e8c07ef

Browse files
authored
Remove sd optional kwarg from distributions (#5583)
1 parent a0cff37 commit e8c07ef

23 files changed

+112
-337
lines changed

RELEASE-NOTES.md

+1
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ All of the above apply to:
9797
This includes API changes we did not warn about since at least `3.11.0` (2021-01).
9898

9999
- Setting initial values through `pm.Distribution(testval=...)` is now `pm.Distribution(initval=...)`.
100+
- Alternative `sd` keyword argument has been removed from all distributions. `sigma` should be used instead (see [#5583](https://github.com/pymc-devs/pymc/pull/5583)).
100101

101102

102103
### New features

benchmarks/benchmarks/benchmarks.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,17 @@ def glm_hierarchical_model(random_seed=123):
3232

3333
n_counties = len(data.county.unique())
3434
with pm.Model() as model:
35-
mu_a = pm.Normal("mu_a", mu=0.0, sd=100**2)
35+
mu_a = pm.Normal("mu_a", mu=0.0, sigma=100**2)
3636
sigma_a = pm.HalfCauchy("sigma_a", 5)
37-
mu_b = pm.Normal("mu_b", mu=0.0, sd=100**2)
37+
mu_b = pm.Normal("mu_b", mu=0.0, sigma=100**2)
3838
sigma_b = pm.HalfCauchy("sigma_b", 5)
39-
a = pm.Normal("a", mu=0, sd=1, shape=n_counties)
40-
b = pm.Normal("b", mu=0, sd=1, shape=n_counties)
39+
a = pm.Normal("a", mu=0, sigma=1, shape=n_counties)
40+
b = pm.Normal("b", mu=0, sigma=1, shape=n_counties)
4141
a = mu_a + sigma_a * a
4242
b = mu_b + sigma_b * b
4343
eps = pm.HalfCauchy("eps", 5)
4444
radon_est = a[county_idx] + b[county_idx] * data.floor.values
45-
pm.Normal("radon_like", mu=radon_est, sd=eps, observed=data.log_radon)
45+
pm.Normal("radon_like", mu=radon_est, sigma=eps, observed=data.log_radon)
4646
return model
4747

4848

@@ -58,7 +58,7 @@ def mixture_model(random_seed=1234):
5858

5959
with pm.Model() as model:
6060
w = pm.Dirichlet("w", a=np.ones_like(w_true))
61-
mu = pm.Normal("mu", mu=0.0, sd=10.0, shape=w_true.shape)
61+
mu = pm.Normal("mu", mu=0.0, sigma=10.0, shape=w_true.shape)
6262
enforce_order = pm.Potential(
6363
"enforce_order",
6464
at.switch(mu[0] - mu[1] <= 0, 0.0, -np.inf)
@@ -88,7 +88,7 @@ class OverheadSuite:
8888
def setup(self, step):
8989
self.n_steps = 10000
9090
with pm.Model() as self.model:
91-
pm.Normal("x", mu=0, sd=1)
91+
pm.Normal("x", mu=0, sigma=1)
9292

9393
def time_overhead_sample(self, step):
9494
with self.model:
@@ -133,8 +133,8 @@ def time_drug_evaluation(self):
133133
sigma_low = 1
134134
sigma_high = 10
135135
with pm.Model():
136-
group1_mean = pm.Normal("group1_mean", y_mean, sd=y_std)
137-
group2_mean = pm.Normal("group2_mean", y_mean, sd=y_std)
136+
group1_mean = pm.Normal("group1_mean", y_mean, sigma=y_std)
137+
group2_mean = pm.Normal("group2_mean", y_mean, sigma=y_std)
138138
group1_std = pm.Uniform("group1_std", lower=sigma_low, upper=sigma_high)
139139
group2_std = pm.Uniform("group2_std", lower=sigma_low, upper=sigma_high)
140140
lambda_1 = group1_std**-2
@@ -301,7 +301,7 @@ def freefall(y, t, p):
301301
# If we know one of the parameter values, we can simply pass the value.
302302
ode_solution = ode_model(y0=[0], theta=[gamma, 9.8])
303303
# The ode_solution has a shape of (n_times, n_states)
304-
Y = pm.Normal("Y", mu=ode_solution, sd=sigma, observed=y)
304+
Y = pm.Normal("Y", mu=ode_solution, sigma=sigma, observed=y)
305305

306306
t0 = time.time()
307307
idata = pm.sample(500, tune=1000, chains=2, cores=2, random_seed=0)

docs/source/PyMC_and_Aesara.rst

+7-7
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,8 @@ example::
188188

189189
with pm.Model() as model:
190190
mu = pm.Normal('mu', 0, 1)
191-
sd = pm.HalfNormal('sd', 1)
192-
y = pm.Normal('y', mu=mu, sigma=sd, observed=data)
191+
sigma = pm.HalfNormal('sigma', 1)
192+
y = pm.Normal('y', mu=mu, sigma=sigma, observed=data)
193193

194194
is roughly equivalent to this::
195195

@@ -203,10 +203,10 @@ is roughly equivalent to this::
203203
model.add_free_variable(sd_log__)
204204
model.add_logp_term(corrected_logp_half_normal(sd_log__))
205205

206-
sd = at.exp(sd_log__)
207-
model.add_deterministic_variable(sd)
206+
sigma = at.exp(sd_log__)
207+
model.add_deterministic_variable(sigma)
208208

209-
model.add_logp_term(pm.Normal.dist(mu, sd).logp(data))
209+
model.add_logp_term(pm.Normal.dist(mu, sigma).logp(data))
210210

211211
The return values of the variable constructors are subclasses
212212
of Aesara variables, so when we define a variable we can use any
@@ -217,5 +217,5 @@ Aesara operation on them::
217217
# beta is a at.dvector
218218
beta = pm.Normal('beta', 0, 1, shape=len(design_matrix))
219219
predict = at.dot(design_matrix, beta)
220-
sd = pm.HalfCauchy('sd', beta=2.5)
221-
pm.Normal('y', mu=predict, sigma=sd, observed=data)
220+
sigma = pm.HalfCauchy('sigma', beta=2.5)
221+
pm.Normal('y', mu=predict, sigma=sigma, observed=data)

docs/source/contributing/developer_guide.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -888,8 +888,8 @@ others. The challenge and some summary of the solution could be found in Luciano
888888
889889
with pm.Model() as m:
890890
mu = pm.Normal('mu', 0., 1., shape=(5, 1))
891-
sd = pm.HalfNormal('sd', 5., shape=(1, 10))
892-
pm.Normal('x', mu=mu, sigma=sd, observed=np.random.randn(2, 5, 10))
891+
sigma = pm.HalfNormal('sigma', 5., shape=(1, 10))
892+
pm.Normal('x', mu=mu, sigma=sigma, observed=np.random.randn(2, 5, 10))
893893
trace = pm.sample_prior_predictive(100)
894894
895895
trace['x'].shape # ==> should be (100, 2, 5, 10)

docs/source/learn/examples/dimensionality.ipynb

+4-185
Large diffs are not rendered by default.

docs/source/learn/examples/posterior_predictive.ipynb

+5-5
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,9 @@
151151
" b = pm.Normal(\"b\", 0.0, 10.0)\n",
152152
"\n",
153153
" mu = a + b * predictor_scaled\n",
154-
" sd = pm.Exponential(\"sd\", 1.0)\n",
154+
" sigma = pm.Exponential(\"sigma\", 1.0)\n",
155155
"\n",
156-
" pm.Normal(\"obs\", mu=mu, sigma=sd, observed=outcome_scaled)\n",
156+
" pm.Normal(\"obs\", mu=mu, sigma=sigma, observed=outcome_scaled)\n",
157157
" idata = pm.sample_prior_predictive(samples=50)"
158158
]
159159
},
@@ -212,9 +212,9 @@
212212
" b = pm.Normal(\"b\", 0.0, 1.0)\n",
213213
"\n",
214214
" mu = a + b * predictor_scaled\n",
215-
" sd = pm.Exponential(\"sd\", 1.0)\n",
215+
" sigma = pm.Exponential(\"sigma\", 1.0)\n",
216216
"\n",
217-
" pm.Normal(\"obs\", mu=mu, sigma=sd, observed=outcome_scaled)\n",
217+
" pm.Normal(\"obs\", mu=mu, sigma=sigma, observed=outcome_scaled)\n",
218218
" idata = pm.sample_prior_predictive(samples=50)"
219219
]
220220
},
@@ -328,7 +328,7 @@
328328
"cell_type": "markdown",
329329
"metadata": {},
330330
"source": [
331-
"Everything ran smoothly, but it's often difficult to understand what the parameters' values mean when analyzing a trace plot or table summary -- even more so here, as the parameters live in the standardized space. A useful thing to understand your models is... you guessed it: posterior predictive checks! We'll use PyMC's dedicated function to sample data from the posterior. This function will randomly draw 4000 samples of parameters from the trace. Then, for each sample, it will draw 100 random numbers from a normal distribution specified by the values of `mu` and `sd` in that sample:"
331+
"Everything ran smoothly, but it's often difficult to understand what the parameters' values mean when analyzing a trace plot or table summary -- even more so here, as the parameters live in the standardized space. A useful thing to understand your models is... you guessed it: posterior predictive checks! We'll use PyMC's dedicated function to sample data from the posterior. This function will randomly draw 4000 samples of parameters from the trace. Then, for each sample, it will draw 100 random numbers from a normal distribution specified by the values of `mu` and `sigma` in that sample:"
332332
]
333333
},
334334
{

pymc/data.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,8 @@ class Minibatch(TensorVariable):
205205
206206
>>> with pm.Model() as model:
207207
... mu = pm.Flat('mu')
208-
... sd = pm.HalfNormal('sd')
209-
... lik = pm.Normal('lik', mu, sd, observed=x, total_size=(100, 100))
208+
... sigma = pm.HalfNormal('sigma')
209+
... lik = pm.Normal('lik', mu, sigma, observed=x, total_size=(100, 100))
210210
211211
212212
Then you can perform regular Variational Inference out of the box

pymc/distributions/continuous.py

+13-51
Original file line numberDiff line numberDiff line change
@@ -546,13 +546,10 @@ class Normal(Continuous):
546546
rv_op = normal
547547

548548
@classmethod
549-
def dist(cls, mu=0, sigma=None, tau=None, sd=None, no_assert=False, **kwargs):
550-
if sd is not None:
551-
sigma = sd
549+
def dist(cls, mu=0, sigma=None, tau=None, no_assert=False, **kwargs):
552550
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
553551
sigma = at.as_tensor_variable(sigma)
554552

555-
# sd = sigma
556553
# tau = at.as_tensor_variable(tau)
557554
# mean = median = mode = mu = at.as_tensor_variable(floatX(mu))
558555
# variance = 1.0 / self.tau
@@ -710,13 +707,11 @@ def dist(
710707
mu: Optional[DIST_PARAMETER_TYPES] = None,
711708
sigma: Optional[DIST_PARAMETER_TYPES] = None,
712709
tau: Optional[DIST_PARAMETER_TYPES] = None,
713-
sd: Optional[DIST_PARAMETER_TYPES] = None,
714710
lower: Optional[DIST_PARAMETER_TYPES] = None,
715711
upper: Optional[DIST_PARAMETER_TYPES] = None,
716712
*args,
717713
**kwargs,
718714
) -> RandomVariable:
719-
sigma = sd if sd is not None else sigma
720715
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
721716
sigma = at.as_tensor_variable(sigma)
722717
tau = at.as_tensor_variable(tau)
@@ -866,10 +861,7 @@ class HalfNormal(PositiveContinuous):
866861
rv_op = halfnormal
867862

868863
@classmethod
869-
def dist(cls, sigma=None, tau=None, sd=None, *args, **kwargs):
870-
if sd is not None:
871-
sigma = sd
872-
864+
def dist(cls, sigma=None, tau=None, *args, **kwargs):
873865
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
874866

875867
assert_negative_support(tau, "tau", "HalfNormal")
@@ -1226,10 +1218,7 @@ class Beta(UnitContinuous):
12261218
rv_op = aesara.tensor.random.beta
12271219

12281220
@classmethod
1229-
def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, **kwargs):
1230-
if sd is not None:
1231-
sigma = sd
1232-
1221+
def dist(cls, alpha=None, beta=None, mu=None, sigma=None, *args, **kwargs):
12331222
alpha, beta = cls.get_alpha_beta(alpha, beta, mu, sigma)
12341223
alpha = at.as_tensor_variable(floatX(alpha))
12351224
beta = at.as_tensor_variable(floatX(beta))
@@ -1785,10 +1774,7 @@ class LogNormal(PositiveContinuous):
17851774
rv_op = lognormal
17861775

17871776
@classmethod
1788-
def dist(cls, mu=0, sigma=None, tau=None, sd=None, *args, **kwargs):
1789-
if sd is not None:
1790-
sigma = sd
1791-
1777+
def dist(cls, mu=0, sigma=None, tau=None, *args, **kwargs):
17921778
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
17931779

17941780
mu = at.as_tensor_variable(floatX(mu))
@@ -1914,9 +1900,7 @@ class StudentT(Continuous):
19141900
rv_op = studentt
19151901

19161902
@classmethod
1917-
def dist(cls, nu, mu=0, lam=None, sigma=None, sd=None, *args, **kwargs):
1918-
if sd is not None:
1919-
sigma = sd
1903+
def dist(cls, nu, mu=0, lam=None, sigma=None, *args, **kwargs):
19201904
nu = at.as_tensor_variable(floatX(nu))
19211905
lam, sigma = get_tau_sigma(tau=lam, sigma=sigma)
19221906
sigma = at.as_tensor_variable(sigma)
@@ -2306,10 +2290,7 @@ class Gamma(PositiveContinuous):
23062290
rv_op = gamma
23072291

23082292
@classmethod
2309-
def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, no_assert=False, **kwargs):
2310-
if sd is not None:
2311-
sigma = sd
2312-
2293+
def dist(cls, alpha=None, beta=None, mu=None, sigma=None, no_assert=False, **kwargs):
23132294
alpha, beta = cls.get_alpha_beta(alpha, beta, mu, sigma)
23142295
alpha = at.as_tensor_variable(floatX(alpha))
23152296
beta = at.as_tensor_variable(floatX(beta))
@@ -2426,10 +2407,7 @@ class InverseGamma(PositiveContinuous):
24262407
rv_op = invgamma
24272408

24282409
@classmethod
2429-
def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, **kwargs):
2430-
if sd is not None:
2431-
sigma = sd
2432-
2410+
def dist(cls, alpha=None, beta=None, mu=None, sigma=None, *args, **kwargs):
24332411
alpha, beta = cls._get_alpha_beta(alpha, beta, mu, sigma)
24342412
alpha = at.as_tensor_variable(floatX(alpha))
24352413
beta = at.as_tensor_variable(floatX(beta))
@@ -2750,11 +2728,7 @@ class HalfStudentT(PositiveContinuous):
27502728
rv_op = halfstudentt
27512729

27522730
@classmethod
2753-
def dist(cls, nu=1, sigma=None, lam=None, sd=None, *args, **kwargs):
2754-
2755-
if sd is not None:
2756-
sigma = sd
2757-
2731+
def dist(cls, nu=1, sigma=None, lam=None, *args, **kwargs):
27582732
nu = at.as_tensor_variable(floatX(nu))
27592733
lam, sigma = get_tau_sigma(lam, sigma)
27602734
sigma = at.as_tensor_variable(sigma)
@@ -2886,11 +2860,7 @@ class ExGaussian(Continuous):
28862860
rv_op = exgaussian
28872861

28882862
@classmethod
2889-
def dist(cls, mu=0.0, sigma=None, nu=None, sd=None, *args, **kwargs):
2890-
2891-
if sd is not None:
2892-
sigma = sd
2893-
2863+
def dist(cls, mu=0.0, sigma=None, nu=None, *args, **kwargs):
28942864
mu = at.as_tensor_variable(floatX(mu))
28952865
sigma = at.as_tensor_variable(floatX(sigma))
28962866
nu = at.as_tensor_variable(floatX(nu))
@@ -3118,10 +3088,7 @@ class SkewNormal(Continuous):
31183088
rv_op = skewnormal
31193089

31203090
@classmethod
3121-
def dist(cls, alpha=1, mu=0.0, sigma=None, tau=None, sd=None, *args, **kwargs):
3122-
if sd is not None:
3123-
sigma = sd
3124-
3091+
def dist(cls, alpha=1, mu=0.0, sigma=None, tau=None, *args, **kwargs):
31253092
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
31263093
alpha = at.as_tensor_variable(floatX(alpha))
31273094
mu = at.as_tensor_variable(floatX(mu))
@@ -3445,10 +3412,7 @@ class Rice(PositiveContinuous):
34453412
rv_op = rice
34463413

34473414
@classmethod
3448-
def dist(cls, nu=None, sigma=None, b=None, sd=None, *args, **kwargs):
3449-
if sd is not None:
3450-
sigma = sd
3451-
3415+
def dist(cls, nu=None, sigma=None, b=None, *args, **kwargs):
34523416
nu, b, sigma = cls.get_nu_b(nu, b, sigma)
34533417
b = at.as_tensor_variable(floatX(b))
34543418
sigma = at.as_tensor_variable(floatX(sigma))
@@ -3657,12 +3621,10 @@ class LogitNormal(UnitContinuous):
36573621
rv_op = logit_normal
36583622

36593623
@classmethod
3660-
def dist(cls, mu=0, sigma=None, tau=None, sd=None, **kwargs):
3661-
if sd is not None:
3662-
sigma = sd
3624+
def dist(cls, mu=0, sigma=None, tau=None, **kwargs):
36633625
mu = at.as_tensor_variable(floatX(mu))
36643626
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
3665-
sigma = sd = at.as_tensor_variable(sigma)
3627+
sigma = at.as_tensor_variable(sigma)
36663628
tau = at.as_tensor_variable(tau)
36673629
assert_negative_support(sigma, "sigma", "LogitNormal")
36683630
assert_negative_support(tau, "tau", "LogitNormal")

pymc/distributions/mixture.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -514,17 +514,13 @@ class NormalMixture:
514514
y = pm.NormalMixture("y", w=weights, mu=μ, sigma=σ, observed=data)
515515
"""
516516

517-
def __new__(cls, name, w, mu, sigma=None, tau=None, sd=None, comp_shape=(), **kwargs):
518-
if sd is not None:
519-
sigma = sd
517+
def __new__(cls, name, w, mu, sigma=None, tau=None, comp_shape=(), **kwargs):
520518
_, sigma = get_tau_sigma(tau=tau, sigma=sigma)
521519

522520
return Mixture(name, w, Normal.dist(mu, sigma=sigma, size=comp_shape), **kwargs)
523521

524522
@classmethod
525-
def dist(cls, w, mu, sigma=None, tau=None, sd=None, comp_shape=(), **kwargs):
526-
if sd is not None:
527-
sigma = sd
523+
def dist(cls, w, mu, sigma=None, tau=None, comp_shape=(), **kwargs):
528524
_, sigma = get_tau_sigma(tau=tau, sigma=sigma)
529525

530526
return Mixture.dist(w, Normal.dist(mu, sigma=sigma, size=comp_shape), **kwargs)

pymc/distributions/timeseries.py

+6-13
Original file line numberDiff line numberDiff line change
@@ -108,15 +108,10 @@ class AR(distribution.Continuous):
108108
distribution for initial values (Defaults to Flat())
109109
"""
110110

111-
def __init__(
112-
self, rho, sigma=None, tau=None, constant=False, init=None, sd=None, *args, **kwargs
113-
):
111+
def __init__(self, rho, sigma=None, tau=None, constant=False, init=None, *args, **kwargs):
114112
super().__init__(*args, **kwargs)
115-
if sd is not None:
116-
sigma = sd
117-
118113
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
119-
self.sigma = self.sd = at.as_tensor_variable(sigma)
114+
self.sigma = at.as_tensor_variable(sigma)
120115
self.tau = at.as_tensor_variable(tau)
121116

122117
self.mean = at.as_tensor_variable(0.0)
@@ -201,17 +196,15 @@ class GaussianRandomWalk(distribution.Continuous):
201196
distribution for initial value (Defaults to Flat())
202197
"""
203198

204-
def __init__(self, tau=None, init=None, sigma=None, mu=0.0, sd=None, *args, **kwargs):
199+
def __init__(self, tau=None, init=None, sigma=None, mu=0.0, *args, **kwargs):
205200
kwargs.setdefault("shape", 1)
206201
super().__init__(*args, **kwargs)
207202
if sum(self.shape) == 0:
208203
raise TypeError("GaussianRandomWalk must be supplied a non-zero shape argument!")
209-
if sd is not None:
210-
sigma = sd
211204
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
212205
self.tau = at.as_tensor_variable(tau)
213206
sigma = at.as_tensor_variable(sigma)
214-
self.sigma = self.sd = sigma
207+
self.sigma = sigma
215208
self.mu = at.as_tensor_variable(mu)
216209
self.init = init or Flat.dist()
217210
self.mean = at.as_tensor_variable(0.0)
@@ -400,8 +393,8 @@ def logp(self, x):
400393
xt = x[:-1]
401394
f, g = self.sde_fn(x[:-1], *self.sde_pars)
402395
mu = xt + self.dt * f
403-
sd = at.sqrt(self.dt) * g
404-
return at.sum(Normal.dist(mu=mu, sigma=sd).logp(x[1:]))
396+
sigma = at.sqrt(self.dt) * g
397+
return at.sum(Normal.dist(mu=mu, sigma=sigma).logp(x[1:]))
405398

406399
def _distr_parameters_for_repr(self):
407400
return ["dt"]

pymc/model.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ def __init__(self, mean=0, sigma=1, name=''):
482482
Normal('v2', mu=mean, sigma=sd)
483483
484484
# something more complex is allowed, too
485-
half_cauchy = HalfCauchy('sd', beta=10, initval=1.)
485+
half_cauchy = HalfCauchy('sigma', beta=10, initval=1.)
486486
Normal('v3', mu=mean, sigma=half_cauchy)
487487
488488
# Deterministic variables can be used in usual way

0 commit comments

Comments
 (0)