Skip to content

Commit 86b37ad

Browse files
Refactor bayesian neural network example (#670)
- Improve axis labels - Remove `mutable=True` argument (will get deprecated) - Fix shape warning - Fix minibatch sampling issue (cf. #654)
1 parent 9003ee2 commit 86b37ad

File tree

2 files changed

+139
-169
lines changed

2 files changed

+139
-169
lines changed

examples/variational_inference/bayesian_neural_network_advi.ipynb

+120-154
Large diffs are not rendered by default.

examples/variational_inference/bayesian_neural_network_advi.myst.md

+19-15
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ import matplotlib.pyplot as plt
6969
import numpy as np
7070
import pymc as pm
7171
import pytensor
72-
import pytensor.tensor as pt
7372
import seaborn as sns
7473
7574
from sklearn.datasets import make_moons
@@ -103,7 +102,7 @@ ax.scatter(X[Y == 0, 0], X[Y == 0, 1], color="C0", label="Class 0")
103102
ax.scatter(X[Y == 1, 0], X[Y == 1, 1], color="C1", label="Class 1")
104103
sns.despine()
105104
ax.legend()
106-
ax.set(xlabel="X", ylabel="Y", title="Toy binary classification data set");
105+
ax.set(xlabel="X1", ylabel="X2", title="Toy binary classification data set");
107106
```
108107

109108
### Model specification
@@ -127,11 +126,11 @@ def construct_nn(ann_input, ann_output):
127126
"hidden_layer_1": np.arange(n_hidden),
128127
"hidden_layer_2": np.arange(n_hidden),
129128
"train_cols": np.arange(X_train.shape[1]),
130-
# "obs_id": np.arange(X_train.shape[0]),
129+
"obs_id": np.arange(X_train.shape[0]),
131130
}
132131
with pm.Model(coords=coords) as neural_network:
133-
ann_input = pm.Data("ann_input", X_train, mutable=True, dims=("obs_id", "train_cols"))
134-
ann_output = pm.Data("ann_output", Y_train, mutable=True, dims="obs_id")
132+
ann_input = pm.Data("ann_input", X_train, dims=("obs_id", "train_cols"))
133+
ann_output = pm.Data("ann_output", Y_train, dims="obs_id")
135134
136135
# Weights from input to hidden layer
137136
weights_in_1 = pm.Normal(
@@ -215,14 +214,15 @@ pred = ppc.posterior_predictive["out"].mean(("chain", "draw")) > 0.5
215214

216215
```{code-cell} ipython3
217216
fig, ax = plt.subplots()
218-
ax.scatter(X_test[pred == 0, 0], X_test[pred == 0, 1], color="C0")
219-
ax.scatter(X_test[pred == 1, 0], X_test[pred == 1, 1], color="C1")
217+
ax.scatter(X_test[pred == 0, 0], X_test[pred == 0, 1], color="C0", label="Predicted 0")
218+
ax.scatter(X_test[pred == 1, 0], X_test[pred == 1, 1], color="C1", label="Predicted 1")
220219
sns.despine()
221-
ax.set(title="Predicted labels in testing set", xlabel="X", ylabel="Y");
220+
ax.legend()
221+
ax.set(title="Predicted labels in testing set", xlabel="X1", ylabel="X2");
222222
```
223223

224224
```{code-cell} ipython3
225-
print(f"Accuracy = {(Y_test == pred.values).mean() * 100}%")
225+
print(f"Accuracy = {(Y_test == pred.values).mean() * 100:.2f}%")
226226
```
227227

228228
Hey, our neural network did all right!
@@ -240,16 +240,21 @@ jupyter:
240240
---
241241
grid = pm.floatX(np.mgrid[-3:3:100j, -3:3:100j])
242242
grid_2d = grid.reshape(2, -1).T
243-
dummy_out = np.ones(grid.shape[1], dtype=np.int8)
243+
dummy_out = np.ones(grid_2d.shape[0], dtype=np.int8)
244244
```
245245

246246
```{code-cell} ipython3
247247
---
248248
jupyter:
249249
outputs_hidden: true
250250
---
251+
coords_eval = {
252+
"train_cols": np.arange(grid_2d.shape[1]),
253+
"obs_id": np.arange(grid_2d.shape[0]),
254+
}
255+
251256
with neural_network:
252-
pm.set_data(new_data={"ann_input": grid_2d, "ann_output": dummy_out})
257+
pm.set_data(new_data={"ann_input": grid_2d, "ann_output": dummy_out}, coords=coords_eval)
253258
ppc = pm.sample_posterior_predictive(trace)
254259
```
255260

@@ -268,7 +273,7 @@ contour = ax.contourf(
268273
ax.scatter(X_test[pred == 0, 0], X_test[pred == 0, 1], color="C0")
269274
ax.scatter(X_test[pred == 1, 0], X_test[pred == 1, 1], color="C1")
270275
cbar = plt.colorbar(contour, ax=ax)
271-
_ = ax.set(xlim=(-3, 3), ylim=(-3, 3), xlabel="X", ylabel="Y")
276+
_ = ax.set(xlim=(-3, 3), ylim=(-3, 3), xlabel="X1", ylabel="X2")
272277
cbar.ax.set_ylabel("Posterior predictive mean probability of class label = 0");
273278
```
274279

@@ -285,7 +290,7 @@ contour = ax.contourf(
285290
ax.scatter(X_test[pred == 0, 0], X_test[pred == 0, 1], color="C0")
286291
ax.scatter(X_test[pred == 1, 0], X_test[pred == 1, 1], color="C1")
287292
cbar = plt.colorbar(contour, ax=ax)
288-
_ = ax.set(xlim=(-3, 3), ylim=(-3, 3), xlabel="X", ylabel="Y")
293+
_ = ax.set(xlim=(-3, 3), ylim=(-3, 3), xlabel="X1", ylabel="X2")
289294
cbar.ax.set_ylabel("Uncertainty (posterior predictive standard deviation)");
290295
```
291296

@@ -300,8 +305,7 @@ So far, we have trained our model on all data at once. Obviously this won't scal
300305
Fortunately, ADVI can be run on mini-batches as well. It just requires some setting up:
301306

302307
```{code-cell} ipython3
303-
minibatch_x = pm.Minibatch(X_train, batch_size=50)
304-
minibatch_y = pm.Minibatch(Y_train, batch_size=50)
308+
minibatch_x, minibatch_y = pm.Minibatch(X_train, Y_train, batch_size=50)
305309
neural_network_minibatch = construct_nn(minibatch_x, minibatch_y)
306310
with neural_network_minibatch:
307311
approx = pm.fit(40000, method=pm.ADVI())

0 commit comments

Comments
 (0)