Skip to content

Commit 986465c

Browse files
author
suwarnarajput
authored
Add files via upload
1 parent 8ad64f8 commit 986465c

12 files changed

+10613
-2
lines changed

Chapter 1/01_Thinking_Probabilistically_a_Bayesian_Inference_Primer (3).ipynb

+347
Large diffs are not rendered by default.

Chapter 1/hpd (1).py

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from __future__ import division
2+
import numpy as np
3+
import scipy.stats.kde as kde
4+
5+
def hpd_grid(sample, alpha=0.05, roundto=2):
6+
"""Calculate highest posterior density (HPD) of array for given alpha.
7+
The HPD is the minimum width Bayesian credible interval (BCI).
8+
The function works for multimodal distributions, returning more than one mode
9+
10+
Parameters
11+
----------
12+
13+
sample : Numpy array or python list
14+
An array containing MCMC samples
15+
alpha : float
16+
Desired probability of type I error (defaults to 0.05)
17+
roundto: integer
18+
Number of digits after the decimal point for the results
19+
20+
Returns
21+
----------
22+
hpd: array with the lower
23+
24+
"""
25+
sample = np.asarray(sample)
26+
sample = sample[~np.isnan(sample)]
27+
# get upper and lower bounds
28+
l = np.min(sample)
29+
u = np.max(sample)
30+
density = kde.gaussian_kde(sample)
31+
x = np.linspace(l, u, 2000)
32+
y = density.evaluate(x)
33+
#y = density.evaluate(x, l, u) waitting for PR to be accepted
34+
xy_zipped = zip(x, y/np.sum(y))
35+
xy = sorted(xy_zipped, key=lambda x: x[1], reverse=True)
36+
xy_cum_sum = 0
37+
hdv = []
38+
for val in xy:
39+
xy_cum_sum += val[1]
40+
hdv.append(val[0])
41+
if xy_cum_sum >= (1-alpha):
42+
break
43+
hdv.sort()
44+
diff = (u-l)/20 # differences of 5%
45+
hpd = []
46+
hpd.append(round(min(hdv), roundto))
47+
for i in range(1, len(hdv)):
48+
if hdv[i]-hdv[i-1] >= diff:
49+
hpd.append(round(hdv[i-1], roundto))
50+
hpd.append(round(hdv[i], roundto))
51+
hpd.append(round(max(hdv), roundto))
52+
ite = iter(hpd)
53+
hpd = list(zip(ite, ite))
54+
modes = []
55+
for value in hpd:
56+
x_hpd = x[(x > value[0]) & (x < value[1])]
57+
y_hpd = y[(x > value[0]) & (x < value[1])]
58+
modes.append(round(x_hpd[np.argmax(y_hpd)], roundto))
59+
return hpd, x, y, modes

Chapter 1/mauna_loa_CO2 (1).csv

+468
Large diffs are not rendered by default.

Chapter 1/plot_post (1).py

+98
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
from __future__ import division
2+
import numpy as np
3+
from scipy import stats
4+
import matplotlib.pyplot as plt
5+
from hpd import hpd_grid
6+
7+
8+
def plot_post(sample, alpha=0.05, show_mode=True, kde_plot=True, bins=50,
9+
ROPE=None, comp_val=None, roundto=2):
10+
"""Plot posterior and HPD
11+
12+
Parameters
13+
----------
14+
15+
sample : Numpy array or python list
16+
An array containing MCMC samples
17+
alpha : float
18+
Desired probability of type I error (defaults to 0.05)
19+
show_mode: Bool
20+
If True the legend will show the mode(s) value(s), if false the mean(s)
21+
will be displayed
22+
kde_plot: Bool
23+
If True the posterior will be displayed using a Kernel Density Estimation
24+
otherwise an histogram will be used
25+
bins: integer
26+
Number of bins used for the histogram, only works when kde_plot is False
27+
ROPE: list or numpy array
28+
Lower and upper values of the Region Of Practical Equivalence
29+
comp_val: float
30+
Comparison value
31+
32+
33+
Returns
34+
-------
35+
36+
post_summary : dictionary
37+
Containing values with several summary statistics
38+
39+
"""
40+
41+
post_summary = {'mean':0,'median':0,'mode':0, 'alpha':0,'hpd_low':0,
42+
'hpd_high':0, 'comp_val':0, 'pc_gt_comp_val':0, 'ROPE_low':0,
43+
'ROPE_high':0, 'pc_in_ROPE':0}
44+
45+
post_summary['mean'] = round(np.mean(sample), roundto)
46+
post_summary['median'] = round(np.median(sample), roundto)
47+
post_summary['alpha'] = alpha
48+
49+
# Compute the hpd, KDE and mode for the posterior
50+
hpd, x, y, modes = hpd_grid(sample, alpha, roundto)
51+
post_summary['hpd'] = hpd
52+
post_summary['mode'] = modes
53+
54+
## Plot KDE.
55+
if kde_plot:
56+
plt.plot(x, y, color='k', lw=2)
57+
## Plot histogram.
58+
else:
59+
plt.hist(sample, normed=True, bins=bins, facecolor='b',
60+
edgecolor='w')
61+
62+
## Display mode or mean:
63+
if show_mode:
64+
string = '{:g} ' * len(post_summary['mode'])
65+
plt.plot(0, label='mode =' + string.format(*post_summary['mode']), alpha=0)
66+
else:
67+
plt.plot(0, label='mean = {:g}'.format(post_summary['mean']), alpha=0)
68+
69+
## Display the hpd.
70+
hpd_label = ''
71+
for value in hpd:
72+
plt.plot(value, [0, 0], linewidth=10, color='b')
73+
hpd_label = hpd_label + '{:g} {:g}\n'.format(round(value[0], roundto), round(value[1], roundto))
74+
plt.plot(0, 0, linewidth=4, color='b', label='hpd {:g}%\n{}'.format((1-alpha)*100, hpd_label))
75+
## Display the ROPE.
76+
if ROPE is not None:
77+
pc_in_ROPE = round(np.sum((sample > ROPE[0]) & (sample < ROPE[1]))/len(sample)*100, roundto)
78+
plt.plot(ROPE, [0, 0], linewidth=20, color='r', alpha=0.75)
79+
plt.plot(0, 0, linewidth=4, color='r', label='{:g}% in ROPE'.format(pc_in_ROPE))
80+
post_summary['ROPE_low'] = ROPE[0]
81+
post_summary['ROPE_high'] = ROPE[1]
82+
post_summary['pc_in_ROPE'] = pc_in_ROPE
83+
## Display the comparison value.
84+
if comp_val is not None:
85+
pc_gt_comp_val = round(100 * np.sum(sample > comp_val)/len(sample), roundto)
86+
pc_lt_comp_val = round(100 - pc_gt_comp_val, roundto)
87+
plt.axvline(comp_val, ymax=.75, color='g', linewidth=4, alpha=0.75,
88+
label='{:g}% < {:g} < {:g}%'.format(pc_lt_comp_val,
89+
comp_val, pc_gt_comp_val))
90+
post_summary['comp_val'] = comp_val
91+
post_summary['pc_gt_comp_val'] = pc_gt_comp_val
92+
93+
plt.legend(loc=0, framealpha=1)
94+
frame = plt.gca()
95+
frame.axes.get_yaxis().set_ticks([])
96+
return post_summary
97+
98+

Chapter 2/02_Programming_probabilistically_a_PyMC3_primer (1).ipynb

+617
Large diffs are not rendered by default.

Chapter 3/03_Juggling with multiparametric and Hierarchical models.ipynb

+1,061
Large diffs are not rendered by default.

Chapter 4/04_Understanding_and_predicting_data_with_linear_regression_models (2).ipynb

+3,105
Large diffs are not rendered by default.

Chapter 5/05_Classifying_outcomes_with_logistic_regression (2).ipynb

+1,471
Large diffs are not rendered by default.

Chapter 6/06_Model_comparison.ipynb

+1,102
Large diffs are not rendered by default.

Chapter 7/07_Mixture_Models (1).ipynb

+1,329
Large diffs are not rendered by default.

Chapter 8/08_Gaussian_processes.ipynb

+901
Large diffs are not rendered by default.

README.md

+55-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,55 @@
1-
# Bayesian-Analysis-with-Python
2-
Bayesian Analysis with Python by Packt
1+
#Bayesian Analysis with Python
2+
This is the code repository for [Bayesian Analysis with Python](https://www.packtpub.com/big-data-and-business-intelligence/bayesian-analysis-python?utm_source=github&utm_medium=repository&utm_campaign=9781785883804), published by Packt. It contains all the supporting project files necessary to work through the book from start to finish.
3+
##Instructions and Navigations
4+
All of the code is organized into folders. Each folder starts with a number followed by the application name. For example, Chapter02.
5+
6+
7+
8+
The code will look like the following:
9+
```
10+
n_params = [1, 2, 4]
11+
p_params = [0.25, 0.5, 0.75]
12+
x = np.arange(0, max(n_params)+1)
13+
f, ax = plt.subplots(len(n_params), len(p_params), sharex=True,
14+
sharey=True)
15+
for i in range(3):
16+
for j in range(3):
17+
n = n_params[i]
18+
p = p_params[j]
19+
y = stats.binom(n=n, p=p).pmf(x)
20+
ax[i,j].vlines(x, 0, y, colors='b', lw=5)
21+
ax[i,j].set_ylim(0, 1)
22+
ax[i,j].plot(0, 0, label="n = {:3.2f}\np =
23+
{:3.2f}".format(n, p), alpha=0)
24+
ax[i,j].legend(fontsize=12)
25+
ax[2,1].set_xlabel('$\\theta$', fontsize=14)
26+
ax[1,0].set_ylabel('$p(y|\\theta)$', fontsize=14)
27+
ax[0,0].set_xticks(x)
28+
```
29+
30+
This book is written for Python version >= 3.5, and it is recommended that you use
31+
the most recent version of Python 3 that is currently available, although most of the
32+
code examples may also run for older versions of Python, including Python 2.7 with
33+
minor adjustments.
34+
Maybe the easiest way to install Python and Python libraries is using Anaconda,
35+
a scienti fi c computing distribution. You can read more about Anaconda and
36+
download it from https://www.continuum.io/downloads. Once Anaconda is in
37+
our system, we can install new Python packages with this command: conda install
38+
NamePackage.
39+
We will use the following python packages:
40+
• Ipython 5.0
41+
• NumPy 1.11.1
42+
• SciPy 0.18.1
43+
• Pandas 0.18.1
44+
• Matplotlib 1.5.3
45+
• Seaborn 0.7.1
46+
• PyMC3 3.0
47+
48+
##Related Products
49+
* [Expert Python Programming](https://www.packtpub.com/application-development/expert-python-programming?utm_source=github&utm_medium=repository&utm_campaign=9781847194947)
50+
51+
* [Learning Bayesian Models with R](https://www.packtpub.com/big-data-and-business-intelligence/learning-bayesian-models-r?utm_source=github&utm_medium=repository&utm_campaign=9781783987603)
52+
53+
* [Mastering Probabilistic Graphical Models Using Python](https://www.packtpub.com/big-data-and-business-intelligence/mastering-probabilistic-graphical-models-using-python?utm_source=github&utm_medium=repository&utm_campaign=9781784394684)
54+
###Suggestions and Feedback
55+
[Click here](https://docs.google.com/forms/d/e/1FAIpQLSe5qwunkGf6PUvzPirPDtuy1Du5Rlzew23UBp2S-P3wB-GcwQ/viewform) if you have any feedback or suggestions.

0 commit comments

Comments
 (0)