-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcbpb-bs-replicates.py
85 lines (62 loc) · 2.17 KB
/
cbpb-bs-replicates.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
sns.set()
# Read csv (1946 - 1995, outlier=1990)
bloom = pd.read_csv("./assets/data/peak-bloom-short2.csv")
df = pd.DataFrame(bloom, columns = ['year', 'date', 'estimated_temp'])
bloom_date = df['date']
def color_picker(shade, index):
"""Pick a color from cubehelix palette"""
num_shades = shade
color_list = sns.cubehelix_palette(num_shades)
colors = color_list.as_hex()
for color in colors:
color = colors[index]
return color
c = color_picker(30, 1)
def ecdf(data):
"""Compute ECDF for a one-dimensional array of measurements."""
# Number of data points
n = len(data)
# x-data for the ECDF
x = np.sort(data)
# y-data for the ECDF
y = np.arange(1, len(x)+1) / n
return x, y
def bootstrap_replicate_1d(data, func):
"""Generate bootstrap replicate of 1D data."""
bs_sample = np.random.choice(data, len(data))
return func(bs_sample)
bs_replicates = np.empty(10000)
def draw_bs_reps(data, func, size=1):
"""Draw bootstrap replicates."""
# Initialize array of replicates
bs_replicates = np.empty(size)
# Generate replicates
for i in range(size):
bs_replicates[i] = bootstrap_replicate_1d(data, func)
return bs_replicates
# Take 10,000 bootstrap replicates of the mean
bs_replicates = draw_bs_reps(bloom_date, np.mean, 10000)
# Compute and print SEM (Standard error of the mean)
sem = np.std(bloom_date) / np.sqrt(len(bloom_date))
print('sem:', sem)
# Compute and print standard deviation of bootstrap replicates
bs_std = np.std(bs_replicates)
print('std:', bs_std)
# Bootstrap confidence interval
conf_int = np.percentile(bs_replicates, [2.5, 97.5])
# Print the confidence interval
print('95% confidence interval =', conf_int)
# Make a histogram of the results
plt.hist(bs_replicates, bins=50, density=True, color=c)
plt.xlabel(r'$\tau$ (dates)').set_color('#2d1e3e')
plt.ylabel('PDF').set_color('#2d1e3e')
plt.xticks(color='#2d1e3e')
plt.yticks(color='#2d1e3e')
# Plot title
plt.title('Cherry blossom peak-bloom date in Kyoto, Japan (1946 - 1995)').set_color('#2d1e3e')
# Show the plot
plt.show()