-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcbpb-bs-samples.py
73 lines (54 loc) · 1.74 KB
/
cbpb-bs-samples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
sns.set()
# Read csv (1946 - 1995, outlier=1990)
bloom = pd.read_csv("./assets/data/peak-bloom-short2.csv")
df = pd.DataFrame(bloom, columns = ['year', 'date', 'estimated_temp'])
def color_picker(shade, index):
"""Pick a color from cubehelix palette"""
num_shades = shade
color_list = sns.cubehelix_palette(num_shades)
colors = color_list.as_hex()
for color in colors:
color = colors[index]
return color
c = color_picker(20, 18)
dot = color_picker(20, 8)
def ecdf(data):
"""Compute ECDF for a one-dimensional array of measurements."""
# Number of data points
n = len(data)
# x-data for the ECDF
x = np.sort(data)
# y-data for the ECDF
y = np.arange(1, len(x)+1) / n
return x, y
# Size of the resampled array
print('Size:', len(df['date']))
for _ in range(50):
# Generate bootstrap sample
bs_sample = np.random.choice(df['date'], size=len(df['date']))
# Compute and plot ECDF from bootstrap sample
x, y = ecdf(bs_sample)
plt.plot(x, y, marker='.', linestyle='none', color=dot, alpha=0.1)
mean = np.mean(bs_sample)
print('mean:', mean)
median = np.median(bs_sample)
print('median:', median)
std = np.std(bs_sample)
print('std:', std)
# Compute and plot ECDF from original data
x, y = ecdf(df['date'])
plt.plot(x, y, marker='.', color=c)
# Make margins and label axes
plt.margins(0.02)
plt.xlabel('Cherry blossom peak-bloom date').set_color('#2d1e3e')
plt.ylabel('ECDF').set_color('#2d1e3e')
plt.xticks(color='#2d1e3e')
plt.yticks(color='#2d1e3e')
# Plot title
plt.title('Cherry blossom peak-bloom date in Kyoto, Japan (1946 - 1995)').set_color('#2d1e3e')
# Show the plot
plt.show()