-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplayground.py
70 lines (57 loc) · 2.71 KB
/
playground.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from scipy.stats import ttest_ind
from mmd import mmd
alpha = 0.05
n_1 = 10**3
n_2 = 10**3
num_bootstrap_replicates = 5
alt_thetas = np.linspace(-2.0, 2.0, 15)
figure = plt.figure(figsize=(10, 30))
# Simulate alternate means and compute power of a test between distributions
two_sample_t_test_power_for_alt_theta = []
mmd_test_power_for_alt_theta = []
for i in range(len(alt_thetas)):
# Sample from dist_1 and dist_2
dist_1 = np.random.normal(0, 1, n_1)
dist_2 = np.random.normal(alt_thetas[i], 1, n_2)
# dist_1 = np.random.chisquare(9, size=n_1)
# dist_2 = np.random.exponential(scale=alt_thetas[i], size=n_2)
# Plot samples
dist_ax = plt.subplot(len(alt_thetas) + 1, 1, i + 1)
dist_ax.hist(dist_1, fc=(0, 0, 1, 0.5))
dist_ax.hist(dist_2, fc=(0.5, 0.5, 0.5, 0.5))
# Use boostrap technique to estimate the distribution of the
# p-value statistic
two_sample_t_test_p_value_dist = []
mmd_test_stat_dist = []
for br in range(num_bootstrap_replicates):
dist_1_replicate = np.random.choice(dist_1, size=n_1, replace=True)
dist_2_replicate = np.random.choice(dist_2, size=n_2, replace=True)
# Classical two sample t test
two_sample_t_test = ttest_ind(dist_1_replicate, dist_2_replicate, equal_var=True)
two_sample_t_test_p_value_dist.append(two_sample_t_test.pvalue)
# MMD statistic
dist_1_replicate = np.expand_dims(dist_1_replicate, 1)
dist_2_replicate = np.expand_dims(dist_2_replicate, 1)
mmd_stat = mmd(dist_1_replicate, dist_2_replicate)[1]
mmd_test_stat_dist.append(mmd_stat)
# Use monte carlo to estimate the power of a test with significance level 0.05
# => average number of p values less than alpha
# => average number of MMD statistics greater than alpha
two_sample_t_test_power = np.mean([p < alpha for p in two_sample_t_test_p_value_dist])
two_sample_t_test_power_for_alt_theta.append(two_sample_t_test_power)
mmd_test_power = np.mean([mmd_stat > alpha for mmd_stat in mmd_test_stat_dist])
mmd_test_power_for_alt_theta.append(mmd_test_power)
# Print % complete
print("PERCENT COMPLETE: {0:.2f}%\r".format((i + 1) * 100 / len(alt_thetas)), end='')
# Plot power of classical two sample t test as a function of alternative mean
power_ax = plt.subplot(len(alt_thetas) + 1, 1, len(alt_thetas) + 1)
power_ax.plot(alt_thetas, two_sample_t_test_power_for_alt_theta)
power_ax.plot(alt_thetas, mmd_test_power_for_alt_theta)
power_ax.set_xlabel('Alternative Means')
power_ax.set_ylabel('Power of Tests')
power_ax.set_ylim([-0.1, 1.1])
power_ax.legend("TM", loc="upper right")
figure.savefig('OUTPUT/test.png')