-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathLUCB_test.py
120 lines (106 loc) · 4.08 KB
/
LUCB_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
execfile("core.py")
import matplotlib.pyplot as plt
from datetime import datetime
random.seed(datetime.now())
def test1_LUCB(num_sims, Klist):
horizons_mean = np.zeros((len(Klist), 2))
for kid, k in enumerate(Klist):
print kid
m = k/5
eps = 0.1
delta = 0.1
horizons = np.zeros((num_sims,2), dtype=int)
for sim in range(num_sims):
means = np.random.random(k)
arms = map(lambda (mu): BernoulliArm(mu), means)
caseH = LUCB(arms, m, eps, delta, beta_lucb, Hoeffding)
caseH.run()
caseC = LUCB(arms, m, eps, delta, beta_lucb, Chernoff)
caseC.run()
horizons[sim,0] = caseH.N
horizons[sim,1] = caseC.N
print sim
horizons_mean[kid] = np.mean(horizons, axis=0)
return horizons_mean
def test23_LUCB(algo, num_sims, check_points, true_bestarms):
num_points = len(check_points)
horizons = np.zeros(num_sims)
checkpoints = np.zeros((num_sims, num_points), dtype=int)
checkerrors = np.zeros((num_sims, num_points), dtype=bool)
for sim in range(num_sims):
print sim
algo.initialize()
algo.set_checkpoints(check_points, true_bestarms)
algo.run()
horizons[sim] = algo.N
checkpoints[sim] = algo.checkpoints
checkerrors[sim] = algo.checkerrors
return horizons, checkpoints, checkerrors
## test 1
print "test1_LUCB"
num_sims = 100
Klist = range(10,61,10)
horizons_mean = test1_LUCB(num_sims, Klist)
# plot test 1
fig = plt.figure()
plt.plot(Klist, horizons_mean[:,0]/10000, 'ko-', label='LUCB')
plt.plot(Klist, horizons_mean[:,1]/10000, 'bo-', label='KL-LUCB')
plt.legend(loc='best')
plt.title('Expected sample complexity / 10000')
plt.xlabel('K')
plt.savefig('figure/test1_LUCB.png', bbox_inches='tight')
plt.close(fig)
## test23
print "test 23"
num_sims = 100
# B1
K = 15
means = np.array([0.5] + map(lambda (a): 0.5-a/40., range(2,K+1)))
n_arms = len(means)
arms = map(lambda (mu): BernoulliArm(mu), means)
m = 3
eps = 0.04
delta = 0.1
caseH = LUCB(arms, m, eps, delta, beta_lucb, Hoeffding)
caseC = LUCB(arms, m, eps, delta, beta_lucb, Chernoff)
checkpoints = np.arange(1000, 7001, 1000)
true_best_arms = [0,1,2]
horizonsH1, checkpointsH1, checkerrorsH1 = test23_LUCB(caseH, num_sims, checkpoints, true_best_arms)
horizonsC1, checkpointsC1, checkerrorsC1 = test23_LUCB(caseC, num_sims, checkpoints, true_best_arms)
errorrateH1 = np.sum(checkerrorsH1, axis=0)/float(num_sims)
errorrateC1 = np.sum(checkerrorsC1, axis=0)/float(num_sims)
# B2
means /= 2
arms = map(lambda (mu): BernoulliArm(mu), means)
m = 3
eps = 0.02
delta = 0.1
caseH = LUCB(arms, m, eps, delta, beta_lucb, Hoeffding)
caseC = LUCB(arms, m, eps, delta, beta_lucb, Chernoff)
horizonsH2, checkpointsH2, checkerrorsH2 = test23_LUCB(caseH, num_sims, checkpoints, true_best_arms)
horizonsC2, checkpointsC2, checkerrorsC2 = test23_LUCB(caseC, num_sims, checkpoints, true_best_arms)
errorrateH2 = np.sum(checkerrorsH2, axis = 0)/float(num_sims)
errorrateC2 = np.sum(checkerrorsC2, axis = 0)/float(num_sims)
# plot test 2
fig = plt.figure()
plt.subplot(2, 1, 1)
plt.hist(horizonsH1/10000, bins=10, range=[2.,22.], normed=True, facecolor='red', align='mid', label='LUCB')
plt.hist(horizonsC1/10000, bins=10, range=[2.,22.], normed=True, facecolor='green', align='mid', label='KL-LUCB')
plt.legend(loc='best')
plt.title('Fraction of runs (in bins of width 100)')
plt.subplot(2, 1, 2)
plt.hist(horizonsH2/10000, bins=10, range=[2.,22.], normed=True, facecolor='red', align='mid', label='LUCB')
plt.hist(horizonsC2/10000, bins=10, range=[2.,22.], normed=True, facecolor='green', align='mid', label='KL-LUCB')
plt.legend(loc='best')
plt.xlabel('Samples / 10000')
plt.savefig('figure/test2_LUCB.png', bbox_inches='tight')
plt.close(fig)
# plot test 3
fig = plt.figure()
plt.plot(checkpoints/1000, errorrateH1, 'ko-', label='LUCB')
plt.plot(checkpoints/1000, errorrateC1, 'bo-', label='KL-LUCB')
plt.legend(loc='best')
plt.title('Empirical mistake probability during run')
plt.xlabel('Samples / 100')
plt.savefig('figure/test3_LUCB.png', bbox_inches='tight')
plt.close(fig)