Skip to content

Commit 2b783e4

Browse files
author
User
committed
update
1 parent 1e5c869 commit 2b783e4

File tree

10 files changed

+753
-16
lines changed

10 files changed

+753
-16
lines changed

ab_testing/bayesian_bandit.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,17 @@
1212
from scipy.stats import beta
1313

1414

15+
# np.random.seed(2)
1516
NUM_TRIALS = 2000
1617
BANDIT_PROBABILITIES = [0.2, 0.5, 0.75]
1718

1819

19-
class Bandit(object):
20+
class Bandit:
2021
def __init__(self, p):
2122
self.p = p
2223
self.a = 1
2324
self.b = 1
25+
self.N = 0 # for information only
2426

2527
def pull(self):
2628
return np.random.random() < self.p
@@ -31,14 +33,15 @@ def sample(self):
3133
def update(self, x):
3234
self.a += x
3335
self.b += 1 - x
36+
self.N += 1
3437

3538

3639
def plot(bandits, trial):
3740
x = np.linspace(0, 1, 200)
3841
for b in bandits:
3942
y = beta.pdf(x, b.a, b.b)
40-
plt.plot(x, y, label="real p: %.4f" % b.p)
41-
plt.title("Bandit distributions after %s trials" % trial)
43+
plt.plot(x, y, label=f"real p: {b.p:.4f}, win rate = {b.a - 1}/{b.N}")
44+
plt.title(f"Bandit distributions after {trial} trials")
4245
plt.legend()
4346
plt.show()
4447

@@ -47,27 +50,28 @@ def experiment():
4750
bandits = [Bandit(p) for p in BANDIT_PROBABILITIES]
4851

4952
sample_points = [5,10,20,50,100,200,500,1000,1500,1999]
53+
rewards = np.zeros(NUM_TRIALS)
5054
for i in range(NUM_TRIALS):
55+
# Thompson sampling
56+
j = np.argmax([b.sample() for b in bandits])
5157

52-
# take a sample from each bandit
53-
bestb = None
54-
maxsample = -1
55-
allsamples = [] # let's collect these just to print for debugging
56-
for b in bandits:
57-
sample = b.sample()
58-
allsamples.append("%.4f" % sample)
59-
if sample > maxsample:
60-
maxsample = sample
61-
bestb = b
58+
# plot the posteriors
6259
if i in sample_points:
63-
print("current samples: %s" % allsamples)
6460
plot(bandits, i)
6561

6662
# pull the arm for the bandit with the largest sample
67-
x = bestb.pull()
63+
x = bandits[j].pull()
64+
65+
# update rewards
66+
rewards[i] = x
6867

6968
# update the distribution for the bandit whose arm we just pulled
70-
bestb.update(x)
69+
bandits[j].update(x)
70+
71+
# print total reward
72+
print("total reward earned:", rewards.sum())
73+
print("overall win rate:", rewards.sum() / NUM_TRIALS)
74+
print("num times selected each bandit:", [b.N for b in bandits])
7175

7276

7377
if __name__ == "__main__":

ab_testing/bayesian_normal.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# https://deeplearningcourses.com/c/artificial-intelligence-reinforcement-learning-in-python
2+
# https://www.udemy.com/artificial-intelligence-reinforcement-learning-in-python
3+
from __future__ import print_function, division
4+
from builtins import range
5+
# Note: you may need to update your version of future
6+
# sudo pip install -U future
7+
8+
import numpy as np
9+
import matplotlib.pyplot as plt
10+
from scipy.stats import norm
11+
12+
13+
np.random.seed(1)
14+
NUM_TRIALS = 2000
15+
BANDIT_MEANS = [1, 2, 3]
16+
17+
18+
class Bandit:
19+
def __init__(self, true_mean):
20+
self.true_mean = true_mean
21+
# parameters for mu - prior is N(0,1)
22+
self.m = 0
23+
self.lambda_ = 1
24+
self.sum_x = 0 # for convenience
25+
self.tau = 1
26+
self.N = 0
27+
28+
def pull(self):
29+
return np.random.randn() / np.sqrt(self.tau) + self.true_mean
30+
31+
def sample(self):
32+
return np.random.randn() / np.sqrt(self.lambda_) + self.m
33+
34+
def update(self, x):
35+
self.lambda_ += self.tau
36+
self.sum_x += x
37+
self.m = self.tau*self.sum_x / self.lambda_
38+
self.N += 1
39+
40+
41+
def plot(bandits, trial):
42+
x = np.linspace(-3, 6, 200)
43+
for b in bandits:
44+
y = norm.pdf(x, b.m, np.sqrt(1. / b.lambda_))
45+
plt.plot(x, y, label=f"real mean: {b.true_mean:.4f}, num plays: {b.N}")
46+
plt.title(f"Bandit distributions after {trial} trials")
47+
plt.legend()
48+
plt.show()
49+
50+
51+
def run_experiment():
52+
bandits = [Bandit(m) for m in BANDIT_MEANS]
53+
54+
sample_points = [5,10,20,50,100,200,500,1000,1500,1999]
55+
rewards = np.empty(NUM_TRIALS)
56+
for i in range(NUM_TRIALS):
57+
# Thompson sampling
58+
j = np.argmax([b.sample() for b in bandits])
59+
60+
# plot the posteriors
61+
if i in sample_points:
62+
plot(bandits, i)
63+
64+
# pull the arm for the bandit with the largest sample
65+
x = bandits[j].pull()
66+
67+
# update the distribution for the bandit whose arm we just pulled
68+
bandits[j].update(x)
69+
70+
# update rewards
71+
rewards[i] = x
72+
73+
cumulative_average = np.cumsum(rewards) / (np.arange(NUM_TRIALS) + 1)
74+
75+
# plot moving average ctr
76+
plt.plot(cumulative_average)
77+
for m in BANDIT_MEANS:
78+
plt.plot(np.ones(NUM_TRIALS)*m)
79+
plt.show()
80+
81+
return cumulative_average
82+
83+
if __name__ == '__main__':
84+
run_experiment()
85+
86+

ab_testing/bayesian_starter.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# From the course: Bayesin Machine Learning in Python: A/B Testing
2+
# https://deeplearningcourses.com/c/bayesian-machine-learning-in-python-ab-testing
3+
# https://www.udemy.com/bayesian-machine-learning-in-python-ab-testing
4+
from __future__ import print_function, division
5+
from builtins import range
6+
# Note: you may need to update your version of future
7+
# sudo pip install -U future
8+
9+
10+
import matplotlib.pyplot as plt
11+
import numpy as np
12+
from scipy.stats import beta
13+
14+
15+
# np.random.seed(2)
16+
NUM_TRIALS = 2000
17+
BANDIT_PROBABILITIES = [0.2, 0.5, 0.75]
18+
19+
20+
class Bandit:
21+
def __init__(self, p):
22+
self.p = p
23+
self.a = # TODO
24+
self.b = # TODO
25+
self.N = 0 # for information only
26+
27+
def pull(self):
28+
return np.random.random() < self.p
29+
30+
def sample(self):
31+
return # TODO - draw a sample from Beta(a, b)
32+
33+
def update(self, x):
34+
self.a = # TODO
35+
self.b = # TODO
36+
self.N += 1
37+
38+
39+
def plot(bandits, trial):
40+
x = np.linspace(0, 1, 200)
41+
for b in bandits:
42+
y = beta.pdf(x, b.a, b.b)
43+
plt.plot(x, y, label=f"real p: {b.p:.4f}, win rate = {b.a - 1}/{b.N}")
44+
plt.title(f"Bandit distributions after {trial} trials")
45+
plt.legend()
46+
plt.show()
47+
48+
49+
def experiment():
50+
bandits = [Bandit(p) for p in BANDIT_PROBABILITIES]
51+
52+
sample_points = [5,10,20,50,100,200,500,1000,1500,1999]
53+
rewards = np.zeros(NUM_TRIALS)
54+
for i in range(NUM_TRIALS):
55+
# Thompson sampling
56+
j = # TODO
57+
58+
# plot the posteriors
59+
if i in sample_points:
60+
plot(bandits, i)
61+
62+
# pull the arm for the bandit with the largest sample
63+
x = bandits[j].pull()
64+
65+
# update rewards
66+
rewards[i] = x
67+
68+
# update the distribution for the bandit whose arm we just pulled
69+
bandits[j].update(x)
70+
71+
# print total reward
72+
print("total reward earned:", rewards.sum())
73+
print("overall win rate:", rewards.sum() / NUM_TRIALS)
74+
print("num times selected each bandit:", [b.N for b in bandits])
75+
76+
77+
if __name__ == "__main__":
78+
experiment()

ab_testing/comparing_epsilons.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# https://deeplearningcourses.com/c/artificial-intelligence-reinforcement-learning-in-python
2+
# https://www.udemy.com/artificial-intelligence-reinforcement-learning-in-python
3+
from __future__ import print_function, division
4+
from builtins import range
5+
# Note: you may need to update your version of future
6+
# sudo pip install -U future
7+
8+
import numpy as np
9+
import matplotlib.pyplot as plt
10+
11+
12+
class BanditArm:
13+
def __init__(self, m):
14+
self.m = m
15+
self.m_estimate = 0
16+
self.N = 0
17+
18+
def pull(self):
19+
return np.random.randn() + self.m
20+
21+
def update(self, x):
22+
self.N += 1
23+
self.m_estimate = (1 - 1.0/self.N)*self.m_estimate + 1.0/self.N*x
24+
25+
26+
def run_experiment(m1, m2, m3, eps, N):
27+
bandits = [BanditArm(m1), BanditArm(m2), BanditArm(m3)]
28+
29+
# count number of suboptimal choices
30+
means = np.array([m1, m2, m3])
31+
true_best = np.argmax(means)
32+
count_suboptimal = 0
33+
34+
data = np.empty(N)
35+
36+
for i in range(N):
37+
# epsilon greedy
38+
p = np.random.random()
39+
if p < eps:
40+
j = np.random.choice(len(bandits))
41+
else:
42+
j = np.argmax([b.m_estimate for b in bandits])
43+
x = bandits[j].pull()
44+
bandits[j].update(x)
45+
46+
if j != true_best:
47+
count_suboptimal += 1
48+
49+
# for the plot
50+
data[i] = x
51+
cumulative_average = np.cumsum(data) / (np.arange(N) + 1)
52+
53+
# plot moving average ctr
54+
plt.plot(cumulative_average)
55+
plt.plot(np.ones(N)*m1)
56+
plt.plot(np.ones(N)*m2)
57+
plt.plot(np.ones(N)*m3)
58+
plt.xscale('log')
59+
plt.show()
60+
61+
for b in bandits:
62+
print(b.m_estimate)
63+
64+
print("percent suboptimal for epsilon = %s:" % eps, float(count_suboptimal) / N)
65+
66+
return cumulative_average
67+
68+
if __name__ == '__main__':
69+
m1, m2, m3 = 1.5, 2.5, 3.5
70+
c_1 = run_experiment(m1, m2, m3, 0.1, 100000)
71+
c_05 = run_experiment(m1, m2, m3, 0.05, 100000)
72+
c_01 = run_experiment(m1, m2, m3, 0.01, 100000)
73+
74+
# log scale plot
75+
plt.plot(c_1, label='eps = 0.1')
76+
plt.plot(c_05, label='eps = 0.05')
77+
plt.plot(c_01, label='eps = 0.01')
78+
plt.legend()
79+
plt.xscale('log')
80+
plt.show()
81+
82+
83+
# linear plot
84+
plt.plot(c_1, label='eps = 0.1')
85+
plt.plot(c_05, label='eps = 0.05')
86+
plt.plot(c_01, label='eps = 0.01')
87+
plt.legend()
88+
plt.show()
89+

0 commit comments

Comments
 (0)