kamaljeetkaur17
diff --git a/‎ab_testing/bayesian_bandit.py‎
Lines changed: 20 additions & 16 deletions b/‎ab_testing/bayesian_bandit.py‎
Lines changed: 20 additions & 16 deletions
diff --git a/‎ab_testing/bayesian_normal.py‎
Lines changed: 86 additions & 0 deletions b/‎ab_testing/bayesian_normal.py‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎ab_testing/bayesian_starter.py‎
Lines changed: 78 additions & 0 deletions b/‎ab_testing/bayesian_starter.py‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎ab_testing/comparing_epsilons.py‎
Lines changed: 89 additions & 0 deletions b/‎ab_testing/comparing_epsilons.py‎
Lines changed: 89 additions & 0 deletions
@@ -12,15 +12,17 @@
 from scipy.stats import beta
 
 
+# np.random.seed(2)
 NUM_TRIALS = 2000
 BANDIT_PROBABILITIES = [0.2, 0.5, 0.75]
 
 
-class Bandit(object):
+class Bandit:
   def __init__(self, p):
     self.p = p
     self.a = 1
     self.b = 1
+    self.N = 0 # for information only
 
   def pull(self):
     return np.random.random() < self.p
@@ -31,14 +33,15 @@ def sample(self):
   def update(self, x):
     self.a += x
     self.b += 1 - x
+    self.N += 1
 
 
 def plot(bandits, trial):
   x = np.linspace(0, 1, 200)
   for b in bandits:
     y = beta.pdf(x, b.a, b.b)
-    plt.plot(x, y, label="real p: %.4f" % b.p)
-  plt.title("Bandit distributions after %s trials" % trial)
+    plt.plot(x, y, label=f"real p: {b.p:.4f}, win rate = {b.a - 1}/{b.N}")
+  plt.title(f"Bandit distributions after {trial} trials")
   plt.legend()
   plt.show()
 
@@ -47,27 +50,28 @@ def experiment():
   bandits = [Bandit(p) for p in BANDIT_PROBABILITIES]
 
   sample_points = [5,10,20,50,100,200,500,1000,1500,1999]
+  rewards = np.zeros(NUM_TRIALS)
   for i in range(NUM_TRIALS):
+    # Thompson sampling
+    j = np.argmax([b.sample() for b in bandits])
 
-    # take a sample from each bandit
-    bestb = None
-    maxsample = -1
-    allsamples = [] # let's collect these just to print for debugging
-    for b in bandits:
-      sample = b.sample()
-      allsamples.append("%.4f" % sample)
-      if sample > maxsample:
-        maxsample = sample
-        bestb = b
+    # plot the posteriors
     if i in sample_points:
-      print("current samples: %s" % allsamples)
       plot(bandits, i)
 
     # pull the arm for the bandit with the largest sample
-    x = bestb.pull()
+    x = bandits[j].pull()
+
+    # update rewards
+    rewards[i] = x
 
     # update the distribution for the bandit whose arm we just pulled
-    bestb.update(x)
+    bandits[j].update(x)
+
+  # print total reward
+  print("total reward earned:", rewards.sum())
+  print("overall win rate:", rewards.sum() / NUM_TRIALS)
+  print("num times selected each bandit:", [b.N for b in bandits])
 
 
 if __name__ == "__main__":
 
@@ -0,0 +1,86 @@
+# https://deeplearningcourses.com/c/artificial-intelligence-reinforcement-learning-in-python
+# https://www.udemy.com/artificial-intelligence-reinforcement-learning-in-python
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
+
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy.stats import norm
+
+
+np.random.seed(1)
+NUM_TRIALS = 2000
+BANDIT_MEANS = [1, 2, 3]
+
+
+class Bandit:
+  def __init__(self, true_mean):
+    self.true_mean = true_mean
+    # parameters for mu - prior is N(0,1)
+    self.m = 0
+    self.lambda_ = 1
+    self.sum_x = 0 # for convenience
+    self.tau = 1
+    self.N = 0
+
+  def pull(self):
+    return np.random.randn() / np.sqrt(self.tau) + self.true_mean
+
+  def sample(self):
+    return np.random.randn() / np.sqrt(self.lambda_) + self.m
+
+  def update(self, x):
+    self.lambda_ += self.tau
+    self.sum_x += x
+    self.m = self.tau*self.sum_x / self.lambda_
+    self.N += 1
+
+
+def plot(bandits, trial):
+  x = np.linspace(-3, 6, 200)
+  for b in bandits:
+    y = norm.pdf(x, b.m, np.sqrt(1. / b.lambda_))
+    plt.plot(x, y, label=f"real mean: {b.true_mean:.4f}, num plays: {b.N}")
+  plt.title(f"Bandit distributions after {trial} trials")
+  plt.legend()
+  plt.show()
+
+
+def run_experiment():
+  bandits = [Bandit(m) for m in BANDIT_MEANS]
+
+  sample_points = [5,10,20,50,100,200,500,1000,1500,1999]
+  rewards = np.empty(NUM_TRIALS)
+  for i in range(NUM_TRIALS):
+    # Thompson sampling
+    j = np.argmax([b.sample() for b in bandits])
+
+    # plot the posteriors
+    if i in sample_points:
+      plot(bandits, i)
+
+    # pull the arm for the bandit with the largest sample
+    x = bandits[j].pull()
+
+    # update the distribution for the bandit whose arm we just pulled
+    bandits[j].update(x)
+
+    # update rewards
+    rewards[i] = x
+
+  cumulative_average = np.cumsum(rewards) / (np.arange(NUM_TRIALS) + 1)
+
+  # plot moving average ctr
+  plt.plot(cumulative_average)
+  for m in BANDIT_MEANS:
+    plt.plot(np.ones(NUM_TRIALS)*m)
+  plt.show()
+
+  return cumulative_average
+
+if __name__ == '__main__':
+  run_experiment()
+
+
@@ -0,0 +1,78 @@
+# From the course: Bayesin Machine Learning in Python: A/B Testing
+# https://deeplearningcourses.com/c/bayesian-machine-learning-in-python-ab-testing
+# https://www.udemy.com/bayesian-machine-learning-in-python-ab-testing
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
+
+
+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.stats import beta
+
+
+# np.random.seed(2)
+NUM_TRIALS = 2000
+BANDIT_PROBABILITIES = [0.2, 0.5, 0.75]
+
+
+class Bandit:
+  def __init__(self, p):
+    self.p = p
+    self.a = # TODO
+    self.b = # TODO
+    self.N = 0 # for information only
+
+  def pull(self):
+    return np.random.random() < self.p
+
+  def sample(self):
+    return # TODO - draw a sample from Beta(a, b)
+
+  def update(self, x):
+    self.a = # TODO
+    self.b = # TODO
+    self.N += 1
+
+
+def plot(bandits, trial):
+  x = np.linspace(0, 1, 200)
+  for b in bandits:
+    y = beta.pdf(x, b.a, b.b)
+    plt.plot(x, y, label=f"real p: {b.p:.4f}, win rate = {b.a - 1}/{b.N}")
+  plt.title(f"Bandit distributions after {trial} trials")
+  plt.legend()
+  plt.show()
+
+
+def experiment():
+  bandits = [Bandit(p) for p in BANDIT_PROBABILITIES]
+
+  sample_points = [5,10,20,50,100,200,500,1000,1500,1999]
+  rewards = np.zeros(NUM_TRIALS)
+  for i in range(NUM_TRIALS):
+    # Thompson sampling
+    j = # TODO
+
+    # plot the posteriors
+    if i in sample_points:
+      plot(bandits, i)
+
+    # pull the arm for the bandit with the largest sample
+    x = bandits[j].pull()
+
+    # update rewards
+    rewards[i] = x
+
+    # update the distribution for the bandit whose arm we just pulled
+    bandits[j].update(x)
+
+  # print total reward
+  print("total reward earned:", rewards.sum())
+  print("overall win rate:", rewards.sum() / NUM_TRIALS)
+  print("num times selected each bandit:", [b.N for b in bandits])
+
+
+if __name__ == "__main__":
+  experiment()
@@ -0,0 +1,89 @@
+# https://deeplearningcourses.com/c/artificial-intelligence-reinforcement-learning-in-python
+# https://www.udemy.com/artificial-intelligence-reinforcement-learning-in-python
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+class BanditArm:
+  def __init__(self, m):
+    self.m = m
+    self.m_estimate = 0
+    self.N = 0
+
+  def pull(self):
+    return np.random.randn() + self.m
+
+  def update(self, x):
+    self.N += 1
+    self.m_estimate = (1 - 1.0/self.N)*self.m_estimate + 1.0/self.N*x
+
+
+def run_experiment(m1, m2, m3, eps, N):
+  bandits = [BanditArm(m1), BanditArm(m2), BanditArm(m3)]
+
+  # count number of suboptimal choices
+  means = np.array([m1, m2, m3])
+  true_best = np.argmax(means)
+  count_suboptimal = 0
+
+  data = np.empty(N)
+  
+  for i in range(N):
+    # epsilon greedy
+    p = np.random.random()
+    if p < eps:
+      j = np.random.choice(len(bandits))
+    else:
+      j = np.argmax([b.m_estimate for b in bandits])
+    x = bandits[j].pull()
+    bandits[j].update(x)
+
+    if j != true_best:
+      count_suboptimal += 1
+
+    # for the plot
+    data[i] = x
+  cumulative_average = np.cumsum(data) / (np.arange(N) + 1)
+
+  # plot moving average ctr
+  plt.plot(cumulative_average)
+  plt.plot(np.ones(N)*m1)
+  plt.plot(np.ones(N)*m2)
+  plt.plot(np.ones(N)*m3)
+  plt.xscale('log')
+  plt.show()
+
+  for b in bandits:
+    print(b.m_estimate)
+
+  print("percent suboptimal for epsilon = %s:" % eps, float(count_suboptimal) / N)
+
+  return cumulative_average
+
+if __name__ == '__main__':
+  m1, m2, m3 = 1.5, 2.5, 3.5
+  c_1 = run_experiment(m1, m2, m3, 0.1, 100000)
+  c_05 = run_experiment(m1, m2, m3, 0.05, 100000)
+  c_01 = run_experiment(m1, m2, m3, 0.01, 100000)
+
+  # log scale plot
+  plt.plot(c_1, label='eps = 0.1')
+  plt.plot(c_05, label='eps = 0.05')
+  plt.plot(c_01, label='eps = 0.01')
+  plt.legend()
+  plt.xscale('log')
+  plt.show()
+
+
+  # linear plot
+  plt.plot(c_1, label='eps = 0.1')
+  plt.plot(c_05, label='eps = 0.05')
+  plt.plot(c_01, label='eps = 0.01')
+  plt.legend()
+  plt.show()
+