Skip to content

Commit aba93a3

Browse files
committed
update
1 parent f8347a3 commit aba93a3

21 files changed

+132
-19
lines changed

Diff for: rl2/a3c/main.py

+8
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,14 @@
1313
from worker import Worker
1414

1515

16+
gym_minor_version = int(gym.__version__.split('.')[1])
17+
if gym_minor_version >= 19:
18+
exit("Please install OpenAI Gym 0.19.0 or earlier")
19+
20+
if tf.__version__.startswith('2'):
21+
exit("Please install Tensorflow 1.x")
22+
23+
1624
ENV_NAME = "Breakout-v0"
1725
MAX_GLOBAL_STEPS = 5e6
1826
STEPS_PER_UPDATE = 5

Diff for: rl2/atari/dqn_tf.py

100644100755
+10-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@
1919

2020

2121

22+
gym_minor_version = int(gym.__version__.split('.')[1])
23+
if gym_minor_version >= 19:
24+
exit("Please install OpenAI Gym 0.19.0 or earlier")
2225

26+
if tf.__version__.startswith('2'):
27+
exit("Please install Tensorflow 1.x")
2328

2429
##### testing only
2530
# MAX_EXPERIENCES = 10000
@@ -141,7 +146,11 @@ def get_minibatch(self):
141146
self.states[i] = self._get_state(idx - 1)
142147
self.new_states[i] = self._get_state(idx)
143148

144-
return np.transpose(self.states, axes=(0, 2, 3, 1)), self.actions[self.indices], self.rewards[self.indices], np.transpose(self.new_states, axes=(0, 2, 3, 1)), self.terminal_flags[self.indices]
149+
return np.transpose(self.states, axes=(0, 2, 3, 1)), \
150+
self.actions[self.indices], \
151+
self.rewards[self.indices], \
152+
np.transpose(self.new_states, axes=(0, 2, 3, 1)), \
153+
self.terminal_flags[self.indices]
145154

146155

147156
class DQN:

Diff for: rl2/atari/dqn_theano.py

100644100755
+5-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,11 @@ def get_minibatch(self):
140140
self.states[i] = self._get_state(idx - 1)
141141
self.new_states[i] = self._get_state(idx)
142142

143-
return self.states, self.actions[self.indices], self.rewards[self.indices], self.new_states, self.terminal_flags[self.indices]
143+
return self.states, \
144+
self.actions[self.indices], \
145+
self.rewards[self.indices], \
146+
self.new_states, \
147+
self.terminal_flags[self.indices]
144148

145149

146150
def init_filter(shape):

Diff for: rl2/cartpole/dqn_tf.py

+7
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@
1515
from datetime import datetime
1616
from q_learning_bins import plot_running_avg
1717

18+
gym_minor_version = int(gym.__version__.split('.')[1])
19+
if gym_minor_version >= 19:
20+
exit("Please install OpenAI Gym 0.19.0 or earlier")
21+
22+
if tf.__version__.startswith('2'):
23+
exit("Please install Tensorflow 1.x")
24+
1825

1926
# global counter
2027
global_iters = 0

Diff for: rl2/cartpole/dqn_theano.py

+4
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
from datetime import datetime
1717
from q_learning_bins import plot_running_avg
1818

19+
gym_minor_version = int(gym.__version__.split('.')[1])
20+
if gym_minor_version >= 19:
21+
exit("Please install OpenAI Gym 0.19.0 or earlier")
22+
1923

2024
# global counter
2125
global_iters = 0

Diff for: rl2/cartpole/pg_tf.py

+7
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,13 @@
1616
from datetime import datetime
1717
from q_learning_bins import plot_running_avg
1818

19+
gym_minor_version = int(gym.__version__.split('.')[1])
20+
if gym_minor_version >= 19:
21+
exit("Please install OpenAI Gym 0.19.0 or earlier")
22+
23+
if tf.__version__.startswith('2'):
24+
exit("Please install Tensorflow 1.x")
25+
1926

2027
# so you can test different architectures
2128
class HiddenLayer:

Diff for: rl2/cartpole/pg_theano.py

+4
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717
from datetime import datetime
1818
from q_learning_bins import plot_running_avg
1919

20+
gym_minor_version = int(gym.__version__.split('.')[1])
21+
if gym_minor_version >= 19:
22+
exit("Please install OpenAI Gym 0.19.0 or earlier")
23+
2024

2125
# so you can test different architectures
2226
class HiddenLayer:

Diff for: rl2/cartpole/q_learning.py

+4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@
2020
from sklearn.kernel_approximation import RBFSampler
2121
from q_learning_bins import plot_running_avg
2222

23+
gym_minor_version = int(gym.__version__.split('.')[1])
24+
if gym_minor_version >= 19:
25+
exit("Please install OpenAI Gym 0.19.0 or earlier")
26+
2327

2428
class SGDRegressor:
2529
def __init__(self, D):

Diff for: rl2/cartpole/q_learning_bins.py

+4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
from gym import wrappers
1616
from datetime import datetime
1717

18+
gym_minor_version = int(gym.__version__.split('.')[1])
19+
if gym_minor_version >= 19:
20+
exit("Please install OpenAI Gym 0.19.0 or earlier")
21+
1822

1923
# turns list of integers into an int
2024
# Ex.

Diff for: rl2/cartpole/random_search.py

+4
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
import numpy as np
1010
import matplotlib.pyplot as plt
1111

12+
gym_minor_version = int(gym.__version__.split('.')[1])
13+
if gym_minor_version >= 19:
14+
exit("Please install OpenAI Gym 0.19.0 or earlier")
15+
1216

1317
def get_action(s, w):
1418
return 1 if s.dot(w) > 0 else 0

Diff for: rl2/cartpole/save_a_video.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010
import numpy as np
1111
import matplotlib.pyplot as plt
1212

13+
gym_minor_version = int(gym.__version__.split('.')[1])
14+
if gym_minor_version >= 19:
15+
exit("Please install OpenAI Gym 0.19.0 or earlier")
16+
17+
1318

1419
def get_action(s, w):
1520
return 1 if s.dot(w) > 0 else 0
@@ -63,6 +68,5 @@ def random_search(env):
6368
plt.show()
6469

6570
# play a final set of episodes
66-
# env = wrappers.Monitor(env, 'my_awesome_dir')
6771
env = wrappers.RecordVideo(env, 'my_awesome_dir')
6872
print("***Final run with final weights***:", play_one_episode(env, params))

Diff for: rl2/cartpole/td_lambda.py

+5
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515
from q_learning import FeatureTransformer
1616
from q_learning_bins import plot_running_avg
1717

18+
gym_minor_version = int(gym.__version__.split('.')[1])
19+
if gym_minor_version >= 19:
20+
exit("Please install OpenAI Gym 0.19.0 or earlier")
21+
22+
1823

1924
class SGDRegressor:
2025
def __init__(self, D):

Diff for: rl2/cartpole/tf_warmup.py

+3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
import tensorflow as tf
88
import q_learning
99

10+
if tf.__version__.startswith('2'):
11+
exit("Please install Tensorflow 1.x")
12+
1013

1114
class SGDRegressor:
1215
def __init__(self, D):

Diff for: rl2/gym_tutorial.py

+5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
# Environment page:
77
# https://gym.openai.com/envs/CartPole-v0
88

9+
gym_minor_version = int(gym.__version__.split('.')[1])
10+
if gym_minor_version >= 19:
11+
exit("Please install OpenAI Gym 0.19.0 or earlier")
12+
13+
914
# get the environment
1015
env = gym.make('CartPole-v0')
1116

Diff for: rl2/mountaincar/n_step.py

+4
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424
import q_learning
2525
from q_learning import plot_cost_to_go, FeatureTransformer, Model, plot_running_avg
2626

27+
gym_minor_version = int(gym.__version__.split('.')[1])
28+
if gym_minor_version >= 19:
29+
exit("Please install OpenAI Gym 0.19.0 or earlier")
30+
2731

2832
class SGDRegressor:
2933
def __init__(self, **kwargs):

Diff for: rl2/mountaincar/pg_tf.py

100644100755
+13-2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@
1515
from datetime import datetime
1616
from q_learning import plot_running_avg, FeatureTransformer, plot_cost_to_go
1717

18+
gym_minor_version = int(gym.__version__.split('.')[1])
19+
if gym_minor_version >= 19:
20+
exit("Please install OpenAI Gym 0.19.0 or earlier")
21+
22+
if tf.__version__.startswith('2'):
23+
exit("Please install Tensorflow 1.x")
24+
1825

1926
# so you can test different architectures
2027
class HiddenLayer:
@@ -177,8 +184,12 @@ def play_one_td(env, pmodel, vmodel, gamma):
177184
totalreward += reward
178185

179186
# update the models
180-
V_next = vmodel.predict(observation)
181-
G = reward + gamma*V_next
187+
if done:
188+
G = reward
189+
else:
190+
V_next = vmodel.predict(observation)
191+
G = reward + gamma*V_next
192+
182193
advantage = G - vmodel.predict(prev_observation)
183194
pmodel.partial_fit(prev_observation, action, advantage)
184195
vmodel.partial_fit(prev_observation, G)

Diff for: rl2/mountaincar/pg_tf_random.py

+7
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@
1515
from datetime import datetime
1616
from q_learning import plot_running_avg, FeatureTransformer
1717

18+
gym_minor_version = int(gym.__version__.split('.')[1])
19+
if gym_minor_version >= 19:
20+
exit("Please install OpenAI Gym 0.19.0 or earlier")
21+
22+
if tf.__version__.startswith('2'):
23+
exit("Please install Tensorflow 1.x")
24+
1825

1926
# so you can test different architectures
2027
class HiddenLayer:

Diff for: rl2/mountaincar/pg_theano.py

100644100755
+8-5
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def predict(self, X):
208208
return self.predict_op(X)
209209

210210

211-
def play_one_td(env, pmodel, vmodel, gamma, train=True):
211+
def play_one_td(env, pmodel, vmodel, gamma):
212212
observation = env.reset()
213213
done = False
214214
totalreward = 0
@@ -224,12 +224,15 @@ def play_one_td(env, pmodel, vmodel, gamma, train=True):
224224
totalreward += reward
225225

226226
# update the models
227-
if train:
227+
if done:
228+
G = reward
229+
else:
228230
V_next = vmodel.predict(observation)
229231
G = reward + gamma*V_next
230-
advantage = G - vmodel.predict(prev_observation)
231-
pmodel.partial_fit(prev_observation, action, advantage)
232-
vmodel.partial_fit(prev_observation, G)
232+
233+
advantage = G - vmodel.predict(prev_observation)
234+
pmodel.partial_fit(prev_observation, action, advantage)
235+
vmodel.partial_fit(prev_observation, G)
233236

234237
iters += 1
235238

Diff for: rl2/mountaincar/pg_theano_random.py

+4
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
from datetime import datetime
1717
from q_learning import plot_running_avg, FeatureTransformer
1818

19+
gym_minor_version = int(gym.__version__.split('.')[1])
20+
if gym_minor_version >= 19:
21+
exit("Please install OpenAI Gym 0.19.0 or earlier")
22+
1923

2024

2125
# so you can test different architectures

Diff for: rl2/mountaincar/q_learning.py

100644100755
+14-6
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@
2727
from sklearn.kernel_approximation import RBFSampler
2828
from sklearn.linear_model import SGDRegressor
2929

30+
gym_minor_version = int(gym.__version__.split('.')[1])
31+
if gym_minor_version >= 19:
32+
exit("Please install OpenAI Gym 0.19.0 or earlier")
33+
3034

3135
# SGDRegressor defaults:
3236
# loss='squared_loss', penalty='l2', alpha=0.0001,
@@ -109,9 +113,13 @@ def play_one(model, env, eps, gamma):
109113
observation, reward, done, info = env.step(action)
110114

111115
# update the model
112-
next = model.predict(observation)
113-
# assert(next.shape == (1, env.action_space.n))
114-
G = reward + gamma*np.max(next[0])
116+
if done:
117+
G = reward
118+
else:
119+
Qnext = model.predict(observation)
120+
# assert(next.shape == (1, env.action_space.n))
121+
G = reward + gamma*np.max(Qnext[0])
122+
115123
model.update(prev_observation, action, G)
116124

117125
totalreward += reward
@@ -165,14 +173,14 @@ def main(show_plots=True):
165173
N = 300
166174
totalrewards = np.empty(N)
167175
for n in range(N):
168-
# eps = 1.0/(0.1*n+1)
169-
eps = 0.1*(0.97**n)
176+
eps = 1.0/(0.1*n+1)
177+
# eps = 0.1*(0.97**n)
170178
if n == 199:
171179
print("eps:", eps)
172180
# eps = 1.0/np.sqrt(n+1)
173181
totalreward = play_one(model, env, eps, gamma)
174182
totalrewards[n] = totalreward
175-
if (n + 1) % 100 == 0:
183+
if (n + 1) % 10 == 0:
176184
print("episode:", n, "total reward:", totalreward)
177185
print("avg reward for last 100 episodes:", totalrewards[-100:].mean())
178186
print("total steps:", -totalrewards.sum())

Diff for: rl2/mountaincar/td_lambda.py

100644100755
+7-3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323
# code we already wrote
2424
from q_learning import plot_cost_to_go, FeatureTransformer, plot_running_avg
2525

26+
gym_minor_version = int(gym.__version__.split('.')[1])
27+
if gym_minor_version >= 19:
28+
exit("Please install OpenAI Gym 0.19.0 or earlier")
29+
2630

2731
class BaseModel:
2832
def __init__(self, D):
@@ -83,9 +87,9 @@ def play_one(model, env, eps, gamma, lambda_):
8387
observation, reward, done, info = env.step(action)
8488

8589
# update the model
86-
next = model.predict(observation)
87-
assert(next.shape == (1, env.action_space.n))
88-
G = reward + gamma*np.max(next[0])
90+
Qnext = model.predict(observation)
91+
assert(Qnext.shape == (1, env.action_space.n))
92+
G = reward + gamma*np.max(Qnext[0])
8993
model.update(prev_observation, action, G, gamma, lambda_)
9094

9195
totalreward += reward

0 commit comments

Comments
 (0)