Skip to content

Commit bf9388a

Browse files
committed
update
1 parent 3aaa09b commit bf9388a

File tree

2 files changed

+9
-14
lines changed

2 files changed

+9
-14
lines changed

Diff for: rl2/mountaincar/n_step.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,6 @@
2424
import q_learning
2525
from q_learning import plot_cost_to_go, FeatureTransformer, Model, plot_running_avg
2626

27-
gym_minor_version = int(gym.__version__.split('.')[1])
28-
if gym_minor_version >= 19:
29-
exit("Please install OpenAI Gym 0.19.0 or earlier")
30-
3127

3228
class SGDRegressor:
3329
def __init__(self, **kwargs):
@@ -58,7 +54,7 @@ def predict(self, X):
5854

5955
# returns a list of states_and_rewards, and the total reward
6056
def play_one(model, eps, gamma, n=5):
61-
observation = env.reset()
57+
observation = env.reset()[0]
6258
done = False
6359
totalreward = 0
6460
rewards = []
@@ -77,15 +73,18 @@ def play_one(model, eps, gamma, n=5):
7773
actions.append(action)
7874

7975
prev_observation = observation
80-
observation, reward, done, info = env.step(action)
76+
observation, reward, done, truncated, info = env.step(action)
8177

8278
rewards.append(reward)
8379

8480
# update the model
8581
if len(rewards) >= n:
8682
# return_up_to_prediction = calculate_return_before_prediction(rewards, gamma)
8783
return_up_to_prediction = multiplier.dot(rewards[-n:])
88-
G = return_up_to_prediction + (gamma**n)*np.max(model.predict(observation)[0])
84+
action_values = model.predict(observation)[0]
85+
# print("action_values.shape:", action_values.shape)
86+
G = return_up_to_prediction + (gamma**n)*np.max(action_values)
87+
# print("G:", G)
8988
model.update(states[-n], actions[-n], G)
9089

9190
# if len(rewards) > n:

Diff for: rl2/mountaincar/q_learning.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,6 @@
2727
from sklearn.kernel_approximation import RBFSampler
2828
from sklearn.linear_model import SGDRegressor
2929

30-
gym_minor_version = int(gym.__version__.split('.')[1])
31-
if gym_minor_version >= 19:
32-
exit("Please install OpenAI Gym 0.19.0 or earlier")
33-
3430

3531
# SGDRegressor defaults:
3632
# loss='squared_loss', penalty='l2', alpha=0.0001,
@@ -74,7 +70,7 @@ def __init__(self, env, feature_transformer, learning_rate):
7470
self.feature_transformer = feature_transformer
7571
for i in range(env.action_space.n):
7672
model = SGDRegressor(learning_rate=learning_rate)
77-
model.partial_fit(feature_transformer.transform( [env.reset()] ), [0])
73+
model.partial_fit(feature_transformer.transform( [env.reset()[0]] ), [0])
7874
self.models.append(model)
7975

8076
def predict(self, s):
@@ -103,14 +99,14 @@ def sample_action(self, s, eps):
10399

104100
# returns a list of states_and_rewards, and the total reward
105101
def play_one(model, env, eps, gamma):
106-
observation = env.reset()
102+
observation = env.reset()[0]
107103
done = False
108104
totalreward = 0
109105
iters = 0
110106
while not done and iters < 10000:
111107
action = model.sample_action(observation, eps)
112108
prev_observation = observation
113-
observation, reward, done, info = env.step(action)
109+
observation, reward, done, truncated, info = env.step(action)
114110

115111
# update the model
116112
if done:

0 commit comments

Comments
 (0)