|
27 | 27 | from sklearn.kernel_approximation import RBFSampler |
28 | 28 | from sklearn.linear_model import SGDRegressor |
29 | 29 |
|
30 | | -gym_minor_version = int(gym.__version__.split('.')[1]) |
31 | | -if gym_minor_version >= 19: |
32 | | - exit("Please install OpenAI Gym 0.19.0 or earlier") |
33 | | - |
34 | 30 |
|
35 | 31 | # SGDRegressor defaults: |
36 | 32 | # loss='squared_loss', penalty='l2', alpha=0.0001, |
@@ -74,7 +70,7 @@ def __init__(self, env, feature_transformer, learning_rate): |
74 | 70 | self.feature_transformer = feature_transformer |
75 | 71 | for i in range(env.action_space.n): |
76 | 72 | model = SGDRegressor(learning_rate=learning_rate) |
77 | | - model.partial_fit(feature_transformer.transform( [env.reset()] ), [0]) |
| 73 | + model.partial_fit(feature_transformer.transform( [env.reset()[0]] ), [0]) |
78 | 74 | self.models.append(model) |
79 | 75 |
|
80 | 76 | def predict(self, s): |
@@ -103,14 +99,14 @@ def sample_action(self, s, eps): |
103 | 99 |
|
104 | 100 | # returns a list of states_and_rewards, and the total reward |
105 | 101 | def play_one(model, env, eps, gamma): |
106 | | - observation = env.reset() |
| 102 | + observation = env.reset()[0] |
107 | 103 | done = False |
108 | 104 | totalreward = 0 |
109 | 105 | iters = 0 |
110 | 106 | while not done and iters < 10000: |
111 | 107 | action = model.sample_action(observation, eps) |
112 | 108 | prev_observation = observation |
113 | | - observation, reward, done, info = env.step(action) |
| 109 | + observation, reward, done, truncated, info = env.step(action) |
114 | 110 |
|
115 | 111 | # update the model |
116 | 112 | if done: |
|
0 commit comments