Skip to content

Commit eacee8c

Browse files
update
1 parent 3cddb9b commit eacee8c

File tree

5 files changed

+16
-10
lines changed

5 files changed

+16
-10
lines changed

examples/reinforcement_learning/tutorial_C51.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
import gym
3535

3636
parser = argparse.ArgumentParser()
37-
parser.add_argument('--train', dest='train', action='store_true', default=False)
37+
parser.add_argument('--train', dest='train', action='store_true', default=True)
3838
parser.add_argument('--test', dest='test', action='store_true', default=True)
3939
parser.add_argument(
4040
'--save_path', default=None, help='folder to save if mode == train else model path,'

examples/reinforcement_learning/tutorial_DQN.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747

4848
# add arguments in command --train/test
4949
parser = argparse.ArgumentParser(description='Train or test neural net motor controller.')
50-
parser.add_argument('--train', dest='train', action='store_true', default=False)
50+
parser.add_argument('--train', dest='train', action='store_true', default=True)
5151
parser.add_argument('--test', dest='test', action='store_true', default=True)
5252
args = parser.parse_args()
5353

@@ -105,7 +105,6 @@ def load_ckpt(model): # load trained weights
105105
all_episode_reward = []
106106
for i in range(num_episodes):
107107
## Reset environment and get first new observation
108-
# episode_time = time.time()
109108
s = env.reset() # observation is state, integer 0 ~ 15
110109
rAll = 0
111110
if render: env.render()
@@ -164,7 +163,6 @@ def load_ckpt(model): # load trained weights
164163
load_ckpt(qnetwork) # load model
165164
for i in range(num_episodes):
166165
## Reset environment and get first new observation
167-
episode_time = time.time()
168166
s = env.reset() # observation is state, integer 0 ~ 15
169167
rAll = 0
170168
if render: env.render()

examples/reinforcement_learning/tutorial_DQN_variants.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
import gym
4848

4949
parser = argparse.ArgumentParser()
50-
parser.add_argument('--train', dest='train', action='store_true', default=False)
50+
parser.add_argument('--train', dest='train', action='store_true', default=True)
5151
parser.add_argument('--test', dest='test', action='store_true', default=True)
5252
parser.add_argument(
5353
'--save_path', default=None, help='folder to save if mode == train else model path,'
@@ -381,6 +381,7 @@ def load(self, path):
381381
# note that `_` tail in var name means next
382382
o_, r, done, info = env.step(a)
383383
buffer.add(o, a, r, o_, done)
384+
episode_reward += r
384385

385386
if i >= warm_start:
386387
transitions = buffer.sample(batch_size)

examples/reinforcement_learning/tutorial_Qlearning.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import matplotlib.pyplot as plt
1818

1919
parser = argparse.ArgumentParser()
20-
parser.add_argument('--train', dest='train', action='store_true', default=False)
20+
parser.add_argument('--train', dest='train', action='store_true', default=True)
2121
parser.add_argument('--test', dest='test', action='store_true', default=True)
2222

2323
parser.add_argument(
@@ -62,8 +62,11 @@
6262
s = s1
6363
if d is True:
6464
break
65-
print("Episode [%d/%d] sum reward: %f took: %.5fs " % (i, num_episodes, rAll, time.time() - t0))
66-
65+
print(
66+
'Training | Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format(
67+
i + 1, num_episodes, rAll, time.time() - t0
68+
)
69+
)
6770
if i == 0:
6871
all_episode_reward.append(rAll)
6972
else:
@@ -100,4 +103,8 @@
100103
s = s1
101104
if d is True:
102105
break
103-
print("Episode [%d/%d] sum reward: %f took: %.5fs " % (i, num_episodes, rAll, time.time() - t0))
106+
print(
107+
'Testing | Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format(
108+
i + 1, num_episodes, rAll, time.time() - t0
109+
)
110+
)

examples/reinforcement_learning/tutorial_prioritized_replay.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737

3838
parser = argparse.ArgumentParser()
3939
# add arguments in command --train/test
40-
parser.add_argument('--train', dest='train', action='store_true', default=False)
40+
parser.add_argument('--train', dest='train', action='store_true', default=True)
4141
parser.add_argument('--test', dest='test', action='store_true', default=True)
4242
parser.add_argument(
4343
'--save_path', default=None, help='folder to save if mode == train else model path,'

0 commit comments

Comments
 (0)