Skip to content

Commit d4cff0d

Browse files
make reward_every_n_steps True by default, change end of episode reward handing out
1 parent dfd023b commit d4cff0d

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

mdp_playground/envs/gym_env_wrapper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ def step(self, action):
381381
if done:
382382
# if episode is finished return the rewards that were delayed and not
383383
# handed out before ##TODO add test case for this
384-
# reward += np.sum(self.reward_buffer * self.reward_scale + self.reward_shift)
384+
reward += np.sum(self.reward_buffer * self.reward_scale + self.reward_shift)
385385
reward += (
386386
self.term_state_reward * self.reward_scale
387387
) # Scale before or after?

mdp_playground/envs/rl_toy_env.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,7 @@ def __init__(self, **config):
525525
self.action_loss_weight = config["action_loss_weight"]
526526

527527
if "reward_every_n_steps" not in config:
528-
self.reward_every_n_steps = False
528+
self.reward_every_n_steps = True
529529
else:
530530
self.reward_every_n_steps = config["reward_every_n_steps"]
531531

0 commit comments

Comments
 (0)