Skip to content

Commit 33b7979

Browse files
authored
Merge pull request #33 from HumanCompatibleAI/display_right_rewards
Fix reward display
2 parents 68d8cc7 + 2ac66c6 commit 33b7979

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

src/reward_preprocessing/interpret.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,8 @@ def param_f():
309309
# Note that since actions is only used to choose which head to use, there are no
310310
# gradients from the reward to the action. Consequently, acts in opt_latent is
311311
# meaningless.
312-
actions = th.tensor(list(range(num_features))).to(device)
312+
action_nums = th.tensor(list(range(num_features))).to(device)
313+
actions = th.nn.functional.one_hot(action_nums, num_classes=num_features)
313314
assert len(actions) == len(obs)
314315
rews = rew_net(obs.to(device), actions, next_obs.to(device), done=None)
315316
custom_logger.log(f"Rewards: {rews}")

0 commit comments

Comments
 (0)