Skip to content

Commit 0fc373c

Browse files
Added reward_scale and reward_shift to complex env wrappers and rectified it in the toy env
1 parent 6ffcd6f commit 0fc373c

File tree

7 files changed

+118
-1
lines changed

7 files changed

+118
-1
lines changed
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

mdp_playground/envs/gym_env_wrapper.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ class GymEnvWrapper(gym.Env):
2020
transition noise (for discrete environments)
2121
reward delay
2222
reward noise
23+
reward scale
24+
reward shift
2325
image_transforms
2426
2527
The wrapper is pretty general and can be applied to any Gym Environment. The environment should be instantiated and passed as the 1st argument to the __init__ method of this class. If using this wrapper with Atari, additional keys may be added specifying either atari_preprocessing = True or wrap_deepmind_ray = True. These would use the AtariPreprocessing wrapper from OpenAI Gym or wrap_deepmind() wrapper from Ray Rllib.
@@ -92,6 +94,17 @@ def __init__(self, env, **config):
9294
else:
9395
self.reward_noise = None
9496

97+
if "reward_scale" not in config:
98+
self.reward_scale = 1.0
99+
else:
100+
self.reward_scale = config["reward_scale"]
101+
102+
if "reward_shift" not in config:
103+
self.reward_shift = 0.0
104+
else:
105+
self.reward_shift = config["reward_shift"]
106+
107+
95108
if "image_transforms" not in config:
96109
self.image_transforms = False
97110
else:
@@ -360,6 +373,8 @@ def step(self, action):
360373
self.total_abs_noise_in_reward_episode += np.abs(noise_in_reward)
361374
self.total_reward_episode += reward
362375
reward += noise_in_reward
376+
reward *= self.reward_scale
377+
reward += self.reward_shift
363378

364379
return next_state, reward, done, info
365380

mdp_playground/envs/rl_toy_env.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1877,12 +1877,12 @@ def reward_function(self, state, action):
18771877
if list(new_relevant_state) == self.target_point:
18781878
reward += 1.0
18791879

1880-
reward *= self.reward_scale
18811880
noise_in_reward = self.reward_noise(self.np_random) if self.reward_noise else 0
18821881
# #random ###TODO Would be better to parameterise this in terms of state, action and time_step as well. Would need to change implementation to have a queue for the rewards achieved and then pick the reward that was generated delay timesteps ago.
18831882
self.total_abs_noise_in_reward_episode += np.abs(noise_in_reward)
18841883
self.total_reward_episode += reward
18851884
reward += noise_in_reward
1885+
reward *= self.reward_scale
18861886
reward += self.reward_shift
18871887
return reward
18881888

tests/test_gym_env_wrapper.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,108 @@ def test_r_delay(self):
6868
print("total_reward:", total_reward)
6969
aew.reset()
7070

71+
def test_r_shift(self):
72+
""" """
73+
print("\033[32;1;4mTEST_REWARD_SHIFT\033[0m")
74+
config = {
75+
"AtariEnv": {
76+
"game": "beam_rider", # "breakout",
77+
"obs_type": "image",
78+
"frameskip": 1,
79+
},
80+
"reward_shift": 1,
81+
# "GymEnvWrapper": {
82+
"atari_preprocessing": True,
83+
"frame_skip": 4,
84+
"grayscale_obs": False,
85+
"state_space_type": "discrete",
86+
"action_space_type": "discrete",
87+
"seed": 0,
88+
# },
89+
# 'seed': 0, #seed
90+
}
91+
92+
# config["log_filename"] = log_filename
93+
94+
from gym.envs.atari import AtariEnv
95+
96+
ae = AtariEnv(**{"game": "beam_rider", "obs_type": "image", "frameskip": 1})
97+
aew = GymEnvWrapper(ae, **config)
98+
ob = aew.reset()
99+
print("observation_space.shape:", ob.shape)
100+
# print(ob)
101+
total_reward = 0.0
102+
for i in range(200):
103+
act = aew.action_space.sample()
104+
next_state, reward, done, info = aew.step(act)
105+
print("step, reward, done, act:", i, reward, done, act)
106+
if i == 153 or i == 158:
107+
assert reward == 45.0, (
108+
"Shifted reward in step: "
109+
+ str(i)
110+
+ " should have been 45.0."
111+
)
112+
if i == 154 or i == 160:
113+
assert reward == 1.0, (
114+
"Shifted reward in step: "
115+
+ str(i)
116+
+ " should have been 1.0."
117+
)
118+
total_reward += reward
119+
print("total_reward:", total_reward)
120+
aew.reset()
121+
122+
def test_r_scale(self):
123+
""" """
124+
print("\033[32;1;4mTEST_REWARD_SHIFT\033[0m")
125+
config = {
126+
"AtariEnv": {
127+
"game": "beam_rider", # "breakout",
128+
"obs_type": "image",
129+
"frameskip": 1,
130+
},
131+
"reward_scale": 2,
132+
# "GymEnvWrapper": {
133+
"atari_preprocessing": True,
134+
"frame_skip": 4,
135+
"grayscale_obs": False,
136+
"state_space_type": "discrete",
137+
"action_space_type": "discrete",
138+
"seed": 0,
139+
# },
140+
# 'seed': 0, #seed
141+
}
142+
143+
# config["log_filename"] = log_filename
144+
145+
from gym.envs.atari import AtariEnv
146+
147+
ae = AtariEnv(**{"game": "beam_rider", "obs_type": "image", "frameskip": 1})
148+
aew = GymEnvWrapper(ae, **config)
149+
ob = aew.reset()
150+
print("observation_space.shape:", ob.shape)
151+
# print(ob)
152+
total_reward = 0.0
153+
for i in range(200):
154+
act = aew.action_space.sample()
155+
next_state, reward, done, info = aew.step(act)
156+
print("step, reward, done, act:", i, reward, done, act)
157+
if i == 153 or i == 158:
158+
assert reward == 88.0, (
159+
"Scaled reward in step: "
160+
+ str(i)
161+
+ " should have been 88.0."
162+
)
163+
if i == 154 or i == 160:
164+
assert reward == 0.0, (
165+
"Scaled reward in step: "
166+
+ str(i)
167+
+ " should have been 0.0."
168+
)
169+
total_reward += reward
170+
print("total_reward:", total_reward)
171+
aew.reset()
172+
71173
def test_r_delay_ray_frame_stack(self):
72174
"""
73175
Uses wrap_deepmind_ray to frame stack Atari

0 commit comments

Comments
 (0)