Skip to content

Commit 4ee76c1

Browse files
MAJOR: Version changed to 1.0.0 with breaking API changes: Change to using gymnasium instead of gym and associated changes in step() and reset() return values, etc.; upgrade numpy and random number generation; still need to update tests; and minigrid example is failing in example.py
1 parent 2502532 commit 4ee76c1

25 files changed

+340
-387
lines changed

docs/_build/html/_modules/mdp_playground/envs/gym_env_wrapper.html

+6-6
Original file line numberDiff line numberDiff line change
@@ -602,7 +602,7 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
602602
<span class="c1"># print(&quot;Setting Mujoco self.frame_skip, self._ctrl_cost_weight, self._forward_reward_weight to&quot;, self.frame_skip, self._ctrl_cost_weight, self._forward_reward_weight, &quot;corresponding to time_unit in config.&quot;)</span>
603603

604604
<div class="viewcode-block" id="GymEnvWrapper.step"><a class="viewcode-back" href="../../../_autosummary/mdp_playground.envs.gym_env_wrapper.GymEnvWrapper.html#mdp_playground.envs.gym_env_wrapper.GymEnvWrapper.step">[docs]</a> <span class="k">def</span> <span class="nf">step</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">action</span><span class="p">):</span>
605-
<span class="c1"># next_state, reward, done, info = super(GymEnvWrapper, self).step(action)</span>
605+
<span class="c1"># next_state, reward, done, trunc, info = super(GymEnvWrapper, self).step(action)</span>
606606
<span class="bp">self</span><span class="o">.</span><span class="n">total_transitions_episode</span> <span class="o">+=</span> <span class="mi">1</span>
607607

608608
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">config</span><span class="p">[</span><span class="s2">&quot;state_space_type&quot;</span><span class="p">]</span> <span class="o">==</span> <span class="s2">&quot;discrete&quot;</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">transition_noise</span> <span class="o">&gt;</span> <span class="mf">0.0</span><span class="p">:</span>
@@ -689,7 +689,7 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
689689
<span class="sd"> int</span>
690690
<span class="sd"> The seed returned by Gym</span>
691691
<span class="sd"> &quot;&quot;&quot;</span>
692-
<span class="c1"># If seed is None, you get a randomly generated seed from gym.utils...</span>
692+
<span class="c1"># If seed is None, you get a randomly generated seed from gymnasium.utils...</span>
693693
<span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">seed_</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">seeding</span><span class="o">.</span><span class="n">np_random</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span> <span class="c1">#random</span>
694694
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Env SEED set to: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span> <span class="o">+</span> <span class="s2">&quot;. Returned seed from Gym: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">seed_</span><span class="p">))</span>
695695

@@ -701,8 +701,8 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
701701

702702

703703
<span class="c1"># from mdp_playground.envs.gym_env_wrapper import get_gym_wrapper</span>
704-
<span class="c1"># from gym.envs.atari import AtariEnv</span>
705-
<span class="c1"># from gym.wrappers import AtariPreprocessing</span>
704+
<span class="c1"># from gymnasium.envs.atari import AtariEnv</span>
705+
<span class="c1"># from gymnasium.wrappers import AtariPreprocessing</span>
706706
<span class="c1"># AtariPreprocessing()</span>
707707
<span class="c1"># AtariEnvWrapper = get_gym_wrapper(AtariEnv)</span>
708708
<span class="c1"># from ray.tune.registry import register_env</span>
@@ -711,7 +711,7 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
711711
<span class="c1"># ob = aew.reset()</span>
712712

713713
<span class="c1"># from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper</span>
714-
<span class="c1"># from gym.envs.atari import AtariEnv</span>
714+
<span class="c1"># from gymnasium.envs.atari import AtariEnv</span>
715715
<span class="c1"># ae = AtariEnv(**{&#39;game&#39;: &#39;beam_rider&#39;, &#39;obs_type&#39;: &#39;image&#39;, &#39;frameskip&#39;: 1})</span>
716716
<span class="c1"># aew = GymEnvWrapper(ae, **{&#39;reward_noise&#39;: lambda a: a.normal(0, 0.1), &#39;transition_noise&#39;: 0.1, &#39;delay&#39;: 1, &#39;frame_skip&#39;: 4, &quot;atari_preprocessing&quot;: True, &quot;state_space_type&quot;: &quot;discrete&quot;, &#39;seed&#39;: 0})</span>
717717
<span class="c1"># ob = aew.reset()</span>
@@ -720,7 +720,7 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
720720
<span class="c1"># total_reward = 0.0</span>
721721
<span class="c1"># for i in range(200):</span>
722722
<span class="c1"># act = aew.action_space.sample()</span>
723-
<span class="c1"># next_state, reward, done, info = aew.step(act)</span>
723+
<span class="c1"># next_state, reward, done, trunc, info = aew.step(act)</span>
724724
<span class="c1"># print(reward, done, act)</span>
725725
<span class="c1"># if reward &gt; 10:</span>
726726
<span class="c1"># print(&quot;reward in step:&quot;, i, reward)</span>

docs/_build/html/_modules/mdp_playground/envs/mujoco_env_wrapper.html

+2-2
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ <h1 class="site-logo" id="site-title">MDP Playground 0.0.1 documentation</h1>
438438
<div>
439439

440440
<h1>Source code for mdp_playground.envs.mujoco_env_wrapper</h1><div class="highlight"><pre>
441-
<span></span><span class="c1"># from gym.envs.mujoco.mujoco_env import MujocoEnv</span>
441+
<span></span><span class="c1"># from gymnasium.envs.mujoco.mujoco_env import MujocoEnv</span>
442442
<span class="kn">from</span> <span class="nn">gym.envs.mujoco.half_cheetah_v3</span> <span class="kn">import</span> <span class="n">HalfCheetahEnv</span>
443443
<span class="kn">from</span> <span class="nn">gym.envs.mujoco.pusher</span> <span class="kn">import</span> <span class="n">PusherEnv</span>
444444
<span class="kn">from</span> <span class="nn">gym.envs.mujoco.reacher</span> <span class="kn">import</span> <span class="n">ReacherEnv</span>
@@ -516,7 +516,7 @@ <h1>Source code for mdp_playground.envs.mujoco_env_wrapper</h1><div class="highl
516516

517517
<span class="c1"># from mdp_playground.envs.mujoco_env_wrapper import get_mujoco_wrapper #hack</span>
518518
<span class="c1">#</span>
519-
<span class="c1"># from gym.envs.mujoco.reacher import ReacherEnv</span>
519+
<span class="c1"># from gymnasium.envs.mujoco.reacher import ReacherEnv</span>
520520
<span class="c1"># ReacherWrapperV2 = get_mujoco_wrapper(ReacherEnv)</span>
521521
<span class="c1"># config = {&quot;time_unit&quot;: 0.2}</span>
522522
<span class="c1"># rw2 = ReacherWrapperV2(**config)</span>

docs/_build/html/_modules/mdp_playground/envs/rl_toy_env.html

+1-1
Original file line numberDiff line numberDiff line change
@@ -1967,7 +1967,7 @@ <h1>Source code for mdp_playground.envs.rl_toy_env</h1><div class="highlight"><p
19671967
<span class="sd"> int</span>
19681968
<span class="sd"> The seed returned by Gym</span>
19691969
<span class="sd"> &quot;&quot;&quot;</span>
1970-
<span class="c1"># If seed is None, you get a randomly generated seed from gym.utils...</span>
1970+
<span class="c1"># If seed is None, you get a randomly generated seed from gymnasium.utils...</span>
19711971
<span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">seed_</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">seeding</span><span class="o">.</span><span class="n">np_random</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span> <span class="c1">#random</span>
19721972
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Env SEED set to: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span> <span class="o">+</span> <span class="s2">&quot;. Returned seed from Gym: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">seed_</span><span class="p">))</span>
19731973
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">seed_</span></div></div>

docs/_build/html/_modules/mdp_playground/spaces/test_image_multi_discrete.html

+2-2
Original file line numberDiff line numberDiff line change
@@ -442,8 +442,8 @@ <h1>Source code for mdp_playground.spaces.test_image_multi_discrete</h1><div cla
442442
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
443443
<span class="kn">from</span> <span class="nn">mdp_playground.spaces.image_multi_discrete</span> <span class="kn">import</span> <span class="n">ImageMultiDiscrete</span>
444444
<span class="kn">from</span> <span class="nn">gym.spaces</span> <span class="kn">import</span> <span class="n">Discrete</span><span class="p">,</span> <span class="n">MultiDiscrete</span>
445-
<span class="c1"># import gym</span>
446-
<span class="c1"># from gym.spaces import MultiDiscrete</span>
445+
<span class="c1"># import gymnasium as gym</span>
446+
<span class="c1"># from gymnasium.spaces import MultiDiscrete</span>
447447
<span class="c1"># # from .space import Space</span>
448448
<span class="c1"># import PIL.ImageDraw as ImageDraw</span>
449449
<span class="c1"># import PIL.Image as Image</span>

example.py

+29-29
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def discrete_environment_example():
5959
config["repeats_in_sequences"] = False
6060

6161
config["generate_random_mdp"] = True
62-
env = RLToyEnv(**config) # Calls env.reset() automatically. So, in general,
62+
env = RLToyEnv(**config) # Calls env.reset()[0] automatically. So, in general,
6363
# there is no need to call it after this.
6464

6565
# The environment maintains an augmented state which contains the underlying
@@ -73,7 +73,7 @@ def discrete_environment_example():
7373
"the transition:"
7474
)
7575
action = env.action_space.sample()
76-
next_state, reward, done, info = env.step(action)
76+
next_state, reward, done, trunc, info = env.step(action)
7777
print("sars', done =", state, action, reward, next_state, done)
7878

7979
env.close()
@@ -113,7 +113,7 @@ def discrete_environment_image_representations_example():
113113
"the transition:"
114114
)
115115
action = env.action_space.sample()
116-
next_state_image, reward, done, info = env.step(action)
116+
next_state_image, reward, done, trunc, info = env.step(action)
117117
augmented_state_dict = env.get_augmented_state()
118118
next_state = augmented_state_dict["curr_state"] # Underlying MDP state holds
119119
# the current discrete state.
@@ -159,7 +159,7 @@ def discrete_environment_diameter_image_representations_example():
159159
"the transition:"
160160
)
161161
action = env.action_space.sample()
162-
next_state_image, reward, done, info = env.step(action)
162+
next_state_image, reward, done, trunc, info = env.step(action)
163163
augmented_state_dict = env.get_augmented_state()
164164
next_state = augmented_state_dict["curr_state"] # Underlying MDP state holds
165165
# the current discrete state.
@@ -192,14 +192,14 @@ def continuous_environment_example_move_to_a_point():
192192
config["reward_function"] = "move_to_a_point"
193193

194194
env = RLToyEnv(**config)
195-
state = env.reset().copy()
195+
state = env.reset()[0].copy()
196196

197197
print(
198198
"Taking a step in the environment with a random action and printing "
199199
"the transition:"
200200
)
201201
action = env.action_space.sample()
202-
next_state, reward, done, info = env.step(action)
202+
next_state, reward, done, trunc, info = env.step(action)
203203
print("sars', done =", state, action, reward, next_state, done)
204204

205205
env.close()
@@ -231,7 +231,7 @@ def continuous_environment_example_move_to_a_point_irrelevant_image():
231231
config["relevant_indices"] = [0, 1]
232232

233233
env = RLToyEnv(**config)
234-
state = env.reset()
234+
state = env.reset()[0]
235235
augmented_state_dict = env.get_augmented_state()
236236
state = augmented_state_dict["curr_state"].copy() # Underlying MDP state holds
237237
# the current continuous state.
@@ -241,7 +241,7 @@ def continuous_environment_example_move_to_a_point_irrelevant_image():
241241
"the transition:"
242242
)
243243
action = env.action_space.sample()
244-
next_state_image, reward, done, info = env.step(action)
244+
next_state_image, reward, done, trunc, info = env.step(action)
245245
augmented_state_dict = env.get_augmented_state()
246246
next_state = augmented_state_dict["curr_state"].copy() # Underlying MDP state holds
247247
# the current continuous state.
@@ -274,14 +274,14 @@ def continuous_environment_example_move_along_a_line():
274274
config["reward_function"] = "move_along_a_line"
275275

276276
env = RLToyEnv(**config)
277-
state = env.reset().copy()
277+
state = env.reset()[0].copy()
278278

279279
print(
280280
"Taking a step in the environment with a random action and printing "
281281
"the transition:"
282282
)
283283
action = env.action_space.sample()
284-
next_state, reward, done, info = env.step(action)
284+
next_state, reward, done, trunc, info = env.step(action)
285285
print("sars', done =", state, action, reward, next_state, done)
286286

287287
env.close()
@@ -305,12 +305,12 @@ def grid_environment_example():
305305

306306
for i in range(len(actions)):
307307
action = actions[i]
308-
next_obs, reward, done, info = env.step(action)
308+
next_obs, reward, done, trunc, info = env.step(action)
309309
next_state = env.get_augmented_state()["augmented_state"][-1]
310310
print("sars', done =", state, action, reward, next_state, done)
311311
state = next_state
312312

313-
env.reset()
313+
env.reset()[0]
314314
env.close()
315315

316316

@@ -334,12 +334,12 @@ def grid_environment_image_representations_example():
334334

335335
for i in range(len(actions)):
336336
action = actions[i]
337-
next_obs, reward, done, info = env.step(action)
337+
next_obs, reward, done, trunc, info = env.step(action)
338338
next_state = env.get_augmented_state()["augmented_state"][-1]
339339
print("sars', done =", state, action, reward, next_state, done)
340340
state = next_state
341341

342-
env.reset()
342+
env.reset()[0]
343343
env.close()
344344

345345
display_image(next_obs)
@@ -356,18 +356,18 @@ def atari_wrapper_example():
356356
}
357357

358358
from mdp_playground.envs import GymEnvWrapper
359-
import gym
359+
import gymnasium as gym
360360

361361
ae = gym.make("QbertNoFrameskip-v4")
362362
env = GymEnvWrapper(ae, **config)
363-
state = env.reset()
363+
state = env.reset()[0]
364364

365365
print(
366366
"Taking 10 steps in the environment with a random action and printing the transition:"
367367
)
368368
for i in range(10):
369369
action = env.action_space.sample()
370-
next_state, reward, done, info = env.step(action)
370+
next_state, reward, done, trunc, info = env.step(action)
371371
print(
372372
"s.shape a r s'.shape, done =",
373373
state.shape,
@@ -403,18 +403,18 @@ def mujoco_wrapper_example():
403403
# of the Mujoco base_class.
404404
try:
405405
from mdp_playground.envs import get_mujoco_wrapper
406-
from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
406+
from gymnasium.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
407407

408408
wrapped_mujoco_env = get_mujoco_wrapper(HalfCheetahEnv)
409409

410410
env = wrapped_mujoco_env(**config)
411-
state = env.reset()
411+
state = env.reset()[0]
412412

413413
print(
414414
"Taking a step in the environment with a random action and printing the transition:"
415415
)
416416
action = env.action_space.sample()
417-
next_state, reward, done, info = env.step(action)
417+
next_state, reward, done, trunc, info = env.step(action)
418418
print("sars', done =", state, action, reward, next_state, done)
419419

420420
env.close()
@@ -440,22 +440,22 @@ def minigrid_wrapper_example():
440440
}
441441

442442
from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
443-
import gym
443+
import gymnasium as gym
444444

445-
from gym_minigrid.wrappers import RGBImgPartialObsWrapper, ImgObsWrapper
445+
from minigrid.wrappers import RGBImgPartialObsWrapper, ImgObsWrapper
446446

447447
env = gym.make("MiniGrid-Empty-8x8-v0")
448448
env = RGBImgPartialObsWrapper(env) # Get pixel observations
449449
env = ImgObsWrapper(env) # Get rid of the 'mission' field
450450

451451
env = GymEnvWrapper(env, **config)
452-
obs = env.reset() # This now produces an RGB tensor only
452+
obs = env.reset()[0] # This now produces an RGB tensor only
453453

454454
print(
455455
"Taking a step in the environment with a random action and printing the transition:"
456456
)
457457
action = env.action_space.sample()
458-
next_obs, reward, done, info = env.step(action)
458+
next_obs, reward, done, trunc, info = env.step(action)
459459
print(
460460
"s.shape ar s'.shape, done =",
461461
obs.shape,
@@ -481,17 +481,17 @@ def procgen_wrapper_example():
481481
}
482482

483483
from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
484-
import gym
484+
import gymnasium as gym
485485

486486
env = gym.make("procgen:procgen-coinrun-v0")
487487
env = GymEnvWrapper(env, **config)
488-
obs = env.reset()
488+
obs = env.reset()[0]
489489

490490
print(
491491
"Taking a step in the environment with a random action and printing the transition:"
492492
)
493493
action = env.action_space.sample()
494-
next_obs, reward, done, info = env.step(action)
494+
next_obs, reward, done, trunc, info = env.step(action)
495495
print(
496496
"s.shape ar s'.shape, done =",
497497
obs.shape,
@@ -577,7 +577,7 @@ def procgen_wrapper_example():
577577

578578
# Using gym.make() example 1
579579
import mdp_playground
580-
import gym
580+
import gymnasium as gym
581581

582582
gym.make("RLToy-v0")
583583

@@ -591,6 +591,6 @@ def procgen_wrapper_example():
591591
"maximally_connected": True,
592592
}
593593
)
594-
env.reset()
594+
env.reset()[0]
595595
for i in range(10):
596596
print(env.step(env.action_space.sample()))

mdp_playground/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from gym.envs.registration import register
1+
from gymnasium.envs.registration import register
22

33
register(
44
id="RLToy-v0",
@@ -11,4 +11,4 @@
1111
max_episode_steps=100,
1212
)
1313

14-
__version__ = "0.0.2"
14+
__version__ = "1.0.0"

0 commit comments

Comments
 (0)