88 one for grid environments with image representations
99 one for wrapping Atari env qbert
1010 one for wrapping Mujoco env HalfCheetah
11- one for wrapping Minigrid env
11+ one for wrapping MiniGrid env
12+ one for wrapping ProcGen env
1213 two examples at the end showing how to create toy envs using gym.make()
1314
1415Many further examples can be found in test_mdp_playground.py.
2223import numpy as np
2324
2425
26+ def display_image (obs , mode = "RGB" ):
27+ # Display the image observation associated with the next state
28+ from PIL import Image
29+
30+ # Because numpy is row-major and Image is column major, need to transpose
31+ obs = obs .transpose (1 , 0 , 2 )
32+ img1 = Image .fromarray (np .squeeze (obs ), mode ) # squeeze() is
33+ # used because the image is 3-D because frameworks like Ray expect the image
34+ # to be 3-D.
35+ img1 .show ()
36+
2537def discrete_environment_example ():
2638
2739 config = {}
@@ -101,18 +113,10 @@ def discrete_environment_image_representations_example():
101113 # the current discrete state.
102114 print ("sars', done =" , state , action , reward , next_state , done )
103115
104- # Display the image observation associated with the next state
105- from PIL import Image
106-
107- # Because numpy is row-major and Image is column major, need to transpose
108- next_state_image = next_state_image .transpose (1 , 0 , 2 )
109- img1 = Image .fromarray (np .squeeze (next_state_image ), "L" ) # 'L' is used for
110- # black and white. squeeze() is used because the image is 3-D because
111- # frameworks like Ray expect the image to be 3-D.
112- img1 .show ()
113-
114116 env .close ()
115117
118+ display_image (next_state_image , mode = "L" )
119+
116120
117121def continuous_environment_example_move_along_a_line ():
118122
@@ -236,15 +240,8 @@ def grid_environment_image_representations_example():
236240 env .reset ()
237241 env .close ()
238242
239- # Display the image observation associated with the next state
240- from PIL import Image
243+ display_image (next_obs )
241244
242- # Because numpy is row-major and Image is column major, need to transpose
243- next_obs = next_obs .transpose (1 , 0 , 2 )
244- img1 = Image .fromarray (np .squeeze (next_obs ), "RGB" ) # squeeze() is
245- # used because the image is 3-D because frameworks like Ray expect the image
246- # to be 3-D.
247- img1 .show ()
248245
249246
250247def atari_wrapper_example ():
@@ -265,21 +262,24 @@ def atari_wrapper_example():
265262 state = env .reset ()
266263
267264 print (
268- "Taking a step in the environment with a random action and printing the transition:"
269- )
270- action = env .action_space .sample ()
271- next_state , reward , done , info = env .step (action )
272- print (
273- "s.shape ar s'.shape, done =" ,
274- state .shape ,
275- action ,
276- reward ,
277- next_state .shape ,
278- done ,
265+ "Taking 10 steps in the environment with a random action and printing the transition:"
279266 )
267+ for i in range (10 ):
268+ action = env .action_space .sample ()
269+ next_state , reward , done , info = env .step (action )
270+ print (
271+ "s.shape ar s'.shape, done =" ,
272+ state .shape ,
273+ action ,
274+ reward ,
275+ next_state .shape ,
276+ done ,
277+ )
280278
281279 env .close ()
282280
281+ display_image (next_state )
282+
283283
284284def mujoco_wrapper_example ():
285285
@@ -302,23 +302,23 @@ def mujoco_wrapper_example():
302302 try :
303303 from mdp_playground .envs import get_mujoco_wrapper
304304 from gym .envs .mujoco .half_cheetah_v3 import HalfCheetahEnv
305- except Exception as e :
306- print ("Exception:" , e , "caught. You may need to install mujoco-py. NOT running mujoco_wrapper_example." )
307- return
305+ wrapped_mujoco_env = get_mujoco_wrapper (HalfCheetahEnv )
308306
309- wrapped_mujoco_env = get_mujoco_wrapper (HalfCheetahEnv )
307+ env = wrapped_mujoco_env (** config )
308+ state = env .reset ()
310309
311- env = wrapped_mujoco_env (** config )
312- state = env .reset ()
310+ print (
311+ "Taking a step in the environment with a random action and printing the transition:"
312+ )
313+ action = env .action_space .sample ()
314+ next_state , reward , done , info = env .step (action )
315+ print ("sars', done =" , state , action , reward , next_state , done )
313316
314- print (
315- "Taking a step in the environment with a random action and printing the transition:"
316- )
317- action = env .action_space .sample ()
318- next_state , reward , done , info = env .step (action )
319- print ("sars', done =" , state , action , reward , next_state , done )
317+ env .close ()
320318
321- env .close ()
319+ except ImportError as e :
320+ print ("Exception:" , type (e ), e , "caught. You may need to install mujoco-py. NOT running mujoco_wrapper_example." )
321+ return
322322
323323
324324def minigrid_wrapper_example ():
@@ -358,6 +358,44 @@ def minigrid_wrapper_example():
358358
359359 env .close ()
360360
361+ display_image (next_obs )
362+
363+
364+ def procgen_wrapper_example ():
365+
366+ config = {
367+ "seed" : 0 ,
368+ "delay" : 1 ,
369+ "transition_noise" : 0.25 ,
370+ "reward_noise" : lambda a : a .normal (0 , 0.1 ),
371+ "state_space_type" : "discrete" ,
372+ }
373+
374+ from mdp_playground .envs .gym_env_wrapper import GymEnvWrapper
375+ import gym
376+
377+ env = gym .make ("procgen:procgen-coinrun-v0" )
378+ env = GymEnvWrapper (env , ** config )
379+ obs = env .reset ()
380+
381+ print (
382+ "Taking a step in the environment with a random action and printing the transition:"
383+ )
384+ action = env .action_space .sample ()
385+ next_obs , reward , done , info = env .step (action )
386+ print (
387+ "s.shape ar s'.shape, done =" ,
388+ obs .shape ,
389+ action ,
390+ reward ,
391+ next_obs .shape ,
392+ done ,
393+ )
394+
395+ env .close ()
396+
397+ display_image (next_obs )
398+
361399
362400if __name__ == "__main__" :
363401
@@ -404,6 +442,9 @@ def minigrid_wrapper_example():
404442 print (set_ansi_escape + "\n Running MiniGrid wrapper example:\n " + reset_ansi_escape )
405443 minigrid_wrapper_example ()
406444
445+ # print(set_ansi_escape + "\nRunning ProcGen wrapper example:\n" + reset_ansi_escape)
446+ # procgen_wrapper_example()
447+
407448 # Using gym.make() example 1
408449 import mdp_playground
409450 import gym
0 commit comments