8
8
one for grid environments with image representations
9
9
one for wrapping Atari env qbert
10
10
one for wrapping Mujoco env HalfCheetah
11
- one for wrapping Minigrid env
11
+ one for wrapping MiniGrid env
12
+ one for wrapping ProcGen env
12
13
two examples at the end showing how to create toy envs using gym.make()
13
14
14
15
Many further examples can be found in test_mdp_playground.py.
22
23
import numpy as np
23
24
24
25
26
+ def display_image (obs , mode = "RGB" ):
27
+ # Display the image observation associated with the next state
28
+ from PIL import Image
29
+
30
+ # Because numpy is row-major and Image is column major, need to transpose
31
+ obs = obs .transpose (1 , 0 , 2 )
32
+ img1 = Image .fromarray (np .squeeze (obs ), mode ) # squeeze() is
33
+ # used because the image is 3-D because frameworks like Ray expect the image
34
+ # to be 3-D.
35
+ img1 .show ()
36
+
25
37
def discrete_environment_example ():
26
38
27
39
config = {}
@@ -101,18 +113,10 @@ def discrete_environment_image_representations_example():
101
113
# the current discrete state.
102
114
print ("sars', done =" , state , action , reward , next_state , done )
103
115
104
- # Display the image observation associated with the next state
105
- from PIL import Image
106
-
107
- # Because numpy is row-major and Image is column major, need to transpose
108
- next_state_image = next_state_image .transpose (1 , 0 , 2 )
109
- img1 = Image .fromarray (np .squeeze (next_state_image ), "L" ) # 'L' is used for
110
- # black and white. squeeze() is used because the image is 3-D because
111
- # frameworks like Ray expect the image to be 3-D.
112
- img1 .show ()
113
-
114
116
env .close ()
115
117
118
+ display_image (next_state_image , mode = "L" )
119
+
116
120
117
121
def continuous_environment_example_move_along_a_line ():
118
122
@@ -236,15 +240,8 @@ def grid_environment_image_representations_example():
236
240
env .reset ()
237
241
env .close ()
238
242
239
- # Display the image observation associated with the next state
240
- from PIL import Image
243
+ display_image (next_obs )
241
244
242
- # Because numpy is row-major and Image is column major, need to transpose
243
- next_obs = next_obs .transpose (1 , 0 , 2 )
244
- img1 = Image .fromarray (np .squeeze (next_obs ), "RGB" ) # squeeze() is
245
- # used because the image is 3-D because frameworks like Ray expect the image
246
- # to be 3-D.
247
- img1 .show ()
248
245
249
246
250
247
def atari_wrapper_example ():
@@ -265,21 +262,24 @@ def atari_wrapper_example():
265
262
state = env .reset ()
266
263
267
264
print (
268
- "Taking a step in the environment with a random action and printing the transition:"
269
- )
270
- action = env .action_space .sample ()
271
- next_state , reward , done , info = env .step (action )
272
- print (
273
- "s.shape ar s'.shape, done =" ,
274
- state .shape ,
275
- action ,
276
- reward ,
277
- next_state .shape ,
278
- done ,
265
+ "Taking 10 steps in the environment with a random action and printing the transition:"
279
266
)
267
+ for i in range (10 ):
268
+ action = env .action_space .sample ()
269
+ next_state , reward , done , info = env .step (action )
270
+ print (
271
+ "s.shape ar s'.shape, done =" ,
272
+ state .shape ,
273
+ action ,
274
+ reward ,
275
+ next_state .shape ,
276
+ done ,
277
+ )
280
278
281
279
env .close ()
282
280
281
+ display_image (next_state )
282
+
283
283
284
284
def mujoco_wrapper_example ():
285
285
@@ -302,23 +302,23 @@ def mujoco_wrapper_example():
302
302
try :
303
303
from mdp_playground .envs import get_mujoco_wrapper
304
304
from gym .envs .mujoco .half_cheetah_v3 import HalfCheetahEnv
305
- except Exception as e :
306
- print ("Exception:" , e , "caught. You may need to install mujoco-py. NOT running mujoco_wrapper_example." )
307
- return
305
+ wrapped_mujoco_env = get_mujoco_wrapper (HalfCheetahEnv )
308
306
309
- wrapped_mujoco_env = get_mujoco_wrapper (HalfCheetahEnv )
307
+ env = wrapped_mujoco_env (** config )
308
+ state = env .reset ()
310
309
311
- env = wrapped_mujoco_env (** config )
312
- state = env .reset ()
310
+ print (
311
+ "Taking a step in the environment with a random action and printing the transition:"
312
+ )
313
+ action = env .action_space .sample ()
314
+ next_state , reward , done , info = env .step (action )
315
+ print ("sars', done =" , state , action , reward , next_state , done )
313
316
314
- print (
315
- "Taking a step in the environment with a random action and printing the transition:"
316
- )
317
- action = env .action_space .sample ()
318
- next_state , reward , done , info = env .step (action )
319
- print ("sars', done =" , state , action , reward , next_state , done )
317
+ env .close ()
320
318
321
- env .close ()
319
+ except ImportError as e :
320
+ print ("Exception:" , type (e ), e , "caught. You may need to install mujoco-py. NOT running mujoco_wrapper_example." )
321
+ return
322
322
323
323
324
324
def minigrid_wrapper_example ():
@@ -358,6 +358,44 @@ def minigrid_wrapper_example():
358
358
359
359
env .close ()
360
360
361
+ display_image (next_obs )
362
+
363
+
364
+ def procgen_wrapper_example ():
365
+
366
+ config = {
367
+ "seed" : 0 ,
368
+ "delay" : 1 ,
369
+ "transition_noise" : 0.25 ,
370
+ "reward_noise" : lambda a : a .normal (0 , 0.1 ),
371
+ "state_space_type" : "discrete" ,
372
+ }
373
+
374
+ from mdp_playground .envs .gym_env_wrapper import GymEnvWrapper
375
+ import gym
376
+
377
+ env = gym .make ("procgen:procgen-coinrun-v0" )
378
+ env = GymEnvWrapper (env , ** config )
379
+ obs = env .reset ()
380
+
381
+ print (
382
+ "Taking a step in the environment with a random action and printing the transition:"
383
+ )
384
+ action = env .action_space .sample ()
385
+ next_obs , reward , done , info = env .step (action )
386
+ print (
387
+ "s.shape ar s'.shape, done =" ,
388
+ obs .shape ,
389
+ action ,
390
+ reward ,
391
+ next_obs .shape ,
392
+ done ,
393
+ )
394
+
395
+ env .close ()
396
+
397
+ display_image (next_obs )
398
+
361
399
362
400
if __name__ == "__main__" :
363
401
@@ -404,6 +442,9 @@ def minigrid_wrapper_example():
404
442
print (set_ansi_escape + "\n Running MiniGrid wrapper example:\n " + reset_ansi_escape )
405
443
minigrid_wrapper_example ()
406
444
445
+ # print(set_ansi_escape + "\nRunning ProcGen wrapper example:\n" + reset_ansi_escape)
446
+ # procgen_wrapper_example()
447
+
407
448
# Using gym.make() example 1
408
449
import mdp_playground
409
450
import gym
0 commit comments