@@ -59,7 +59,7 @@ def discrete_environment_example():
59
59
config ["repeats_in_sequences" ] = False
60
60
61
61
config ["generate_random_mdp" ] = True
62
- env = RLToyEnv (** config ) # Calls env.reset() automatically. So, in general,
62
+ env = RLToyEnv (** config ) # Calls env.reset()[0] automatically. So, in general,
63
63
# there is no need to call it after this.
64
64
65
65
# The environment maintains an augmented state which contains the underlying
@@ -73,7 +73,7 @@ def discrete_environment_example():
73
73
"the transition:"
74
74
)
75
75
action = env .action_space .sample ()
76
- next_state , reward , done , info = env .step (action )
76
+ next_state , reward , done , trunc , info = env .step (action )
77
77
print ("sars', done =" , state , action , reward , next_state , done )
78
78
79
79
env .close ()
@@ -113,7 +113,7 @@ def discrete_environment_image_representations_example():
113
113
"the transition:"
114
114
)
115
115
action = env .action_space .sample ()
116
- next_state_image , reward , done , info = env .step (action )
116
+ next_state_image , reward , done , trunc , info = env .step (action )
117
117
augmented_state_dict = env .get_augmented_state ()
118
118
next_state = augmented_state_dict ["curr_state" ] # Underlying MDP state holds
119
119
# the current discrete state.
@@ -159,7 +159,7 @@ def discrete_environment_diameter_image_representations_example():
159
159
"the transition:"
160
160
)
161
161
action = env .action_space .sample ()
162
- next_state_image , reward , done , info = env .step (action )
162
+ next_state_image , reward , done , trunc , info = env .step (action )
163
163
augmented_state_dict = env .get_augmented_state ()
164
164
next_state = augmented_state_dict ["curr_state" ] # Underlying MDP state holds
165
165
# the current discrete state.
@@ -192,14 +192,14 @@ def continuous_environment_example_move_to_a_point():
192
192
config ["reward_function" ] = "move_to_a_point"
193
193
194
194
env = RLToyEnv (** config )
195
- state = env .reset ().copy ()
195
+ state = env .reset ()[ 0 ] .copy ()
196
196
197
197
print (
198
198
"Taking a step in the environment with a random action and printing "
199
199
"the transition:"
200
200
)
201
201
action = env .action_space .sample ()
202
- next_state , reward , done , info = env .step (action )
202
+ next_state , reward , done , trunc , info = env .step (action )
203
203
print ("sars', done =" , state , action , reward , next_state , done )
204
204
205
205
env .close ()
@@ -231,7 +231,7 @@ def continuous_environment_example_move_to_a_point_irrelevant_image():
231
231
config ["relevant_indices" ] = [0 , 1 ]
232
232
233
233
env = RLToyEnv (** config )
234
- state = env .reset ()
234
+ state = env .reset ()[ 0 ]
235
235
augmented_state_dict = env .get_augmented_state ()
236
236
state = augmented_state_dict ["curr_state" ].copy () # Underlying MDP state holds
237
237
# the current continuous state.
@@ -241,7 +241,7 @@ def continuous_environment_example_move_to_a_point_irrelevant_image():
241
241
"the transition:"
242
242
)
243
243
action = env .action_space .sample ()
244
- next_state_image , reward , done , info = env .step (action )
244
+ next_state_image , reward , done , trunc , info = env .step (action )
245
245
augmented_state_dict = env .get_augmented_state ()
246
246
next_state = augmented_state_dict ["curr_state" ].copy () # Underlying MDP state holds
247
247
# the current continuous state.
@@ -274,14 +274,14 @@ def continuous_environment_example_move_along_a_line():
274
274
config ["reward_function" ] = "move_along_a_line"
275
275
276
276
env = RLToyEnv (** config )
277
- state = env .reset ().copy ()
277
+ state = env .reset ()[ 0 ] .copy ()
278
278
279
279
print (
280
280
"Taking a step in the environment with a random action and printing "
281
281
"the transition:"
282
282
)
283
283
action = env .action_space .sample ()
284
- next_state , reward , done , info = env .step (action )
284
+ next_state , reward , done , trunc , info = env .step (action )
285
285
print ("sars', done =" , state , action , reward , next_state , done )
286
286
287
287
env .close ()
@@ -305,12 +305,12 @@ def grid_environment_example():
305
305
306
306
for i in range (len (actions )):
307
307
action = actions [i ]
308
- next_obs , reward , done , info = env .step (action )
308
+ next_obs , reward , done , trunc , info = env .step (action )
309
309
next_state = env .get_augmented_state ()["augmented_state" ][- 1 ]
310
310
print ("sars', done =" , state , action , reward , next_state , done )
311
311
state = next_state
312
312
313
- env .reset ()
313
+ env .reset ()[ 0 ]
314
314
env .close ()
315
315
316
316
@@ -334,12 +334,12 @@ def grid_environment_image_representations_example():
334
334
335
335
for i in range (len (actions )):
336
336
action = actions [i ]
337
- next_obs , reward , done , info = env .step (action )
337
+ next_obs , reward , done , trunc , info = env .step (action )
338
338
next_state = env .get_augmented_state ()["augmented_state" ][- 1 ]
339
339
print ("sars', done =" , state , action , reward , next_state , done )
340
340
state = next_state
341
341
342
- env .reset ()
342
+ env .reset ()[ 0 ]
343
343
env .close ()
344
344
345
345
display_image (next_obs )
@@ -356,18 +356,18 @@ def atari_wrapper_example():
356
356
}
357
357
358
358
from mdp_playground .envs import GymEnvWrapper
359
- import gym
359
+ import gymnasium as gym
360
360
361
361
ae = gym .make ("QbertNoFrameskip-v4" )
362
362
env = GymEnvWrapper (ae , ** config )
363
- state = env .reset ()
363
+ state = env .reset ()[ 0 ]
364
364
365
365
print (
366
366
"Taking 10 steps in the environment with a random action and printing the transition:"
367
367
)
368
368
for i in range (10 ):
369
369
action = env .action_space .sample ()
370
- next_state , reward , done , info = env .step (action )
370
+ next_state , reward , done , trunc , info = env .step (action )
371
371
print (
372
372
"s.shape a r s'.shape, done =" ,
373
373
state .shape ,
@@ -403,18 +403,18 @@ def mujoco_wrapper_example():
403
403
# of the Mujoco base_class.
404
404
try :
405
405
from mdp_playground .envs import get_mujoco_wrapper
406
- from gym .envs .mujoco .half_cheetah_v3 import HalfCheetahEnv
406
+ from gymnasium .envs .mujoco .half_cheetah_v3 import HalfCheetahEnv
407
407
408
408
wrapped_mujoco_env = get_mujoco_wrapper (HalfCheetahEnv )
409
409
410
410
env = wrapped_mujoco_env (** config )
411
- state = env .reset ()
411
+ state = env .reset ()[ 0 ]
412
412
413
413
print (
414
414
"Taking a step in the environment with a random action and printing the transition:"
415
415
)
416
416
action = env .action_space .sample ()
417
- next_state , reward , done , info = env .step (action )
417
+ next_state , reward , done , trunc , info = env .step (action )
418
418
print ("sars', done =" , state , action , reward , next_state , done )
419
419
420
420
env .close ()
@@ -440,22 +440,22 @@ def minigrid_wrapper_example():
440
440
}
441
441
442
442
from mdp_playground .envs .gym_env_wrapper import GymEnvWrapper
443
- import gym
443
+ import gymnasium as gym
444
444
445
- from gym_minigrid .wrappers import RGBImgPartialObsWrapper , ImgObsWrapper
445
+ from minigrid .wrappers import RGBImgPartialObsWrapper , ImgObsWrapper
446
446
447
447
env = gym .make ("MiniGrid-Empty-8x8-v0" )
448
448
env = RGBImgPartialObsWrapper (env ) # Get pixel observations
449
449
env = ImgObsWrapper (env ) # Get rid of the 'mission' field
450
450
451
451
env = GymEnvWrapper (env , ** config )
452
- obs = env .reset () # This now produces an RGB tensor only
452
+ obs = env .reset ()[ 0 ] # This now produces an RGB tensor only
453
453
454
454
print (
455
455
"Taking a step in the environment with a random action and printing the transition:"
456
456
)
457
457
action = env .action_space .sample ()
458
- next_obs , reward , done , info = env .step (action )
458
+ next_obs , reward , done , trunc , info = env .step (action )
459
459
print (
460
460
"s.shape ar s'.shape, done =" ,
461
461
obs .shape ,
@@ -481,17 +481,17 @@ def procgen_wrapper_example():
481
481
}
482
482
483
483
from mdp_playground .envs .gym_env_wrapper import GymEnvWrapper
484
- import gym
484
+ import gymnasium as gym
485
485
486
486
env = gym .make ("procgen:procgen-coinrun-v0" )
487
487
env = GymEnvWrapper (env , ** config )
488
- obs = env .reset ()
488
+ obs = env .reset ()[ 0 ]
489
489
490
490
print (
491
491
"Taking a step in the environment with a random action and printing the transition:"
492
492
)
493
493
action = env .action_space .sample ()
494
- next_obs , reward , done , info = env .step (action )
494
+ next_obs , reward , done , trunc , info = env .step (action )
495
495
print (
496
496
"s.shape ar s'.shape, done =" ,
497
497
obs .shape ,
@@ -577,7 +577,7 @@ def procgen_wrapper_example():
577
577
578
578
# Using gym.make() example 1
579
579
import mdp_playground
580
- import gym
580
+ import gymnasium as gym
581
581
582
582
gym .make ("RLToy-v0" )
583
583
@@ -591,6 +591,6 @@ def procgen_wrapper_example():
591
591
"maximally_connected" : True ,
592
592
}
593
593
)
594
- env .reset ()
594
+ env .reset ()[ 0 ]
595
595
for i in range (10 ):
596
596
print (env .step (env .action_space .sample ()))
0 commit comments