Improved example.py by adding MiniGrid and ProcGen examples

RaghuSpaceRajan · RaghuSpaceRajan · commit adae364b9fd9 · 2021-09-24T20:18:23.000+02:00
diff --git a/example.py b/example.py
@@ -8,7 +8,8 @@
     one for grid environments with image representations
     one for wrapping Atari env qbert
     one for wrapping Mujoco env HalfCheetah
-    one for wrapping Minigrid env
+    one for wrapping MiniGrid env
+    one for wrapping ProcGen env
     two examples at the end showing how to create toy envs using gym.make()
 
 Many further examples can be found in test_mdp_playground.py.
@@ -22,6 +23,17 @@
 import numpy as np
 
 
+def display_image(obs, mode="RGB"):
+    # Display the image observation associated with the next state
+    from PIL import Image
+
+    # Because numpy is row-major and Image is column major, need to transpose
+    obs = obs.transpose(1, 0, 2)
+    img1 = Image.fromarray(np.squeeze(obs), mode)  # squeeze() is
+    # used because the image is 3-D because frameworks like Ray expect the image
+    # to be 3-D.
+    img1.show()
+
 def discrete_environment_example():
 
     config = {}
@@ -101,18 +113,10 @@ def discrete_environment_image_representations_example():
     # the current discrete state.
     print("sars', done =", state, action, reward, next_state, done)
 
-    # Display the image observation associated with the next state
-    from PIL import Image
-
-    # Because numpy is row-major and Image is column major, need to transpose
-    next_state_image = next_state_image.transpose(1, 0, 2)
-    img1 = Image.fromarray(np.squeeze(next_state_image), "L")  # 'L' is used for
-    # black and white. squeeze() is used because the image is 3-D because
-    # frameworks like Ray expect the image to be 3-D.
-    img1.show()
-
     env.close()
 
+    display_image(next_state_image, mode="L")
+
 
 def continuous_environment_example_move_along_a_line():
 
@@ -236,15 +240,8 @@ def grid_environment_image_representations_example():
     env.reset()
     env.close()
 
-    # Display the image observation associated with the next state
-    from PIL import Image
+    display_image(next_obs)
 
-    # Because numpy is row-major and Image is column major, need to transpose
-    next_obs = next_obs.transpose(1, 0, 2)
-    img1 = Image.fromarray(np.squeeze(next_obs), "RGB")  # squeeze() is
-    # used because the image is 3-D because frameworks like Ray expect the image
-    # to be 3-D.
-    img1.show()
 
 
 def atari_wrapper_example():
@@ -265,21 +262,24 @@ def atari_wrapper_example():
     state = env.reset()
 
     print(
-        "Taking a step in the environment with a random action and printing the transition:"
-    )
-    action = env.action_space.sample()
-    next_state, reward, done, info = env.step(action)
-    print(
-        "s.shape ar s'.shape, done =",
-        state.shape,
-        action,
-        reward,
-        next_state.shape,
-        done,
+        "Taking 10 steps in the environment with a random action and printing the transition:"
     )
+    for i in range(10):
+        action = env.action_space.sample()
+        next_state, reward, done, info = env.step(action)
+        print(
+            "s.shape ar s'.shape, done =",
+            state.shape,
+            action,
+            reward,
+            next_state.shape,
+            done,
+        )
 
     env.close()
 
+    display_image(next_state)
+
 
 def mujoco_wrapper_example():
 
@@ -302,23 +302,23 @@ def mujoco_wrapper_example():
     try:
         from mdp_playground.envs import get_mujoco_wrapper
         from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
-    except Exception as e:
-        print("Exception:", e, "caught. You may need to install mujoco-py. NOT running mujoco_wrapper_example.")
-        return
+        wrapped_mujoco_env = get_mujoco_wrapper(HalfCheetahEnv)
 
-    wrapped_mujoco_env = get_mujoco_wrapper(HalfCheetahEnv)
+        env = wrapped_mujoco_env(**config)
+        state = env.reset()
 
-    env = wrapped_mujoco_env(**config)
-    state = env.reset()
+        print(
+            "Taking a step in the environment with a random action and printing the transition:"
+        )
+        action = env.action_space.sample()
+        next_state, reward, done, info = env.step(action)
+        print("sars', done =", state, action, reward, next_state, done)
 
-    print(
-        "Taking a step in the environment with a random action and printing the transition:"
-    )
-    action = env.action_space.sample()
-    next_state, reward, done, info = env.step(action)
-    print("sars', done =", state, action, reward, next_state, done)
+        env.close()
 
-    env.close()
+    except ImportError as e:
+        print("Exception:", type(e), e, "caught. You may need to install mujoco-py. NOT running mujoco_wrapper_example.")
+        return
 
 
 def minigrid_wrapper_example():
@@ -358,6 +358,44 @@ def minigrid_wrapper_example():
 
     env.close()
 
+    display_image(next_obs)
+
+
+def procgen_wrapper_example():
+
+    config = {
+        "seed": 0,
+        "delay": 1,
+        "transition_noise": 0.25,
+        "reward_noise": lambda a: a.normal(0, 0.1),
+        "state_space_type": "discrete",
+    }
+
+    from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
+    import gym
+
+    env = gym.make("procgen:procgen-coinrun-v0")
+    env = GymEnvWrapper(env, **config)
+    obs = env.reset()
+
+    print(
+        "Taking a step in the environment with a random action and printing the transition:"
+    )
+    action = env.action_space.sample()
+    next_obs, reward, done, info = env.step(action)
+    print(
+        "s.shape ar s'.shape, done =",
+        obs.shape,
+        action,
+        reward,
+        next_obs.shape,
+        done,
+    )
+
+    env.close()
+
+    display_image(next_obs)
+
 
 if __name__ == "__main__":
 
@@ -404,6 +442,9 @@ def minigrid_wrapper_example():
     print(set_ansi_escape + "\nRunning MiniGrid wrapper example:\n" + reset_ansi_escape)
     minigrid_wrapper_example()
 
+    # print(set_ansi_escape + "\nRunning ProcGen wrapper example:\n" + reset_ansi_escape)
+    # procgen_wrapper_example()
+
     # Using gym.make() example 1
     import mdp_playground
     import gym
diff --git a/mdp_playground/envs/__init__.py b/mdp_playground/envs/__init__.py
@@ -1,7 +1,8 @@
 from mdp_playground.envs.rl_toy_env import RLToyEnv
+from gym import error
 
 try:
     from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
     from mdp_playground.envs.mujoco_env_wrapper import get_mujoco_wrapper
-except Exception as e:
-    print("Exception:", e, "caught. You may need to install Ray or mujoco-py.")
+except error.DependencyNotInstalled as e:
+    print("Exception:", type(e), e, "caught. You may need to install Ray or mujoco-py.")
diff --git a/mdp_playground/envs/gym_env_wrapper.py b/mdp_playground/envs/gym_env_wrapper.py
@@ -4,7 +4,6 @@
 import sys
 from gym.spaces import Box, Tuple
 from gym.wrappers import AtariPreprocessing
-from ray.rllib.env.atari_wrappers import wrap_deepmind, is_atari
 from mdp_playground.envs.rl_toy_env import RLToyEnv
 import warnings
 import PIL.ImageDraw as ImageDraw
@@ -151,6 +150,7 @@ def __init__(self, env, **config):
         if (
             "wrap_deepmind_ray" in config and config["wrap_deepmind_ray"]
         ):  # hack ##TODO remove?
+            from ray.rllib.env.atari_wrappers import wrap_deepmind, is_atari
             self.env = wrap_deepmind(self.env, dim=42, framestack=True)
         elif "atari_preprocessing" in config and config["atari_preprocessing"]:
             self.frame_skip = 4  # default for AtariPreprocessing
diff --git a/setup.py b/setup.py
@@ -12,10 +12,9 @@
 package_data = {"": ["*"]}
 
 extras_require = [
-    "ray[default,rllib,debug]==1.3.0",
+    "ray[default,rllib]==1.3.0",
     "tensorflow==2.2.0",
     "pillow>=6.1.0",
-    "pandas==0.25.0",
     "requests==2.22.0",
     "configspace==0.4.10",
     "scipy>=1.3.0",
@@ -27,7 +26,6 @@
     "ray[rllib,debug]==0.7.3",
     "tensorflow==1.13.0rc1",
     "pillow>=6.1.0",
-    "pandas==0.25.0",
     "requests==2.22.0",
     "configspace==0.4.10",
     "scipy==1.3.0",
@@ -42,7 +40,6 @@
     # 'ray[rllib,debug]==0.9.0',
     "tensorflow==2.2.0",
     "tensorflow-probability==0.9.0",
-    "pandas==0.25.0",
     "requests==2.22.0",
     "mujoco-py==2.0.2.13",  # with mujoco 2.0
     "configspace>=0.4.10",