Merge branch 'new_expts' of git+ssh://github.com/automl/mdp-playground into new_expts

RaghuSpaceRajan · RaghuSpaceRajan · commit e029d8a0937b · 2021-09-24T17:10:54.000+02:00
diff --git a/codecov.yml b/codecov.yml
@@ -0,0 +1,11 @@
+coverage:
+  range: 68..100
+  round: down
+  precision: 2
+  status:
+    project:
+      default:
+        # basic
+        target: 68%
+        threshold: 5%
+        base: auto 
diff --git a/example.py b/example.py
@@ -257,7 +257,7 @@ def atari_wrapper_example():
         "state_space_type": "discrete",
     }
 
-    from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
+    from mdp_playground.envs import GymEnvWrapper
     import gym
 
     ae = gym.make("QbertNoFrameskip-v4")
@@ -299,8 +299,12 @@ def mujoco_wrapper_example():
     # This actually makes a subclass and not a wrapper. Because, some
     # frameworks might need an instance of this class to also be an instance
     # of the Mujoco base_class.
-    from mdp_playground.envs.mujoco_env_wrapper import get_mujoco_wrapper
-    from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
+    try:
+        from mdp_playground.envs import get_mujoco_wrapper
+        from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
+    except Exception as e:
+        print("Exception:", e, "caught. You may need to install mujoco-py. NOT running mujoco_wrapper_example.")
+        return
 
     wrapped_mujoco_env = get_mujoco_wrapper(HalfCheetahEnv)
 
@@ -413,7 +417,6 @@ def minigrid_wrapper_example():
             "action_space_size": 8,
             "state_space_type": "discrete",
             "action_space_type": "discrete",
-            "terminal_state_density": 0.25,
             "maximally_connected": True,
         }
     )
diff --git a/mdp_playground/envs/__init__.py b/mdp_playground/envs/__init__.py
@@ -1 +1,7 @@
 from mdp_playground.envs.rl_toy_env import RLToyEnv
+
+try:
+    from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
+    from mdp_playground.envs.mujoco_env_wrapper import get_mujoco_wrapper
+except Exception as e:
+    print("Exception:", e, "caught. You may need to install Ray or mujoco-py.")
diff --git a/mdp_playground/envs/rl_toy_env.py b/mdp_playground/envs/rl_toy_env.py
@@ -53,7 +53,7 @@ class RLToyEnv(gym.Env):
         diameter : int > 0
             For discrete environments, if diameter = d, the set of states is set to be a d-partite graph (and NOT a complete d-partite graph), where, if we order the d sets as 1, 2, .., d, states from set 1 will have actions leading to states in set 2 and so on, with the final set d having actions leading to states in set 1. Number of actions for each state will, thus, be = (number of states) / (d). Default value: 1 for discrete environments. For continuous environments, this dimension is set automatically based on the state_space_max value.
         terminal_state_density : float in range [0, 1]
-            For discrete environments, the fraction of states that are terminal; the terminal states are fixed to the "last" states when we consider them to be ordered by their numerical value. This is w.l.o.g. because discrete states are categorical. For continuous environments, please see terminal_states and term_state_edge for how to control terminal states.
+            For discrete environments, the fraction of states that are terminal; the terminal states are fixed to the "last" states when we consider them to be ordered by their numerical value. This is w.l.o.g. because discrete states are categorical. For continuous environments, please see terminal_states and term_state_edge for how to control terminal states. Default value: 0.25.
         term_state_reward : float
             Adds this to the reward if a terminal state was reached at the current time step. Default value: 0.
         image_representations : boolean
@@ -217,6 +217,16 @@ def __init__(self, **config):
 
         print("Passed config:", config, "\n")
 
+        if config == {}:
+            config = {
+                "state_space_size": 8,
+                "action_space_size": 8,
+                "state_space_type": "discrete",
+                "action_space_type": "discrete",
+                "terminal_state_density": 0.25,
+                "maximally_connected": True,
+            }
+
         # Print initial "banner"
         screen_output_width = 132  # #hardcoded #TODO get from system
         repeat_equal_sign = (screen_output_width - 20) // 2
@@ -329,6 +339,11 @@ def __init__(self, **config):
                 # if config["state_space_type"] == "discrete":
                 #     assert "init_state_dist" in config
 
+        if "terminal_state_density" not in config:
+            self.terminal_state_density = 0.25
+        else:
+            self.terminal_state_density = config["terminal_state_density"]
+
         if not self.use_custom_mdp:
             if "generate_random_mdp" not in config:
                 self.generate_random_mdp = True
@@ -786,7 +801,7 @@ def init_terminal_states(self):
         """Initialises terminal state set to be the 'last' states for discrete environments. For continuous environments, terminal states will be in a hypercube centred around config['terminal_states'] with the edge of the hypercube of length config['term_state_edge']."""
         if self.config["state_space_type"] == "discrete":
             if (
-                self.use_custom_mdp and "terminal_state_density" not in self.config
+                self.use_custom_mdp and "terminal_states" in self.config
             ):  # custom/user-defined terminal states
                 self.is_terminal_state = (
                     self.config["terminal_states"]
@@ -796,7 +811,7 @@ def init_terminal_states(self):
             else:
                 # Define the no. of terminal states per independent set of the state space
                 self.num_terminal_states = int(
-                    self.config["terminal_state_density"] * self.action_space_size[0]
+                    self.terminal_state_density * self.action_space_size[0]
                 )  # #hardcoded ####IMP Using action_space_size
                 # since it contains state_space_size // diameter
                 # if self.num_terminal_states == 0: # Have at least 1 terminal state?