Fixed failing tests. Added run_experiments.py to root dir

RaghuSpaceRajan · RaghuSpaceRajan · commit 3b7d32e3910f · 2021-06-05T23:17:54.000+02:00
diff --git a/mdp_playground/scripts/run_experiments.py b/mdp_playground/scripts/run_experiments.py
@@ -22,7 +22,8 @@
 
 
 def main(args):
-    #TODO Different seeds for Ray Trainer (TF, numpy, Python; Torch, Env), Environment (it has multiple sources of randomness too), Ray Evaluator
+    # #TODO Different seeds for Ray Trainer (TF, numpy, Python; Torch, Env),
+    # Environment (it has multiple sources of randomness too), Ray Evaluator
     # docstring at beginning of the file is stored in __doc__
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument('-c', '--config-file', dest='config_file',
@@ -138,14 +139,20 @@ def main(args):
 
     if args.config_num is not None:
         stats_file_name += '_' + str(args.config_num)
-    # elif args.agent_config_num is not None: ###IMP Commented out! If we append both these nums then, that can lead to 1M small files for 1000x1000 configs which doesn't play well with our Nemo cluster.
+    # elif args.agent_config_num is not None: ###IMP Commented out! If we append
+    # both these nums then, that can lead to 1M small files for 1000x1000 configs
+    # which doesn't play well with our Nemo cluster.
     #     stats_file_name += '_' + str(args.agent_config_num)
 
     print("Stats file being written to:", stats_file_name)
 
-    config, final_configs =  config_processor.process_configs(config_file, stats_file_prefix=stats_file_name, framework=args.framework, config_num=args.config_num, log_level=log_level_, framework_dir=args.framework_dir)
+    config, final_configs =  config_processor.process_configs(config_file,\
+            stats_file_prefix=stats_file_name, framework=args.framework,\
+            config_num=args.config_num, log_level=log_level_,\
+            framework_dir=args.framework_dir)
 
-    print("Configuration number(s) that will be run:", "all" if args.config_num is None else args.config_num)
+    print("Configuration number(s) that will be run:", "all" if args.config_num is\
+                None else args.config_num)
 
 
     # import default_config
diff --git a/run_experiments.py b/run_experiments.py
@@ -0,0 +1,4 @@
+import sys
+import mdp_playground.scripts.run_experiments as run_experiments
+
+run_experiments.main(sys.argv[1:])
diff --git a/tests/test_mdp_playground.py b/tests/test_mdp_playground.py
@@ -754,7 +754,7 @@ def test_grid_env(self):
         env = RLToyEnv(**config)
 
         state = env.get_augmented_state()['augmented_state'][-1]
-        actions = [[0, 1], [-1, 1], [-1, 0], [1, -1], [0.5, -0.5], [1, 2], [1, 1], [0, 1]]
+        actions = [[0, -1], [-1, 0], [1, 0], [1, 0], [0, 1], [0, 1], [0, 1], [0, 1], [-1, 0]]
 
         tot_rew = 0
         for i in range(len(actions)):
@@ -765,29 +765,38 @@ def test_grid_env(self):
             state = next_state.copy()
             tot_rew += reward
 
-        assert tot_rew == 7.5, str(tot_rew)
+        assert tot_rew == 8.25, str(tot_rew)
 
         env.reset()
         env.close()
 
 
         # Test 2: Almost the same as 1, but with irrelevant features
         config["irrelevant_features"] = True
+        config["term_state_reward"] = 0.
 
         env = RLToyEnv(**config)
         state = env.get_augmented_state()['augmented_state'][-1]
-        actions = [[0, 1], [-1, 1], [-1, 0], [1, -1], [0.5, -0.5], [1, 2], [1, 1], [0, 1]]
+        actions = [[0, -1], [-1, 0], [1, 0], [1, 0], [0, 1], [0, 1], [0, 1], [0, 1], [-1, 0]]
 
         tot_rew = 0
         for i in range(len(actions)):
-            action = actions[i] + [-1, 0]
+            action = actions[i] + [0, 0]
+            next_obs, reward, done, info = env.step(action)
+            next_state = env.get_augmented_state()['augmented_state'][-1]
+            print("sars', done =", state, action, reward, next_state, done)
+            state = next_state.copy()
+            tot_rew += reward
+
+        for i in range(len(actions)):
+            action = [0, 0] + actions[i]
             next_obs, reward, done, info = env.step(action)
             next_state = env.get_augmented_state()['augmented_state'][-1]
-            print("sars'o', done =", state, action, reward, next_state, next_obs, done)
+            print("sars', done =", state, action, reward, next_state, done)
             state = next_state.copy()
             tot_rew += reward
 
-        assert tot_rew == 7.5, str(tot_rew)
+        assert tot_rew == 9, str(tot_rew)
 
         env.reset()
         env.close()