updated config for experiment

sash-a · sash-a · commit cc4c7f54357b · 2021-08-01T15:57:41.000+02:00
diff --git a/configs/flagrun.json b/configs/flagrun.json
@@ -2,9 +2,9 @@
   "env": {
     "name": "AntFlagrunBulletEnv-v0",
     "max_steps": 500,
-    "ant_env_rew_weight": 1,
+    "ant_env_rew_weight": 0,
     "path_rew_weight": 0,
-    "dist_rew_weight": 0,
+    "dist_rew_weight": 1,
     "goal_reach_rew": 500,
     "kwargs": {
       "enclosed": true,
@@ -13,7 +13,7 @@
       "max_target_dist": 4,
       "max_targets": 0,
       "tolerance": 1,
-      "switch_flag_on_collision": true,
+      "switch_flag_on_collision": false,
       "use_sensor": true
     }
   },
@@ -25,8 +25,10 @@
   },
   "policy": {
     "layer_sizes": [
+      128,
       256,
-      256
+      256,
+      128
     ],
     "ac_std": 0.01,
     "ac_std_decay": 1,
@@ -46,10 +48,10 @@
     "use_pos": false
   },
   "general": {
-    "name": "flagrun-local-stupid-sensor",
-    "gens": 200,
-    "policies_per_gen": 960,
-    "eps_per_policy": 100,
+    "name": "flagrun-sensor-distrew-100timeout",
+    "gens": 201,
+    "policies_per_gen": 1200,
+    "eps_per_policy": 10,
     "n_policies": 1,
     "batch_size": 500,
     "seed": null,
diff --git a/run_saved.py b/run_saved.py
@@ -43,13 +43,31 @@ def run_saved(model: torch.nn.Module, env, steps):
     timeout = 200
     world_size = 10
     enclosed = True
+    tolerance = 1
+    steps = 1000
+    max_target_dist = 4
+    max_targets = 0
+    switch_flag_on_collision = False
+    use_sensor = False
+
+    e = gym.make(args.env,
+                 enclosed=enclosed,
+                 timeout=timeout,
+                 size=world_size,
+                 tolerance=tolerance,
+                 max_target_dist=max_target_dist,
+                 max_targets=max_targets,
+                 switch_flag_on_collision=switch_flag_on_collision,
+                 use_sensor=use_sensor,
+                 debug=False).unwrapped
 
-    e = gym.make(args.env, enclosed=enclosed, timeout=timeout, size=world_size).unwrapped
     e.mpi_common_rand = np.random.RandomState()
+    AntGatherBulletEnv.ant_env_rew_weight = 1
+    AntGatherBulletEnv.path_rew_weight = 0
+    AntGatherBulletEnv.dist_rew_weight = 0
 
     e.render('human')
     e.reset()
-    steps = 1000
     if args.record:
         e.scene._p.startStateLogging(e.scene._p.STATE_LOGGING_VIDEO_MP4, '~/Documents/es/testvid.mp4')