Added config for goal reached reward

Sasha · Sasha · commit 94bcff44c9ba · 2021-05-23T20:25:38.000+02:00
diff --git a/configs/flagrun.json b/configs/flagrun.json
@@ -2,8 +2,9 @@
   "env": {
     "name": "AntFlagrunBulletEnv-v0",
     "max_steps": 1000,
-    "ant_env_rew_weight": 1,
+    "ant_env_rew_weight": 0,
     "path_rew_weight": 1,
+    "goal_reach_rew": 10,
     "kwargs": {
       "enclosed": true,
       "timeout": 100,
@@ -40,10 +41,10 @@
     "use_pos": false
   },
   "general": {
-    "name": "flagrun-10eps_100timeout_10size-env_and_path_rew",
+    "name": "flagrun-pathrewfixedagain",
     "gens": 500,
     "policies_per_gen": 2400,
-    "eps_per_policy": 10,
+    "eps_per_policy": 20,
     "n_policies": 1,
     "batch_size": 500,
     "seed": null,
diff --git a/flagrun.py b/flagrun.py
@@ -4,9 +4,9 @@
 import gym
 import numpy as np
 # noinspection PyUnresolvedReferences
-import pybullet_envs
+import hrl_pybullet_envs
 # noinspection PyUnresolvedReferences
-import pybulletgym
+import pybullet_envs
 import torch
 from mpi4py import MPI
 from torch import Tensor, clamp, cat, nn
@@ -162,6 +162,7 @@ def run_model(model: PrimFF,
     env: gym.Env = gym.make(cfg.env.name, enclosed=True, timeout=-1)
     env.ant_env_rew_weight = cfg.env.ant_env_rew_weight
     env.path_rew_weight = cfg.env.path_rew_weight
+    env.goal_reach_rew = cfg.env.goal_reach_rew
     # seeding; this must be done before creating the neural network so that params are deterministic across processes
     rs, my_seed, global_seed = utils.seed(comm, cfg.general.seed, env)
     all_seeds = comm.alltoall([my_seed] * comm.size)  # simply for saving/viewing the seeds used on each proc