@@ -125,9 +125,10 @@ def run_model(model: PrimFF,
125
125
if render :
126
126
env .render ('human' )
127
127
# time.sleep(1 / 100)
128
- env .stadium_scene ._p .addUserDebugLine ([* pos , 0.5 ], [* (pos + ob [:2 ]), 0.5 ], lifeTime = 0.1 )
128
+ # env.stadium_scene._p.addUserDebugLine([*pos, 0.5], [*(pos + ob[:2]), 0.5], lifeTime=0.1)
129
129
# robot to goal
130
- # env.stadium_scene._p.addUserDebugLine(pos, [env.walk_target_x, env.walk_target_y, pos[2]], lifeTime=0.1)
130
+ env .stadium_scene ._p .addUserDebugLine ([* pos , 0.5 ], [env .walk_target_x , env .walk_target_y , 0.5 ],
131
+ lifeTime = 0.1 )
131
132
# robot dir
132
133
# point = [10, m * 10 + c, pos[2]]
133
134
# env.stadium_scene._p.addUserDebugLine([x, y, pos[2]], point, lifeTime=0.1, lineColorRGB=[0, 1, 0])
@@ -178,6 +179,7 @@ def r_fn(model: PrimFF, use_ac_noise=True) -> TrainingResult:
178
179
rs if use_ac_noise else None , False )
179
180
return RewardResult (rews , behv , obs if save_obs else np .array ([np .zeros (env .observation_space .shape )]), steps )
180
181
182
+
181
183
assert cfg .general .policies_per_gen % comm .size == 0 and (cfg .general .policies_per_gen / comm .size ) % 2 == 0
182
184
eps_per_proc = int ((cfg .general .policies_per_gen / comm .size ) / 2 )
183
185
for gen in range (cfg .general .gens ): # main loop
0 commit comments