-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
executable file
·65 lines (56 loc) · 1.63 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#encoding: utf-8
from __future__ import print_function, division
# 第三方
import os
import sys
import pyglet
import tensorflow as tf
import gym,roboschool
import random
import numpy as np
sys.path.append('env/')
from monitor import *
from subproc_vec_env import SubprocVecEnv
from vec_normalize import VecNormalize,VecNormalizeTest
from collections import deque
from hyperparameter import *
from agent import *
from hyperparameter import *
def run(agent,env):
with agent.sess as session:
tf.global_variables_initializer().run()
#Load network
if LOAD_NETWORK:
agent.load_network()
### 训练
if TRAIN: # Train mode
print('Start Training')
for i in range(NUM_EPISODES):
agent.agent_run(i)
else: # Test mode
from OpenGL import GLU
print('Start Test')
r=0
env=gym.make(ENV_NAME)
if ENV_NORMALIZE:
running_mean = np.load('{}/mean.npy'.format(SAVE_NETWORK_PATH))
running_var = np.load('{}/var.npy'.format(SAVE_NETWORK_PATH))
env = VecNormalizeTest(env, running_mean, running_var)
#env.render()
env.reset()
for _ in range(NUM_EPISODES_AT_TEST*EP_LEN):
actions= agent.sess.run(agent.pi, {agent.s:agent.obs})
agent.obs[:], rewards, agent.dones, infos = env.step(actions[0])
env.render()
r+=rewards
print("all_reward=",r/NUM_EPISODES_AT_TEST)
if __name__ == '__main__':
if not TRAIN:
NWORK=1
EARLY_RESET=False
env = SubprocVecEnv([make_env(ENV_NAME,i,log_monitor=LOG_MONITOR) for i in range(NWORK)])
if ENV_NORMALIZE:
env = VecNormalize(env)
Agent = agent(env,optimizeMethod='adam')
run(Agent,env)
#运行完后终端输入: tensorboard --logdir Graphview