-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
138 lines (118 loc) · 3.98 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import gym
from stable_baselines3 import PPO
import random
import argparse
import yaml
import csv
import numpy as np
import os
import matplotlib.pyplot as plt
def parse_args():
ap = argparse.ArgumentParser()
ap.add_argument('--config_path', type=str, required=True, help='Path to the config yaml')
return ap.parse_args()
if __name__ == '__main__':
args = parse_args()
# load config
config = yaml.full_load(open(args.config_path, 'r'))
n_cpu = 6
env = gym.make(config['env_name'])
env.configure({
"controlled_vehicles": random.choice(range(1,39))+1,
"vehicles_count": random.choice(range(10))+1, # add more IDM vehicles if needs
"observation": {
"type": "MultiAgentObservation",
"observation_config": {
"type": "Kinematics",
}
},
"action": {
"type": "MultiAgentAction",
"action_config": {
"type": "DiscreteMetaAction",
},
},
"create_vehicles_ego_idm": True, # True to create ego vehicle as IDM
"ego_collision_weight": config['ego_collision_weight'],
})
env.reset()
model = PPO('MultiInputPolicy', env,
policy_kwargs=dict(net_arch=[dict(pi=[256, 256], vf=[256, 256])]),
learning_rate=5e-4,
batch_size=32,
gamma=0.8,
n_steps=32 * 12 // n_cpu,
verbose=2,
n_epochs=100,
tensorboard_log="highway_multiagent/")
env.set_dqn_model(model)
model.learn(int(2e4))
model.save(config['model_path'])
print('start inference')
# inference
ttc_threshold = config['ttc_threshold']
save_result_csv = config['save_result_csv']
# number of episodes
T = config['num_episodes']
num_steps = config['num_steps']
eps = 0
patience = 3
episode_reward = 0
rewards = []
while eps < T:
age = 0
done = False
obs = env.reset()
previous_info = False
prev_info = None
for i in range(num_steps):# done:
action = model.predict(obs)
# convert action to tuple
action = tuple(action[0])
next_obs, reward, done, info = env.step(action)
episode_reward += reward
rewards.append(episode_reward)
current_crashed = info['crashed']
if current_crashed and not previous_info:
if prev_info is None:
prev_info = info
ego_speed = info['speed']
ego_acceleration = info['ego_action']['acceleration']
ego_ast_action = info['ego_ast_action']
lane_index_ = info['current_lane_index'][2]
# get crashed vehicle info
crashed_veh_info = info['crashed_veh_info']
crashed_speed = crashed_veh_info['crashed_veh_speed']
crashed_lane_index = crashed_veh_info['crashed_veh_lane_index'][2]
crashed_front = crashed_veh_info['front']
crashed_distance = crashed_veh_info['crashed_distance']
crashed_acceleration = crashed_veh_info['action']['acceleration']
crashed_ast_action = crashed_veh_info['ast_action']
with open(save_result_csv, 'a', newline='') as file:
writer = csv.writer(file)
# append to csv file the done, reward
writer.writerow([
eps, i, ego_speed, ego_acceleration, ego_ast_action, lane_index_,
crashed_speed, crashed_acceleration, crashed_lane_index, crashed_ast_action, crashed_front, crashed_distance,
])
# save picture
os.makedirs(config['save_pic'], exist_ok=True)
img_file = os.path.join(config['save_pic'], 'episode_{}_step_{}.png'.format(eps, i))
img = env.render(mode='rgb_array')
plt.imsave(img_file, img)
previous_info = True
if not current_crashed:
previous_info = False
prev_obs = next_obs
prev_reward = reward
prev_info = info
prev_done = done
obs = next_obs
env.render()
if done:
episode_reward = 0.0
obs = env.reset()
eps = eps + 1
# save rewards
with open(config['reward_file'], 'wb') as f:
np.save(f, rewards)