-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrun.py
50 lines (46 loc) · 1.55 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# import gym
from chess_env import ChessEnv
import ppo
import numpy as np
np.random.seed(42)
#hyperparameters
learning_rate = 1e-3
gamma = 0.99
lambd = 0.95
epsilon = 0.1
num_epochs = 3
num_actors = 4
T_horizon = 51
batch_size = 64
num_iters = 1000
print_interval = 20
if __name__ == "__main__":
env = ChessEnv(r"/home/keerthan/Softwares/stockfish-10-linux/Linux/stockfish_10_x64")
model=ppo.PPO(env.state_dimension, env.action_dimension, learning_rate=learning_rate, gamma=gamma, lambd=lambd, epsilon=epsilon, num_epochs=num_epochs, batch_size=batch_size)
#copy global variables to local
iters = num_iters
_print_interval = print_interval
horizon = T_horizon
score = 0
render = False
renormalize = True
# run the algorithm
for i in range(1, iters+1):
start_state = env.reset()
avg_score = 0
# collect data with N actors (N = num_actors)
for actors in range(num_actors):
actor_history, actor_score, start_state, done = ppo.generate_trajectory(env, model, start_state, horizon, render, renormalize)
model.add_advantage_targets(actor_history)
avg_score += actor_score
if done:
start_state = env.reset()
score += (avg_score / num_actors)
# now train for K epochs (K = num_epochs)
model.train()
# print results
if i % print_interval== 0:
score /= print_interval
print("# of iters:{},avg_score:{}".format(i, score))
render = score > -20 or score == -50
score = 0