-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtraining.py
115 lines (77 loc) · 3.98 KB
/
training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import random
import logging
import numpy as np
from Net.NNet import NeuralNets
from othelo_mcts import OthelloMCTS
from agents import NeuralNetworkOthelloAgent, duel_between_agents
from Othello import OthelloGame, OthelloPlayer, BoardView
def training_example_symmetries(board, policy):
symetric_examples = []
for rotation in range(1, 5):
for flip in [True, False]:
new_board_example = np.rot90(board, k=rotation)
new_policy_example = np.rot90(policy, k=rotation)
if flip:
new_board_example = np.fliplr(new_board_example)
new_policy_example = np.fliplr(new_policy_example)
symetric_examples.append((new_board_example, new_policy_example))
return symetric_examples
def execute_episode(board_size, neural_network, degree_exploration,
num_simulations, policy_temperature, e_greedy):
examples = []
game = OthelloGame(board_size)
mcts = OthelloMCTS(board_size, neural_network, degree_exploration)
if neural_network.network_type == NeuralNets.ONN:
board_view_type = BoardView.TWO_CHANNELS
elif neural_network.network_type == NeuralNets.BNN:
board_view_type = BoardView.ONE_CHANNEL
while not game.has_finished():
state = game.board(BoardView.TWO_CHANNELS)
for _ in range(num_simulations):
mcts.simulate(state, game.current_player)
if game.current_player == OthelloPlayer.WHITE:
state = OthelloGame.invert_board(state)
policy = mcts.get_policy_action_probabilities(state, policy_temperature)
# e-greedy
coin = random.random()
if coin <= e_greedy:
action = np.argwhere(policy == policy.max())[0]
else:
action = mcts.get_state_actions(state)[np.random.choice(len(mcts.get_state_actions(state)))]
action_choosed = np.zeros((board_size, board_size))
action_choosed[action[0]][action[1]] = 1
example = game.board(board_view_type), action_choosed, game.current_player
for board_example, policy_example in training_example_symmetries(game.board(board_view_type), action_choosed):
example = board_example, policy_example, game.current_player
examples.append(example)
game.play(*action)
winner, winner_points = game.get_winning_player()
logging.info(f'Episode finished: The winner obtained {winner_points} points.')
return [(state, policy, 1 if winner == player else -1) for state, policy, player in examples]
def duel_between_neural_networks(board_size, neural_network_1, neural_network_2, degree_exploration, num_simulations):
game = OthelloGame(board_size)
nn_1_agent = NeuralNetworkOthelloAgent(game, neural_network_1, num_simulations, degree_exploration)
nn_2_agent = NeuralNetworkOthelloAgent(game, neural_network_2, num_simulations, degree_exploration)
agents = {
nn_1_agent: neural_network_1,
nn_2_agent: neural_network_2
}
agent_winner = duel_between_agents(game, nn_1_agent, nn_2_agent)
return 0 if agents[agent_winner] is neural_network_1 else 1
def evaluate_neural_network(board_size, total_iterations, neural_network, num_simulations, degree_exploration,
agent_class, agent_arguments):
net_wins = 0
logging.info(f'Neural Network Evaluation: Started')
for iteration in range(1, total_iterations + 1):
game = OthelloGame(board_size)
nn_agent = NeuralNetworkOthelloAgent(game, neural_network, num_simulations, degree_exploration)
evaluation_agent = agent_class(game, *agent_arguments)
agents = [evaluation_agent, nn_agent]
random.shuffle(agents)
agent_winner = duel_between_agents(game, *agents)
if agent_winner is nn_agent:
net_wins += 1
logging.info(f'Neural Network Evaluation: Network won')
else:
logging.info(f'Neural Network Evaluation: Network lost')
return net_wins