-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
93 lines (80 loc) · 3.38 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from lib.dqn import DQNAgent
from lib.utils import *
import random
import pandas as pd
# === GLOBAL CONSTANT
EPISODES = 1
TIME = 15
TRAINING_THR = 0.01
MINIMUM_SCORE = 87.00
# === SETTING ENVIRONMENT
actions = [0, 1, 2, 3, 4]
dataset = fakeDataset(Nsamples=TIME)
state_size = dataset.shape[1]
action_size = len(actions)
agent = DQNAgent(state_size, action_size)
train, test = agent.data.train_test_split(dataset)
y_train = train[:, 4]
train_sc, scalar = standardScalar(train)
train_sc[:, 4] = y_train
print(f"train shape: {train.shape}")
done = False
batch_size = 32
stopCondition = False
initStep = 1
isMinimumLoss = False
pd.DataFrame(data=test).to_csv('test/test_SeLuDL2.csv', index=False)
print('=== TEST loaded. ===')
for e in range(EPISODES):
state = train_sc[0]
state = np.reshape(state, [1, agent.state_size])
agent.total_rewards = 0
agent.epsilon = 1 * random.choice([0, 1]) * agent.epsilon_decay
if stopCondition:
"""Loss < TrainingThr, so try to learn unseen samples."""
initStep = int(TIME * 0.8 * 0.8) + 1
state = train_sc[initStep]
state = np.reshape(state, [1, agent.state_size])
for time in range(TIME):
score = (agent.total_rewards / (time + 1)) * 100
agent.data.measures['score'][e] = score
print(f'*** TIME: {time} *** score rewards %: {(score)}')
action = agent.act(state) # index of maximum the maximum Q-learning value
print(f"Given behaviour: {state[0, agent.state_size - 1]}, chosen action: {action}")
reward, dist = agent.step(action, state)
agent.data.measures['totalRewards'][e].append(agent.total_rewards)
print(f"Get reward: {reward}, given dist: {dist}")
done = True if time + initStep == int(train.shape[0] * 0.8) else False
if not done:
next_state = np.reshape(train_sc[initStep + time], [1, agent.state_size])
agent.memorize(state, action, reward, next_state, done)
state = next_state
else:
agent.update_target_model()
print("=== Sharing weights between models. ===")
print("episode: {}/{}, score: {}, epsilon: {:.2}"
.format(e, EPISODES, score, agent.epsilon))
break
if len(agent.memory) > batch_size:
agent.replay(batch_size=batch_size, episod=e)
nMiniBatches = len(agent.data.measures['loss'][e])
if nMiniBatches >= batch_size * 20 and nMiniBatches % batch_size == 0:
print(f"Print Metrics miniBatch {nMiniBatches}")
agent.plotMetrics(episod=e, nBathc=nMiniBatches)
agent.plotLoss(episod=e)
agent.plotRewards(episod=e)
if len(agent.data.measures['loss'][e]) > 0 and agent.data.measures['loss'][e][-1] <= TRAINING_THR:
print(f"Minimum Loss: {agent.data.measures['loss'][e]}")
stopCondition = True
isMinimumLoss = True
break
# if e == 5:
# """try to learn new samples"""
# stopCondition = True
# Save the model
if agent.data.measures['score'][e] > MINIMUM_SCORE:
print(f'=== Minimum score {MINIMUM_SCORE} reached. Model saved.')
agent.save(name='models/SeLuDL2.h5')
print("=== Hyperparameters ===")
print(f"LR: {agent.learning_rate}; Gamma: {agent.gamma}, Eps: {agent.epsilon}, clip: {agent.clipDelta}")
print(f"Final Scores (totRewards/steps) for episodes: {agent.data.measures['score']}")