-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtrain.py
129 lines (106 loc) · 3.54 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python3
##########################################################################################
# Author: Jared L. Ostmeyer
# Date Started: 2016-07-26
# License: See LICENSE
# Purpose: Train statistical classifier on repertoire data
##########################################################################################
##########################################################################################
# Libraries
##########################################################################################
import lib_paths
import dataplumbing as dp
import numpy as np
import tensorflow as tf
from model import *
##########################################################################################
# Data
##########################################################################################
# Load data
#
path_dir = 'PATH_TO_YOUR_TRAINING_DATA'
samples = dp.load_repertoires(path_dir)
xs, cs, ys = dp.process_repertoires(samples, snip_size=6)
##########################################################################################
# Operators
##########################################################################################
# Model settings
#
batch_size = xs.shape[0]
max_instances = xs.shape[1]
num_features = xs.shape[2]
# Training settings
#
learning_rate = 0.01 # Step size
num_iterations = 2500 # Optimization steps
num_replicas = 10000 # Number of attempts to find the best fit
# Inputs
#
features = tf.placeholder(tf.float32, [batch_size, max_instances, num_features])
counts = tf.placeholder(tf.float32, [batch_size, max_instances])
labels = tf.placeholder(tf.float32, [batch_size])
# Repertoire model
#
models = MaxSnippetModel(num_features, num_replicas=num_replicas)
logits = models(features, counts)
costs = models.costs(logits, labels)
accuracies = models.accuracies(logits, labels)
# Select replica with best fit to the training data
#
index_bestfit = tf.argmin(costs, 0)
# Create operator to optimize the model
#
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(costs)
# Create operator to initialize session
#
initializer = tf.global_variables_initializer()
##########################################################################################
# Session
##########################################################################################
# Open session
#
with tf.Session() as session:
# Initialize variables
#
session.run(initializer)
# Grab a batch of training data
#
feed = {features: xs, counts: cs, labels: ys}
# Each training session represents one batch
#
for iteration in range(num_iterations):
# Optimize model
#
out = session.run(
(
index_bestfit,
costs,
accuracies,
optimizer
),
feed_dict=feed
)
print(
'Iteration:', iteration,
'Cost:', '%4.3f'%(out[1][out[0]]/np.log(2.0)),
'Accuracy:', '%4.3f'%(100.0*out[2][out[0]])
)
# Save weights and bias term
#
out = session.run(
(
index_bestfit,
models.weights,
models.biases
),
feed_dict=feed
)
weights_bestfit = out[1][:,out[0]]
bias_bestfit = out[2][out[0]]
##########################################################################################
# Save parameters
##########################################################################################
# Save parameters of trained model
#
np.savetxt('weights.txt', weights_bestfit)
np.savetxt('bias.txt', [bias_bestfit])