forked from Azure/MachineLearningNotebooks
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchainer_mnist_hd.py
134 lines (101 loc) · 4.59 KB
/
chainer_mnist_hd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import argparse
import numpy as np
import chainer
from chainer import backend
from chainer import backends
from chainer.backends import cuda
from chainer import Function, gradient_check, report, training, utils, Variable
from chainer import datasets, iterators, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions
from chainer.dataset import concat_examples
from chainer.backends.cuda import to_cpu
from azureml.core.run import Run
run = Run.get_context()
class MyNetwork(Chain):
def __init__(self, n_mid_units=100, n_out=10):
super(MyNetwork, self).__init__()
with self.init_scope():
self.l1 = L.Linear(None, n_mid_units)
self.l2 = L.Linear(n_mid_units, n_mid_units)
self.l3 = L.Linear(n_mid_units, n_out)
def forward(self, x):
h = F.relu(self.l1(x))
h = F.relu(self.l2(h))
return self.l3(h)
def main():
parser = argparse.ArgumentParser(description='Chainer example: MNIST')
parser.add_argument('--batchsize', '-b', type=int, default=100,
help='Number of images in each mini-batch')
parser.add_argument('--epochs', '-e', type=int, default=20,
help='Number of sweeps over the dataset to train')
parser.add_argument('--output_dir', '-o', default='./outputs',
help='Directory to output the result')
args = parser.parse_args()
# Download the MNIST data if you haven't downloaded it yet
train, test = datasets.mnist.get_mnist(withlabel=True, ndim=1)
batchsize = args.batchsize
epochs = args.epochs
run.log('Batch size', np.int(batchsize))
run.log('Epochs', np.int(epochs))
train_iter = iterators.SerialIterator(train, batchsize)
test_iter = iterators.SerialIterator(test, batchsize,
repeat=False, shuffle=False)
model = MyNetwork()
gpu_id = -1 # Set to -1 if you use CPU
if gpu_id >= 0:
# Make a specified GPU current
chainer.backends.cuda.get_device_from_id(0).use()
model.to_gpu() # Copy the model to the GPU
# Choose an optimizer algorithm
optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)
# Give the optimizer a reference to the model so that it
# can locate the model's parameters.
optimizer.setup(model)
while train_iter.epoch < epochs:
# ---------- One iteration of the training loop ----------
train_batch = train_iter.next()
image_train, target_train = concat_examples(train_batch, gpu_id)
# Calculate the prediction of the network
prediction_train = model(image_train)
# Calculate the loss with softmax_cross_entropy
loss = F.softmax_cross_entropy(prediction_train, target_train)
# Calculate the gradients in the network
model.cleargrads()
loss.backward()
# Update all the trainable parameters
optimizer.update()
# --------------------- until here ---------------------
# Check the validation accuracy of prediction after every epoch
if train_iter.is_new_epoch: # If this iteration is the final iteration of the current epoch
# Display the training loss
print('epoch:{:02d} train_loss:{:.04f} '.format(
train_iter.epoch, float(to_cpu(loss.array))), end='')
test_losses = []
test_accuracies = []
while True:
test_batch = test_iter.next()
image_test, target_test = concat_examples(test_batch, gpu_id)
# Forward the test data
prediction_test = model(image_test)
# Calculate the loss
loss_test = F.softmax_cross_entropy(prediction_test, target_test)
test_losses.append(to_cpu(loss_test.array))
# Calculate the accuracy
accuracy = F.accuracy(prediction_test, target_test)
accuracy.to_cpu()
test_accuracies.append(accuracy.array)
if test_iter.is_new_epoch:
test_iter.epoch = 0
test_iter.current_position = 0
test_iter.is_new_epoch = False
test_iter._pushed_position = None
break
val_accuracy = np.mean(test_accuracies)
print('val_loss:{:.04f} val_accuracy:{:.04f}'.format(
np.mean(test_losses), val_accuracy))
run.log("Accuracy", np.float(val_accuracy))
if __name__ == '__main__':
main()