9
9
from keras .preprocessing import sequence
10
10
11
11
from utils4e import sigmoid , dotproduct , softmax1D , conv1D , GaussianKernel , element_wise_product , \
12
- vector_add , random_weights , scalar_vector_product , matrix_multiplication , map_vector , mse_loss
12
+ vector_add , random_weights , scalar_vector_product , matrix_multiplication , map_vector , mse_loss
13
13
14
14
15
15
# DEEP NEURAL NETWORKS. (Chapter 19)
20
20
21
21
class Node :
22
22
"""
23
- A node in computational graph, It contains the pointer to all its parents.
23
+ A node in a computational graph. Contains the pointer to all its parents.
24
24
:param val: value of current node.
25
25
:param parents: a container of all parents of current node.
26
26
"""
@@ -35,7 +35,7 @@ def __repr__(self):
35
35
36
36
class NNUnit (Node ):
37
37
"""
38
- A single unit of a Layer in a Neural Network
38
+ A single unit of a layer in a Neural Network
39
39
:param weights: weights between parent nodes and current node
40
40
:param value: value of current node
41
41
"""
@@ -47,7 +47,7 @@ def __init__(self, weights=None, value=None):
47
47
48
48
class Layer :
49
49
"""
50
- A layer in a neural network based on computational graph.
50
+ A layer in a neural network based on a computational graph.
51
51
:param size: number of units in the current layer
52
52
"""
53
53
@@ -207,16 +207,14 @@ def gradient_descent(dataset, net, loss, epochs=1000, l_rate=0.01, batch_size=1,
207
207
gradient descent algorithm to update the learnable parameters of a network.
208
208
:return: the updated network.
209
209
"""
210
- # init data
211
- examples = dataset .examples
210
+ examples = dataset .examples # init data
212
211
213
212
for e in range (epochs ):
214
213
total_loss = 0
215
214
random .shuffle (examples )
216
215
weights = [[node .weights for node in layer .nodes ] for layer in net ]
217
216
218
217
for batch in get_batch (examples , batch_size ):
219
-
220
218
inputs , targets = init_examples (batch , dataset .inputs , dataset .target , len (net [- 1 ].nodes ))
221
219
# compute gradients of weights
222
220
gs , batch_loss = BackPropagation (inputs , targets , weights , net , loss )
@@ -231,6 +229,7 @@ def gradient_descent(dataset, net, loss, epochs=1000, l_rate=0.01, batch_size=1,
231
229
232
230
if verbose and (e + 1 ) % verbose == 0 :
233
231
print ("epoch:{}, total_loss:{}" .format (e + 1 , total_loss ))
232
+
234
233
return net
235
234
236
235
@@ -261,21 +260,26 @@ def adam_optimizer(dataset, net, loss, epochs=1000, rho=(0.9, 0.999), delta=1 /
261
260
for batch in get_batch (examples , batch_size ):
262
261
t += 1
263
262
inputs , targets = init_examples (batch , dataset .inputs , dataset .target , len (net [- 1 ].nodes ))
263
+
264
264
# compute gradients of weights
265
265
gs , batch_loss = BackPropagation (inputs , targets , weights , net , loss )
266
+
266
267
# update s,r,s_hat and r_gat
267
268
s = vector_add (scalar_vector_product (rho [0 ], s ),
268
269
scalar_vector_product ((1 - rho [0 ]), gs ))
269
270
r = vector_add (scalar_vector_product (rho [1 ], r ),
270
271
scalar_vector_product ((1 - rho [1 ]), element_wise_product (gs , gs )))
271
272
s_hat = scalar_vector_product (1 / (1 - rho [0 ] ** t ), s )
272
273
r_hat = scalar_vector_product (1 / (1 - rho [1 ] ** t ), r )
274
+
273
275
# rescale r_hat
274
276
r_hat = map_vector (lambda x : 1 / (math .sqrt (x ) + delta ), r_hat )
277
+
275
278
# delta weights
276
279
delta_theta = scalar_vector_product (- l_rate , element_wise_product (s_hat , r_hat ))
277
280
weights = vector_add (weights , delta_theta )
278
281
total_loss += batch_loss
282
+
279
283
# update the weights of network each batch
280
284
for i in range (len (net )):
281
285
if weights [i ]:
@@ -284,6 +288,7 @@ def adam_optimizer(dataset, net, loss, epochs=1000, rho=(0.9, 0.999), delta=1 /
284
288
285
289
if verbose and (e + 1 ) % verbose == 0 :
286
290
print ("epoch:{}, total_loss:{}" .format (e + 1 , total_loss ))
291
+
287
292
return net
288
293
289
294
@@ -327,6 +332,7 @@ def BackPropagation(inputs, targets, theta, net, loss):
327
332
328
333
previous = [layer_out [i ] - t_val [i ] for i in range (o_units )]
329
334
h_layers = n_layers - 1
335
+
330
336
# Backward pass
331
337
for i in range (h_layers , 0 , - 1 ):
332
338
layer = net [i ]
@@ -426,6 +432,7 @@ def perceptron_learner(dataset, learning_rate=0.01, epochs=100, verbose=None):
426
432
427
433
# initialize the network, add dense layer
428
434
raw_net = [InputLayer (input_size ), DenseLayer (input_size , output_size )]
435
+
429
436
# update the network
430
437
learned_net = gradient_descent (dataset , raw_net , mse_loss , epochs , l_rate = learning_rate , verbose = verbose )
431
438
@@ -497,6 +504,7 @@ def auto_encoder_learner(inputs, encoding_size, epochs=200):
497
504
model .add (Dense (encoding_size , input_dim = input_size , activation = 'relu' , kernel_initializer = 'random_uniform' ,
498
505
bias_initializer = 'ones' ))
499
506
model .add (Dense (input_size , activation = 'relu' , kernel_initializer = 'random_uniform' , bias_initializer = 'ones' ))
507
+
500
508
# update model with sgd
501
509
sgd = optimizers .SGD (lr = 0.01 )
502
510
model .compile (loss = 'mean_squared_error' , optimizer = sgd , metrics = ['accuracy' ])
0 commit comments