@@ -109,8 +109,7 @@ using tied weights in this tutorial, :math:`\mathbf{W}^T` will be used for
109
109
:math:`\mathbf{W'}`):
110
110
111
111
.. literalinclude:: ../code/dA.py
112
- :start-after: start-snippet-1
113
- :end-before: end-snippet-1
112
+ :pyobject: dA.__init__
114
113
115
114
Note that we pass the symbolic ``input`` to the autoencoder as a parameter.
116
115
This is so that we can concatenate layers of autoencoders to form a deep
@@ -212,23 +211,8 @@ corruption mechanism of randomly masking entries of the input by making
212
211
them zero. The code below
213
212
does just that :
214
213
215
- .. code-block:: python
216
-
217
- from theano.tensor.shared_randomstreams import RandomStreams
218
-
219
- def get_corrupted_input(self, input, corruption_level):
220
- """ This function keeps ``1-corruption_level`` entries of the inputs the same
221
- and zero-out randomly selected subset of size ``coruption_level``
222
- Note : first argument of theano.rng.binomial is the shape(size) of
223
- random numbers that it should produce
224
- second argument is the number of trials
225
- third argument is the probability of success of any trial
226
-
227
- this will produce an array of 0s and 1s where 1 has a probability of
228
- 1 - ``corruption_level`` and 0 with ``corruption_level``
229
- """
230
- return self.theano_rng.binomial(size=input.shape, n=1, p=1 - corruption_level) * input
231
-
214
+ .. literalinclude:: ../code/dA.py
215
+ :pyobject: dA.get_corrupted_input
232
216
233
217
234
218
In the stacked autoencoder class (:ref:`stacked_autoencoders`) the weights of
@@ -239,172 +223,8 @@ new ones will be constructed.
239
223
240
224
The final denoising autoencoder class becomes :
241
225
242
- .. code-block:: python
243
-
244
- class dA(object):
245
- """Denoising Auto-Encoder class (dA)
246
-
247
- A denoising autoencoders tries to reconstruct the input from a corrupted
248
- version of it by projecting it first in a latent space and reprojecting
249
- it afterwards back in the input space. Please refer to Vincent et al.,2008
250
- for more details. If x is the input then equation (1) computes a partially
251
- destroyed version of x by means of a stochastic mapping q_D. Equation (2)
252
- computes the projection of the input into the latent space. Equation (3)
253
- computes the reconstruction of the input, while equation (4) computes the
254
- reconstruction error.
255
-
256
- .. math::
257
-
258
- \tilde{x} ~ q_D(\tilde{x}|x) (1)
259
-
260
- y = s(W \tilde{x} + b) (2)
261
-
262
- x = s(W' y + b') (3)
263
-
264
- L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)] (4)
265
-
266
- """
267
-
268
- def __init__(self, numpy_rng, theano_rng=None, input=None, n_visible=784, n_hidden=500,
269
- W=None, bhid=None, bvis=None):
270
- """
271
- Initialize the dA class by specifying the number of visible units (the
272
- dimension d of the input ), the number of hidden units ( the dimension
273
- d' of the latent or hidden space ) and the corruption level. The
274
- constructor also receives symbolic variables for the input, weights and
275
- bias. Such a symbolic variables are useful when, for example the input is
276
- the result of some computations, or when weights are shared between the
277
- dA and an MLP layer. When dealing with SdAs this always happens,
278
- the dA on layer 2 gets as input the output of the dA on layer 1,
279
- and the weights of the dA are used in the second stage of training
280
- to construct an MLP.
281
-
282
- :type numpy_rng: numpy.random.RandomState
283
- :param numpy_rng: number random generator used to generate weights
284
-
285
- :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
286
- :param theano_rng: Theano random generator; if None is given one is generated
287
- based on a seed drawn from `rng`
288
-
289
- :type input: theano.tensor.TensorType
290
- :paran input: a symbolic description of the input or None for standalone
291
- dA
292
-
293
- :type n_visible: int
294
- :param n_visible: number of visible units
295
-
296
- :type n_hidden: int
297
- :param n_hidden: number of hidden units
298
-
299
- :type W: theano.tensor.TensorType
300
- :param W: Theano variable pointing to a set of weights that should be
301
- shared belong the dA and another architecture; if dA should
302
- be standalone set this to None
303
-
304
- :type bhid: theano.tensor.TensorType
305
- :param bhid: Theano variable pointing to a set of biases values (for
306
- hidden units) that should be shared belong dA and another
307
- architecture; if dA should be standalone set this to None
308
-
309
- :type bvis: theano.tensor.TensorType
310
- :param bvis: Theano variable pointing to a set of biases values (for
311
- visible units) that should be shared belong dA and another
312
- architecture; if dA should be standalone set this to None
313
-
314
-
315
- """
316
- self.n_visible = n_visible
317
- self.n_hidden = n_hidden
318
-
319
- # create a Theano random generator that gives symbolic random values
320
- if not theano_rng :
321
- theano_rng = RandomStreams(rng.randint(2 ** 30))
322
-
323
- # note : W' was written as `W_prime` and b' as `b_prime`
324
- if not W:
325
- # W is initialized with `initial_W` which is uniformely sampled
326
- # from -4.*sqrt(6./(n_visible+n_hidden)) and 4.*sqrt(6./(n_hidden+n_visible))
327
- # the output of uniform if converted using asarray to dtype
328
- # theano.config.floatX so that the code is runable on GPU
329
- initial_W = numpy.asarray(numpy_rng.uniform(
330
- low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
331
- high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
332
- size=(n_visible, n_hidden)), dtype=theano.config.floatX)
333
- W = theano.shared(value=initial_W, name='W')
334
-
335
- if not bvis:
336
- bvis = theano.shared(value = numpy.zeros(n_visible,
337
- dtype=theano.config.floatX), name='bvis')
338
-
339
- if not bhid:
340
- bhid = theano.shared(value=numpy.zeros(n_hidden,
341
- dtype=theano.config.floatX), name='bhid')
342
-
343
- self.W = W
344
- # b corresponds to the bias of the hidden
345
- self.b = bhid
346
- # b_prime corresponds to the bias of the visible
347
- self.b_prime = bvis
348
- # tied weights, therefore W_prime is W transpose
349
- self.W_prime = self.W.T
350
- self.theano_rng = theano_rng
351
- # if no input is given, generate a variable representing the input
352
- if input == None:
353
- # we use a matrix because we expect a minibatch of several examples,
354
- # each example being a row
355
- self.x = T.dmatrix(name='input')
356
- else:
357
- self.x = input
358
-
359
- self.params = [self.W, self.b, self.b_prime]
360
-
361
- def get_corrupted_input(self, input, corruption_level):
362
- """ This function keeps ``1-corruption_level`` entries of the inputs the same
363
- and zero-out randomly selected subset of size ``coruption_level``
364
- Note : first argument of theano.rng.binomial is the shape(size) of
365
- random numbers that it should produce
366
- second argument is the number of trials
367
- third argument is the probability of success of any trial
368
-
369
- this will produce an array of 0s and 1s where 1 has a probability of
370
- 1 - ``corruption_level`` and 0 with ``corruption_level``
371
- """
372
- return self.theano_rng.binomial(size=input.shape, n=1, p=1 - corruption_level) * input
373
-
374
-
375
- def get_hidden_values(self, input):
376
- """ Computes the values of the hidden layer """
377
- return T.nnet.sigmoid(T.dot(input, self.W) + self.b)
378
-
379
- def get_reconstructed_input(self, hidden ):
380
- """ Computes the reconstructed input given the values of the hidden layer """
381
- return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
382
-
383
- def get_cost_updates(self, corruption_level, learning_rate):
384
- """ This function computes the cost and the updates for one trainng
385
- step of the dA """
386
-
387
- tilde_x = self.get_corrupted_input(self.x, corruption_level)
388
- y = self.get_hidden_values( tilde_x)
389
- z = self.get_reconstructed_input(y)
390
- # note : we sum over the size of a datapoint; if we are using minibatches,
391
- # L will be a vector, with one entry per example in minibatch
392
- L = -T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1 )
393
- # note : L is now a vector, where each element is the cross-entropy cost
394
- # of the reconstruction of the corresponding example of the
395
- # minibatch. We need to compute the average of all these to get
396
- # the cost of the minibatch
397
- cost = T.mean(L)
398
-
399
- # compute the gradients of the cost of the `dA` with respect
400
- # to its parameters
401
- gparams = T.grad(cost, self.params)
402
- # generate the list of updates
403
- updates = []
404
- for param, gparam in zip(self.params, gparams):
405
- updates.append((param, param - learning_rate * gparam))
406
-
407
- return (cost, updates)
226
+ .. literalinclude:: ../code/dA.py
227
+ :pyobject: dA
408
228
409
229
410
230
@@ -415,49 +235,15 @@ Putting it All Together
415
235
It is easy now to construct an instance of our ``dA`` class and train
416
236
it.
417
237
418
- .. code-block:: python
419
-
420
- # allocate symbolic variables for the data
421
- index = T.lscalar() # index to a [mini]batch
422
- x = T.matrix('x') # the data is presented as rasterized images
423
-
424
- ######################
425
- # BUILDING THE MODEL #
426
- ######################
427
-
428
- rng = numpy.random.RandomState(123)
429
- theano_rng = RandomStreams(rng.randint(2 ** 30))
430
-
431
- da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x,
432
- n_visible=28 * 28, n_hidden=500)
433
-
434
- cost, updates = da.get_cost_updates(corruption_level=0.2,
435
- learning_rate=learning_rate)
436
-
437
-
438
- train_da = theano.function([index], cost, updates=updates,
439
- givens = {x: train_set_x[index * batch_size: (index + 1) * batch_size]})
440
-
441
- start_time = time.clock()
442
-
443
- ############
444
- # TRAINING #
445
- ############
446
-
447
- # go through training epochs
448
- for epoch in xrange(training_epochs):
449
- # go through trainng set
450
- c = []
451
- for batch_index in xrange(n_train_batches):
452
- c.append(train_da(batch_index))
453
-
454
- print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
455
-
456
- end_time = time.clock
238
+ .. literalinclude:: ../code/dA.py
239
+ :language: python
240
+ :start-after: start-snippet-2
241
+ :end-before: end-snippet-2
457
242
458
- training_time = (end_time - start_time)
243
+ .. literalinclude:: ../code/dA.py
244
+ :start-after: start-snippet-3
245
+ :end-before: end-snippet-3
459
246
460
- print ('Training took %f minutes' % (pretraining_time / 60.))
461
247
462
248
In order to get a feeling of what the network learned we are going to
463
249
plot the filters (defined by the weight matrix). Bear in mind, however,
@@ -470,12 +256,9 @@ To plot our filters we will need the help of ``tile_raster_images`` (see
470
256
using the help of the Python Image Library, the following lines of code will
471
257
save the filters as an image :
472
258
473
- .. code-block:: python
474
-
475
- image = Image.fromarray(tile_raster_images(X=da.W.get_value(borrow=True).T,
476
- img_shape=(28, 28), tile_shape=(10, 10),
477
- tile_spacing=(1, 1)))
478
- image.save('filters_corruption_30.png')
259
+ .. literalinclude:: ../code/dA.py
260
+ :start-after: start-snippet-4
261
+ :end-before: end-snippet-4
479
262
480
263
481
264
Running the Code
0 commit comments