Skip to content

Commit f9108d3

Browse files
committed
Fix and deduplication in denoising autoencoder tutorial
1 parent d6443e5 commit f9108d3

File tree

2 files changed

+22
-234
lines changed

2 files changed

+22
-234
lines changed

Diff for: code/dA.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
import Image
5050

5151

52-
# start-snippet-1
5352
class dA(object):
5453
"""Denoising Auto-Encoder class (dA)
5554
@@ -191,7 +190,6 @@ def __init__(
191190
self.x = input
192191

193192
self.params = [self.W, self.b, self.b_prime]
194-
# end-snippet-1
195193

196194
def get_corrupted_input(self, input, corruption_level):
197195
"""This function keeps ``1-corruption_level`` entries of the inputs the
@@ -284,13 +282,16 @@ def test_dA(learning_rate=0.1, training_epochs=15,
284282
# compute number of minibatches for training, validation and testing
285283
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
286284

285+
# start-snippet-2
287286
# allocate symbolic variables for the data
288287
index = T.lscalar() # index to a [mini]batch
289288
x = T.matrix('x') # the data is presented as rasterized images
289+
# end-snippet-2
290290

291291
if not os.path.isdir(output_folder):
292292
os.makedirs(output_folder)
293293
os.chdir(output_folder)
294+
294295
####################################
295296
# BUILDING THE MODEL NO CORRUPTION #
296297
####################################
@@ -348,6 +349,7 @@ def test_dA(learning_rate=0.1, training_epochs=15,
348349
tile_spacing=(1, 1)))
349350
image.save('filters_corruption_0.png')
350351

352+
# start-snippet-3
351353
#####################################
352354
# BUILDING THE MODEL CORRUPTION 30% #
353355
#####################################
@@ -399,12 +401,15 @@ def test_dA(learning_rate=0.1, training_epochs=15,
399401
print >> sys.stderr, ('The 30% corruption code for file ' +
400402
os.path.split(__file__)[1] +
401403
' ran for %.2fm' % (training_time / 60.))
404+
# end-snippet-3
402405

406+
# start-snippet-4
403407
image = Image.fromarray(tile_raster_images(
404408
X=da.W.get_value(borrow=True).T,
405409
img_shape=(28, 28), tile_shape=(10, 10),
406410
tile_spacing=(1, 1)))
407411
image.save('filters_corruption_30.png')
412+
# end-snippet-4
408413

409414
os.chdir('../')
410415

Diff for: doc/dA.txt

+15-232
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,7 @@ using tied weights in this tutorial, :math:`\mathbf{W}^T` will be used for
109109
:math:`\mathbf{W'}`):
110110

111111
.. literalinclude:: ../code/dA.py
112-
:start-after: start-snippet-1
113-
:end-before: end-snippet-1
112+
:pyobject: dA.__init__
114113

115114
Note that we pass the symbolic ``input`` to the autoencoder as a parameter.
116115
This is so that we can concatenate layers of autoencoders to form a deep
@@ -212,23 +211,8 @@ corruption mechanism of randomly masking entries of the input by making
212211
them zero. The code below
213212
does just that :
214213

215-
.. code-block:: python
216-
217-
from theano.tensor.shared_randomstreams import RandomStreams
218-
219-
def get_corrupted_input(self, input, corruption_level):
220-
""" This function keeps ``1-corruption_level`` entries of the inputs the same
221-
and zero-out randomly selected subset of size ``coruption_level``
222-
Note : first argument of theano.rng.binomial is the shape(size) of
223-
random numbers that it should produce
224-
second argument is the number of trials
225-
third argument is the probability of success of any trial
226-
227-
this will produce an array of 0s and 1s where 1 has a probability of
228-
1 - ``corruption_level`` and 0 with ``corruption_level``
229-
"""
230-
return self.theano_rng.binomial(size=input.shape, n=1, p=1 - corruption_level) * input
231-
214+
.. literalinclude:: ../code/dA.py
215+
:pyobject: dA.get_corrupted_input
232216

233217

234218
In the stacked autoencoder class (:ref:`stacked_autoencoders`) the weights of
@@ -239,172 +223,8 @@ new ones will be constructed.
239223

240224
The final denoising autoencoder class becomes :
241225

242-
.. code-block:: python
243-
244-
class dA(object):
245-
"""Denoising Auto-Encoder class (dA)
246-
247-
A denoising autoencoders tries to reconstruct the input from a corrupted
248-
version of it by projecting it first in a latent space and reprojecting
249-
it afterwards back in the input space. Please refer to Vincent et al.,2008
250-
for more details. If x is the input then equation (1) computes a partially
251-
destroyed version of x by means of a stochastic mapping q_D. Equation (2)
252-
computes the projection of the input into the latent space. Equation (3)
253-
computes the reconstruction of the input, while equation (4) computes the
254-
reconstruction error.
255-
256-
.. math::
257-
258-
\tilde{x} ~ q_D(\tilde{x}|x) (1)
259-
260-
y = s(W \tilde{x} + b) (2)
261-
262-
x = s(W' y + b') (3)
263-
264-
L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)] (4)
265-
266-
"""
267-
268-
def __init__(self, numpy_rng, theano_rng=None, input=None, n_visible=784, n_hidden=500,
269-
W=None, bhid=None, bvis=None):
270-
"""
271-
Initialize the dA class by specifying the number of visible units (the
272-
dimension d of the input ), the number of hidden units ( the dimension
273-
d' of the latent or hidden space ) and the corruption level. The
274-
constructor also receives symbolic variables for the input, weights and
275-
bias. Such a symbolic variables are useful when, for example the input is
276-
the result of some computations, or when weights are shared between the
277-
dA and an MLP layer. When dealing with SdAs this always happens,
278-
the dA on layer 2 gets as input the output of the dA on layer 1,
279-
and the weights of the dA are used in the second stage of training
280-
to construct an MLP.
281-
282-
:type numpy_rng: numpy.random.RandomState
283-
:param numpy_rng: number random generator used to generate weights
284-
285-
:type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
286-
:param theano_rng: Theano random generator; if None is given one is generated
287-
based on a seed drawn from `rng`
288-
289-
:type input: theano.tensor.TensorType
290-
:paran input: a symbolic description of the input or None for standalone
291-
dA
292-
293-
:type n_visible: int
294-
:param n_visible: number of visible units
295-
296-
:type n_hidden: int
297-
:param n_hidden: number of hidden units
298-
299-
:type W: theano.tensor.TensorType
300-
:param W: Theano variable pointing to a set of weights that should be
301-
shared belong the dA and another architecture; if dA should
302-
be standalone set this to None
303-
304-
:type bhid: theano.tensor.TensorType
305-
:param bhid: Theano variable pointing to a set of biases values (for
306-
hidden units) that should be shared belong dA and another
307-
architecture; if dA should be standalone set this to None
308-
309-
:type bvis: theano.tensor.TensorType
310-
:param bvis: Theano variable pointing to a set of biases values (for
311-
visible units) that should be shared belong dA and another
312-
architecture; if dA should be standalone set this to None
313-
314-
315-
"""
316-
self.n_visible = n_visible
317-
self.n_hidden = n_hidden
318-
319-
# create a Theano random generator that gives symbolic random values
320-
if not theano_rng :
321-
theano_rng = RandomStreams(rng.randint(2 ** 30))
322-
323-
# note : W' was written as `W_prime` and b' as `b_prime`
324-
if not W:
325-
# W is initialized with `initial_W` which is uniformely sampled
326-
# from -4.*sqrt(6./(n_visible+n_hidden)) and 4.*sqrt(6./(n_hidden+n_visible))
327-
# the output of uniform if converted using asarray to dtype
328-
# theano.config.floatX so that the code is runable on GPU
329-
initial_W = numpy.asarray(numpy_rng.uniform(
330-
low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
331-
high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
332-
size=(n_visible, n_hidden)), dtype=theano.config.floatX)
333-
W = theano.shared(value=initial_W, name='W')
334-
335-
if not bvis:
336-
bvis = theano.shared(value = numpy.zeros(n_visible,
337-
dtype=theano.config.floatX), name='bvis')
338-
339-
if not bhid:
340-
bhid = theano.shared(value=numpy.zeros(n_hidden,
341-
dtype=theano.config.floatX), name='bhid')
342-
343-
self.W = W
344-
# b corresponds to the bias of the hidden
345-
self.b = bhid
346-
# b_prime corresponds to the bias of the visible
347-
self.b_prime = bvis
348-
# tied weights, therefore W_prime is W transpose
349-
self.W_prime = self.W.T
350-
self.theano_rng = theano_rng
351-
# if no input is given, generate a variable representing the input
352-
if input == None:
353-
# we use a matrix because we expect a minibatch of several examples,
354-
# each example being a row
355-
self.x = T.dmatrix(name='input')
356-
else:
357-
self.x = input
358-
359-
self.params = [self.W, self.b, self.b_prime]
360-
361-
def get_corrupted_input(self, input, corruption_level):
362-
""" This function keeps ``1-corruption_level`` entries of the inputs the same
363-
and zero-out randomly selected subset of size ``coruption_level``
364-
Note : first argument of theano.rng.binomial is the shape(size) of
365-
random numbers that it should produce
366-
second argument is the number of trials
367-
third argument is the probability of success of any trial
368-
369-
this will produce an array of 0s and 1s where 1 has a probability of
370-
1 - ``corruption_level`` and 0 with ``corruption_level``
371-
"""
372-
return self.theano_rng.binomial(size=input.shape, n=1, p=1 - corruption_level) * input
373-
374-
375-
def get_hidden_values(self, input):
376-
""" Computes the values of the hidden layer """
377-
return T.nnet.sigmoid(T.dot(input, self.W) + self.b)
378-
379-
def get_reconstructed_input(self, hidden ):
380-
""" Computes the reconstructed input given the values of the hidden layer """
381-
return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
382-
383-
def get_cost_updates(self, corruption_level, learning_rate):
384-
""" This function computes the cost and the updates for one trainng
385-
step of the dA """
386-
387-
tilde_x = self.get_corrupted_input(self.x, corruption_level)
388-
y = self.get_hidden_values( tilde_x)
389-
z = self.get_reconstructed_input(y)
390-
# note : we sum over the size of a datapoint; if we are using minibatches,
391-
# L will be a vector, with one entry per example in minibatch
392-
L = -T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1 )
393-
# note : L is now a vector, where each element is the cross-entropy cost
394-
# of the reconstruction of the corresponding example of the
395-
# minibatch. We need to compute the average of all these to get
396-
# the cost of the minibatch
397-
cost = T.mean(L)
398-
399-
# compute the gradients of the cost of the `dA` with respect
400-
# to its parameters
401-
gparams = T.grad(cost, self.params)
402-
# generate the list of updates
403-
updates = []
404-
for param, gparam in zip(self.params, gparams):
405-
updates.append((param, param - learning_rate * gparam))
406-
407-
return (cost, updates)
226+
.. literalinclude:: ../code/dA.py
227+
:pyobject: dA
408228

409229

410230

@@ -415,49 +235,15 @@ Putting it All Together
415235
It is easy now to construct an instance of our ``dA`` class and train
416236
it.
417237

418-
.. code-block:: python
419-
420-
# allocate symbolic variables for the data
421-
index = T.lscalar() # index to a [mini]batch
422-
x = T.matrix('x') # the data is presented as rasterized images
423-
424-
######################
425-
# BUILDING THE MODEL #
426-
######################
427-
428-
rng = numpy.random.RandomState(123)
429-
theano_rng = RandomStreams(rng.randint(2 ** 30))
430-
431-
da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x,
432-
n_visible=28 * 28, n_hidden=500)
433-
434-
cost, updates = da.get_cost_updates(corruption_level=0.2,
435-
learning_rate=learning_rate)
436-
437-
438-
train_da = theano.function([index], cost, updates=updates,
439-
givens = {x: train_set_x[index * batch_size: (index + 1) * batch_size]})
440-
441-
start_time = time.clock()
442-
443-
############
444-
# TRAINING #
445-
############
446-
447-
# go through training epochs
448-
for epoch in xrange(training_epochs):
449-
# go through trainng set
450-
c = []
451-
for batch_index in xrange(n_train_batches):
452-
c.append(train_da(batch_index))
453-
454-
print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
455-
456-
end_time = time.clock
238+
.. literalinclude:: ../code/dA.py
239+
:language: python
240+
:start-after: start-snippet-2
241+
:end-before: end-snippet-2
457242

458-
training_time = (end_time - start_time)
243+
.. literalinclude:: ../code/dA.py
244+
:start-after: start-snippet-3
245+
:end-before: end-snippet-3
459246

460-
print ('Training took %f minutes' % (pretraining_time / 60.))
461247

462248
In order to get a feeling of what the network learned we are going to
463249
plot the filters (defined by the weight matrix). Bear in mind, however,
@@ -470,12 +256,9 @@ To plot our filters we will need the help of ``tile_raster_images`` (see
470256
using the help of the Python Image Library, the following lines of code will
471257
save the filters as an image :
472258

473-
.. code-block:: python
474-
475-
image = Image.fromarray(tile_raster_images(X=da.W.get_value(borrow=True).T,
476-
img_shape=(28, 28), tile_shape=(10, 10),
477-
tile_spacing=(1, 1)))
478-
image.save('filters_corruption_30.png')
259+
.. literalinclude:: ../code/dA.py
260+
:start-after: start-snippet-4
261+
:end-before: end-snippet-4
479262

480263

481264
Running the Code

0 commit comments

Comments
 (0)