Fix and deduplication in denoising autoencoder tutorial

lamblin · lamblin · commit f9108d359a0c · 2015-05-01T19:06:56.000-04:00
diff --git a/code/dA.py b/code/dA.py
@@ -49,7 +49,6 @@
     import Image
 
 
-# start-snippet-1
 class dA(object):
     """Denoising Auto-Encoder class (dA)
 
@@ -191,7 +190,6 @@ def __init__(
             self.x = input
 
         self.params = [self.W, self.b, self.b_prime]
-    # end-snippet-1
 
     def get_corrupted_input(self, input, corruption_level):
         """This function keeps ``1-corruption_level`` entries of the inputs the
@@ -284,13 +282,16 @@ def test_dA(learning_rate=0.1, training_epochs=15,
     # compute number of minibatches for training, validation and testing
     n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
 
+    # start-snippet-2
     # allocate symbolic variables for the data
     index = T.lscalar()    # index to a [mini]batch
     x = T.matrix('x')  # the data is presented as rasterized images
+    # end-snippet-2
 
     if not os.path.isdir(output_folder):
         os.makedirs(output_folder)
     os.chdir(output_folder)
+
     ####################################
     # BUILDING THE MODEL NO CORRUPTION #
     ####################################
@@ -348,6 +349,7 @@ def test_dA(learning_rate=0.1, training_epochs=15,
                            tile_spacing=(1, 1)))
     image.save('filters_corruption_0.png')
 
+    # start-snippet-3
     #####################################
     # BUILDING THE MODEL CORRUPTION 30% #
     #####################################
@@ -399,12 +401,15 @@ def test_dA(learning_rate=0.1, training_epochs=15,
     print >> sys.stderr, ('The 30% corruption code for file ' +
                           os.path.split(__file__)[1] +
                           ' ran for %.2fm' % (training_time / 60.))
+    # end-snippet-3
 
+    # start-snippet-4
     image = Image.fromarray(tile_raster_images(
         X=da.W.get_value(borrow=True).T,
         img_shape=(28, 28), tile_shape=(10, 10),
         tile_spacing=(1, 1)))
     image.save('filters_corruption_30.png')
+    # end-snippet-4
 
     os.chdir('../')
 
diff --git a/doc/dA.txt b/doc/dA.txt
@@ -109,8 +109,7 @@ using tied weights in this tutorial, :math:`\mathbf{W}^T` will be used for
 :math:`\mathbf{W'}`):
 
 .. literalinclude:: ../code/dA.py
-  :start-after: start-snippet-1
-  :end-before: end-snippet-1
+  :pyobject: dA.__init__
 
 Note that we pass the symbolic ``input`` to the autoencoder as a parameter.
 This is so that we can concatenate layers of autoencoders to form a deep
@@ -212,23 +211,8 @@ corruption mechanism of randomly masking entries of the input by making
 them zero. The code below
 does just that :
 
-.. code-block:: python
-
-  from theano.tensor.shared_randomstreams import RandomStreams
-
-  def get_corrupted_input(self, input, corruption_level):
-        """ This function keeps ``1-corruption_level`` entries of the inputs the same
-        and zero-out randomly selected subset of size ``coruption_level``
-        Note : first argument of theano.rng.binomial is the shape(size) of
-               random numbers that it should produce
-               second argument is the number of trials
-               third argument is the probability of success of any trial
-
-                this will produce an array of 0s and 1s where 1 has a probability of
-                1 - ``corruption_level`` and 0 with ``corruption_level``
-        """
-        return  self.theano_rng.binomial(size=input.shape, n=1, p=1 - corruption_level) * input
-
+.. literalinclude:: ../code/dA.py
+  :pyobject: dA.get_corrupted_input
 
 
 In the stacked autoencoder class (:ref:`stacked_autoencoders`) the weights of
@@ -239,172 +223,8 @@ new ones will be constructed.
 
 The final denoising autoencoder class becomes :
 
-.. code-block:: python
-
- class dA(object):
-    """Denoising Auto-Encoder class (dA)
-
-    A denoising autoencoders tries to reconstruct the input from a corrupted
-    version of it by projecting it first in a latent space and reprojecting
-    it afterwards back in the input space. Please refer to Vincent et al.,2008
-    for more details. If x is the input then equation (1) computes a partially
-    destroyed version of x by means of a stochastic mapping q_D. Equation (2)
-    computes the projection of the input into the latent space. Equation (3)
-    computes the reconstruction of the input, while equation (4) computes the
-    reconstruction error.
-
-    .. math::
-
-        \tilde{x} ~ q_D(\tilde{x}|x)                                     (1)
-
-        y = s(W \tilde{x} + b)                                           (2)
-
-        x = s(W' y  + b')                                                (3)
-
-        L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)]      (4)
-
-    """
-
-    def __init__(self, numpy_rng, theano_rng=None, input=None, n_visible=784, n_hidden=500,
-               W=None, bhid=None, bvis=None):
-        """
-        Initialize the dA class by specifying the number of visible units (the
-        dimension d of the input ), the number of hidden units ( the dimension
-        d' of the latent or hidden space ) and the corruption level. The
-        constructor also receives symbolic variables for the input, weights and
-        bias. Such a symbolic variables are useful when, for example the input is
-        the result of some computations, or when weights are shared between the
-        dA and an MLP layer. When dealing with SdAs this always happens,
-        the dA on layer 2 gets as input the output of the dA on layer 1,
-        and the weights of the dA are used in the second stage of training
-        to construct an MLP.
-
-        :type numpy_rng: numpy.random.RandomState
-        :param numpy_rng: number random generator used to generate weights
-
-        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
-        :param theano_rng: Theano random generator; if None is given one is generated
-                     based on a seed drawn from `rng`
-
-        :type input: theano.tensor.TensorType
-        :paran input: a symbolic description of the input or None for standalone
-                      dA
-
-        :type n_visible: int
-        :param n_visible: number of visible units
-
-        :type n_hidden: int
-        :param n_hidden:  number of hidden units
-
-        :type W: theano.tensor.TensorType
-        :param W: Theano variable pointing to a set of weights that should be
-                  shared belong the dA and another architecture; if dA should
-                  be standalone set this to None
-
-        :type bhid: theano.tensor.TensorType
-        :param bhid: Theano variable pointing to a set of biases values (for
-                     hidden units) that should be shared belong dA and another
-                     architecture; if dA should be standalone set this to None
-
-        :type bvis: theano.tensor.TensorType
-        :param bvis: Theano variable pointing to a set of biases values (for
-                     visible units) that should be shared belong dA and another
-                     architecture; if dA should be standalone set this to None
-
-
-        """
-        self.n_visible = n_visible
-        self.n_hidden = n_hidden
-
-        # create a Theano random generator that gives symbolic random values
-        if not theano_rng :
-            theano_rng = RandomStreams(rng.randint(2 ** 30))
-
-        # note : W' was written as `W_prime` and b' as `b_prime`
-        if not W:
-            # W is initialized with `initial_W` which is uniformely sampled
-            # from -4.*sqrt(6./(n_visible+n_hidden)) and 4.*sqrt(6./(n_hidden+n_visible))
-            # the output of uniform if converted using asarray to dtype
-            # theano.config.floatX so that the code is runable on GPU
-            initial_W = numpy.asarray(numpy_rng.uniform(
-                      low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
-                      high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
-                      size=(n_visible, n_hidden)), dtype=theano.config.floatX)
-            W = theano.shared(value=initial_W, name='W')
-
-        if not bvis:
-            bvis = theano.shared(value = numpy.zeros(n_visible,
-                                         dtype=theano.config.floatX), name='bvis')
-
-        if not bhid:
-            bhid = theano.shared(value=numpy.zeros(n_hidden,
-                                              dtype=theano.config.floatX), name='bhid')
-
-        self.W = W
-        # b corresponds to the bias of the hidden
-        self.b = bhid
-        # b_prime corresponds to the bias of the visible
-        self.b_prime = bvis
-        # tied weights, therefore W_prime is W transpose
-        self.W_prime = self.W.T
-        self.theano_rng = theano_rng
-        # if no input is given, generate a variable representing the input
-        if input == None:
-            # we use a matrix because we expect a minibatch of several examples,
-            # each example being a row
-            self.x = T.dmatrix(name='input')
-        else:
-            self.x = input
-
-        self.params = [self.W, self.b, self.b_prime]
-
-    def get_corrupted_input(self, input, corruption_level):
-        """ This function keeps ``1-corruption_level`` entries of the inputs the same
-        and zero-out randomly selected subset of size ``coruption_level``
-        Note : first argument of theano.rng.binomial is the shape(size) of
-               random numbers that it should produce
-               second argument is the number of trials
-               third argument is the probability of success of any trial
-
-                this will produce an array of 0s and 1s where 1 has a probability of
-                1 - ``corruption_level`` and 0 with ``corruption_level``
-        """
-        return  self.theano_rng.binomial(size=input.shape, n=1, p=1 - corruption_level) * input
-
-
-    def get_hidden_values(self, input):
-        """ Computes the values of the hidden layer """
-        return T.nnet.sigmoid(T.dot(input, self.W) + self.b)
-
-    def get_reconstructed_input(self, hidden ):
-        """ Computes the reconstructed input given the values of the hidden layer """
-        return  T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
-
-    def get_cost_updates(self, corruption_level, learning_rate):
-        """ This function computes the cost and the updates for one trainng
-        step of the dA """
-
-        tilde_x = self.get_corrupted_input(self.x, corruption_level)
-        y = self.get_hidden_values( tilde_x)
-        z = self.get_reconstructed_input(y)
-        # note : we sum over the size of a datapoint; if we are using minibatches,
-        #        L will  be a vector, with one entry per example in minibatch
-        L = -T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1 )
-        # note : L is now a vector, where each element is the cross-entropy cost
-        #        of the reconstruction of the corresponding example of the
-        #        minibatch. We need to compute the average of all these to get
-        #        the cost of the minibatch
-        cost = T.mean(L)
-
-        # compute the gradients of the cost of the `dA` with respect
-        # to its parameters
-        gparams = T.grad(cost, self.params)
-        # generate the list of updates
-        updates = []
-        for param, gparam in zip(self.params, gparams):
-            updates.append((param, param - learning_rate * gparam))
-
-        return (cost, updates)
+.. literalinclude:: ../code/dA.py
+  :pyobject: dA
 
 
 
@@ -415,49 +235,15 @@ Putting it All Together
 It is easy now to construct an instance of our ``dA`` class and train
 it.
 
-.. code-block:: python
-
-    # allocate symbolic variables for the data
-    index = T.lscalar()  # index to a [mini]batch
-    x = T.matrix('x')  # the data is presented as rasterized images
-
-    ######################
-    # BUILDING THE MODEL #
-    ######################
-
-    rng = numpy.random.RandomState(123)
-    theano_rng = RandomStreams(rng.randint(2 ** 30))
-
-    da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x,
-            n_visible=28 * 28, n_hidden=500)
-
-    cost, updates = da.get_cost_updates(corruption_level=0.2,
-                                learning_rate=learning_rate)
-
-
-    train_da = theano.function([index], cost, updates=updates,
-         givens = {x: train_set_x[index * batch_size: (index + 1) * batch_size]})
-
-    start_time = time.clock()
-
-    ############
-    # TRAINING #
-    ############
-
-    # go through training epochs
-    for epoch in xrange(training_epochs):
-        # go through trainng set
-        c = []
-        for batch_index in xrange(n_train_batches):
-            c.append(train_da(batch_index))
-
-        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
-
-    end_time = time.clock
+.. literalinclude:: ../code/dA.py
+  :language: python
+  :start-after: start-snippet-2
+  :end-before: end-snippet-2
 
-    training_time = (end_time - start_time)
+.. literalinclude:: ../code/dA.py
+  :start-after: start-snippet-3
+  :end-before: end-snippet-3
 
-    print ('Training took %f minutes' % (pretraining_time / 60.))
 
 In order to get a feeling of what the network learned we are going to
 plot the filters (defined by the weight matrix). Bear in mind, however,
@@ -470,12 +256,9 @@ To plot our filters we will need the help of ``tile_raster_images`` (see
 using the help of the Python Image Library, the following lines of code will
 save the filters as an image :
 
-.. code-block:: python
-
-    image = Image.fromarray(tile_raster_images(X=da.W.get_value(borrow=True).T,
-                 img_shape=(28, 28), tile_shape=(10, 10),
-                 tile_spacing=(1, 1)))
-    image.save('filters_corruption_30.png')
+.. literalinclude:: ../code/dA.py
+  :start-after: start-snippet-4
+  :end-before: end-snippet-4
 
 
 Running the Code