@@ -109,8 +109,7 @@ using tied weights in this tutorial, :math:`\mathbf{W}^T` will be used for
109109:math:`\mathbf{W'}`):
110110
111111.. literalinclude:: ../code/dA.py
112- :start-after: start-snippet-1
113- :end-before: end-snippet-1
112+ :pyobject: dA.__init__
114113
115114Note that we pass the symbolic ``input`` to the autoencoder as a parameter.
116115This is so that we can concatenate layers of autoencoders to form a deep
@@ -212,23 +211,8 @@ corruption mechanism of randomly masking entries of the input by making
212211them zero. The code below
213212does just that :
214213
215- .. code-block:: python
216-
217- from theano.tensor.shared_randomstreams import RandomStreams
218-
219- def get_corrupted_input(self, input, corruption_level):
220- """ This function keeps ``1-corruption_level`` entries of the inputs the same
221- and zero-out randomly selected subset of size ``coruption_level``
222- Note : first argument of theano.rng.binomial is the shape(size) of
223- random numbers that it should produce
224- second argument is the number of trials
225- third argument is the probability of success of any trial
226-
227- this will produce an array of 0s and 1s where 1 has a probability of
228- 1 - ``corruption_level`` and 0 with ``corruption_level``
229- """
230- return self.theano_rng.binomial(size=input.shape, n=1, p=1 - corruption_level) * input
231-
214+ .. literalinclude:: ../code/dA.py
215+ :pyobject: dA.get_corrupted_input
232216
233217
234218In the stacked autoencoder class (:ref:`stacked_autoencoders`) the weights of
@@ -239,172 +223,8 @@ new ones will be constructed.
239223
240224The final denoising autoencoder class becomes :
241225
242- .. code-block:: python
243-
244- class dA(object):
245- """Denoising Auto-Encoder class (dA)
246-
247- A denoising autoencoders tries to reconstruct the input from a corrupted
248- version of it by projecting it first in a latent space and reprojecting
249- it afterwards back in the input space. Please refer to Vincent et al.,2008
250- for more details. If x is the input then equation (1) computes a partially
251- destroyed version of x by means of a stochastic mapping q_D. Equation (2)
252- computes the projection of the input into the latent space. Equation (3)
253- computes the reconstruction of the input, while equation (4) computes the
254- reconstruction error.
255-
256- .. math::
257-
258- \tilde{x} ~ q_D(\tilde{x}|x) (1)
259-
260- y = s(W \tilde{x} + b) (2)
261-
262- x = s(W' y + b') (3)
263-
264- L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)] (4)
265-
266- """
267-
268- def __init__(self, numpy_rng, theano_rng=None, input=None, n_visible=784, n_hidden=500,
269- W=None, bhid=None, bvis=None):
270- """
271- Initialize the dA class by specifying the number of visible units (the
272- dimension d of the input ), the number of hidden units ( the dimension
273- d' of the latent or hidden space ) and the corruption level. The
274- constructor also receives symbolic variables for the input, weights and
275- bias. Such a symbolic variables are useful when, for example the input is
276- the result of some computations, or when weights are shared between the
277- dA and an MLP layer. When dealing with SdAs this always happens,
278- the dA on layer 2 gets as input the output of the dA on layer 1,
279- and the weights of the dA are used in the second stage of training
280- to construct an MLP.
281-
282- :type numpy_rng: numpy.random.RandomState
283- :param numpy_rng: number random generator used to generate weights
284-
285- :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
286- :param theano_rng: Theano random generator; if None is given one is generated
287- based on a seed drawn from `rng`
288-
289- :type input: theano.tensor.TensorType
290- :paran input: a symbolic description of the input or None for standalone
291- dA
292-
293- :type n_visible: int
294- :param n_visible: number of visible units
295-
296- :type n_hidden: int
297- :param n_hidden: number of hidden units
298-
299- :type W: theano.tensor.TensorType
300- :param W: Theano variable pointing to a set of weights that should be
301- shared belong the dA and another architecture; if dA should
302- be standalone set this to None
303-
304- :type bhid: theano.tensor.TensorType
305- :param bhid: Theano variable pointing to a set of biases values (for
306- hidden units) that should be shared belong dA and another
307- architecture; if dA should be standalone set this to None
308-
309- :type bvis: theano.tensor.TensorType
310- :param bvis: Theano variable pointing to a set of biases values (for
311- visible units) that should be shared belong dA and another
312- architecture; if dA should be standalone set this to None
313-
314-
315- """
316- self.n_visible = n_visible
317- self.n_hidden = n_hidden
318-
319- # create a Theano random generator that gives symbolic random values
320- if not theano_rng :
321- theano_rng = RandomStreams(rng.randint(2 ** 30))
322-
323- # note : W' was written as `W_prime` and b' as `b_prime`
324- if not W:
325- # W is initialized with `initial_W` which is uniformely sampled
326- # from -4.*sqrt(6./(n_visible+n_hidden)) and 4.*sqrt(6./(n_hidden+n_visible))
327- # the output of uniform if converted using asarray to dtype
328- # theano.config.floatX so that the code is runable on GPU
329- initial_W = numpy.asarray(numpy_rng.uniform(
330- low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
331- high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
332- size=(n_visible, n_hidden)), dtype=theano.config.floatX)
333- W = theano.shared(value=initial_W, name='W')
334-
335- if not bvis:
336- bvis = theano.shared(value = numpy.zeros(n_visible,
337- dtype=theano.config.floatX), name='bvis')
338-
339- if not bhid:
340- bhid = theano.shared(value=numpy.zeros(n_hidden,
341- dtype=theano.config.floatX), name='bhid')
342-
343- self.W = W
344- # b corresponds to the bias of the hidden
345- self.b = bhid
346- # b_prime corresponds to the bias of the visible
347- self.b_prime = bvis
348- # tied weights, therefore W_prime is W transpose
349- self.W_prime = self.W.T
350- self.theano_rng = theano_rng
351- # if no input is given, generate a variable representing the input
352- if input == None:
353- # we use a matrix because we expect a minibatch of several examples,
354- # each example being a row
355- self.x = T.dmatrix(name='input')
356- else:
357- self.x = input
358-
359- self.params = [self.W, self.b, self.b_prime]
360-
361- def get_corrupted_input(self, input, corruption_level):
362- """ This function keeps ``1-corruption_level`` entries of the inputs the same
363- and zero-out randomly selected subset of size ``coruption_level``
364- Note : first argument of theano.rng.binomial is the shape(size) of
365- random numbers that it should produce
366- second argument is the number of trials
367- third argument is the probability of success of any trial
368-
369- this will produce an array of 0s and 1s where 1 has a probability of
370- 1 - ``corruption_level`` and 0 with ``corruption_level``
371- """
372- return self.theano_rng.binomial(size=input.shape, n=1, p=1 - corruption_level) * input
373-
374-
375- def get_hidden_values(self, input):
376- """ Computes the values of the hidden layer """
377- return T.nnet.sigmoid(T.dot(input, self.W) + self.b)
378-
379- def get_reconstructed_input(self, hidden ):
380- """ Computes the reconstructed input given the values of the hidden layer """
381- return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
382-
383- def get_cost_updates(self, corruption_level, learning_rate):
384- """ This function computes the cost and the updates for one trainng
385- step of the dA """
386-
387- tilde_x = self.get_corrupted_input(self.x, corruption_level)
388- y = self.get_hidden_values( tilde_x)
389- z = self.get_reconstructed_input(y)
390- # note : we sum over the size of a datapoint; if we are using minibatches,
391- # L will be a vector, with one entry per example in minibatch
392- L = -T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1 )
393- # note : L is now a vector, where each element is the cross-entropy cost
394- # of the reconstruction of the corresponding example of the
395- # minibatch. We need to compute the average of all these to get
396- # the cost of the minibatch
397- cost = T.mean(L)
398-
399- # compute the gradients of the cost of the `dA` with respect
400- # to its parameters
401- gparams = T.grad(cost, self.params)
402- # generate the list of updates
403- updates = []
404- for param, gparam in zip(self.params, gparams):
405- updates.append((param, param - learning_rate * gparam))
406-
407- return (cost, updates)
226+ .. literalinclude:: ../code/dA.py
227+ :pyobject: dA
408228
409229
410230
@@ -415,49 +235,15 @@ Putting it All Together
415235It is easy now to construct an instance of our ``dA`` class and train
416236it.
417237
418- .. code-block:: python
419-
420- # allocate symbolic variables for the data
421- index = T.lscalar() # index to a [mini]batch
422- x = T.matrix('x') # the data is presented as rasterized images
423-
424- ######################
425- # BUILDING THE MODEL #
426- ######################
427-
428- rng = numpy.random.RandomState(123)
429- theano_rng = RandomStreams(rng.randint(2 ** 30))
430-
431- da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x,
432- n_visible=28 * 28, n_hidden=500)
433-
434- cost, updates = da.get_cost_updates(corruption_level=0.2,
435- learning_rate=learning_rate)
436-
437-
438- train_da = theano.function([index], cost, updates=updates,
439- givens = {x: train_set_x[index * batch_size: (index + 1) * batch_size]})
440-
441- start_time = time.clock()
442-
443- ############
444- # TRAINING #
445- ############
446-
447- # go through training epochs
448- for epoch in xrange(training_epochs):
449- # go through trainng set
450- c = []
451- for batch_index in xrange(n_train_batches):
452- c.append(train_da(batch_index))
453-
454- print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
455-
456- end_time = time.clock
238+ .. literalinclude:: ../code/dA.py
239+ :language: python
240+ :start-after: start-snippet-2
241+ :end-before: end-snippet-2
457242
458- training_time = (end_time - start_time)
243+ .. literalinclude:: ../code/dA.py
244+ :start-after: start-snippet-3
245+ :end-before: end-snippet-3
459246
460- print ('Training took %f minutes' % (pretraining_time / 60.))
461247
462248In order to get a feeling of what the network learned we are going to
463249plot the filters (defined by the weight matrix). Bear in mind, however,
@@ -470,12 +256,9 @@ To plot our filters we will need the help of ``tile_raster_images`` (see
470256using the help of the Python Image Library, the following lines of code will
471257save the filters as an image :
472258
473- .. code-block:: python
474-
475- image = Image.fromarray(tile_raster_images(X=da.W.get_value(borrow=True).T,
476- img_shape=(28, 28), tile_shape=(10, 10),
477- tile_spacing=(1, 1)))
478- image.save('filters_corruption_30.png')
259+ .. literalinclude:: ../code/dA.py
260+ :start-after: start-snippet-4
261+ :end-before: end-snippet-4
479262
480263
481264Running the Code
0 commit comments