@@ -253,7 +253,7 @@ The final denoising autoencoder class becomes :
253253
254254 class dA(object):
255255
256- def __init__(self, n_visible= 784, n_hidden= 500, input= None):
256+ def __init__(self, n_visible= 784, n_hidden= 500, input= None, corruption_level = 0.1 ):
257257
258258 self.n_visible = n_visible
259259 self.n_hidden = n_hidden
@@ -289,8 +289,16 @@ The final denoising autoencoder class becomes :
289289 self.x = T.dmatrix(name = 'input')
290290 else:
291291 self.x = input
292-
293- self.tilde_x = theano_rng.binomial( self.x.shape, 1, 0.9) * self.x
292+ # keep 90% of the inputs the same and zero-out randomly selected subset of 10% of the inputs
293+ # note : first argument of theano.rng.binomial is the shape(size) of
294+ # random numbers that it should produce
295+ # second argument is the number of trials
296+ # third argument is the probability of success of any trial
297+ #
298+ # this will produce an array of 0s and 1s where 1 has a
299+ # probability of 1 - ``corruption_level`` and 0 with
300+ # ``corruption_level``
301+ self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level) * self.x
294302 self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b)
295303 self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
296304 self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 )
@@ -341,7 +349,7 @@ before for a denoising autoencoder :
341349
342350 class StackedAutoencoder():
343351
344- def __init__(self, input, n_ins, hidden_layers_sizes, n_outs):
352+ def __init__(self, input, n_ins, hidden_layers_sizes, n_outs, corruption_levels ):
345353 """ This class is made to support a variable number of layers.
346354
347355 :param input: symbolic variable describing the input of the SdA
@@ -352,6 +360,10 @@ before for a denoising autoencoder :
352360 at least one value
353361
354362 :param n_outs: dimension of the output of the network
363+
364+ :param corruption_levels: amount of corruption to use for each
365+ layer
366+
355367 """
356368
357369Next step, we create an denoising autoencoder for each layer and link them
@@ -365,7 +377,8 @@ together:
365377 raiseException (' You must have at least one hidden layer ')
366378
367379 # add first layer:
368- layer = dA(n_ins, hidden_layers_sizes[0], input = input)
380+ layer = dA(n_ins, hidden_layers_sizes[0], input = input, \
381+ corruption_level = corruption_levels[0])
369382 self.layers += [layer]
370383 # add all intermediate layers
371384 for i in xrange( 1, len(hidden_layers_sizes) ):
@@ -374,7 +387,8 @@ together:
374387 # of layers `self.layers`
375388 layer = dA( hidden_layers_sizes[i-1], \
376389 hidden_layers_sizes[i], \
377- input = self.layers[-1].hidden_values )
390+ input = self.layers[-1].hidden_values,\
391+ corruption_level = corruption_levels[i])
378392 self.layers += [layer]
379393
380394
@@ -433,7 +447,7 @@ autoencoder :
433447
434448 # construct the logistic regression class
435449 classifier = SdA( input=x, n_ins=28*28, \
436- hidden_layers_sizes = [500, 500, 500 ], n_outs=10)
450+ hidden_layers_sizes = [1000, 1000, 1o00 ], n_outs=10)
437451
438452
439453There are two stages in training this network, a layer wise pre-training and
@@ -532,11 +546,18 @@ early stopping loop and we are done.
532546Running the Code
533547++++++++++++++++
534548
535- pretraining duration : 79.4971 minutes
549+ The user can run the code by calling:
550+
551+ .. code-block:: bash
552+
553+ python code/SdA.py
536554
555+ By default the code runs 15 pre-training epochs for each layer, with
556+ a corruption level of 0.1 and a learning rate of 0.1. Pre-training takes
557+ 79.4971 minutes. Fine-tuning is completed after 144 epochs in 282.195
558+ minutes and results in a validation score of 1.36 %, with a test
559+ performace of 1.6 %.
537560
538- 282.195 min , 144epochs 1.36 validation , 1.6 test
539- TODO
540561
541562References
542563++++++++++
0 commit comments