added some results ; they should be improved though (1.6% testing error)

pascanur · pascanur · commit 769da7776db7 · 2010-02-08T09:32:10.000-05:00
diff --git a/doc/SdA.txt b/doc/SdA.txt
@@ -253,7 +253,7 @@ The final denoising autoencoder class becomes :
 
    class dA(object):
 
-       def __init__(self, n_visible= 784, n_hidden= 500, input= None):
+       def __init__(self, n_visible= 784, n_hidden= 500, input= None, corruption_level = 0.1):
 
           self.n_visible = n_visible
           self.n_hidden  = n_hidden
@@ -289,8 +289,16 @@ The final denoising autoencoder class becomes :
               self.x = T.dmatrix(name = 'input') 
           else:
               self.x = input
-          
-          self.tilde_x  = theano_rng.binomial( self.x.shape,  1,  0.9) * self.x
+          # keep 90% of the inputs the same and zero-out randomly selected subset of 10% of the inputs
+          # note : first argument of theano.rng.binomial is the shape(size) of 
+          #        random numbers that it should produce
+          #        second argument is the number of trials 
+          #        third argument is the probability of success of any trial
+          #
+          #        this will produce an array of 0s and 1s where 1 has a 
+          #        probability of 1 - ``corruption_level`` and 0 with
+          #        ``corruption_level``
+          self.tilde_x  = theano_rng.binomial( self.x.shape,  1,  1 - corruption_level) * self.x
           self.y   = T.nnet.sigmoid(T.dot(self.tilde_x, self.W      ) + self.b)
           self.z        = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
           self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) 
@@ -341,7 +349,7 @@ before for a denoising autoencoder :
 
   class StackedAutoencoder():
    
-    def __init__(self, input, n_ins, hidden_layers_sizes, n_outs):
+    def __init__(self, input, n_ins, hidden_layers_sizes, n_outs, corruption_levels):
         """ This class is made to support a variable number of layers. 
 
         :param input: symbolic variable describing the input of the SdA
@@ -352,6 +360,10 @@ before for a denoising autoencoder :
         at least one value
 
         :param n_outs: dimension of the output of the network
+        
+        :param corruption_levels: amount of corruption to use for each 
+        layer
+ 
         """
 
 Next step, we create an denoising autoencoder for each layer and link them
@@ -365,7 +377,8 @@ together:
             raiseException (' You must have at least one hidden layer ')
 
         # add first layer:
-        layer = dA(n_ins, hidden_layers_sizes[0], input = input)
+        layer = dA(n_ins, hidden_layers_sizes[0], input = input, \
+                   corruption_level = corruption_levels[0])
         self.layers += [layer]
         # add all intermediate layers
         for i in xrange( 1, len(hidden_layers_sizes) ):
@@ -374,7 +387,8 @@ together:
             # of layers `self.layers`
             layer = dA( hidden_layers_sizes[i-1],             \
                         hidden_layers_sizes[i],               \
-                        input = self.layers[-1].hidden_values )
+                        input = self.layers[-1].hidden_values,\
+                        corruption_level = corruption_levels[i])
             self.layers += [layer]
         
 
@@ -433,7 +447,7 @@ autoencoder :
 
     # construct the logistic regression class
     classifier = SdA( input=x, n_ins=28*28, \
-                      hidden_layers_sizes = [500, 500, 500], n_outs=10)
+                      hidden_layers_sizes = [1000, 1000, 1o00], n_outs=10)
     
 
 There are two stages in training this network, a layer wise pre-training and 
@@ -532,11 +546,18 @@ early stopping loop and we are done.
 Running the Code
 ++++++++++++++++
 
-pretraining duration : 79.4971 minutes
+The user can run the code by calling:
+
+.. code-block:: bash
+  
+  python code/SdA.py
 
+By default the code runs 15 pre-training epochs for each layer, with 
+a corruption level of 0.1 and a learning rate of 0.1. Pre-training takes
+79.4971 minutes. Fine-tuning is completed after 144 epochs in 282.195 
+minutes and results  in a validation score of 1.36 %, with a test 
+performace of 1.6 %.
 
-282.195 min , 144epochs 1.36 validation , 1.6 test
-TODO
 
 References
 ++++++++++