Merge branch 'master' of git@github.com:lisa-lab/DeepLearningTutorials

pascanur · pascanur · commit 69c8ee98413d · 2010-02-05T16:52:53.000-05:00
diff --git a/code/SdA.py b/code/SdA.py
@@ -207,6 +207,8 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
     # Equation (3)
     self.z   = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
     # Equation (4)
+    # note : we sum over the size of a datapoint; if we are using minibatches,
+    #        L will  be a vector, with one entry per example in minibatch
     self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) 
     # note : L is now a vector, where each element is the cross-entropy cost 
     #        of the reconstruction of the corresponding example of the 
@@ -260,17 +262,13 @@ def __init__(self, input, n_ins, hidden_layers_sizes, n_outs):
             # input size is that of the previous layer
             # input is the output of the last layer inserted in our list 
             # of layers `self.layers`
-            print i 
-            print theano.pp(self.layers[-1].hidden_values)
             layer = dA( hidden_layers_sizes[i-1],             \
                         hidden_layers_sizes[i],               \
                         input = self.layers[-1].hidden_values )
             self.layers += [layer]
         
 
         self.n_layers = len(self.layers)
-        print '------------------------------------------'
-        print theano.pp(self.layers[-1].hidden_values)
         # now we need to use same weights and biases to define an MLP
         # We can simply use the `hidden_values` of the top layer, which 
         # computes the input that we would normally feed to the logistic
@@ -302,7 +300,7 @@ def errors(self, y):
 
   
 
-def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 10, \
+def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 15, \
                             pretraining_lr = 0.1, training_epochs = 1000, dataset='mnist.pkl.gz'):
     """
     Demonstrate stochastic gradient descent optimization for a multilayer 
@@ -357,7 +355,7 @@ def shared_dataset(data_xy):
 
     # construct the logistic regression class
     classifier = SdA( input=x, n_ins=28*28, \
-                      hidden_layers_sizes = [700, 700, 700], n_outs=10)
+                      hidden_layers_sizes = [1000, 1000, 1000], n_outs=10)
     
     ## Pre-train layer-wise 
     for i in xrange(classifier.n_layers):
@@ -383,7 +381,7 @@ def shared_dataset(data_xy):
             # go through the training set
             for batch_index in xrange(n_train_batches):
                 c = layer_update(batch_index)
-            print 'Pre-training layer %i, epoch %d'%(i,epoch),c
+            print 'Pre-training layer %i, epoch %d'%(i,epoch),c[0]
  
 
 
@@ -458,10 +456,8 @@ def shared_dataset(data_xy):
         iter    = epoch * n_train_batches + minibatch_index
 
         if (iter+1) % validation_frequency == 0: 
-            print cost_ij
             cost_ij = []
             validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
-            print validation_losses
             this_validation_loss = numpy.mean(validation_losses)
             print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                    (epoch, minibatch_index+1, n_train_batches, \
diff --git a/doc/SdA.txt b/doc/SdA.txt
@@ -433,11 +433,7 @@ TODO
 References
 ++++++++++
 
-.. [Vincent08] Vincent, P., Larochelle H., Bengio Y. and Manzagol P.A.
-     (2008). Extracting and Composing Robust Features with Denoising
-     Autoencoders. ICML'08, pp. 1096 - 1103
+.. [Vincent08] Vincent, P., Larochelle H., Bengio Y. and Manzagol P.A.(2008).Extracting and Composing Robust Features with Denoising Autoencoders. ICML'08, pp. 1096 - 1103
 
-.. [Bengio07] Bengio Y., Lamblin P., Popovici D. and Larochelle H.
-     (2007). Greedy Layer-Wise Training of Deep Networks. NIPS'06, pp
-     153-160
+.. [Bengio07] Bengio Y., Lamblin P., Popovici D. and Larochelle H.(2007). Greedy Layer-Wise Training of Deep Networks. NIPS'06, pp  153-160