@@ -137,11 +137,11 @@ class dA(object):
137137
138138 def __init__ (self , n_visible = 784 , n_hidden = 500 , input = None ):
139139 """
140- Initialize the DAE class by specifying the number of visible units (the
140+ Initialize the dA class by specifying the number of visible units (the
141141 dimension d of the input ), the number of hidden units ( the dimension
142142 d' of the latent or hidden space ) and by giving a symbolic variable
143143 for the input. Such a symbolic variable is useful when the input is
144- the result of some computations. For example when dealing with SDAEs ,
144+ the result of some computations. For example when dealing with SdAs ,
145145 the dA on layer 2 gets as input the output of the DAE on layer 1.
146146 This output can be written as a function of the input to the entire
147147 model, and as such can be computed by theano whenever needed.
@@ -188,9 +188,9 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
188188 if input == None :
189189 # we use a matrix because we expect a minibatch of several examples,
190190 # each example being a row
191- x = T .dmatrix (name = 'input' )
191+ self . x = T .dmatrix (name = 'input' )
192192 else :
193- x = input
193+ self . x = input
194194 # Equation (1)
195195 # note : first argument of theano.rng.binomial is the shape(size) of
196196 # random numbers that it should produce
@@ -199,15 +199,15 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
199199 #
200200 # this will produce an array of 0s and 1s where 1 has a
201201 # probability of 0.9 and 0 if 0.1
202- tilde_x = theano_rng .binomial ( x .shape , 1 , 0.9 ) * x
202+ self . tilde_x = theano_rng .binomial ( self . x .shape , 1 , 0.9 ) * self . x
203203 # Equation (2)
204204 # note : y is stored as an attribute of the class so that it can be
205205 # used later when stacking dAs.
206- self .y = T .nnet .sigmoid (T .dot (tilde_x , self .W ) + self .b )
206+ self .y = T .nnet .sigmoid (T .dot (self . tilde_x , self .W ) + self .b )
207207 # Equation (3)
208- z = T .nnet .sigmoid (T .dot (self .y , self .W_prime ) + self .b_prime )
208+ self . z = T .nnet .sigmoid (T .dot (self .y , self .W_prime ) + self .b_prime )
209209 # Equation (4)
210- self .L = - T .sum ( x * T .log (z ) + (1 - x )* T .log (1 - z ), axis = 1 )
210+ self .L = - T .sum ( self . x * T .log (self . z ) + (1 - self . x )* T .log (1 - self . z ), axis = 1 )
211211 # note : L is now a vector, where each element is the cross-entropy cost
212212 # of the reconstruction of the corresponding example of the
213213 # minibatch. We need to compute the average of all these to get
@@ -217,7 +217,7 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
217217 # we will need the hidden layer obtained from the uncorrupted
218218 # input when for example we will pass this as input to the layer
219219 # above
220- self .hidden_values = T .nnet .sigmoid ( T .dot (x , self .W ) + self .b )
220+ self .hidden_values = T .nnet .sigmoid ( T .dot (self . x , self .W ) + self .b )
221221
222222
223223
@@ -262,13 +262,17 @@ def __init__(self, input, n_ins, hidden_layers_sizes, n_outs):
262262 # input size is that of the previous layer
263263 # input is the output of the last layer inserted in our list
264264 # of layers `self.layers`
265+ print i
266+ print theano .pp (self .layers [- 1 ].hidden_values )
265267 layer = dA ( hidden_layers_sizes [i - 1 ], \
266268 hidden_layers_sizes [i ], \
267269 input = self .layers [- 1 ].hidden_values )
268270 self .layers += [layer ]
269271
270272
271273 self .n_layers = len (self .layers )
274+ print '------------------------------------------'
275+ print theano .pp (self .layers [- 1 ].hidden_values )
272276 # now we need to use same weights and biases to define an MLP
273277 # We can simply use the `hidden_values` of the top layer, which
274278 # computes the input that we would normally feed to the logistic
@@ -300,8 +304,8 @@ def errors(self, y):
300304
301305
302306
303- def sgd_optimization_mnist ( learning_rate = 0.1 , pretraining_epochs = 20 , \
304- pretraining_lr = 0.1 , n_iter = 1000 , dataset = 'mnist.pkl.gz' ):
307+ def sgd_optimization_mnist ( learning_rate = 0.1 , pretraining_epochs = 5 , \
308+ pretraining_lr = 0.1 , training_epochs = 1000 , dataset = 'mnist.pkl.gz' ):
305309 """
306310 Demonstrate stochastic gradient descent optimization for a multilayer
307311 perceptron
@@ -345,17 +349,17 @@ def shared_dataset(data_xy):
345349 n_test_batches = test_set_x .value .shape [0 ] / batch_size
346350
347351 # allocate symbolic variables for the data
348- minibatch_offset = T .lscalar () # offset to the start of a [mini]batch
349- x = T .matrix ('x' ) # the data is presented as rasterized images
350- y = T .ivector ('y' ) # the labels are presented as 1D vector of
351- # [int] labels
352+ index = T .lscalar () # index to a [mini]batch
353+ x = T .matrix ('x' ) # the data is presented as rasterized images
354+ y = T .ivector ('y' ) # the labels are presented as 1D vector of
355+ # [int] labels
352356
353357
354358
355359
356360 # construct the logistic regression class
357361 classifier = SdA ( input = x , n_ins = 28 * 28 , \
358- hidden_layers_sizes = [700 , 700 , 700 ], n_outs = 10 )
362+ hidden_layers_sizes = [500 , 500 , 500 ], n_outs = 10 )
359363
360364 ## Pre-train layer-wise
361365 for i in xrange (classifier .n_layers ):
@@ -369,19 +373,35 @@ def shared_dataset(data_xy):
369373 new_b = classifier .layers [i ].b - gb * pretraining_lr
370374 new_b_prime = classifier .layers [i ].b_prime - gb_prime * pretraining_lr
371375 cost = classifier .layers [i ].cost
372- layer_update = theano .function ([minibatch_offset ], cost , \
376+ print '---------------------------------------------------'
377+ print ' Layer : ' ,i
378+ print ' x : ' , theano .pp (classifier .layers [i ].x )
379+ print ' '
380+ print ' tilde_x: ' , theano .pp (classifier .layers [i ].tilde_x )
381+ print ' '
382+ print 'y :' , theano .pp (classifier .layers [i ].y )
383+ print ' '
384+ print 'z: ' , theano .pp (classifier .layers [i ].z )
385+ print ' '
386+ print 'L:' , theano .pp (classifier .layers [i ].L )
387+ print ' '
388+ print 'cost: ' , theano .pp (classifier .layers [i ].cost )
389+ print ' '
390+ print 'hid: ' , theano .pp (classifier .layers [i ].hidden_values )
391+ print '================================================='
392+ layer_update = theano .function ([index ], [cost , classifier .layers [i ].x , classifier .layers [i ].z ], \
373393 updates = {
374394 classifier .layers [i ].W : new_W \
375395 , classifier .layers [i ].b : new_b \
376396 , classifier .layers [i ].b_prime : new_b_prime },
377397 givens = {
378- x :test_set_x [ minibatch_offset : minibatch_offset + batch_size ]})
398+ x :train_set_x [ index * batch_size :( index + 1 ) * batch_size ]})
379399 # go through pretraining epochs
380400 for epoch in xrange (pretraining_epochs ):
381401 # go through the training set
382- for batch_offset in xrange (n_train_batches ):
383- layer_update (i * batch_size )
384- print 'Pre-training layer %i, epoch %d' % (i ,epoch )
402+ for batch_index in xrange (n_train_batches ):
403+ c = layer_update (batch_index )
404+ print 'Pre-training layer %i, epoch %d' % (i ,epoch ), c , batch_index
385405
386406
387407
@@ -393,15 +413,15 @@ def shared_dataset(data_xy):
393413 # compiling a theano function that computes the mistakes that are made
394414 # by the model on a minibatch
395415 # create a function to compute the mistakes that are made by the model
396- test_model = theano .function ([minibatch_offset ], cost ,
416+ test_model = theano .function ([index ], classifier . errors ( y ) ,
397417 givens = {
398- x : test_set_x [minibatch_offset : minibatch_offset + batch_size ],
399- y : test_set_y [minibatch_offset : minibatch_offset + batch_size ]})
418+ x : test_set_x [index * batch_size :( index + 1 ) * batch_size ],
419+ y : test_set_y [index * batch_size :( index + 1 ) * batch_size ]})
400420
401- validate_model = theano .function ([minibatch_offset ], cost ,
421+ validate_model = theano .function ([index ], classifier . errors ( y ) ,
402422 givens = {
403- x : valid_set_x [minibatch_offset : minibatch_offset + batch_size ],
404- y : valid_set_y [minibatch_offset : minibatch_offset + batch_size ]})
423+ x : valid_set_x [index * batch_size :( index + 1 ) * batch_size ],
424+ y : valid_set_y [index * batch_size :( index + 1 ) * batch_size ]})
405425
406426
407427 # compute the gradient of cost with respect to theta and add them to the
@@ -425,10 +445,10 @@ def shared_dataset(data_xy):
425445 # compiling a theano function `train_model` that returns the cost, but
426446 # in the same time updates the parameter of the model based on the rules
427447 # defined in `updates`
428- train_model = theano .function ([minibatch_offset ], cost , updates = updates ,
448+ train_model = theano .function ([index ], cost , updates = updates ,
429449 givens = {
430- x : train_set_x [minibatch_offset : minibatch_offset + batch_size ],
431- y : train_set_y [minibatch_offset : minibatch_offset + batch_size ]})
450+ x : train_set_x [index * batch_size :( index + 1 ) * batch_size ],
451+ y : train_set_y [index * batch_size :( index + 1 ) * batch_size ]})
432452
433453 # early-stopping parameters
434454 patience = 10000 # look as this many examples regardless
@@ -447,18 +467,18 @@ def shared_dataset(data_xy):
447467 best_validation_loss = float ('inf' )
448468 test_score = 0.
449469 start_time = time .clock ()
450- # have a maximum of `n_iter` iterations through the entire dataset
451- for iter in xrange (n_iter * n_train_batches ):
470+ cost_ij = []
471+ for epoch in xrange (training_epochs ):
472+ for minibatch_index in xrange (n_train_batches ):
452473
453- # get epoch and minibatch index
454- epoch = iter / n_train_batches
455- minibatch_index = iter % n_train_batches
456- minibatch_offset = minibatch_index * batch_size
457-
458- cost_ij = train_model (minibatch_offset )
474+ cost_ij += [train_model (minibatch_index )]
475+ iter = epoch * n_train_batches + minibatch_index
459476
460477 if (iter + 1 ) % validation_frequency == 0 :
461- validation_losses = [validate_model (i * batch_size ) for i in xrange (n_valid_batches )]
478+ print cost_ij
479+ cost_ij = []
480+ validation_losses = [validate_model (i ) for i in xrange (n_valid_batches )]
481+ print validation_losses
462482 this_validation_loss = numpy .mean (validation_losses )
463483 print ('epoch %i, minibatch %i/%i, validation error %f %%' % \
464484 (epoch , minibatch_index + 1 , n_train_batches , \
@@ -478,7 +498,7 @@ def shared_dataset(data_xy):
478498 best_iter = iter
479499
480500 # test it on the test set
481- test_losses = [test_model (i * batch_size ) for i in xrange (n_test_batches )]
501+ test_losses = [test_model (i ) for i in xrange (n_test_batches )]
482502 test_score = numpy .mean (test_losses )
483503 print ((' epoch %i, minibatch %i/%i, test error of best '
484504 'model %f %%' ) %
0 commit comments