@@ -137,11 +137,11 @@ class dA(object):
137137
138138 def __init__ (self , n_visible = 784 , n_hidden = 500 , input = None ):
139139 """
140- Initialize the DAE class by specifying the number of visible units (the
140+ Initialize the dA class by specifying the number of visible units (the
141141 dimension d of the input ), the number of hidden units ( the dimension
142142 d' of the latent or hidden space ) and by giving a symbolic variable
143143 for the input. Such a symbolic variable is useful when the input is
144- the result of some computations. For example when dealing with SDAEs ,
144+ the result of some computations. For example when dealing with SdAs ,
145145 the dA on layer 2 gets as input the output of the DAE on layer 1.
146146 This output can be written as a function of the input to the entire
147147 model, and as such can be computed by theano whenever needed.
@@ -170,8 +170,8 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
170170 # the output of uniform if converted using asarray to dtype
171171 # theano.config.floatX so that the code is runable on GPU
172172 initial_W = numpy .asarray ( numpy .random .uniform ( \
173- low = - numpy .sqrt (6. / (n_visible + n_hidden )), \
174- high = numpy .sqrt (6. / (n_visible + n_hidden )), \
173+ low = - numpy .sqrt (6. / (n_hidden + n_visible )), \
174+ high = numpy .sqrt (6. / (n_hidden + n_visible )), \
175175 size = (n_visible , n_hidden )), dtype = theano .config .floatX )
176176 initial_b = numpy .zeros (n_hidden )
177177 initial_b_prime = numpy .zeros (n_visible )
@@ -188,9 +188,9 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
188188 if input == None :
189189 # we use a matrix because we expect a minibatch of several examples,
190190 # each example being a row
191- x = T .dmatrix (name = 'input' )
191+ self . x = T .dmatrix (name = 'input' )
192192 else :
193- x = input
193+ self . x = input
194194 # Equation (1)
195195 # note : first argument of theano.rng.binomial is the shape(size) of
196196 # random numbers that it should produce
@@ -199,15 +199,15 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
199199 #
200200 # this will produce an array of 0s and 1s where 1 has a
201201 # probability of 0.9 and 0 if 0.1
202- tilde_x = theano_rng .binomial ( x .shape , 1 , 0.9 ) * x
202+ self . tilde_x = theano_rng .binomial ( self . x .shape , 1 , 0.9 ) * self . x
203203 # Equation (2)
204204 # note : y is stored as an attribute of the class so that it can be
205205 # used later when stacking dAs.
206- self .y = T .nnet .sigmoid (T .dot (tilde_x , self .W ) + self .b )
206+ self .y = T .nnet .sigmoid (T .dot (self . tilde_x , self .W ) + self .b )
207207 # Equation (3)
208- z = T .nnet .sigmoid (T .dot (self .y , self .W_prime ) + self .b_prime )
208+ self . z = T .nnet .sigmoid (T .dot (self .y , self .W_prime ) + self .b_prime )
209209 # Equation (4)
210- self .L = - T .sum ( x * T .log (z ) + (1 - x )* T .log (1 - z ), axis = 1 )
210+ self .L = - T .sum ( self . x * T .log (self . z ) + (1 - self . x )* T .log (1 - self . z ), axis = 1 )
211211 # note : L is now a vector, where each element is the cross-entropy cost
212212 # of the reconstruction of the corresponding example of the
213213 # minibatch. We need to compute the average of all these to get
@@ -217,7 +217,7 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
217217 # we will need the hidden layer obtained from the uncorrupted
218218 # input when for example we will pass this as input to the layer
219219 # above
220- self .hidden_values = T .nnet .sigmoid ( T .dot (x , self .W ) + self .b )
220+ self .hidden_values = T .nnet .sigmoid ( T .dot (self . x , self .W ) + self .b )
221221
222222
223223
@@ -260,13 +260,17 @@ def __init__(self, input, n_ins, hidden_layers_sizes, n_outs):
260260 # input size is that of the previous layer
261261 # input is the output of the last layer inserted in our list
262262 # of layers `self.layers`
263+ print i
264+ print theano .pp (self .layers [- 1 ].hidden_values )
263265 layer = dA ( hidden_layers_sizes [i - 1 ], \
264266 hidden_layers_sizes [i ], \
265267 input = self .layers [- 1 ].hidden_values )
266268 self .layers += [layer ]
267269
268270
269271 self .n_layers = len (self .layers )
272+ print '------------------------------------------'
273+ print theano .pp (self .layers [- 1 ].hidden_values )
270274 # now we need to use same weights and biases to define an MLP
271275 # We can simply use the `hidden_values` of the top layer, which
272276 # computes the input that we would normally feed to the logistic
@@ -298,8 +302,8 @@ def errors(self, y):
298302
299303
300304
301- def sgd_optimization_mnist ( learning_rate = 0.01 , pretraining_epochs = 10 , \
302- pretraining_lr = 0.1 , n_iter = 1000 , dataset = 'mnist.pkl.gz' ):
305+ def sgd_optimization_mnist ( learning_rate = 0.1 , pretraining_epochs = 10 , \
306+ pretraining_lr = 0.1 , training_epochs = 1000 , dataset = 'mnist.pkl.gz' ):
303307 """
304308 Demonstrate stochastic gradient descent optimization for a multilayer
305309 perceptron
@@ -335,51 +339,52 @@ def shared_dataset(data_xy):
335339 valid_set_x , valid_set_y = shared_dataset (valid_set )
336340 train_set_x , train_set_y = shared_dataset (train_set )
337341
338- batch_size = 500 # size of the minibatch
342+ batch_size = 20 # size of the minibatch
339343
340344 # compute number of minibatches for training, validation and testing
341345 n_train_batches = train_set_x .value .shape [0 ] / batch_size
342346 n_valid_batches = valid_set_x .value .shape [0 ] / batch_size
343347 n_test_batches = test_set_x .value .shape [0 ] / batch_size
344348
345349 # allocate symbolic variables for the data
346- minibatch_offset = T .lscalar () # offset to the start of a [mini]batch
347- x = T .matrix ('x' ) # the data is presented as rasterized images
348- y = T .ivector ('y' ) # the labels are presented as 1D vector of
349- # [int] labels
350+ index = T .lscalar () # index to a [mini]batch
351+ x = T .matrix ('x' ) # the data is presented as rasterized images
352+ y = T .ivector ('y' ) # the labels are presented as 1D vector of
353+ # [int] labels
350354
351355
352356
353357
354358 # construct the logistic regression class
355359 classifier = SdA ( input = x , n_ins = 28 * 28 , \
356- hidden_layers_sizes = [500 , 500 , 500 ], n_outs = 10 )
360+ hidden_layers_sizes = [700 , 700 , 700 ], n_outs = 10 )
357361
358362 ## Pre-train layer-wise
359363 for i in xrange (classifier .n_layers ):
364+ cost = classifier .layers [i ].cost
360365 # compute gradients of layer parameters
361- gW = T .grad (classifier .layers [i ].cost , classifier .layers [i ].W )
362- gb = T .grad (classifier .layers [i ].cost , classifier .layers [i ].b )
363- gb_prime = T .grad (classifier .layers [i ].cost , \
364- classifier .layers [i ].b_prime )
366+ gW = T .grad (cost , classifier .layers [i ].W )
367+ gb = T .grad (cost , classifier .layers [i ].b )
368+ gb_prime = T .grad (cost , classifier .layers [i ].b_prime )
365369 # updated value of parameters after each step
366370 new_W = classifier .layers [i ].W - gW * pretraining_lr
367371 new_b = classifier .layers [i ].b - gb * pretraining_lr
368372 new_b_prime = classifier .layers [i ].b_prime - gb_prime * pretraining_lr
369- cost = classifier . layers [ i ]. cost
370- layer_update = theano .function ([minibatch_offset ], cost , \
373+
374+ layer_update = theano .function ([index ], [ cost ] , \
371375 updates = {
372376 classifier .layers [i ].W : new_W \
373377 , classifier .layers [i ].b : new_b \
374378 , classifier .layers [i ].b_prime : new_b_prime },
375379 givens = {
376- x :test_set_x [ minibatch_offset : minibatch_offset + batch_size ]})
380+ x :train_set_x [ index * batch_size :( index + 1 ) * batch_size - 1 ]})
377381 # go through pretraining epochs
378382 for epoch in xrange (pretraining_epochs ):
379383 # go through the training set
380- for batch_offset in xrange (n_train_batches ):
381- layer_update (i * batch_size )
382- print 'Pre-training layer %i, epoch %d' % (i ,epoch )
384+ for batch_index in xrange (n_train_batches ):
385+ c = layer_update (batch_index )
386+ print 'Pre-training layer %i, epoch %d' % (i ,epoch ),c
387+
383388
384389
385390
@@ -391,15 +396,15 @@ def shared_dataset(data_xy):
391396 # compiling a theano function that computes the mistakes that are made
392397 # by the model on a minibatch
393398 # create a function to compute the mistakes that are made by the model
394- test_model = theano .function ([minibatch_offset ], cost ,
399+ test_model = theano .function ([index ], classifier . errors ( y ) ,
395400 givens = {
396- x : test_set_x [minibatch_offset : minibatch_offset + batch_size ],
397- y : test_set_y [minibatch_offset : minibatch_offset + batch_size ]})
401+ x : test_set_x [index * batch_size :( index + 1 ) * batch_size ],
402+ y : test_set_y [index * batch_size :( index + 1 ) * batch_size ]})
398403
399- validate_model = theano .function ([minibatch_offset ], cost ,
404+ validate_model = theano .function ([index ], classifier . errors ( y ) ,
400405 givens = {
401- x : valid_set_x [minibatch_offset : minibatch_offset + batch_size ],
402- y : valid_set_y [minibatch_offset : minibatch_offset + batch_size ]})
406+ x : valid_set_x [index * batch_size :( index + 1 ) * batch_size ],
407+ y : valid_set_y [index * batch_size :( index + 1 ) * batch_size ]})
403408
404409
405410 # compute the gradient of cost with respect to theta and add them to the
@@ -423,10 +428,10 @@ def shared_dataset(data_xy):
423428 # compiling a theano function `train_model` that returns the cost, but
424429 # in the same time updates the parameter of the model based on the rules
425430 # defined in `updates`
426- train_model = theano .function ([minibatch_offset ], cost , updates = updates ,
431+ train_model = theano .function ([index ], cost , updates = updates ,
427432 givens = {
428- x : train_set_x [minibatch_offset : minibatch_offset + batch_size ],
429- y : train_set_y [minibatch_offset : minibatch_offset + batch_size ]})
433+ x : train_set_x [index * batch_size :( index + 1 ) * batch_size ],
434+ y : train_set_y [index * batch_size :( index + 1 ) * batch_size ]})
430435
431436 # early-stopping parameters
432437 patience = 10000 # look as this many examples regardless
@@ -445,18 +450,18 @@ def shared_dataset(data_xy):
445450 best_validation_loss = float ('inf' )
446451 test_score = 0.
447452 start_time = time .clock ()
448- # have a maximum of `n_iter` iterations through the entire dataset
449- for iter in xrange (n_iter * n_train_batches ):
450-
451- # get epoch and minibatch index
452- epoch = iter / n_train_batches
453- minibatch_index = iter % n_train_batches
454- minibatch_offset = minibatch_index * batch_size
453+ cost_ij = []
454+ for epoch in xrange (training_epochs ):
455+ for minibatch_index in xrange (n_train_batches ):
455456
456- cost_ij = train_model (minibatch_offset )
457+ cost_ij += [train_model (minibatch_index )]
458+ iter = epoch * n_train_batches + minibatch_index
457459
458460 if (iter + 1 ) % validation_frequency == 0 :
459- validation_losses = [validate_model (i * batch_size ) for i in xrange (n_valid_batches )]
461+ print cost_ij
462+ cost_ij = []
463+ validation_losses = [validate_model (i ) for i in xrange (n_valid_batches )]
464+ print validation_losses
460465 this_validation_loss = numpy .mean (validation_losses )
461466 print ('epoch %i, minibatch %i/%i, validation error %f %%' % \
462467 (epoch , minibatch_index + 1 , n_train_batches , \
@@ -476,7 +481,7 @@ def shared_dataset(data_xy):
476481 best_iter = iter
477482
478483 # test it on the test set
479- test_losses = [test_model (i * batch_size ) for i in xrange (n_test_batches )]
484+ test_losses = [test_model (i ) for i in xrange (n_test_batches )]
480485 test_score = numpy .mean (test_losses )
481486 print ((' epoch %i, minibatch %i/%i, test error of best '
482487 'model %f %%' ) %
0 commit comments