@@ -170,8 +170,8 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
170170 # the output of uniform if converted using asarray to dtype
171171 # theano.config.floatX so that the code is runable on GPU
172172 initial_W = numpy .asarray ( numpy .random .uniform ( \
173- low = - numpy .sqrt (1 ./ (n_visible )), \
174- high = numpy .sqrt (1 ./ (n_visible )), \
173+ low = - numpy .sqrt (6 ./ (n_hidden + n_visible )), \
174+ high = numpy .sqrt (6 ./ (n_hidden + n_visible )), \
175175 size = (n_visible , n_hidden )), dtype = theano .config .floatX )
176176 initial_b = numpy .zeros (n_hidden )
177177 initial_b_prime = numpy .zeros (n_visible )
@@ -304,7 +304,7 @@ def errors(self, y):
304304
305305
306306
307- def sgd_optimization_mnist ( learning_rate = 0.1 , pretraining_epochs = 5 , \
307+ def sgd_optimization_mnist ( learning_rate = 0.1 , pretraining_epochs = 10 , \
308308 pretraining_lr = 0.1 , training_epochs = 1000 , dataset = 'mnist.pkl.gz' ):
309309 """
310310 Demonstrate stochastic gradient descent optimization for a multilayer
@@ -359,51 +359,34 @@ def shared_dataset(data_xy):
359359
360360 # construct the logistic regression class
361361 classifier = SdA ( input = x , n_ins = 28 * 28 , \
362- hidden_layers_sizes = [500 , 500 , 500 ], n_outs = 10 )
362+ hidden_layers_sizes = [700 , 700 , 700 ], n_outs = 10 )
363363
364364 ## Pre-train layer-wise
365365 for i in xrange (classifier .n_layers ):
366+ cost = classifier .layers [i ].cost
366367 # compute gradients of layer parameters
367- gW = T .grad (classifier .layers [i ].cost , classifier .layers [i ].W )
368- gb = T .grad (classifier .layers [i ].cost , classifier .layers [i ].b )
369- gb_prime = T .grad (classifier .layers [i ].cost , \
370- classifier .layers [i ].b_prime )
368+ gW = T .grad (cost , classifier .layers [i ].W )
369+ gb = T .grad (cost , classifier .layers [i ].b )
370+ gb_prime = T .grad (cost , classifier .layers [i ].b_prime )
371371 # updated value of parameters after each step
372372 new_W = classifier .layers [i ].W - gW * pretraining_lr
373373 new_b = classifier .layers [i ].b - gb * pretraining_lr
374374 new_b_prime = classifier .layers [i ].b_prime - gb_prime * pretraining_lr
375- cost = classifier .layers [i ].cost
376- print '---------------------------------------------------'
377- print ' Layer : ' ,i
378- print ' x : ' , theano .pp (classifier .layers [i ].x )
379- print ' '
380- print ' tilde_x: ' , theano .pp (classifier .layers [i ].tilde_x )
381- print ' '
382- print 'y :' , theano .pp (classifier .layers [i ].y )
383- print ' '
384- print 'z: ' , theano .pp (classifier .layers [i ].z )
385- print ' '
386- print 'L:' , theano .pp (classifier .layers [i ].L )
387- print ' '
388- print 'cost: ' , theano .pp (classifier .layers [i ].cost )
389- print ' '
390- print 'hid: ' , theano .pp (classifier .layers [i ].hidden_values )
391- print ' '
392- print '================================================='
393- layer_update = theano .function ([index ], [cost , classifier .layers [i ].x , new_W , new_b , new_b_prime ], \
375+
376+ layer_update = theano .function ([index ], [cost ], \
394377 updates = {
395378 classifier .layers [i ].W : new_W \
396379 , classifier .layers [i ].b : new_b \
397380 , classifier .layers [i ].b_prime : new_b_prime },
398381 givens = {
399- x :train_set_x [index * batch_size :(index + 1 )* batch_size ]})
382+ x :train_set_x [index * batch_size :(index + 1 )* batch_size - 1 ]})
400383 # go through pretraining epochs
401384 for epoch in xrange (pretraining_epochs ):
402385 # go through the training set
403386 for batch_index in xrange (n_train_batches ):
404387 c = layer_update (batch_index )
405- print 'Pre-training layer %i, epoch %d' % (i ,epoch ),c , batch_index
406-
388+ print 'Pre-training layer %i, epoch %d' % (i ,epoch ),c
389+
407390
408391
409392
0 commit comments