Skip to content

Commit d6c0574

Browse files
author
Razvan Pascanu
committed
A more readable/almost complete version of the SdA tutorial.. still working on a possible bug in the SdA.py
1 parent d2f54e0 commit d6c0574

3 files changed

Lines changed: 241 additions & 151 deletions

File tree

code/SdA.py

Lines changed: 13 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,8 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
170170
# the output of uniform if converted using asarray to dtype
171171
# theano.config.floatX so that the code is runable on GPU
172172
initial_W = numpy.asarray( numpy.random.uniform( \
173-
low = -numpy.sqrt(1./(n_visible)), \
174-
high = numpy.sqrt(1./(n_visible)), \
173+
low = -numpy.sqrt(6./(n_hidden+n_visible)), \
174+
high = numpy.sqrt(6./(n_hidden+n_visible)), \
175175
size = (n_visible, n_hidden)), dtype = theano.config.floatX)
176176
initial_b = numpy.zeros(n_hidden)
177177
initial_b_prime= numpy.zeros(n_visible)
@@ -304,7 +304,7 @@ def errors(self, y):
304304

305305

306306

307-
def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 5, \
307+
def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 10, \
308308
pretraining_lr = 0.1, training_epochs = 1000, dataset='mnist.pkl.gz'):
309309
"""
310310
Demonstrate stochastic gradient descent optimization for a multilayer
@@ -359,51 +359,34 @@ def shared_dataset(data_xy):
359359

360360
# construct the logistic regression class
361361
classifier = SdA( input=x, n_ins=28*28, \
362-
hidden_layers_sizes = [500, 500, 500], n_outs=10)
362+
hidden_layers_sizes = [700, 700, 700], n_outs=10)
363363

364364
## Pre-train layer-wise
365365
for i in xrange(classifier.n_layers):
366+
cost = classifier.layers[i].cost
366367
# compute gradients of layer parameters
367-
gW = T.grad(classifier.layers[i].cost, classifier.layers[i].W)
368-
gb = T.grad(classifier.layers[i].cost, classifier.layers[i].b)
369-
gb_prime = T.grad(classifier.layers[i].cost, \
370-
classifier.layers[i].b_prime)
368+
gW = T.grad(cost, classifier.layers[i].W)
369+
gb = T.grad(cost, classifier.layers[i].b)
370+
gb_prime = T.grad(cost, classifier.layers[i].b_prime)
371371
# updated value of parameters after each step
372372
new_W = classifier.layers[i].W - gW * pretraining_lr
373373
new_b = classifier.layers[i].b - gb * pretraining_lr
374374
new_b_prime = classifier.layers[i].b_prime- gb_prime* pretraining_lr
375-
cost = classifier.layers[i].cost
376-
print '---------------------------------------------------'
377-
print ' Layer : ',i
378-
print ' x : ', theano.pp(classifier.layers[i].x)
379-
print ' '
380-
print ' tilde_x: ', theano.pp(classifier.layers[i].tilde_x)
381-
print ' '
382-
print 'y :', theano.pp(classifier.layers[i].y)
383-
print ' '
384-
print 'z: ', theano.pp(classifier.layers[i].z)
385-
print ' '
386-
print 'L:', theano.pp(classifier.layers[i].L)
387-
print ' '
388-
print 'cost: ', theano.pp(classifier.layers[i].cost)
389-
print ' '
390-
print 'hid: ', theano.pp(classifier.layers[i].hidden_values)
391-
print ' '
392-
print '================================================='
393-
layer_update = theano.function([index], [cost, classifier.layers[i].x, new_W, new_b, new_b_prime], \
375+
376+
layer_update = theano.function([index], [cost], \
394377
updates = {
395378
classifier.layers[i].W : new_W \
396379
, classifier.layers[i].b : new_b \
397380
, classifier.layers[i].b_prime : new_b_prime },
398381
givens = {
399-
x :train_set_x[index*batch_size:(index+1)*batch_size]})
382+
x :train_set_x[index*batch_size:(index+1)*batch_size-1]})
400383
# go through pretraining epochs
401384
for epoch in xrange(pretraining_epochs):
402385
# go through the training set
403386
for batch_index in xrange(n_train_batches):
404387
c = layer_update(batch_index)
405-
print 'Pre-training layer %i, epoch %d'%(i,epoch),c, batch_index
406-
388+
print 'Pre-training layer %i, epoch %d'%(i,epoch),c
389+
407390

408391

409392

0 commit comments

Comments
 (0)