Skip to content

Commit 9d2f5d3

Browse files
author
Razvan Pascanu
committed
Using index instead of offset ; some bug fixes for the SdA.py; partially finished SdA tutorial ( I got stucked in trying to fix the code and didn't manage to finish everything )
1 parent bf266e5 commit 9d2f5d3

6 files changed

Lines changed: 463 additions & 123 deletions

File tree

code/SdA.py

Lines changed: 60 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -137,11 +137,11 @@ class dA(object):
137137

138138
def __init__(self, n_visible= 784, n_hidden= 500, input= None):
139139
"""
140-
Initialize the DAE class by specifying the number of visible units (the
140+
Initialize the dA class by specifying the number of visible units (the
141141
dimension d of the input ), the number of hidden units ( the dimension
142142
d' of the latent or hidden space ) and by giving a symbolic variable
143143
for the input. Such a symbolic variable is useful when the input is
144-
the result of some computations. For example when dealing with SDAEs,
144+
the result of some computations. For example when dealing with SdAs,
145145
the dA on layer 2 gets as input the output of the DAE on layer 1.
146146
This output can be written as a function of the input to the entire
147147
model, and as such can be computed by theano whenever needed.
@@ -188,9 +188,9 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
188188
if input == None :
189189
# we use a matrix because we expect a minibatch of several examples,
190190
# each example being a row
191-
x = T.dmatrix(name = 'input')
191+
self.x = T.dmatrix(name = 'input')
192192
else:
193-
x = input
193+
self.x = input
194194
# Equation (1)
195195
# note : first argument of theano.rng.binomial is the shape(size) of
196196
# random numbers that it should produce
@@ -199,15 +199,15 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
199199
#
200200
# this will produce an array of 0s and 1s where 1 has a
201201
# probability of 0.9 and 0 if 0.1
202-
tilde_x = theano_rng.binomial( x.shape, 1, 0.9) * x
202+
self.tilde_x = theano_rng.binomial( self.x.shape, 1, 0.9) * self.x
203203
# Equation (2)
204204
# note : y is stored as an attribute of the class so that it can be
205205
# used later when stacking dAs.
206-
self.y = T.nnet.sigmoid(T.dot(tilde_x, self.W ) + self.b)
206+
self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b)
207207
# Equation (3)
208-
z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
208+
self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
209209
# Equation (4)
210-
self.L = - T.sum( x*T.log(z) + (1-x)*T.log(1-z), axis=1 )
210+
self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 )
211211
# note : L is now a vector, where each element is the cross-entropy cost
212212
# of the reconstruction of the corresponding example of the
213213
# minibatch. We need to compute the average of all these to get
@@ -217,7 +217,7 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
217217
# we will need the hidden layer obtained from the uncorrupted
218218
# input when for example we will pass this as input to the layer
219219
# above
220-
self.hidden_values = T.nnet.sigmoid( T.dot(x, self.W) + self.b)
220+
self.hidden_values = T.nnet.sigmoid( T.dot(self.x, self.W) + self.b)
221221

222222

223223

@@ -262,13 +262,17 @@ def __init__(self, input, n_ins, hidden_layers_sizes, n_outs):
262262
# input size is that of the previous layer
263263
# input is the output of the last layer inserted in our list
264264
# of layers `self.layers`
265+
print i
266+
print theano.pp(self.layers[-1].hidden_values)
265267
layer = dA( hidden_layers_sizes[i-1], \
266268
hidden_layers_sizes[i], \
267269
input = self.layers[-1].hidden_values )
268270
self.layers += [layer]
269271

270272

271273
self.n_layers = len(self.layers)
274+
print '------------------------------------------'
275+
print theano.pp(self.layers[-1].hidden_values)
272276
# now we need to use same weights and biases to define an MLP
273277
# We can simply use the `hidden_values` of the top layer, which
274278
# computes the input that we would normally feed to the logistic
@@ -300,8 +304,8 @@ def errors(self, y):
300304

301305

302306

303-
def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 20, \
304-
pretraining_lr = 0.1, n_iter = 1000, dataset='mnist.pkl.gz'):
307+
def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 5, \
308+
pretraining_lr = 0.1, training_epochs = 1000, dataset='mnist.pkl.gz'):
305309
"""
306310
Demonstrate stochastic gradient descent optimization for a multilayer
307311
perceptron
@@ -345,17 +349,17 @@ def shared_dataset(data_xy):
345349
n_test_batches = test_set_x.value.shape[0] / batch_size
346350

347351
# allocate symbolic variables for the data
348-
minibatch_offset = T.lscalar() # offset to the start of a [mini]batch
349-
x = T.matrix('x') # the data is presented as rasterized images
350-
y = T.ivector('y') # the labels are presented as 1D vector of
351-
# [int] labels
352+
index = T.lscalar() # index to a [mini]batch
353+
x = T.matrix('x') # the data is presented as rasterized images
354+
y = T.ivector('y') # the labels are presented as 1D vector of
355+
# [int] labels
352356

353357

354358

355359

356360
# construct the logistic regression class
357361
classifier = SdA( input=x, n_ins=28*28, \
358-
hidden_layers_sizes = [700, 700,700], n_outs=10)
362+
hidden_layers_sizes = [500, 500, 500], n_outs=10)
359363

360364
## Pre-train layer-wise
361365
for i in xrange(classifier.n_layers):
@@ -369,19 +373,35 @@ def shared_dataset(data_xy):
369373
new_b = classifier.layers[i].b - gb * pretraining_lr
370374
new_b_prime = classifier.layers[i].b_prime- gb_prime* pretraining_lr
371375
cost = classifier.layers[i].cost
372-
layer_update = theano.function([minibatch_offset], cost, \
376+
print '---------------------------------------------------'
377+
print ' Layer : ',i
378+
print ' x : ', theano.pp(classifier.layers[i].x)
379+
print ' '
380+
print ' tilde_x: ', theano.pp(classifier.layers[i].tilde_x)
381+
print ' '
382+
print 'y :', theano.pp(classifier.layers[i].y)
383+
print ' '
384+
print 'z: ', theano.pp(classifier.layers[i].z)
385+
print ' '
386+
print 'L:', theano.pp(classifier.layers[i].L)
387+
print ' '
388+
print 'cost: ', theano.pp(classifier.layers[i].cost)
389+
print ' '
390+
print 'hid: ', theano.pp(classifier.layers[i].hidden_values)
391+
print '================================================='
392+
layer_update = theano.function([index], [cost, classifier.layers[i].x, classifier.layers[i].z], \
373393
updates = {
374394
classifier.layers[i].W : new_W \
375395
, classifier.layers[i].b : new_b \
376396
, classifier.layers[i].b_prime : new_b_prime },
377397
givens = {
378-
x :test_set_x[minibatch_offset:minibatch_offset+batch_size]})
398+
x :train_set_x[index*batch_size:(index+1)*batch_size]})
379399
# go through pretraining epochs
380400
for epoch in xrange(pretraining_epochs):
381401
# go through the training set
382-
for batch_offset in xrange(n_train_batches):
383-
layer_update(i*batch_size)
384-
print 'Pre-training layer %i, epoch %d'%(i,epoch)
402+
for batch_index in xrange(n_train_batches):
403+
c = layer_update(batch_index)
404+
print 'Pre-training layer %i, epoch %d'%(i,epoch),c, batch_index
385405

386406

387407

@@ -393,15 +413,15 @@ def shared_dataset(data_xy):
393413
# compiling a theano function that computes the mistakes that are made
394414
# by the model on a minibatch
395415
# create a function to compute the mistakes that are made by the model
396-
test_model = theano.function([minibatch_offset], cost,
416+
test_model = theano.function([index], classifier.errors(y),
397417
givens = {
398-
x: test_set_x[minibatch_offset:minibatch_offset+batch_size],
399-
y: test_set_y[minibatch_offset:minibatch_offset+batch_size]})
418+
x: test_set_x[index*batch_size:(index+1)*batch_size],
419+
y: test_set_y[index*batch_size:(index+1)*batch_size]})
400420

401-
validate_model = theano.function([minibatch_offset], cost,
421+
validate_model = theano.function([index], classifier.errors(y),
402422
givens = {
403-
x: valid_set_x[minibatch_offset:minibatch_offset+batch_size],
404-
y: valid_set_y[minibatch_offset:minibatch_offset+batch_size]})
423+
x: valid_set_x[index*batch_size:(index+1)*batch_size],
424+
y: valid_set_y[index*batch_size:(index+1)*batch_size]})
405425

406426

407427
# compute the gradient of cost with respect to theta and add them to the
@@ -425,10 +445,10 @@ def shared_dataset(data_xy):
425445
# compiling a theano function `train_model` that returns the cost, but
426446
# in the same time updates the parameter of the model based on the rules
427447
# defined in `updates`
428-
train_model = theano.function([minibatch_offset], cost, updates=updates,
448+
train_model = theano.function([index], cost, updates=updates,
429449
givens = {
430-
x: train_set_x[minibatch_offset:minibatch_offset+batch_size],
431-
y: train_set_y[minibatch_offset:minibatch_offset+batch_size]})
450+
x: train_set_x[index*batch_size:(index+1)*batch_size],
451+
y: train_set_y[index*batch_size:(index+1)*batch_size]})
432452

433453
# early-stopping parameters
434454
patience = 10000 # look as this many examples regardless
@@ -447,18 +467,18 @@ def shared_dataset(data_xy):
447467
best_validation_loss = float('inf')
448468
test_score = 0.
449469
start_time = time.clock()
450-
# have a maximum of `n_iter` iterations through the entire dataset
451-
for iter in xrange(n_iter* n_train_batches):
470+
cost_ij = []
471+
for epoch in xrange(training_epochs):
472+
for minibatch_index in xrange(n_train_batches):
452473

453-
# get epoch and minibatch index
454-
epoch = iter / n_train_batches
455-
minibatch_index = iter % n_train_batches
456-
minibatch_offset = minibatch_index * batch_size
457-
458-
cost_ij = train_model(minibatch_offset)
474+
cost_ij += [train_model(minibatch_index)]
475+
iter = epoch * n_train_batches + minibatch_index
459476

460477
if (iter+1) % validation_frequency == 0:
461-
validation_losses = [validate_model(i*batch_size) for i in xrange(n_valid_batches)]
478+
print cost_ij
479+
cost_ij = []
480+
validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
481+
print validation_losses
462482
this_validation_loss = numpy.mean(validation_losses)
463483
print('epoch %i, minibatch %i/%i, validation error %f %%' % \
464484
(epoch, minibatch_index+1, n_train_batches, \
@@ -478,7 +498,7 @@ def shared_dataset(data_xy):
478498
best_iter = iter
479499

480500
# test it on the test set
481-
test_losses = [test_model(i*batch_size) for i in xrange(n_test_batches)]
501+
test_losses = [test_model(i) for i in xrange(n_test_batches)]
482502
test_score = numpy.mean(test_losses)
483503
print((' epoch %i, minibatch %i/%i, test error of best '
484504
'model %f %%') %

code/convolutional_mlp.py

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def errors(self, y):
186186
raise NotImplementedError()
187187

188188

189-
def evaluate_lenet5(learning_rate=0.1, n_iter=200, dataset='mnist.pkl.gz', nkerns=[20,50]):
189+
def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='mnist.pkl.gz', nkerns=[20,50]):
190190
rng = numpy.random.RandomState(23455)
191191

192192
# Load the dataset
@@ -213,10 +213,10 @@ def shared_dataset(data_xy):
213213
n_test_batches = test_set_x.value.shape[0] / batch_size
214214

215215
# allocate symbolic variables for the data
216-
minibatch_offset = T.lscalar() # offset to the start of a [mini]batch
217-
x = T.matrix('x') # the data is presented as rasterized images
218-
y = T.ivector('y') # the labels are presented as 1D vector of
219-
# [int] labels
216+
index = T.lscalar() # index to a [mini]batch
217+
x = T.matrix('x') # the data is presented as rasterized images
218+
y = T.ivector('y') # the labels are presented as 1D vector of
219+
# [int] labels
220220

221221

222222
ishape = (28,28) # this is the size of MNIST images
@@ -261,15 +261,15 @@ def shared_dataset(data_xy):
261261
cost = layer3.negative_log_likelihood(y)
262262

263263
# create a function to compute the mistakes that are made by the model
264-
test_model = theano.function([minibatch_offset], layer3.errors(y),
264+
test_model = theano.function([index], layer3.errors(y),
265265
givens = {
266-
x: test_set_x[minibatch_offset:minibatch_offset+batch_size],
267-
y: test_set_y[minibatch_offset:minibatch_offset+batch_size]})
266+
x: test_set_x[index*batch_size:(index+1)*batch_size],
267+
y: test_set_y[index*batch_size:(index+1)*batch_size]})
268268

269-
validate_model = theano.function([minibatch_offset], layer3.errors(y),
269+
validate_model = theano.function([index], layer3.errors(y),
270270
givens = {
271-
x: valid_set_x[minibatch_offset:minibatch_offset+batch_size],
272-
y: valid_set_y[minibatch_offset:minibatch_offset+batch_size]})
271+
x: valid_set_x[index*batch_size:(index+1)*batch_size],
272+
y: valid_set_y[index*batch_size:(index+1)*batch_size]})
273273

274274
# create a list of all model parameters to be fit by gradient descent
275275
params = layer3.params+ layer2.params+ layer1.params + layer0.params
@@ -285,10 +285,10 @@ def shared_dataset(data_xy):
285285
for param_i, grad_i in zip(params, grads):
286286
updates[param_i] = param_i - learning_rate * grad_i
287287

288-
train_model = theano.function([minibatch_offset], cost, updates=updates,
288+
train_model = theano.function([index], cost, updates=updates,
289289
givens = {
290-
x: train_set_x[minibatch_offset:minibatch_offset+batch_size],
291-
y: train_set_y[minibatch_offset:minibatch_offset+batch_size]})
290+
x: train_set_x[index*batch_size:(index+1)*batch_size],
291+
y: train_set_y[index*batch_size:(index+1)*batch_size]})
292292

293293

294294
###############
@@ -313,23 +313,19 @@ def shared_dataset(data_xy):
313313
test_score = 0.
314314
start_time = time.clock()
315315

316-
# have a maximum of `n_iter` iterations through the entire dataset
317-
for iter in xrange(n_iter * n_train_batches):
318-
319-
# get epoch and minibatch index
320-
epoch = iter / n_train_batches
321-
minibatch_index = iter % n_train_batches
322-
minibatch_offset = minibatch_index * batch_size
323-
316+
for epoch in xrange(n_epochs):
317+
for minibatch_index in xrange(n_train_batches):
318+
319+
iter = epoch * n_train_batches + minibatch_index
324320

325321
if iter %100 == 0:
326322
print 'training @ iter = ', iter
327-
cost_ij = train_model(minibatch_offset)
323+
cost_ij = train_model(minibatch_index)
328324

329325
if (iter+1) % validation_frequency == 0:
330326

331327
# compute zero-one loss on validation set
332-
validation_losses = [validate_model(i*batch_size) for i in xrange(n_valid_batches)]
328+
validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
333329
this_validation_loss = numpy.mean(validation_losses)
334330
print('epoch %i, minibatch %i/%i, validation error %f %%' % \
335331
(epoch, minibatch_index+1, n_train_batches, \
@@ -349,7 +345,7 @@ def shared_dataset(data_xy):
349345
best_iter = iter
350346

351347
# test it on the test set
352-
test_losses = [test_model(i*batch_size) for i in xrange(n_test_batches)]
348+
test_losses = [test_model(i) for i in xrange(n_test_batches)]
353349
test_score = numpy.mean(test_losses)
354350
print((' epoch %i, minibatch %i/%i, test error of best '
355351
'model %f %%') %

0 commit comments

Comments
 (0)