Skip to content

Commit f84b517

Browse files
committed
Merge branch 'master' of git@github.com:lisa-lab/DeepLearningTutorials
2 parents 7ffaf89 + b241bde commit f84b517

11 files changed

Lines changed: 612 additions & 152 deletions

File tree

code/SdA.py

Lines changed: 53 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -137,11 +137,11 @@ class dA(object):
137137

138138
def __init__(self, n_visible= 784, n_hidden= 500, input= None):
139139
"""
140-
Initialize the DAE class by specifying the number of visible units (the
140+
Initialize the dA class by specifying the number of visible units (the
141141
dimension d of the input ), the number of hidden units ( the dimension
142142
d' of the latent or hidden space ) and by giving a symbolic variable
143143
for the input. Such a symbolic variable is useful when the input is
144-
the result of some computations. For example when dealing with SDAEs,
144+
the result of some computations. For example when dealing with SdAs,
145145
the dA on layer 2 gets as input the output of the DAE on layer 1.
146146
This output can be written as a function of the input to the entire
147147
model, and as such can be computed by theano whenever needed.
@@ -170,8 +170,8 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
170170
# the output of uniform if converted using asarray to dtype
171171
# theano.config.floatX so that the code is runable on GPU
172172
initial_W = numpy.asarray( numpy.random.uniform( \
173-
low = -numpy.sqrt(6./(n_visible+n_hidden)), \
174-
high = numpy.sqrt(6./(n_visible+n_hidden)), \
173+
low = -numpy.sqrt(6./(n_hidden+n_visible)), \
174+
high = numpy.sqrt(6./(n_hidden+n_visible)), \
175175
size = (n_visible, n_hidden)), dtype = theano.config.floatX)
176176
initial_b = numpy.zeros(n_hidden)
177177
initial_b_prime= numpy.zeros(n_visible)
@@ -188,9 +188,9 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
188188
if input == None :
189189
# we use a matrix because we expect a minibatch of several examples,
190190
# each example being a row
191-
x = T.dmatrix(name = 'input')
191+
self.x = T.dmatrix(name = 'input')
192192
else:
193-
x = input
193+
self.x = input
194194
# Equation (1)
195195
# note : first argument of theano.rng.binomial is the shape(size) of
196196
# random numbers that it should produce
@@ -199,15 +199,15 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
199199
#
200200
# this will produce an array of 0s and 1s where 1 has a
201201
# probability of 0.9 and 0 if 0.1
202-
tilde_x = theano_rng.binomial( x.shape, 1, 0.9) * x
202+
self.tilde_x = theano_rng.binomial( self.x.shape, 1, 0.9) * self.x
203203
# Equation (2)
204204
# note : y is stored as an attribute of the class so that it can be
205205
# used later when stacking dAs.
206-
self.y = T.nnet.sigmoid(T.dot(tilde_x, self.W ) + self.b)
206+
self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b)
207207
# Equation (3)
208-
z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
208+
self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
209209
# Equation (4)
210-
self.L = - T.sum( x*T.log(z) + (1-x)*T.log(1-z), axis=1 )
210+
self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 )
211211
# note : L is now a vector, where each element is the cross-entropy cost
212212
# of the reconstruction of the corresponding example of the
213213
# minibatch. We need to compute the average of all these to get
@@ -217,7 +217,7 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
217217
# we will need the hidden layer obtained from the uncorrupted
218218
# input when for example we will pass this as input to the layer
219219
# above
220-
self.hidden_values = T.nnet.sigmoid( T.dot(x, self.W) + self.b)
220+
self.hidden_values = T.nnet.sigmoid( T.dot(self.x, self.W) + self.b)
221221

222222

223223

@@ -260,13 +260,17 @@ def __init__(self, input, n_ins, hidden_layers_sizes, n_outs):
260260
# input size is that of the previous layer
261261
# input is the output of the last layer inserted in our list
262262
# of layers `self.layers`
263+
print i
264+
print theano.pp(self.layers[-1].hidden_values)
263265
layer = dA( hidden_layers_sizes[i-1], \
264266
hidden_layers_sizes[i], \
265267
input = self.layers[-1].hidden_values )
266268
self.layers += [layer]
267269

268270

269271
self.n_layers = len(self.layers)
272+
print '------------------------------------------'
273+
print theano.pp(self.layers[-1].hidden_values)
270274
# now we need to use same weights and biases to define an MLP
271275
# We can simply use the `hidden_values` of the top layer, which
272276
# computes the input that we would normally feed to the logistic
@@ -298,8 +302,8 @@ def errors(self, y):
298302

299303

300304

301-
def sgd_optimization_mnist( learning_rate=0.01, pretraining_epochs = 10, \
302-
pretraining_lr = 0.1, n_iter = 1000, dataset='mnist.pkl.gz'):
305+
def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 10, \
306+
pretraining_lr = 0.1, training_epochs = 1000, dataset='mnist.pkl.gz'):
303307
"""
304308
Demonstrate stochastic gradient descent optimization for a multilayer
305309
perceptron
@@ -335,51 +339,52 @@ def shared_dataset(data_xy):
335339
valid_set_x, valid_set_y = shared_dataset(valid_set)
336340
train_set_x, train_set_y = shared_dataset(train_set)
337341

338-
batch_size = 500 # size of the minibatch
342+
batch_size = 20 # size of the minibatch
339343

340344
# compute number of minibatches for training, validation and testing
341345
n_train_batches = train_set_x.value.shape[0] / batch_size
342346
n_valid_batches = valid_set_x.value.shape[0] / batch_size
343347
n_test_batches = test_set_x.value.shape[0] / batch_size
344348

345349
# allocate symbolic variables for the data
346-
minibatch_offset = T.lscalar() # offset to the start of a [mini]batch
347-
x = T.matrix('x') # the data is presented as rasterized images
348-
y = T.ivector('y') # the labels are presented as 1D vector of
349-
# [int] labels
350+
index = T.lscalar() # index to a [mini]batch
351+
x = T.matrix('x') # the data is presented as rasterized images
352+
y = T.ivector('y') # the labels are presented as 1D vector of
353+
# [int] labels
350354

351355

352356

353357

354358
# construct the logistic regression class
355359
classifier = SdA( input=x, n_ins=28*28, \
356-
hidden_layers_sizes = [500, 500,500], n_outs=10)
360+
hidden_layers_sizes = [700, 700, 700], n_outs=10)
357361

358362
## Pre-train layer-wise
359363
for i in xrange(classifier.n_layers):
364+
cost = classifier.layers[i].cost
360365
# compute gradients of layer parameters
361-
gW = T.grad(classifier.layers[i].cost, classifier.layers[i].W)
362-
gb = T.grad(classifier.layers[i].cost, classifier.layers[i].b)
363-
gb_prime = T.grad(classifier.layers[i].cost, \
364-
classifier.layers[i].b_prime)
366+
gW = T.grad(cost, classifier.layers[i].W)
367+
gb = T.grad(cost, classifier.layers[i].b)
368+
gb_prime = T.grad(cost, classifier.layers[i].b_prime)
365369
# updated value of parameters after each step
366370
new_W = classifier.layers[i].W - gW * pretraining_lr
367371
new_b = classifier.layers[i].b - gb * pretraining_lr
368372
new_b_prime = classifier.layers[i].b_prime- gb_prime* pretraining_lr
369-
cost = classifier.layers[i].cost
370-
layer_update = theano.function([minibatch_offset], cost, \
373+
374+
layer_update = theano.function([index], [cost], \
371375
updates = {
372376
classifier.layers[i].W : new_W \
373377
, classifier.layers[i].b : new_b \
374378
, classifier.layers[i].b_prime : new_b_prime },
375379
givens = {
376-
x :test_set_x[minibatch_offset:minibatch_offset+batch_size]})
380+
x :train_set_x[index*batch_size:(index+1)*batch_size-1]})
377381
# go through pretraining epochs
378382
for epoch in xrange(pretraining_epochs):
379383
# go through the training set
380-
for batch_offset in xrange(n_train_batches):
381-
layer_update(i*batch_size)
382-
print 'Pre-training layer %i, epoch %d'%(i,epoch)
384+
for batch_index in xrange(n_train_batches):
385+
c = layer_update(batch_index)
386+
print 'Pre-training layer %i, epoch %d'%(i,epoch),c
387+
383388

384389

385390

@@ -391,15 +396,15 @@ def shared_dataset(data_xy):
391396
# compiling a theano function that computes the mistakes that are made
392397
# by the model on a minibatch
393398
# create a function to compute the mistakes that are made by the model
394-
test_model = theano.function([minibatch_offset], cost,
399+
test_model = theano.function([index], classifier.errors(y),
395400
givens = {
396-
x: test_set_x[minibatch_offset:minibatch_offset+batch_size],
397-
y: test_set_y[minibatch_offset:minibatch_offset+batch_size]})
401+
x: test_set_x[index*batch_size:(index+1)*batch_size],
402+
y: test_set_y[index*batch_size:(index+1)*batch_size]})
398403

399-
validate_model = theano.function([minibatch_offset], cost,
404+
validate_model = theano.function([index], classifier.errors(y),
400405
givens = {
401-
x: valid_set_x[minibatch_offset:minibatch_offset+batch_size],
402-
y: valid_set_y[minibatch_offset:minibatch_offset+batch_size]})
406+
x: valid_set_x[index*batch_size:(index+1)*batch_size],
407+
y: valid_set_y[index*batch_size:(index+1)*batch_size]})
403408

404409

405410
# compute the gradient of cost with respect to theta and add them to the
@@ -423,10 +428,10 @@ def shared_dataset(data_xy):
423428
# compiling a theano function `train_model` that returns the cost, but
424429
# in the same time updates the parameter of the model based on the rules
425430
# defined in `updates`
426-
train_model = theano.function([minibatch_offset], cost, updates=updates,
431+
train_model = theano.function([index], cost, updates=updates,
427432
givens = {
428-
x: train_set_x[minibatch_offset:minibatch_offset+batch_size],
429-
y: train_set_y[minibatch_offset:minibatch_offset+batch_size]})
433+
x: train_set_x[index*batch_size:(index+1)*batch_size],
434+
y: train_set_y[index*batch_size:(index+1)*batch_size]})
430435

431436
# early-stopping parameters
432437
patience = 10000 # look as this many examples regardless
@@ -445,18 +450,18 @@ def shared_dataset(data_xy):
445450
best_validation_loss = float('inf')
446451
test_score = 0.
447452
start_time = time.clock()
448-
# have a maximum of `n_iter` iterations through the entire dataset
449-
for iter in xrange(n_iter* n_train_batches):
450-
451-
# get epoch and minibatch index
452-
epoch = iter / n_train_batches
453-
minibatch_index = iter % n_train_batches
454-
minibatch_offset = minibatch_index * batch_size
453+
cost_ij = []
454+
for epoch in xrange(training_epochs):
455+
for minibatch_index in xrange(n_train_batches):
455456

456-
cost_ij = train_model(minibatch_offset)
457+
cost_ij += [train_model(minibatch_index)]
458+
iter = epoch * n_train_batches + minibatch_index
457459

458460
if (iter+1) % validation_frequency == 0:
459-
validation_losses = [validate_model(i*batch_size) for i in xrange(n_valid_batches)]
461+
print cost_ij
462+
cost_ij = []
463+
validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
464+
print validation_losses
460465
this_validation_loss = numpy.mean(validation_losses)
461466
print('epoch %i, minibatch %i/%i, validation error %f %%' % \
462467
(epoch, minibatch_index+1, n_train_batches, \
@@ -476,7 +481,7 @@ def shared_dataset(data_xy):
476481
best_iter = iter
477482

478483
# test it on the test set
479-
test_losses = [test_model(i*batch_size) for i in xrange(n_test_batches)]
484+
test_losses = [test_model(i) for i in xrange(n_test_batches)]
480485
test_score = numpy.mean(test_losses)
481486
print((' epoch %i, minibatch %i/%i, test error of best '
482487
'model %f %%') %

code/convolutional_mlp.py

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def errors(self, y):
186186
raise NotImplementedError()
187187

188188

189-
def evaluate_lenet5(learning_rate=0.1, n_iter=200, dataset='mnist.pkl.gz', nkerns=[20,50]):
189+
def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='mnist.pkl.gz', nkerns=[20,50]):
190190
rng = numpy.random.RandomState(23455)
191191

192192
# Load the dataset
@@ -213,10 +213,10 @@ def shared_dataset(data_xy):
213213
n_test_batches = test_set_x.value.shape[0] / batch_size
214214

215215
# allocate symbolic variables for the data
216-
minibatch_offset = T.lscalar() # offset to the start of a [mini]batch
217-
x = T.matrix('x') # the data is presented as rasterized images
218-
y = T.ivector('y') # the labels are presented as 1D vector of
219-
# [int] labels
216+
index = T.lscalar() # index to a [mini]batch
217+
x = T.matrix('x') # the data is presented as rasterized images
218+
y = T.ivector('y') # the labels are presented as 1D vector of
219+
# [int] labels
220220

221221

222222
ishape = (28,28) # this is the size of MNIST images
@@ -261,15 +261,15 @@ def shared_dataset(data_xy):
261261
cost = layer3.negative_log_likelihood(y)
262262

263263
# create a function to compute the mistakes that are made by the model
264-
test_model = theano.function([minibatch_offset], layer3.errors(y),
264+
test_model = theano.function([index], layer3.errors(y),
265265
givens = {
266-
x: test_set_x[minibatch_offset:minibatch_offset+batch_size],
267-
y: test_set_y[minibatch_offset:minibatch_offset+batch_size]})
266+
x: test_set_x[index*batch_size:(index+1)*batch_size],
267+
y: test_set_y[index*batch_size:(index+1)*batch_size]})
268268

269-
validate_model = theano.function([minibatch_offset], layer3.errors(y),
269+
validate_model = theano.function([index], layer3.errors(y),
270270
givens = {
271-
x: valid_set_x[minibatch_offset:minibatch_offset+batch_size],
272-
y: valid_set_y[minibatch_offset:minibatch_offset+batch_size]})
271+
x: valid_set_x[index*batch_size:(index+1)*batch_size],
272+
y: valid_set_y[index*batch_size:(index+1)*batch_size]})
273273

274274
# create a list of all model parameters to be fit by gradient descent
275275
params = layer3.params+ layer2.params+ layer1.params + layer0.params
@@ -285,10 +285,10 @@ def shared_dataset(data_xy):
285285
for param_i, grad_i in zip(params, grads):
286286
updates[param_i] = param_i - learning_rate * grad_i
287287

288-
train_model = theano.function([minibatch_offset], cost, updates=updates,
288+
train_model = theano.function([index], cost, updates=updates,
289289
givens = {
290-
x: train_set_x[minibatch_offset:minibatch_offset+batch_size],
291-
y: train_set_y[minibatch_offset:minibatch_offset+batch_size]})
290+
x: train_set_x[index*batch_size:(index+1)*batch_size],
291+
y: train_set_y[index*batch_size:(index+1)*batch_size]})
292292

293293

294294
###############
@@ -313,23 +313,19 @@ def shared_dataset(data_xy):
313313
test_score = 0.
314314
start_time = time.clock()
315315

316-
# have a maximum of `n_iter` iterations through the entire dataset
317-
for iter in xrange(n_iter * n_train_batches):
318-
319-
# get epoch and minibatch index
320-
epoch = iter / n_train_batches
321-
minibatch_index = iter % n_train_batches
322-
minibatch_offset = minibatch_index * batch_size
323-
316+
for epoch in xrange(n_epochs):
317+
for minibatch_index in xrange(n_train_batches):
318+
319+
iter = epoch * n_train_batches + minibatch_index
324320

325321
if iter %100 == 0:
326322
print 'training @ iter = ', iter
327-
cost_ij = train_model(minibatch_offset)
323+
cost_ij = train_model(minibatch_index)
328324

329325
if (iter+1) % validation_frequency == 0:
330326

331327
# compute zero-one loss on validation set
332-
validation_losses = [validate_model(i*batch_size) for i in xrange(n_valid_batches)]
328+
validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
333329
this_validation_loss = numpy.mean(validation_losses)
334330
print('epoch %i, minibatch %i/%i, validation error %f %%' % \
335331
(epoch, minibatch_index+1, n_train_batches, \
@@ -349,7 +345,7 @@ def shared_dataset(data_xy):
349345
best_iter = iter
350346

351347
# test it on the test set
352-
test_losses = [test_model(i*batch_size) for i in xrange(n_test_batches)]
348+
test_losses = [test_model(i) for i in xrange(n_test_batches)]
353349
test_score = numpy.mean(test_losses)
354350
print((' epoch %i, minibatch %i/%i, test error of best '
355351
'model %f %%') %

0 commit comments

Comments
 (0)