Skip to content

Commit 081f668

Browse files
committed
pep8
1 parent c459ead commit 081f668

1 file changed

Lines changed: 134 additions & 118 deletions

File tree

code/convolutional_mlp.py

Lines changed: 134 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,33 @@
1-
"""
2-
This tutorial introduces the LeNet5 neural network architecture using Theano. LeNet5 is a
3-
convolutional neural network, good for classifying images. This tutorial shows how to build the
4-
architecture, and comes with all the hyper-parameters you need to reproduce the paper's MNIST
5-
results.
1+
"""This tutorial introduces the LeNet5 neural network architecture
2+
using Theano. LeNet5 is a convolutional neural network, good for
3+
classifying images. This tutorial shows how to build the architecture,
4+
and comes with all the hyper-parameters you need to reproduce the
5+
paper's MNIST results.
66
77
88
This implementation simplifies the model in the following ways:
99
1010
- LeNetConvPool doesn't implement location-specific gain and bias parameters
11-
- LeNetConvPool doesn't implement pooling by average, it implements pooling by max.
12-
- Digit classification is implemented with a logistic regression rather than an RBF network
11+
- LeNetConvPool doesn't implement pooling by average, it implements pooling
12+
by max.
13+
- Digit classification is implemented with a logistic regression rather than
14+
an RBF network
1315
- LeNet5 was not fully-connected convolutions at second layer
1416
1517
References:
16-
- Y. LeCun, L. Bottou, Y. Bengio and P. Haffner: Gradient-Based Learning Applied to Document
18+
- Y. LeCun, L. Bottou, Y. Bengio and P. Haffner:
19+
Gradient-Based Learning Applied to Document
1720
Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998.
1821
http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf
22+
1923
"""
24+
import cPickle
25+
import gzip
26+
import os
27+
import sys
28+
import time
2029

21-
import numpy, time, cPickle, gzip, sys, os
30+
import numpy
2231

2332
import theano
2433
import theano.tensor as T
@@ -32,7 +41,7 @@
3241
class LeNetConvPoolLayer(object):
3342
"""Pool Layer of a convolutional network """
3443

35-
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)):
44+
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
3645
"""
3746
Allocate a LeNetConvPoolLayer with shared variable internal parameters.
3847
@@ -54,56 +63,59 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)):
5463
:param poolsize: the downsampling (pooling) factor (#rows,#cols)
5564
"""
5665

57-
assert image_shape[1]==filter_shape[1]
66+
assert image_shape[1] == filter_shape[1]
5867
self.input = input
59-
60-
# initialize weights to temporary values until we know the shape of the output feature
61-
# maps
68+
69+
# initialize weights to temporary values until we know the
70+
# shape of the output feature maps
6271
W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX)
63-
self.W = theano.shared(value = W_values)
72+
self.W = theano.shared(value=W_values)
6473

6574
# the bias is a 1D tensor -- one bias per output feature map
66-
b_values = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX)
67-
self.b = theano.shared(value= b_values)
75+
b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
76+
self.b = theano.shared(value=b_values)
6877

6978
# convolve input feature maps with filters
70-
conv_out = conv.conv2d(input = input, filters = self.W,
79+
conv_out = conv.conv2d(input=input, filters=self.W,
7180
filter_shape=filter_shape, image_shape=image_shape)
7281

73-
# there are "num input feature maps * filter height * filter width" inputs
74-
# to each hidden unit
82+
# there are "num input feature maps * filter height * filter width"
83+
# inputs to each hidden unit
7584
fan_in = numpy.prod(filter_shape[1:])
7685
# each unit in the lower layer receives a gradient from:
77-
# "num output feature maps * filter height * filter width" / pooling size
78-
fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)
86+
# "num output feature maps * filter height * filter width" /
87+
# pooling size
88+
fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
89+
numpy.prod(poolsize))
7990
# replace weight values with random weights
80-
W_bound = numpy.sqrt(6./(fan_in + fan_out))
91+
W_bound = numpy.sqrt(6. / (fan_in + fan_out))
8192
self.W.set_value(numpy.asarray(
8293
rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
83-
dtype = theano.config.floatX),
94+
dtype=theano.config.floatX),
8495
borrow=True)
8596

8697
# downsample each feature map individually, using maxpooling
87-
pooled_out = downsample.max_pool_2d( input = conv_out,
88-
ds = poolsize, ignore_border=True)
98+
pooled_out = downsample.max_pool_2d(input=conv_out,
99+
ds=poolsize, ignore_border=True)
89100

90101
# add the bias term. Since the bias is a vector (1D array), we first
91-
# reshape it to a tensor of shape (1,n_filters,1,1). Each bias will thus
92-
# be broadcasted across mini-batches and feature map width & height
102+
# reshape it to a tensor of shape (1,n_filters,1,1). Each bias will
103+
# thus be broadcasted across mini-batches and feature map
104+
# width & height
93105
self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
94106

95107
# store parameters of this layer
96108
self.params = [self.W, self.b]
97109

98110

99-
100-
def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='../data/mnist.pkl.gz',
101-
nkerns=[20,50], batch_size = 500):
111+
def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
112+
dataset='../data/mnist.pkl.gz',
113+
nkerns=[20, 50], batch_size=500):
102114
""" Demonstrates lenet on MNIST dataset
103115
104116
:type learning_rate: float
105117
:param learning_rate: learning rate used (factor for the stochastic
106-
gradient)
118+
gradient)
107119
108120
:type n_epochs: int
109121
:param n_epochs: maximal number of epochs to run the optimizer
@@ -121,22 +133,23 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='../data/mnist.pkl.
121133

122134
train_set_x, train_set_y = datasets[0]
123135
valid_set_x, valid_set_y = datasets[1]
124-
test_set_x , test_set_y = datasets[2]
125-
136+
test_set_x, test_set_y = datasets[2]
126137

127138
# compute number of minibatches for training, validation and testing
128-
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
129-
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
130-
n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
139+
n_train_batches = train_set_x.get_value(borrow=True).shape[0]
140+
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
141+
n_test_batches = test_set_x.get_value(borrow=True).shape[0]
142+
n_train_batches /= batch_size
143+
n_valid_batches /= batch_size
144+
n_test_batches /= batch_size
131145

132146
# allocate symbolic variables for the data
133-
index = T.lscalar() # index to a [mini]batch
134-
x = T.matrix('x') # the data is presented as rasterized images
135-
y = T.ivector('y') # the labels are presented as 1D vector of
136-
# [int] labels
147+
index = T.lscalar() # index to a [mini]batch
148+
x = T.matrix('x') # the data is presented as rasterized images
149+
y = T.ivector('y') # the labels are presented as 1D vector of
150+
# [int] labels
137151

138-
139-
ishape = (28,28) # this is the size of MNIST images
152+
ishape = (28, 28) # this is the size of MNIST images
140153

141154
######################
142155
# BUILD ACTUAL MODEL #
@@ -145,32 +158,32 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='../data/mnist.pkl.
145158

146159
# Reshape matrix of rasterized images of shape (batch_size,28*28)
147160
# to a 4D tensor, compatible with our LeNetConvPoolLayer
148-
layer0_input = x.reshape((batch_size,1,28,28))
161+
layer0_input = x.reshape((batch_size, 1, 28, 28))
149162

150163
# Construct the first convolutional pooling layer:
151164
# filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
152165
# maxpooling reduces this further to (24/2,24/2) = (12,12)
153166
# 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
154167
layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
155-
image_shape=(batch_size,1,28,28),
156-
filter_shape=(nkerns[0],1,5,5), poolsize=(2,2))
168+
image_shape=(batch_size, 1, 28, 28),
169+
filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2))
157170

158171
# Construct the second convolutional pooling layer
159172
# filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
160173
# maxpooling reduces this further to (8/2,8/2) = (4,4)
161174
# 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
162175
layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
163-
image_shape=(batch_size,nkerns[0],12,12),
164-
filter_shape=(nkerns[1],nkerns[0],5,5), poolsize=(2,2))
176+
image_shape=(batch_size, nkerns[0], 12, 12),
177+
filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2))
165178

166179
# the TanhLayer being fully-connected, it operates on 2D matrices of
167180
# shape (batch_size,num_pixels) (i.e matrix of rasterized images).
168181
# This will generate a matrix of shape (20,32*4*4) = (20,512)
169182
layer2_input = layer1.output.flatten(2)
170183

171184
# construct a fully-connected sigmoidal layer
172-
layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1]*4*4,
173-
n_out=500, activation = T.tanh)
185+
layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4,
186+
n_out=500, activation=T.tanh)
174187

175188
# classify the values of the fully-connected sigmoidal layer
176189
layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
@@ -180,113 +193,116 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='../data/mnist.pkl.
180193

181194
# create a function to compute the mistakes that are made by the model
182195
test_model = theano.function([index], layer3.errors(y),
183-
givens = {
184-
x: test_set_x[index*batch_size:(index+1)*batch_size],
185-
y: test_set_y[index*batch_size:(index+1)*batch_size]})
196+
givens={
197+
x: test_set_x[index * batch_size: (index + 1) * batch_size],
198+
y: test_set_y[index * batch_size: (index + 1) * batch_size]})
186199

187200
validate_model = theano.function([index], layer3.errors(y),
188-
givens = {
189-
x: valid_set_x[index*batch_size:(index+1)*batch_size],
190-
y: valid_set_y[index*batch_size:(index+1)*batch_size]})
201+
givens={
202+
x: valid_set_x[index * batch_size: (index + 1) * batch_size],
203+
y: valid_set_y[index * batch_size: (index + 1) * batch_size]})
191204

192205
# create a list of all model parameters to be fit by gradient descent
193-
params = layer3.params+ layer2.params+ layer1.params + layer0.params
194-
206+
params = layer3.params + layer2.params + layer1.params + layer0.params
207+
195208
# create a list of gradients for all model parameters
196209
grads = T.grad(cost, params)
197210

198-
# train_model is a function that updates the model parameters by SGD
199-
# Since this model has many parameters, it would be tedious to manually
200-
# create an update rule for each model parameter. We thus create the updates
201-
# dictionary by automatically looping over all (params[i],grads[i]) pairs.
211+
# train_model is a function that updates the model parameters by
212+
# SGD Since this model has many parameters, it would be tedious to
213+
# manually create an update rule for each model parameter. We thus
214+
# create the updates dictionary by automatically looping over all
215+
# (params[i],grads[i]) pairs.
202216
updates = {}
203217
for param_i, grad_i in zip(params, grads):
204218
updates[param_i] = param_i - learning_rate * grad_i
205-
206-
train_model = theano.function([index], cost, updates=updates,
207-
givens = {
208-
x: train_set_x[index*batch_size:(index+1)*batch_size],
209-
y: train_set_y[index*batch_size:(index+1)*batch_size]})
210219

220+
train_model = theano.function([index], cost, updates=updates,
221+
givens={
222+
x: train_set_x[index * batch_size: (index + 1) * batch_size],
223+
y: train_set_y[index * batch_size: (index + 1) * batch_size]})
211224

212225
###############
213226
# TRAIN MODEL #
214227
###############
215228
print '... training'
216229
# early-stopping parameters
217-
patience = 10000 # look as this many examples regardless
218-
patience_increase = 2 # wait this much longer when a new best is
219-
# found
220-
improvement_threshold = 0.995 # a relative improvement of this much is
221-
# considered significant
222-
validation_frequency = min(n_train_batches, patience/2)
223-
# go through this many
224-
# minibatche before checking the network
225-
# on the validation set; in this case we
226-
# check every epoch
227-
228-
best_params = None
230+
patience = 10000 # look as this many examples regardless
231+
patience_increase = 2 # wait this much longer when a new best is
232+
# found
233+
improvement_threshold = 0.995 # a relative improvement of this much is
234+
# considered significant
235+
validation_frequency = min(n_train_batches, patience / 2)
236+
# go through this many
237+
# minibatche before checking the network
238+
# on the validation set; in this case we
239+
# check every epoch
240+
241+
best_params = None
229242
best_validation_loss = numpy.inf
230-
best_iter = 0
231-
test_score = 0.
243+
best_iter = 0
244+
test_score = 0.
232245
start_time = time.clock()
233246

234-
epoch = 0
247+
epoch = 0
235248
done_looping = False
236249

237250
while (epoch < n_epochs) and (not done_looping):
238-
epoch = epoch + 1
239-
for minibatch_index in xrange(n_train_batches):
240-
241-
iter = epoch * n_train_batches + minibatch_index
251+
epoch = epoch + 1
252+
for minibatch_index in xrange(n_train_batches):
242253

243-
if iter %100 == 0:
244-
print 'training @ iter = ', iter
245-
cost_ij = train_model(minibatch_index)
254+
iter = epoch * n_train_batches + minibatch_index
246255

247-
if (iter+1) % validation_frequency == 0:
256+
if iter % 100 == 0:
257+
print 'training @ iter = ', iter
258+
cost_ij = train_model(minibatch_index)
248259

249-
# compute zero-one loss on validation set
250-
validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
251-
this_validation_loss = numpy.mean(validation_losses)
252-
print('epoch %i, minibatch %i/%i, validation error %f %%' % \
253-
(epoch, minibatch_index+1, n_train_batches, \
254-
this_validation_loss*100.))
260+
if (iter + 1) % validation_frequency == 0:
255261

262+
# compute zero-one loss on validation set
263+
validation_losses = [validate_model(i) for i
264+
in xrange(n_valid_batches)]
265+
this_validation_loss = numpy.mean(validation_losses)
266+
print('epoch %i, minibatch %i/%i, validation error %f %%' % \
267+
(epoch, minibatch_index + 1, n_train_batches, \
268+
this_validation_loss * 100.))
256269

257-
# if we got the best validation score until now
258-
if this_validation_loss < best_validation_loss:
270+
# if we got the best validation score until now
271+
if this_validation_loss < best_validation_loss:
259272

260-
#improve patience if loss improvement is good enough
261-
if this_validation_loss < best_validation_loss * \
262-
improvement_threshold :
263-
patience = max(patience, iter * patience_increase)
273+
#improve patience if loss improvement is good enough
274+
if this_validation_loss < best_validation_loss * \
275+
improvement_threshold:
276+
patience = max(patience, iter * patience_increase)
264277

265-
# save best validation score and iteration number
266-
best_validation_loss = this_validation_loss
267-
best_iter = iter
278+
# save best validation score and iteration number
279+
best_validation_loss = this_validation_loss
280+
best_iter = iter
268281

269-
# test it on the test set
270-
test_losses = [test_model(i) for i in xrange(n_test_batches)]
271-
test_score = numpy.mean(test_losses)
272-
print((' epoch %i, minibatch %i/%i, test error of best '
273-
'model %f %%') %
274-
(epoch, minibatch_index+1, n_train_batches,
275-
test_score*100.))
282+
# test it on the test set
283+
test_losses = [test_model(i) for i in xrange(n_test_batches)]
284+
test_score = numpy.mean(test_losses)
285+
print((' epoch %i, minibatch %i/%i, test error of best '
286+
'model %f %%') %
287+
(epoch, minibatch_index + 1, n_train_batches,
288+
test_score * 100.))
276289

277-
if patience <= iter :
278-
done_looping = True
279-
break
290+
if patience <= iter:
291+
done_looping = True
292+
break
280293

281294
end_time = time.clock()
282295
print('Optimization complete.')
283296
print('Best validation score of %f %% obtained at iteration %i,'\
284-
'with test performance %f %%' %
285-
(best_validation_loss * 100., best_iter, test_score*100.))
286-
print >> sys.stderr, ('The code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % ((end_time-start_time)/60.))
297+
'with test performance %f %%' %
298+
(best_validation_loss * 100., best_iter, test_score * 100.))
299+
print >> sys.stderr, ('The code for file ' +
300+
os.path.split(__file__)[1] +
301+
' ran for %.2fm' % ((end_time - start_time) / 60.))
287302

288303
if __name__ == '__main__':
289304
evaluate_lenet5()
290305

306+
291307
def experiment(state, channel):
292308
evaluate_lenet5(state.learning_rate, dataset=state.dataset)

0 commit comments

Comments
 (0)