1- """
2- This tutorial introduces the LeNet5 neural network architecture using Theano. LeNet5 is a
3- convolutional neural network, good for classifying images. This tutorial shows how to build the
4- architecture, and comes with all the hyper-parameters you need to reproduce the paper's MNIST
5- results.
1+ """This tutorial introduces the LeNet5 neural network architecture
2+ using Theano. LeNet5 is a convolutional neural network, good for
3+ classifying images. This tutorial shows how to build the architecture,
4+ and comes with all the hyper-parameters you need to reproduce the
5+ paper's MNIST results.
66
77
88This implementation simplifies the model in the following ways:
99
1010 - LeNetConvPool doesn't implement location-specific gain and bias parameters
11- - LeNetConvPool doesn't implement pooling by average, it implements pooling by max.
12- - Digit classification is implemented with a logistic regression rather than an RBF network
11+ - LeNetConvPool doesn't implement pooling by average, it implements pooling
12+ by max.
13+ - Digit classification is implemented with a logistic regression rather than
14+ an RBF network
1315 - LeNet5 was not fully-connected convolutions at second layer
1416
1517References:
16- - Y. LeCun, L. Bottou, Y. Bengio and P. Haffner: Gradient-Based Learning Applied to Document
18+ - Y. LeCun, L. Bottou, Y. Bengio and P. Haffner:
19+ Gradient-Based Learning Applied to Document
1720 Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998.
1821 http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf
22+
1923"""
24+ import cPickle
25+ import gzip
26+ import os
27+ import sys
28+ import time
2029
21- import numpy , time , cPickle , gzip , sys , os
30+ import numpy
2231
2332import theano
2433import theano .tensor as T
3241class LeNetConvPoolLayer (object ):
3342 """Pool Layer of a convolutional network """
3443
35- def __init__ (self , rng , input , filter_shape , image_shape , poolsize = (2 ,2 )):
44+ def __init__ (self , rng , input , filter_shape , image_shape , poolsize = (2 , 2 )):
3645 """
3746 Allocate a LeNetConvPoolLayer with shared variable internal parameters.
3847
@@ -54,56 +63,59 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)):
5463 :param poolsize: the downsampling (pooling) factor (#rows,#cols)
5564 """
5665
57- assert image_shape [1 ]== filter_shape [1 ]
66+ assert image_shape [1 ] == filter_shape [1 ]
5867 self .input = input
59-
60- # initialize weights to temporary values until we know the shape of the output feature
61- # maps
68+
69+ # initialize weights to temporary values until we know the
70+ # shape of the output feature maps
6271 W_values = numpy .zeros (filter_shape , dtype = theano .config .floatX )
63- self .W = theano .shared (value = W_values )
72+ self .W = theano .shared (value = W_values )
6473
6574 # the bias is a 1D tensor -- one bias per output feature map
66- b_values = numpy .zeros ((filter_shape [0 ],), dtype = theano .config .floatX )
67- self .b = theano .shared (value = b_values )
75+ b_values = numpy .zeros ((filter_shape [0 ],), dtype = theano .config .floatX )
76+ self .b = theano .shared (value = b_values )
6877
6978 # convolve input feature maps with filters
70- conv_out = conv .conv2d (input = input , filters = self .W ,
79+ conv_out = conv .conv2d (input = input , filters = self .W ,
7180 filter_shape = filter_shape , image_shape = image_shape )
7281
73- # there are "num input feature maps * filter height * filter width" inputs
74- # to each hidden unit
82+ # there are "num input feature maps * filter height * filter width"
83+ # inputs to each hidden unit
7584 fan_in = numpy .prod (filter_shape [1 :])
7685 # each unit in the lower layer receives a gradient from:
77- # "num output feature maps * filter height * filter width" / pooling size
78- fan_out = filter_shape [0 ] * numpy .prod (filter_shape [2 :]) / numpy .prod (poolsize )
86+ # "num output feature maps * filter height * filter width" /
87+ # pooling size
88+ fan_out = (filter_shape [0 ] * numpy .prod (filter_shape [2 :]) /
89+ numpy .prod (poolsize ))
7990 # replace weight values with random weights
80- W_bound = numpy .sqrt (6. / (fan_in + fan_out ))
91+ W_bound = numpy .sqrt (6. / (fan_in + fan_out ))
8192 self .W .set_value (numpy .asarray (
8293 rng .uniform (low = - W_bound , high = W_bound , size = filter_shape ),
83- dtype = theano .config .floatX ),
94+ dtype = theano .config .floatX ),
8495 borrow = True )
8596
8697 # downsample each feature map individually, using maxpooling
87- pooled_out = downsample .max_pool_2d ( input = conv_out ,
88- ds = poolsize , ignore_border = True )
98+ pooled_out = downsample .max_pool_2d (input = conv_out ,
99+ ds = poolsize , ignore_border = True )
89100
90101 # add the bias term. Since the bias is a vector (1D array), we first
91- # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will thus
92- # be broadcasted across mini-batches and feature map width & height
102+ # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will
103+ # thus be broadcasted across mini-batches and feature map
104+ # width & height
93105 self .output = T .tanh (pooled_out + self .b .dimshuffle ('x' , 0 , 'x' , 'x' ))
94106
95107 # store parameters of this layer
96108 self .params = [self .W , self .b ]
97109
98110
99-
100- def evaluate_lenet5 ( learning_rate = 0.1 , n_epochs = 200 , dataset = '../data/mnist.pkl.gz' ,
101- nkerns = [20 ,50 ], batch_size = 500 ):
111+ def evaluate_lenet5 ( learning_rate = 0.1 , n_epochs = 200 ,
112+ dataset = '../data/mnist.pkl.gz' ,
113+ nkerns = [20 , 50 ], batch_size = 500 ):
102114 """ Demonstrates lenet on MNIST dataset
103115
104116 :type learning_rate: float
105117 :param learning_rate: learning rate used (factor for the stochastic
106- gradient)
118+ gradient)
107119
108120 :type n_epochs: int
109121 :param n_epochs: maximal number of epochs to run the optimizer
@@ -121,22 +133,23 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='../data/mnist.pkl.
121133
122134 train_set_x , train_set_y = datasets [0 ]
123135 valid_set_x , valid_set_y = datasets [1 ]
124- test_set_x , test_set_y = datasets [2 ]
125-
136+ test_set_x , test_set_y = datasets [2 ]
126137
127138 # compute number of minibatches for training, validation and testing
128- n_train_batches = train_set_x .get_value (borrow = True ).shape [0 ] / batch_size
129- n_valid_batches = valid_set_x .get_value (borrow = True ).shape [0 ] / batch_size
130- n_test_batches = test_set_x .get_value (borrow = True ).shape [0 ] / batch_size
139+ n_train_batches = train_set_x .get_value (borrow = True ).shape [0 ]
140+ n_valid_batches = valid_set_x .get_value (borrow = True ).shape [0 ]
141+ n_test_batches = test_set_x .get_value (borrow = True ).shape [0 ]
142+ n_train_batches /= batch_size
143+ n_valid_batches /= batch_size
144+ n_test_batches /= batch_size
131145
132146 # allocate symbolic variables for the data
133- index = T .lscalar () # index to a [mini]batch
134- x = T .matrix ('x' ) # the data is presented as rasterized images
135- y = T .ivector ('y' ) # the labels are presented as 1D vector of
136- # [int] labels
147+ index = T .lscalar () # index to a [mini]batch
148+ x = T .matrix ('x' ) # the data is presented as rasterized images
149+ y = T .ivector ('y' ) # the labels are presented as 1D vector of
150+ # [int] labels
137151
138-
139- ishape = (28 ,28 ) # this is the size of MNIST images
152+ ishape = (28 , 28 ) # this is the size of MNIST images
140153
141154 ######################
142155 # BUILD ACTUAL MODEL #
@@ -145,32 +158,32 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='../data/mnist.pkl.
145158
146159 # Reshape matrix of rasterized images of shape (batch_size,28*28)
147160 # to a 4D tensor, compatible with our LeNetConvPoolLayer
148- layer0_input = x .reshape ((batch_size ,1 , 28 ,28 ))
161+ layer0_input = x .reshape ((batch_size , 1 , 28 , 28 ))
149162
150163 # Construct the first convolutional pooling layer:
151164 # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
152165 # maxpooling reduces this further to (24/2,24/2) = (12,12)
153166 # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
154167 layer0 = LeNetConvPoolLayer (rng , input = layer0_input ,
155- image_shape = (batch_size ,1 , 28 ,28 ),
156- filter_shape = (nkerns [0 ],1 , 5 , 5 ), poolsize = (2 ,2 ))
168+ image_shape = (batch_size , 1 , 28 , 28 ),
169+ filter_shape = (nkerns [0 ], 1 , 5 , 5 ), poolsize = (2 , 2 ))
157170
158171 # Construct the second convolutional pooling layer
159172 # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
160173 # maxpooling reduces this further to (8/2,8/2) = (4,4)
161174 # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
162175 layer1 = LeNetConvPoolLayer (rng , input = layer0 .output ,
163- image_shape = (batch_size ,nkerns [0 ],12 ,12 ),
164- filter_shape = (nkerns [1 ],nkerns [0 ],5 , 5 ), poolsize = (2 ,2 ))
176+ image_shape = (batch_size , nkerns [0 ], 12 , 12 ),
177+ filter_shape = (nkerns [1 ], nkerns [0 ], 5 , 5 ), poolsize = (2 , 2 ))
165178
166179 # the TanhLayer being fully-connected, it operates on 2D matrices of
167180 # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
168181 # This will generate a matrix of shape (20,32*4*4) = (20,512)
169182 layer2_input = layer1 .output .flatten (2 )
170183
171184 # construct a fully-connected sigmoidal layer
172- layer2 = HiddenLayer (rng , input = layer2_input , n_in = nkerns [1 ]* 4 * 4 ,
173- n_out = 500 , activation = T .tanh )
185+ layer2 = HiddenLayer (rng , input = layer2_input , n_in = nkerns [1 ] * 4 * 4 ,
186+ n_out = 500 , activation = T .tanh )
174187
175188 # classify the values of the fully-connected sigmoidal layer
176189 layer3 = LogisticRegression (input = layer2 .output , n_in = 500 , n_out = 10 )
@@ -180,113 +193,116 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='../data/mnist.pkl.
180193
181194 # create a function to compute the mistakes that are made by the model
182195 test_model = theano .function ([index ], layer3 .errors (y ),
183- givens = {
184- x : test_set_x [index * batch_size :(index + 1 ) * batch_size ],
185- y : test_set_y [index * batch_size :(index + 1 ) * batch_size ]})
196+ givens = {
197+ x : test_set_x [index * batch_size : (index + 1 ) * batch_size ],
198+ y : test_set_y [index * batch_size : (index + 1 ) * batch_size ]})
186199
187200 validate_model = theano .function ([index ], layer3 .errors (y ),
188- givens = {
189- x : valid_set_x [index * batch_size :(index + 1 ) * batch_size ],
190- y : valid_set_y [index * batch_size :(index + 1 ) * batch_size ]})
201+ givens = {
202+ x : valid_set_x [index * batch_size : (index + 1 ) * batch_size ],
203+ y : valid_set_y [index * batch_size : (index + 1 ) * batch_size ]})
191204
192205 # create a list of all model parameters to be fit by gradient descent
193- params = layer3 .params + layer2 .params + layer1 .params + layer0 .params
194-
206+ params = layer3 .params + layer2 .params + layer1 .params + layer0 .params
207+
195208 # create a list of gradients for all model parameters
196209 grads = T .grad (cost , params )
197210
198- # train_model is a function that updates the model parameters by SGD
199- # Since this model has many parameters, it would be tedious to manually
200- # create an update rule for each model parameter. We thus create the updates
201- # dictionary by automatically looping over all (params[i],grads[i]) pairs.
211+ # train_model is a function that updates the model parameters by
212+ # SGD Since this model has many parameters, it would be tedious to
213+ # manually create an update rule for each model parameter. We thus
214+ # create the updates dictionary by automatically looping over all
215+ # (params[i],grads[i]) pairs.
202216 updates = {}
203217 for param_i , grad_i in zip (params , grads ):
204218 updates [param_i ] = param_i - learning_rate * grad_i
205-
206- train_model = theano .function ([index ], cost , updates = updates ,
207- givens = {
208- x : train_set_x [index * batch_size :(index + 1 )* batch_size ],
209- y : train_set_y [index * batch_size :(index + 1 )* batch_size ]})
210219
220+ train_model = theano .function ([index ], cost , updates = updates ,
221+ givens = {
222+ x : train_set_x [index * batch_size : (index + 1 ) * batch_size ],
223+ y : train_set_y [index * batch_size : (index + 1 ) * batch_size ]})
211224
212225 ###############
213226 # TRAIN MODEL #
214227 ###############
215228 print '... training'
216229 # early-stopping parameters
217- patience = 10000 # look as this many examples regardless
218- patience_increase = 2 # wait this much longer when a new best is
219- # found
220- improvement_threshold = 0.995 # a relative improvement of this much is
221- # considered significant
222- validation_frequency = min (n_train_batches , patience / 2 )
223- # go through this many
224- # minibatche before checking the network
225- # on the validation set; in this case we
226- # check every epoch
227-
228- best_params = None
230+ patience = 10000 # look as this many examples regardless
231+ patience_increase = 2 # wait this much longer when a new best is
232+ # found
233+ improvement_threshold = 0.995 # a relative improvement of this much is
234+ # considered significant
235+ validation_frequency = min (n_train_batches , patience / 2 )
236+ # go through this many
237+ # minibatche before checking the network
238+ # on the validation set; in this case we
239+ # check every epoch
240+
241+ best_params = None
229242 best_validation_loss = numpy .inf
230- best_iter = 0
231- test_score = 0.
243+ best_iter = 0
244+ test_score = 0.
232245 start_time = time .clock ()
233246
234- epoch = 0
247+ epoch = 0
235248 done_looping = False
236249
237250 while (epoch < n_epochs ) and (not done_looping ):
238- epoch = epoch + 1
239- for minibatch_index in xrange (n_train_batches ):
240-
241- iter = epoch * n_train_batches + minibatch_index
251+ epoch = epoch + 1
252+ for minibatch_index in xrange (n_train_batches ):
242253
243- if iter % 100 == 0 :
244- print 'training @ iter = ' , iter
245- cost_ij = train_model (minibatch_index )
254+ iter = epoch * n_train_batches + minibatch_index
246255
247- if (iter + 1 ) % validation_frequency == 0 :
256+ if iter % 100 == 0 :
257+ print 'training @ iter = ' , iter
258+ cost_ij = train_model (minibatch_index )
248259
249- # compute zero-one loss on validation set
250- validation_losses = [validate_model (i ) for i in xrange (n_valid_batches )]
251- this_validation_loss = numpy .mean (validation_losses )
252- print ('epoch %i, minibatch %i/%i, validation error %f %%' % \
253- (epoch , minibatch_index + 1 , n_train_batches , \
254- this_validation_loss * 100. ))
260+ if (iter + 1 ) % validation_frequency == 0 :
255261
262+ # compute zero-one loss on validation set
263+ validation_losses = [validate_model (i ) for i
264+ in xrange (n_valid_batches )]
265+ this_validation_loss = numpy .mean (validation_losses )
266+ print ('epoch %i, minibatch %i/%i, validation error %f %%' % \
267+ (epoch , minibatch_index + 1 , n_train_batches , \
268+ this_validation_loss * 100. ))
256269
257- # if we got the best validation score until now
258- if this_validation_loss < best_validation_loss :
270+ # if we got the best validation score until now
271+ if this_validation_loss < best_validation_loss :
259272
260- #improve patience if loss improvement is good enough
261- if this_validation_loss < best_validation_loss * \
262- improvement_threshold :
263- patience = max (patience , iter * patience_increase )
273+ #improve patience if loss improvement is good enough
274+ if this_validation_loss < best_validation_loss * \
275+ improvement_threshold :
276+ patience = max (patience , iter * patience_increase )
264277
265- # save best validation score and iteration number
266- best_validation_loss = this_validation_loss
267- best_iter = iter
278+ # save best validation score and iteration number
279+ best_validation_loss = this_validation_loss
280+ best_iter = iter
268281
269- # test it on the test set
270- test_losses = [test_model (i ) for i in xrange (n_test_batches )]
271- test_score = numpy .mean (test_losses )
272- print ((' epoch %i, minibatch %i/%i, test error of best '
273- 'model %f %%' ) %
274- (epoch , minibatch_index + 1 , n_train_batches ,
275- test_score * 100. ))
282+ # test it on the test set
283+ test_losses = [test_model (i ) for i in xrange (n_test_batches )]
284+ test_score = numpy .mean (test_losses )
285+ print ((' epoch %i, minibatch %i/%i, test error of best '
286+ 'model %f %%' ) %
287+ (epoch , minibatch_index + 1 , n_train_batches ,
288+ test_score * 100. ))
276289
277- if patience <= iter :
278- done_looping = True
279- break
290+ if patience <= iter :
291+ done_looping = True
292+ break
280293
281294 end_time = time .clock ()
282295 print ('Optimization complete.' )
283296 print ('Best validation score of %f %% obtained at iteration %i,' \
284- 'with test performance %f %%' %
285- (best_validation_loss * 100. , best_iter , test_score * 100. ))
286- print >> sys .stderr , ('The code for file ' + os .path .split (__file__ )[1 ]+ ' ran for %.2fm' % ((end_time - start_time )/ 60. ))
297+ 'with test performance %f %%' %
298+ (best_validation_loss * 100. , best_iter , test_score * 100. ))
299+ print >> sys .stderr , ('The code for file ' +
300+ os .path .split (__file__ )[1 ] +
301+ ' ran for %.2fm' % ((end_time - start_time ) / 60. ))
287302
288303if __name__ == '__main__' :
289304 evaluate_lenet5 ()
290305
306+
291307def experiment (state , channel ):
292308 evaluate_lenet5 (state .learning_rate , dataset = state .dataset )
0 commit comments