2727
2828
2929"""
30- import numpy
31- from theano .compile .sandbox import shared , pfunc
32- from theano import tensor
33- from pylearn .shared .layers import LogisticRegression , SigmoidalLayer
30+ import numpy , theano , cPickle , gzip , time
31+ import theano .tensor as T
3432import theano .sandbox .softsign
3533import pylearn .datasets .MNIST
34+ from theano .sandbox import conv , downsample
3635
37-
38- try :
39- # this tells theano to use the GPU if possible
40- from theano .sandbox .cuda import use
41- use ()
42- except Exception , e :
43- print ('Warning: Attempt to use GPU resulted in error "%s"' % str (e ))
44-
45- class LeNetConvPool (object ):
36+ class LeNetConvPoolLayer (object ):
4637 """WRITEME
4738
4839 Math of what the layer does, and what symbolic variables are created by the class (w, b,
@@ -55,21 +46,17 @@ class LeNetConvPool(object):
5546 # - one bias & scale per downsample feature location (a 2d bias)
5647 # - more?
5748
58- def __init__ (self , rng , input , n_examples , n_imgs , img_shape , n_filters , filter_shape = (5 ,5 ),
49+ def __init__ (self , rng , input , n_imgs , n_filters , filter_shape = (5 ,5 ),
5950 poolsize = (2 ,2 )):
6051 """
61- Allocate a LeNetConvPool layer with shared variable internal parameters.
52+ Allocate a LeNetConvPoolLayer with shared variable internal parameters.
6253
6354 :param rng: a random number generator used to initialize weights
6455
65- :param input: symbolic images. Shape: (n_examples, n_imgs, img_shape[0], img_shape[1])
66-
67- :param n_examples: input's shape[0] at runtime
56+ :param input: symbolic images. Shape: (<mini-batch size>, n_imgs, <img height>, <img width>)
6857
6958 :param n_imgs: input's shape[1] at runtime
7059
71- :param img_shape: input's shape[2:4] at runtime
72-
7360 :param n_filters: the number of filters to apply to the image.
7461
7562 :param filter_shape: the size of the filters to apply
@@ -79,74 +66,67 @@ def __init__(self, rng, input, n_examples, n_imgs, img_shape, n_filters, filter_
7966 :type poolsize: pair (rows, cols)
8067 """
8168
82- #TODO: make a simpler convolution constructor!!
83- # - make dx and dy optional
84- # - why do we have to pass shapes? (Can we make them optional at least?)
85- conv_op = ConvOp ((n_imgs ,)+ img_shape , filter_shape , n_filters , n_examples ,
86- dx = 1 , dy = 1 , output_mode = 'valid' )
87-
88- # - why is poolsize an op parameter here?
89- # - can we just have a maxpool function that creates this Op internally?
90- ds_op = DownsampleFactorMax (poolsize , ignore_border = True )
91-
9269 # the filter tensor that we will apply is a 4D tensor
9370 w_shp = (n_filters , n_imgs ) + filter_shape
94-
95- # the bias we add is a 1D tensor
96- b_shp = (n_filters ,)
97-
98- self .w = shared (
99- numpy .asarray (
71+ w_bound = numpy .sqrt (filter_shape [0 ] * filter_shape [1 ] * n_imgs )
72+ self .w = theano .shared ( numpy .asarray (
10073 rng .uniform (
101- low = - 1.0 / numpy . sqrt ( filter_shape [ 0 ] * filter_shape [ 1 ] * n_imgs ) ,
102- high = 1.0 / numpy . sqrt ( filter_shape [ 0 ] * filter_shape [ 1 ] * n_imgs ) ,
74+ low = - 1.0 / w_bound ,
75+ high = 1.0 / w_bound ,
10376 size = w_shp ),
10477 dtype = input .dtype ))
105- self .b = shared (
106- numpy .asarray (
78+
79+ # the bias we add is a 1D tensor
80+ b_shp = (n_filters ,)
81+ self .b = theano .shared ( numpy .asarray (
10782 rng .uniform (low = - .0 , high = 0. , size = (n_filters ,)),
10883 dtype = input .dtype ))
10984
11085 self .input = input
111- conv_out = conv_op (input , self .w )
112- self .output = tensor .tanh (ds_op (conv_out ) + b .dimshuffle ('x' , 0 , 'x' , 'x' ))
86+ conv_out = conv .conv2d (input , self .w )
87+
88+ # - why is poolsize an op parameter here?
89+ # - can we just have a maxpool function that creates this Op internally?
90+ ds_op = downsample .DownsampleFactorMax (poolsize , ignore_border = True )
91+ self .output = T .tanh (ds_op (conv_out ) + self .b .dimshuffle ('x' , 0 , 'x' , 'x' ))
11392 self .params = [self .w , self .b ]
11493
94+
11595class SigmoidalLayer (object ):
116- def __init__ (self , input , n_in , n_out ):
96+ def __init__ (self , rng , input , n_in , n_out ):
11797 """
11898 :param input: a symbolic tensor of shape (n_examples, n_in)
11999 :param w: a symbolic weight matrix of shape (n_in, n_out)
120100 :param b: symbolic bias terms of shape (n_out,)
121101 :param squash: an squashing function
122102 """
123103 self .input = input
124- self .w = shared (
104+ self .w = theano . shared (
125105 numpy .asarray (
126106 rng .uniform (low = - 2 / numpy .sqrt (n_in ), high = 2 / numpy .sqrt (n_in ),
127107 size = (n_in , n_out )), dtype = input .dtype ))
128- self .b = shared (numpy .asarray (numpy .zeros (n_out ), dtype = input .dtype ))
129- self .output = tensor .tanh (tensor .dot (input , self .w ) + self .b )
108+ self .b = theano . shared (numpy .asarray (numpy .zeros (n_out ), dtype = input .dtype ))
109+ self .output = T .tanh (T .dot (input , self .w ) + self .b )
130110 self .params = [self .w , self .b ]
131111
132112class LogisticRegression (object ):
133113 """WRITEME"""
134114
135115 def __init__ (self , input , n_in , n_out ):
136- self .w = shared (numpy .zeros ((n_in , n_out ), dtype = input .dtype ))
137- self .b = shared (numpy .zeros ((n_out ,), dtype = input .dtype ))
138- self .l1 = abs (self .w ).sum ()
116+ self .w = theano . shared (numpy .zeros ((n_in , n_out ), dtype = input .dtype ))
117+ self .b = theano . shared (numpy .zeros ((n_out ,), dtype = input .dtype ))
118+ self .l1 = abs (self .w ).sum ()
139119 self .l2_sqr = (self .w ** 2 ).sum ()
140- self .output = nnet .softmax (theano .dot (input , self .w )+ self .b )
141- self .argmax = theano . tensor .argmax (self .output , axis = 1 )
120+ self .output = T . nnet .softmax (theano .dot (input , self .w )+ self .b )
121+ self .argmax = T .argmax (self .output , axis = 1 )
142122 self .params = [self .w , self .b ]
143123
144124 def nll (self , target ):
145125 """Return the negative log-likelihood of the prediction of this model under a given
146126 target distribution. Passing symbolic integers here means 1-hot.
147127 WRITEME
148128 """
149- return nnet .categorical_crossentropy (self .output , target )
129+ return T . nnet .categorical_crossentropy (self .output , target )
150130
151131 def errors (self , target ):
152132 """Return a vector of 0s and 1s, with 1s on every line that was mis-classified.
@@ -155,75 +135,179 @@ def errors(self, target):
155135 raise TypeError ('target should have the same shape as self.argmax' , ('target' , target .type ,
156136 'argmax' , self .argmax .type ))
157137 if target .dtype .startswith ('int' ):
158- return theano . tensor .neq (self .argmax , target )
138+ return T .neq (self .argmax , target )
159139 else :
160140 raise NotImplementedError ()
161141
162- def evaluate_lenet5 (batch_size = 30 , n_iter = 1000 ):
142+ def load_dataset ():
143+
144+ # Load the dataset
145+ f = gzip .open ('mnist.pkl.gz' ,'rb' )
146+ train_set , valid_set , test_set = cPickle .load (f )
147+ f .close ()
148+
149+ # make minibatches of size 20
150+ batch_size = 20 # sized of the minibatch
151+
152+ # Dealing with the training set
153+ # get the list of training images (x) and their labels (y)
154+ (train_set_x , train_set_y ) = train_set
155+ # initialize the list of training minibatches with empty list
156+ train_batches = []
157+ for i in xrange (0 , len (train_set_x ), batch_size ):
158+ # add to the list of minibatches the minibatch starting at
159+ # position i, ending at position i+batch_size
160+ # a minibatch is a pair ; the first element of the pair is a list
161+ # of datapoints, the second element is the list of corresponding
162+ # labels
163+ train_batches = train_batches + \
164+ [(train_set_x [i :i + batch_size ], train_set_y [i :i + batch_size ])]
165+
166+ # Dealing with the validation set
167+ (valid_set_x , valid_set_y ) = valid_set
168+ # initialize the list of validation minibatches
169+ valid_batches = []
170+ for i in xrange (0 , len (valid_set_x ), batch_size ):
171+ valid_batches = valid_batches + \
172+ [(valid_set_x [i :i + batch_size ], valid_set_y [i :i + batch_size ])]
173+
174+ # Dealing with the testing set
175+ (test_set_x , test_set_y ) = test_set
176+ # initialize the list of testing minibatches
177+ test_batches = []
178+ for i in xrange (0 , len (test_set_x ), batch_size ):
179+ test_batches = test_batches + \
180+ [(test_set_x [i :i + batch_size ], test_set_y [i :i + batch_size ])]
181+
182+ return train_batches , valid_batches , test_batches
183+
184+
185+ def evaluate_lenet5 (learning_rate = 0.01 , n_iter = 1000 ):
186+
163187 rng = numpy .random .RandomState (23455 )
164188
165- mnist = pylearn . datasets . MNIST . train_valid_test ()
189+ train_batches , valid_batches , test_batches = load_dataset ()
166190
167- ishape = (28 ,28 ) #this is the size of MNIST images
191+ ishape = (28 ,28 ) # this is the size of MNIST images
192+ batch_size = 20 # sized of the minibatch
168193
169194 # allocate symbolic variables for the data
170- x = tensor .fmatrix () # the data is presented as rasterized images
171- y = tensor .lvector () # the labels are presented as 1D vector of [long int] labels
195+ x = T .fmatrix () # the data is presented as rasterized images
196+ y = T .lvector () # the labels are presented as 1D vector of [long int] labels
197+
198+
199+ ######################
200+ # BUILD ACTUAL MODEL #
201+ ######################
172202
173203 # construct the first convolutional pooling layer
174- layer0 = LeNetConvPool .new (rng , input = x .reshape ((batch_size ,1 ,28 ,28 )), n_examples = batch_size ,
175- n_imgs = 1 , img_shape = ishape ,
176- n_filters = 6 , filter_shape = (5 ,5 ),
177- poolsize = (2 ,2 ))
204+ layer0 = LeNetConvPoolLayer (rng , input = x .reshape ((batch_size ,1 ,28 ,28 )),
205+ n_imgs = 1 , n_filters = 6 , filter_shape = (5 ,5 ), poolsize = (2 ,2 ))
178206
179207 # construct the second convolutional pooling layer
180- layer1 = LeNetConvPool .new (rng , input = layer0 .output , n_examples = batch_size ,
181- n_imgs = 6 , img_shape = (12 ,12 ),
182- n_filters = 16 , filter_shape = (5 ,5 ),
183- poolsize = (2 ,2 ))
208+ layer1 = LeNetConvPoolLayer (rng , input = layer0 .output ,
209+ n_imgs = 6 , n_filters = 16 , filter_shape = (5 ,5 ), poolsize = (2 ,2 ))
184210
185211 # construct a fully-connected sigmoidal layer
186- layer2 = SigmoidalLayer . new (rng , input = layer1 .output .flatten (2 ), n_in = 16 * 16 , n_out = 128 ) # 128 ?
212+ layer2 = SigmoidalLayer (rng , input = layer1 .output .flatten (2 ), n_in = 16 * 4 * 4 , n_out = 128 ) # 128 ?
187213
188214 # classify the values of the fully-connected sigmoidal layer
189- layer3 = LogisticRegression . new (input = layer2 .output , n_in = 128 , n_out = 10 )
215+ layer3 = LogisticRegression (input = layer2 .output , n_in = 128 , n_out = 10 )
190216
191217 # the cost we minimize during training is the NLL of the model
192218 cost = layer3 .nll (y ).mean ()
193219
194220 # create a function to compute the mistakes that are made by the model
195- test_model = pfunc ([x ,y ], layer3 .errors (y ))
221+ test_model = theano . function ([x ,y ], layer3 .errors (y ))
196222
197223 # create a list of all model parameters to be fit by gradient descent
198224 params = layer3 .params + layer2 .params + layer1 .params + layer0 .params
199- learning_rate = numpy .asarray (0.01 , dtype = 'float32' )
225+ learning_rate = numpy .asarray (learning_rate , dtype = 'float32' )
200226
201227 # train_model is a function that updates the model parameters by SGD
202- train_model = pfunc ([x , y ], cost ,
203- updates = [(p , p - learning_rate * gp ) for p ,gp in zip (params , tensor .grad (cost , params ))])
204-
205- # IS IT MORE SIMPLE TO USE A MINIMIZER OR THE DIRECT CODE?
206-
207- best_valid_score = float ('inf' )
208- for i in xrange (n_iter ):
209- for j in xrange (len (mnist .train .x )/ batch_size ):
210- cost_ij = train_model (
211- mnist .train .x [j * batch_size :(j + 1 )* batch_size ],
212- mnist .train .y [j * batch_size :(j + 1 )* batch_size ])
213- #if 0 == j % 100:
214- #print('epoch %i:%i, training error %f' % (i, j*batch_size, cost_ij))
215- valid_score = numpy .mean ([test_model (
216- mnist .valid .x [j * batch_size :(j + 1 )* batch_size ],
217- mnist .valid .y [j * batch_size :(j + 1 )* batch_size ])
218- for j in xrange (len (mnist .valid .x )/ batch_size )])
219- print ('epoch %i, validation error %f' % (i , valid_score ))
220- if valid_score < best_valid_score :
221- best_valid_score = valid_score
222- test_score = numpy .mean ([test_model (
223- mnist .test .x [j * batch_size :(j + 1 )* batch_size ],
224- mnist .test .y [j * batch_size :(j + 1 )* batch_size ])
225- for j in xrange (len (mnist .test .x )/ batch_size )])
226- print ('epoch %i, test error of best model %f' % (i , test_score ))
228+ train_model = theano .function ([x , y ], cost ,
229+ updates = [(p , p - learning_rate * gp ) for p ,gp in zip (params , T .grad (cost , params ))])
230+
231+
232+ ###############
233+ # TRAIN MODEL #
234+ ###############
235+
236+ n_minibatches = len (train_batches )
237+
238+ # early-stopping parameters
239+ patience = 10000 # look as this many examples regardless
240+ patience_increase = 2 # wait this much longer when a new best is
241+ # found
242+ improvement_threshold = 0.995 # a relative improvement of this much is
243+ # considered significant
244+ validation_frequency = n_minibatches # go through this many
245+ # minibatche before checking the network
246+ # on the validation set; in this case we
247+ # check every epoch
248+
249+ best_params = None
250+ best_validation_loss = float ('inf' )
251+ test_score = 0.
252+ start_time = time .clock ()
253+
254+ # have a maximum of `n_iter` iterations through the entire dataset
255+ for iter in xrange (n_iter * n_minibatches ):
256+
257+ # get epoch and minibatch index
258+ epoch = iter / n_minibatches
259+ minibatch_index = iter % n_minibatches
260+
261+ # get the minibatches corresponding to `iter` modulo
262+ # `len(train_batches)`
263+ x ,y = train_batches [ minibatch_index ]
264+
265+ print 'training @ iter = ' , iter
266+ cost_ij = train_model (x ,y )
267+
268+ if (iter + 1 ) % validation_frequency == 0 :
269+ # compute zero-one loss on validation set
270+ this_validation_loss = 0.
271+ for x ,y in valid_batches :
272+ # sum up the errors for each minibatch
273+ this_validation_loss += test_model (x ,y )
274+ # get the average by dividing with the number of minibatches
275+ this_validation_loss /= len (valid_batches )
276+
277+ print ('epoch %i, minibatch %i/%i, validation error %f %%' % \
278+ (epoch , minibatch_index + 1 , n_minibatches , \
279+ this_validation_loss * 100. ))
280+
281+
282+ # if we got the best validation score until now
283+ if this_validation_loss < best_validation_loss :
284+
285+ #improve patience if loss improvement is good enough
286+ if this_validation_loss < best_validation_loss * \
287+ improvement_threshold :
288+ patience = max (patience , iter * patience_increase )
289+
290+ best_validation_loss = this_validation_loss
291+ # test it on the test set
292+
293+ test_score = 0.
294+ for x ,y in test_batches :
295+ test_score += test_model (x ,y )
296+ test_score /= len (test_batches )
297+ print ((' epoch %i, minibatch %i/%i, test error of best '
298+ 'model %f %%' ) %
299+ (epoch , minibatch_index + 1 , n_minibatches ,
300+ test_score * 100. ))
301+
302+ if patience <= iter :
303+ break
304+
305+ end_time = time .clock ()
306+ print (('Optimization complete with best validation score of %f %%,'
307+ 'with test performance %f %%' ) %
308+ (best_validation_loss * 100. , test_score * 100. ))
309+ print ('The code ran for %f minutes' % ((end_time - start_time )/ 60. ))
310+
227311
228312if __name__ == '__main__' :
229313 evaluate_lenet5 ()
0 commit comments