11"""
22This tutorial introduces the multi-layer perceptron using Theano.
33
4- Long description with formulas
4+ Multilayer perceptron
55
66
77..math::
1313 - textbooks: "Pattern Recognition and Machine Learning" -
1414 Christopher M. Bishop, section 5
1515
16+
17+ 99 epochs : 259.218667 mins
18+ validation score : 1.930000 %
19+ test score 1.9200000 %
20+
1621TODO: recommended preprocessing, lr ranges, regularization ranges (explain
1722 to do lr first, then add regularization)
1823
2631import theano
2732import theano .tensor as T
2833
34+ import time
35+
2936from theano .compile .sandbox import shared , pfunc
3037import theano .tensor .nnet
3138
@@ -135,8 +142,8 @@ def errors(self, y):
135142
136143
137144
138- def sgd_optimization_mnist ( learning_rate = 0.01 , L1_reg = 0.0001 , \
139- L2_reg = 0.0001 , n_iter = 100 ):
145+ def sgd_optimization_mnist ( learning_rate = 0.01 , L1_reg = 0.0 , \
146+ L2_reg = 0.0 , n_iter = 100 ):
140147 """
141148 Demonstrate stochastic gradient descent optimization for a multilayer
142149 perceptron
@@ -171,7 +178,7 @@ def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0001, \
171178
172179 # construct the logistic regression class
173180 classifier = MLP ( input = x .reshape ((batch_size ,28 * 28 )),\
174- n_in = 28 * 28 , n_hidden = 500 , n_out = 10 )
181+ n_in = 28 * 28 , n_hidden = 1000 , n_out = 10 )
175182
176183 # the cost we minimize during training is the negative log likelihood of
177184 # the model plus the regularization terms (L1 and L2); cost is expressed
@@ -203,18 +210,19 @@ def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0001, \
203210 train_model = pfunc ([x , y ], cost , updates = updates )
204211
205212 # early-stopping parameters
206- patience = 5000 # look as this many examples regardless
213+ patience = 10000 # look as this many examples regardless
207214 patience_increase = 2 # wait this much longer when a new best is
208215 # found
209216 improvement_threshold = 0.995 # a relative improvement of this much is
210217 # considered significant
211- validation_frequency = 1000 # make this many SGD updates between
218+ validation_frequency = 3000 # make this many SGD updates between
212219 # validations
213220
214221 best_params = None
215222 best_validation_loss = float ('inf' )
216223 test_score = 0.
217-
224+
225+ start_time = time .clock ()
218226 # have a maximum of `n_iter` iterations through the entire dataset
219227 for iter in xrange (n_iter * len (train_batches )):
220228
@@ -236,8 +244,8 @@ def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0001, \
236244 # get the average by dividing with the number of minibatches
237245 this_validation_loss /= len (valid_batches )
238246
239- print ('epoch %i, validation error %f' %
240- (epoch , this_validation_loss ))
247+ print ('epoch %i, validation error %f %% ' %
248+ (epoch , this_validation_loss * 100. ))
241249
242250 #improve patience
243251 if this_validation_loss < best_validation_loss * \
@@ -254,15 +262,19 @@ def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0001, \
254262 for x ,y in test_batches :
255263 test_score += test_model (x ,y )
256264 test_score /= len (test_batches )
257- print (' epoch %i, test error of best model %f' %
258- (epoch , test_score ))
265+ print (' epoch %i, test error of best model %f %% ' %
266+ (epoch , test_score * 100. ))
259267
260268 if patience <= iter :
261269 break
262270
271+ end_time = time .clock ()
272+ print (('Optimization complete with best validation score of %f %%,'
273+ 'with test performance %f %%' ) %
274+ (best_validation_loss * 100. , test_score * 100. ))
275+ print ('The code ran for %f minutes' % ((end_time - start_time )/ 60. ))
276+
263277
264- print (('Optimization complete with best validation score of %f,'
265- 'with test performance %f' ) % (best_validation_loss , test_score ))
266278
267279
268280
0 commit comments