Skip to content

Commit 750a2cc

Browse files
committed
Stats for logistic_sgd and mlp
1 parent aef8e8c commit 750a2cc

4 files changed

Lines changed: 60 additions & 26 deletions

File tree

code/logistic_cg.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ def callback(theta_value):
209209

210210
this_validation_loss /= len(valid_batches)
211211

212-
print('validation error %f' % (this_validation_loss,))
212+
print('validation error %f %%' % (this_validation_loss*100.,))
213213

214214
# check if it is better then best validation score got until now
215215
if this_validation_loss < validation_scores[0]:
@@ -224,16 +224,20 @@ def callback(theta_value):
224224
# using scipy conjugate gradient optimizer
225225
import scipy.optimize
226226
print ("Optimizing using scipy.optimize.fmin_cg...")
227+
start_time = time.clock()
227228
best_w_b = scipy.optimize.fmin_cg(
228229
f=train_fn,
229230
x0=numpy.zeros((n_in+1)*n_out, dtype=x.dtype),
230231
fprime=train_fn_grad,
231232
callback=callback,
232233
disp=0,
233234
maxiter=n_iter)
235+
end_time = time.clock()
236+
print(('Optimization complete with best validation score of %f %%, with'
237+
'test performance %f %%') %
238+
(best_validation_loss*100., test_score*100.))
234239

235-
print(('Optimization complete with best validation score of %f, with'
236-
'test performance %f') % (best_validation_loss, test_score))
240+
print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
237241

238242

239243

code/logistic_sgd.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141

4242
import numpy, cPickle, gzip
4343

44+
import time
4445

4546
import theano
4647
import theano.tensor as T
@@ -197,6 +198,7 @@ def sgd_optimization_mnist( learning_rate=0.01, n_iter=100):
197198
best_validation_loss = float('inf')
198199
test_score = 0.
199200

201+
start_time = time.clock()
200202
# have a maximum of `n_iter` iterations through the entire dataset
201203
for iter in xrange(n_iter* len(train_batches)):
202204

@@ -218,8 +220,8 @@ def sgd_optimization_mnist( learning_rate=0.01, n_iter=100):
218220
# get the average by dividing with the number of minibatches
219221
this_validation_loss /= len(valid_batches)
220222

221-
print('epoch %i, validation error %f' %
222-
(epoch, this_validation_loss))
223+
print('epoch %i, validation error %f %%' %
224+
(epoch, this_validation_loss*100.))
223225

224226
#improve patience
225227
if this_validation_loss < best_validation_loss * \
@@ -236,15 +238,17 @@ def sgd_optimization_mnist( learning_rate=0.01, n_iter=100):
236238
for x,y in test_batches:
237239
test_score += test_model(x,y)
238240
test_score /= len(test_batches)
239-
print(' epoch %i, test error of best model %f' %
240-
(epoch, test_score))
241+
print(' epoch %i, test error of best model %f %%' %
242+
(epoch, test_score*100.))
241243

242244
if patience <= iter :
243245
break
244246

245-
246-
print(('Optimization complete with best validation score of %f,'
247-
'with test performance %f') % (best_validation_loss, test_score))
247+
end_time = time.clock()
248+
print(('Optimization complete with best validation score of %f %%,'
249+
'with test performance %f %%') %
250+
(best_validation_loss * 100., test_score*100.))
251+
print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
248252

249253

250254

code/mlp.py

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
This tutorial introduces the multi-layer perceptron using Theano.
33
4-
Long description with formulas
4+
Multilayer perceptron
55
66
77
..math::
@@ -13,6 +13,11 @@
1313
- textbooks: "Pattern Recognition and Machine Learning" -
1414
Christopher M. Bishop, section 5
1515
16+
17+
99 epochs : 259.218667 mins
18+
validation score : 1.930000 %
19+
test score 1.9200000 %
20+
1621
TODO: recommended preprocessing, lr ranges, regularization ranges (explain
1722
to do lr first, then add regularization)
1823
@@ -26,6 +31,8 @@
2631
import theano
2732
import theano.tensor as T
2833

34+
import time
35+
2936
from theano.compile.sandbox import shared, pfunc
3037
import theano.tensor.nnet
3138

@@ -135,8 +142,8 @@ def errors(self, y):
135142

136143

137144

138-
def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0001, \
139-
L2_reg = 0.0001, n_iter=100):
145+
def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0, \
146+
L2_reg = 0.0, n_iter=100):
140147
"""
141148
Demonstrate stochastic gradient descent optimization for a multilayer
142149
perceptron
@@ -171,7 +178,7 @@ def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0001, \
171178

172179
# construct the logistic regression class
173180
classifier = MLP( input=x.reshape((batch_size,28*28)),\
174-
n_in=28*28, n_hidden = 500, n_out=10)
181+
n_in=28*28, n_hidden = 1000, n_out=10)
175182

176183
# the cost we minimize during training is the negative log likelihood of
177184
# the model plus the regularization terms (L1 and L2); cost is expressed
@@ -203,18 +210,19 @@ def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0001, \
203210
train_model = pfunc([x, y], cost, updates = updates )
204211

205212
# early-stopping parameters
206-
patience = 5000 # look as this many examples regardless
213+
patience = 10000 # look as this many examples regardless
207214
patience_increase = 2 # wait this much longer when a new best is
208215
# found
209216
improvement_threshold = 0.995 # a relative improvement of this much is
210217
# considered significant
211-
validation_frequency = 1000 # make this many SGD updates between
218+
validation_frequency = 3000 # make this many SGD updates between
212219
# validations
213220

214221
best_params = None
215222
best_validation_loss = float('inf')
216223
test_score = 0.
217-
224+
225+
start_time = time.clock()
218226
# have a maximum of `n_iter` iterations through the entire dataset
219227
for iter in xrange(n_iter* len(train_batches)):
220228

@@ -236,8 +244,8 @@ def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0001, \
236244
# get the average by dividing with the number of minibatches
237245
this_validation_loss /= len(valid_batches)
238246

239-
print('epoch %i, validation error %f' %
240-
(epoch, this_validation_loss))
247+
print('epoch %i, validation error %f %%' %
248+
(epoch, this_validation_loss*100.))
241249

242250
#improve patience
243251
if this_validation_loss < best_validation_loss * \
@@ -254,15 +262,19 @@ def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0001, \
254262
for x,y in test_batches:
255263
test_score += test_model(x,y)
256264
test_score /= len(test_batches)
257-
print(' epoch %i, test error of best model %f' %
258-
(epoch, test_score))
265+
print(' epoch %i, test error of best model %f %%' %
266+
(epoch, test_score*100.))
259267

260268
if patience <= iter :
261269
break
262270

271+
end_time = time.clock()
272+
print(('Optimization complete with best validation score of %f %%,'
273+
'with test performance %f %%') %
274+
(best_validation_loss * 100., test_score*100.))
275+
print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
276+
263277

264-
print(('Optimization complete with best validation score of %f,'
265-
'with test performance %f') % (best_validation_loss, test_score))
266278

267279

268280

doc/logreg.txt

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ Up to this point, we have only defined the graph of computations which theano
8282
should perform. To get the actual numerical value of :math:`P(Y|x, W,b)`, we
8383
must create a function ``get_p_y_given_x``, which takes as input ``x`` and
8484
returns ``p_y_given_x``. We can then index its return value with the
85-
index :math:`i` to get the membership probability of the :math:`i`th class.
85+
index :math:`i` to get the membership probability of the :math:`i` th class.
8686

8787
Now let's finishing building the theano graph. To get the actual model
8888
prediction, we can use the ``T.argmax`` operator, which will return the index at
@@ -313,12 +313,26 @@ within the DeepLearningTutorials folder:
313313

314314
python code/logistic_sgd.py
315315

316+
The output one should expect is of the form :
316317

318+
.. code-block:: bash
319+
320+
epoch 0, validation error 12.210000 %
321+
epoch 0, test error of best model 12.660000 %
322+
...
323+
epoch 25, validation error 7.090000 %
324+
Optimization complete with best validation score of 6.97%, with test performance 7.62%
325+
The code ran for 2.840833 minutes
326+
327+
On a **what type of machine is simplet??** the code run with
328+
approximately 6.8179992 sec/epoch and it took 25 epochs to reach a test
329+
error of 7.62%.
317330

318331
.. rubric:: Footnotes
319332

320333
.. [#f1] For smaller datasets and simpler models, more sophisticated descent
321-
algorithms can be more effective. The sample code for logistic regression
322-
demonstrates how to use SciPy's conjugate gradient solver with theano.
334+
algorithms can be more effective. The sample code logistic_cg.py
335+
demonstrates how to use SciPy's conjugate gradient solver with theano
336+
on the logistic regression task.
323337

324338

0 commit comments

Comments
 (0)