Stats for logistic_sgd and mlp

pascanur · pascanur · commit 750a2cce487b · 2010-01-08T08:08:01.000-05:00
diff --git a/code/logistic_cg.py b/code/logistic_cg.py
@@ -209,7 +209,7 @@ def callback(theta_value):
 
         this_validation_loss /= len(valid_batches)
 
-        print('validation error %f' % (this_validation_loss,))
+        print('validation error %f %%' % (this_validation_loss*100.,))
         
         # check if it is better then best validation score got until now
         if this_validation_loss < validation_scores[0]:
@@ -224,16 +224,20 @@ def callback(theta_value):
     # using scipy conjugate gradient optimizer 
     import scipy.optimize
     print ("Optimizing using scipy.optimize.fmin_cg...")
+    start_time = time.clock()
     best_w_b = scipy.optimize.fmin_cg(
             f=train_fn, 
             x0=numpy.zeros((n_in+1)*n_out, dtype=x.dtype),
             fprime=train_fn_grad,
             callback=callback,
             disp=0,
             maxiter=n_iter)
+    end_time = time.clock()
+    print(('Optimization complete with best validation score of %f %%, with'
+          'test performance %f %%') % 
+               (best_validation_loss*100., test_score*100.))
 
-    print(('Optimization complete with best validation score of %f, with'
-          'test performance %f') % (best_validation_loss, test_score))
+    print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
 
 
 
diff --git a/code/logistic_sgd.py b/code/logistic_sgd.py
@@ -41,6 +41,7 @@
 
 import numpy, cPickle, gzip
 
+import time
 
 import theano
 import theano.tensor as T
@@ -197,6 +198,7 @@ def sgd_optimization_mnist( learning_rate=0.01, n_iter=100):
     best_validation_loss = float('inf')
     test_score           = 0.
 
+    start_time = time.clock()
     # have a maximum of `n_iter` iterations through the entire dataset
     for iter in xrange(n_iter* len(train_batches)):
 
@@ -218,8 +220,8 @@ def sgd_optimization_mnist( learning_rate=0.01, n_iter=100):
             # get the average by dividing with the number of minibatches
             this_validation_loss /= len(valid_batches)
 
-            print('epoch %i, validation error %f' % 
-                                (epoch, this_validation_loss))
+            print('epoch %i, validation error %f %%' % 
+                                (epoch, this_validation_loss*100.))
 
             #improve patience 
             if this_validation_loss < best_validation_loss *  \
@@ -236,15 +238,17 @@ def sgd_optimization_mnist( learning_rate=0.01, n_iter=100):
                 for x,y in test_batches:
                     test_score += test_model(x,y)
                 test_score /= len(test_batches)
-                print('     epoch %i, test error of best model %f' % 
-                                    (epoch, test_score))
+                print('     epoch %i, test error of best model %f %%' % 
+                                    (epoch, test_score*100.))
 
         if patience <= iter :
                 break
 
-
-    print(('Optimization complete with best validation score of %f,'
-           'with test performance %f') %  (best_validation_loss, test_score))
+    end_time = time.clock()
+    print(('Optimization complete with best validation score of %f %%,'
+           'with test performance %f %%') %  
+                 (best_validation_loss * 100., test_score*100.))
+    print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
 
 
 
diff --git a/code/mlp.py b/code/mlp.py
@@ -1,7 +1,7 @@
 """
 This tutorial introduces the multi-layer perceptron using Theano.  
 
-Long description with formulas
+ Multilayer perceptron 
 
 
 ..math::
@@ -13,6 +13,11 @@
     - textbooks: "Pattern Recognition and Machine Learning" - 
                  Christopher M. Bishop, section 5
 
+
+ 99 epochs : 259.218667 mins
+ validation score : 1.930000 % 
+ test score 1.9200000 %
+
 TODO: recommended preprocessing, lr ranges, regularization ranges (explain 
       to do lr first, then add regularization)
 
@@ -26,6 +31,8 @@
 import theano
 import theano.tensor as T
 
+import time 
+
 from theano.compile.sandbox import shared, pfunc
 import theano.tensor.nnet
 
@@ -135,8 +142,8 @@ def errors(self, y):
 
 
 
-def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0001, \
-                            L2_reg = 0.0001, n_iter=100):
+def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0, \
+                            L2_reg = 0.0, n_iter=100):
     """
     Demonstrate stochastic gradient descent optimization for a multilayer 
     perceptron
@@ -171,7 +178,7 @@ def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0001, \
 
     # construct the logistic regression class
     classifier = MLP( input=x.reshape((batch_size,28*28)),\
-                      n_in=28*28, n_hidden = 500, n_out=10)
+                      n_in=28*28, n_hidden = 1000, n_out=10)
 
     # the cost we minimize during training is the negative log likelihood of 
     # the model plus the regularization terms (L1 and L2); cost is expressed
@@ -203,18 +210,19 @@ def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0001, \
     train_model = pfunc([x, y], cost, updates = updates )
 
     # early-stopping parameters
-    patience              = 5000  # look as this many examples regardless
+    patience              = 10000 # look as this many examples regardless
     patience_increase     = 2     # wait this much longer when a new best is 
                                   # found
     improvement_threshold = 0.995 # a relative improvement of this much is 
                                   # considered significant
-    validation_frequency  = 1000  # make this many SGD updates between 
+    validation_frequency  = 3000  # make this many SGD updates between 
                                   # validations
 
     best_params          = None
     best_validation_loss = float('inf')
     test_score           = 0.
-
+    
+    start_time = time.clock()
     # have a maximum of `n_iter` iterations through the entire dataset
     for iter in xrange(n_iter* len(train_batches)):
 
@@ -236,8 +244,8 @@ def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0001, \
             # get the average by dividing with the number of minibatches
             this_validation_loss /= len(valid_batches)
 
-            print('epoch %i, validation error %f' % 
-                                (epoch, this_validation_loss))
+            print('epoch %i, validation error %f %%' % 
+                                (epoch, this_validation_loss*100.))
 
             #improve patience 
             if this_validation_loss < best_validation_loss *  \
@@ -254,15 +262,19 @@ def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0001, \
                 for x,y in test_batches:
                     test_score += test_model(x,y)
                 test_score /= len(test_batches)
-                print('     epoch %i, test error of best model %f' % 
-                                    (epoch, test_score))
+                print('     epoch %i, test error of best model %f %%' % 
+                                    (epoch, test_score*100.))
 
         if patience <= iter :
                 break
 
+    end_time = time.clock()
+    print(('Optimization complete with best validation score of %f %%,'
+           'with test performance %f %%') %  
+                 (best_validation_loss * 100., test_score*100.))
+    print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
+
 
-    print(('Optimization complete with best validation score of %f,'
-           'with test performance %f') %  (best_validation_loss, test_score))
 
 
 
diff --git a/doc/logreg.txt b/doc/logreg.txt
@@ -82,7 +82,7 @@ Up to this point, we have only defined the graph of computations which theano
 should perform. To get the actual numerical value of :math:`P(Y|x, W,b)`, we
 must create a function ``get_p_y_given_x``, which takes as input ``x`` and
 returns ``p_y_given_x``. We can then index its return value with the
-index :math:`i` to get the membership probability of the :math:`i`th class.
+index :math:`i` to get the membership probability of the :math:`i` th class.
 
 Now let's finishing building the theano graph. To get the actual model
 prediction, we can use the ``T.argmax`` operator, which will return the index at
@@ -313,12 +313,26 @@ within the DeepLearningTutorials folder:
 
     python code/logistic_sgd.py
 
+The output one should expect is of the form :
 
+.. code-block:: bash
+
+  epoch 0, validation error 12.210000 %
+       epoch 0, test error of best model 12.660000 %
+  ...
+  epoch 25, validation error 7.090000 %
+  Optimization complete with best validation score of 6.97%, with test performance 7.62%
+  The code ran for 2.840833 minutes
+
+On a **what type of machine is simplet??** the code run with
+approximately 6.8179992 sec/epoch and it took 25 epochs to reach a test
+error of 7.62%. 
 
 .. rubric:: Footnotes
 
 .. [#f1] For smaller datasets and simpler models, more sophisticated descent
-         algorithms can be more effective. The sample code for logistic regression
-         demonstrates how to use SciPy's conjugate gradient solver with theano.
+         algorithms can be more effective. The sample code logistic_cg.py
+         demonstrates how to use SciPy's conjugate gradient solver with theano 
+         on the logistic regression task.