I changed the pickled file such that it is not divided in minibatches before hand. Note that the code now generates some warning .. this is because of an issue with advanced indexing that Pascal will fix later today. The error is caused the fact that I am using mean instead of sum in the negative log likelihood function.

pascanur · pascanur · commit 36c00803c41a · 2010-01-14T13:07:32.000-05:00
diff --git a/code/logistic_cg.py b/code/logistic_cg.py
@@ -32,8 +32,6 @@
    - textbooks: "Pattern Recognition and Machine Learning" - 
                  Christopher M. Bishop, section 4.3.2
 
-TODO: recommended preprocessing, lr ranges, regularization ranges (explain 
-      to do lr first, then add regularization)
 
 """
 __docformat__ = 'restructedtext en'
@@ -146,16 +144,47 @@ def cg_optimization_mnist( n_iter=50 ):
     :param n_iter: number of iterations ot run the optimizer 
 
     """
-    #TODO: Tzanetakis
 
-    # Load the dataset ; note that the dataset is already divided in
-    # minibatches of size 20; 
+    # Load the dataset 
     f = gzip.open('mnist.pkl.gz','rb')
-    train_batches, valid_batches, test_batches = cPickle.load(f)
+    train_set, valid_set, test_set = cPickle.load(f)
     f.close()
 
+    # make minibatches of size 20 
+    batch_size = 20    # sized of the minibatch
+
+    # Dealing with the training set
+    # get the list of training images (x) and their labels (y)
+    (train_set_x, train_set_y) = train_set
+    # initialize the list of training minibatches with empty list
+    train_batches = []
+    for i in xrange(0, len(train_set_x), batch_size):
+        # add to the list of minibatches the minibatch starting at 
+        # position i, ending at position i+batch_size
+        # a minibatch is a pair ; the first element of the pair is a list 
+        # of datapoints, the second element is the list of corresponding 
+        # labels
+        train_batches = train_batches + \
+               [(train_set_x[i:i+batch_size], train_set_y[i:i+batch_size])]
+
+    # Dealing with the validation set
+    (valid_set_x, valid_set_y) = valid_set
+    # initialize the list of validation minibatches 
+    valid_batches = []
+    for i in xrange(0, len(valid_set_x), batch_size):
+        valid_batches = valid_batches + \
+               [(valid_set_x[i:i+batch_size], valid_set_y[i:i+batch_size])]
+
+    # Dealing with the testing set
+    (test_set_x, test_set_y) = test_set
+    # initialize the list of testing minibatches 
+    test_batches = []
+    for i in xrange(0, len(test_set_x), batch_size):
+        test_batches = test_batches + \
+              [(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])]
+
+
     ishape     = (28,28) # this is the size of MNIST images
-    batch_size = 20       # size of the minibatch 
     n_in       = 28*28   # number of input units
     n_out      = 10      # number of output units
     # allocate symbolic variables for the data
diff --git a/code/logistic_sgd.py b/code/logistic_sgd.py
@@ -32,8 +32,6 @@
     - textbooks: "Pattern Recognition and Machine Learning" - 
                  Christopher M. Bishop, section 4.3.2
 
-TODO: recommended preprocessing, lr ranges, regularization ranges (explain 
-      to do lr first, then add regularization)
 
 """
 __docformat__ = 'restructedtext en'
@@ -150,14 +148,46 @@ def sgd_optimization_mnist( learning_rate=0.01, n_iter=100):
 
     """
 
-    # Load the dataset ; note that the dataset is already divided in
-    # minibatches of size 10; 
+    # Load the dataset 
     f = gzip.open('mnist.pkl.gz','rb')
-    train_batches, valid_batches, test_batches = cPickle.load(f)
+    train_set, valid_set, test_set = cPickle.load(f)
     f.close()
 
+    # make minibatches of size 20 
+    batch_size = 20    # sized of the minibatch
+
+    # Dealing with the training set
+    # get the list of training images (x) and their labels (y)
+    (train_set_x, train_set_y) = train_set
+    # initialize the list of training minibatches with empty list
+    train_batches = []
+    for i in xrange(0, len(train_set_x), batch_size):
+        # add to the list of minibatches the minibatch starting at 
+        # position i, ending at position i+batch_size
+        # a minibatch is a pair ; the first element of the pair is a list 
+        # of datapoints, the second element is the list of corresponding 
+        # labels
+        train_batches = train_batches + \
+               [(train_set_x[i:i+batch_size], train_set_y[i:i+batch_size])]
+
+    # Dealing with the validation set
+    (valid_set_x, valid_set_y) = valid_set
+    # initialize the list of validation minibatches 
+    valid_batches = []
+    for i in xrange(0, len(valid_set_x), batch_size):
+        valid_batches = valid_batches + \
+               [(valid_set_x[i:i+batch_size], valid_set_y[i:i+batch_size])]
+
+    # Dealing with the testing set
+    (test_set_x, test_set_y) = test_set
+    # initialize the list of testing minibatches 
+    test_batches = []
+    for i in xrange(0, len(test_set_x), batch_size):
+        test_batches = test_batches + \
+              [(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])]
+
+
     ishape     = (28,28) # this is the size of MNIST images
-    batch_size =  20     # size of the minibatch 
 
     # allocate symbolic variables for the data
     x = T.fmatrix()  # the data is presented as rasterized images
diff --git a/code/mlp.py b/code/mlp.py
@@ -170,14 +170,46 @@ def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0, \
     regularization)
     """
 
-    # Load the dataset ; note that the dataset is already divided in
-    # minibatches of size 10; 
+    # Load the dataset 
     f = gzip.open('mnist.pkl.gz','rb')
-    train_batches, valid_batches, test_batches = cPickle.load(f)
+    train_set, valid_set, test_set = cPickle.load(f)
     f.close()
 
+    # make minibatches of size 20 
+    batch_size = 20    # sized of the minibatch
+
+    # Dealing with the training set
+    # get the list of training images (x) and their labels (y)
+    (train_set_x, train_set_y) = train_set
+    # initialize the list of training minibatches with empty list
+    train_batches = []
+    for i in xrange(0, len(train_set_x), batch_size):
+        # add to the list of minibatches the minibatch starting at 
+        # position i, ending at position i+batch_size
+        # a minibatch is a pair ; the first element of the pair is a list 
+        # of datapoints, the second element is the list of corresponding 
+        # labels
+        train_batches = train_batches + \
+               [(train_set_x[i:i+batch_size], train_set_y[i:i+batch_size])]
+
+    # Dealing with the validation set
+    (valid_set_x, valid_set_y) = valid_set
+    # initialize the list of validation minibatches 
+    valid_batches = []
+    for i in xrange(0, len(valid_set_x), batch_size):
+        valid_batches = valid_batches + \
+               [(valid_set_x[i:i+batch_size], valid_set_y[i:i+batch_size])]
+
+    # Dealing with the testing set
+    (test_set_x, test_set_y) = test_set
+    # initialize the list of testing minibatches 
+    test_batches = []
+    for i in xrange(0, len(test_set_x), batch_size):
+        test_batches = test_batches + \
+              [(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])]
+
+
     ishape     = (28,28) # this is the size of MNIST images
-    batch_size = 20      # size of the minibatch 
 
     # allocate symbolic variables for the data
     x = T.fmatrix()  # the data is presented as rasterized images
@@ -216,7 +248,8 @@ def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0, \
     # the same time updates the parameter of the model based on the rules 
     # defined in `updates`
     train_model = theano.function([x, y], cost, updates = updates )
-
+    n_minibatches        = len(train_batches) 
+ 
     # early-stopping parameters
     patience              = 10000 # look as this many examples regardless
     patience_increase     = 2     # wait this much longer when a new best is 
@@ -232,7 +265,6 @@ def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.0, \
     best_params          = None
     best_validation_loss = float('inf')
     test_score           = 0.
-    n_minibatches        = len(train_batches) 
     start_time = time.clock()
     # have a maximum of `n_iter` iterations through the entire dataset
     for iter in xrange(n_iter* n_minibatches):
diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt
@@ -47,24 +47,59 @@ MNIST Dataset
  For convenience we pickled the dataset to make it easier to use in python.
  It is available for download `here <http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz>`_.
  The pickled file represents a tuple of 3 lists : the training set, the 
- validation set and the testing set. Each element of any of the three lists 
- represents a minibatch of 20 examples. Such an element is a tuple composed 
- of the list of 20 images and the list of class labels for each of the
- images. An image is represented as numpy 1-dimensional array of 784 (28 x 28) float
- values between 0 and 1 ( 0 stands for black, 1 for white). The labels
- are numbers between 0 and 9 indicating which digit the image
- represents. Loading and accessing the dataset in the python can be done as 
- follows: 
+ validation set and the testing set. Each of the three lists is a pair
+ formed from a list of images and a list of class labels for each of the
+ images. An image is represented as numpy 1-dimensional array of 784 (28
+ x 28) float values between 0 and 1 (0 stands for black, 1 for white).
+ The labels are numbers between 0 and 9 indicating which digit the image
+ represents. When using the dataset, we usually divide it in minibatches 
+ (see :ref:`opt_SGD`). The code block below shows how to load the
+ dataset and how to divide it in minibatches of a given size : 
+ 
 
  .. code-block:: python
 
    import cPickle, gzip, numpy
  
+    # Load the dataset 
+    f = gzip.open('mnist.pkl.gz','rb')
+    train_set, valid_set, test_set = cPickle.load(f)
+    f.close()
+
+    # make minibatches of size 20 
+    batch_size = 20    # sized of the minibatch
+
+    # Dealing with the training set
+    # get the list of training images (x) and their labels (y)
+    (train_set_x, train_set_y) = train_set
+    # initialize the list of training minibatches with empty list
+    train_batches = []
+    for i in xrange(0, len(train_set_x), batch_size):
+        # add to the list of minibatches the minibatch starting at 
+        # position i, ending at position i+batch_size
+        # a minibatch is a pair ; the first element of the pair is a list 
+        # of datapoints, the second element is the list of corresponding 
+        # labels
+        train_batches = train_batches + \
+               [(train_set_x[i:i+batch_size], train_set_y[i:i+batch_size])]
+
+    # Dealing with the validation set
+    (valid_set_x, valid_set_y) = valid_set
+    # initialize the list of validation minibatches 
+    valid_batches = []
+    for i in xrange(0, len(valid_set_x), batch_size):
+        valid_batches = valid_batches + \
+               [(valid_set_x[i:i+batch_size], valid_set_y[i:i+batch_size])]
+
+    # Dealing with the testing set
+    (test_set_x, test_set_y) = test_set
+    # initialize the list of testing minibatches 
+    test_batches = []
+    for i in xrange(0, len(test_set_x), batch_size):
+        test_batches = test_batches + \
+              [(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])]
+
 
-   f = gzip.open('mnist.pkl.gz','rb')
-   (training_set, validation_set, testing_set) = cPickle.load(f)
-   f.close()
-   
    # accessing training example i of minibatch j 
    image = training_set[j][0][i]
    label = training_set[j][1][i]
@@ -250,10 +285,10 @@ This can be computed using the following line of code :
   # syntax to retrieve the log-probability of the correct labels, y.
 
 
-.. _opt_SGD:
-
 .. index:: Stochastic Gradient Descent
 
+.. _opt_SGD:
+
 Stochastic Gradient Descent
 +++++++++++++++++++++++++++
 
diff --git a/doc/logreg.txt b/doc/logreg.txt
@@ -1,7 +1,8 @@
-.. _logreg:
-
 .. index:: Logistic Regression
 
+.. _logreg :
+
+
 Classifying MNIST digits using Logistic Regression
 ==================================================
 
@@ -14,7 +15,7 @@ TODO : shared variables documentation not up !!
 
 .. _shared variables: http://www.pylearn.org/theano/basic_tutorial
 
-.. _basic arthmetic ops: http://www.pylearn.org/theano/basic_tutorial/adding.html
+.. _basic arithmetic ops: http://www.pylearn.org/theano/basic_tutorial/adding.html
 
 .. _T.grad: http://www.pylearn.org/theano/basic_tutorial/examples.html#computing-gradients
 
diff --git a/doc/mlp.txt b/doc/mlp.txt
@@ -6,9 +6,19 @@ Multilayer Perceptron
 =====================
 
 .. note::
-    This section assumes the reader has already read through :doc:`logreg.txt`.
+    This section assumes the reader has already read through :ref:`logreg` .
     Additionally, it uses the following new Theano functions and concepts:
-    T.tanh, abs, L1 and L2 regularization
+    `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, 
+    :ref:`L1_L2_regularization`
+
+.. _T.tanh: http://www.pylearn.org/theano/basic_tutorial/examples.html?highlight=tanh#logistic-function
+
+.. _shared variables: http://www.pylearn.org/theano/basic_tutorial
+
+.. _basic arithmetic ops: http://www.pylearn.org/theano/basic_tutorial/adding.html
+
+.. _T.grad: http://www.pylearn.org/theano/basic_tutorial/examples.html#computing-gradients
+
 
 The next architecture we are going to present using Theano is the single-hidden
 layer Multi-Layer Perceptron (MLP). An MLP can be viewed as a logistic