ChengduoZhao
diff --git a/‎code/DBN.py‎
Lines changed: 185 additions & 86 deletions b/‎code/DBN.py‎
Lines changed: 185 additions & 86 deletions
@@ -1,8 +1,8 @@
 """
- This tutorial introduces deep belief networks (DBN) using Theano.
 """
+import os
 
-import numpy, time, cPickle, gzip
+import numpy, time, cPickle, gzip 
 
 import theano
 import theano.tensor as T
@@ -13,90 +13,168 @@
 from rbm import RBM
 
 
+
 class DBN(object):
-    """ DBN """
+    """
+    """
 
     def __init__(self, numpy_rng, theano_rng = None, n_ins = 784, 
                  hidden_layers_sizes = [500,500], n_outs = 10):
-    
+        """This class is made to support a variable number of layers. 
+
+        :type numpy_rng: numpy.random.RandomState
+        :param numpy_rng: numpy random number generator used to draw initial 
+                    weights
+
+        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
+        :param theano_rng: Theano random generator; if None is given one is 
+                           generated based on a seed drawn from `rng`
+
+        :type n_ins: int
+        :param n_ins: dimension of the input to the DBN
+
+        :type n_layers_sizes: list of ints
+        :param n_layers_sizes: intermidiate layers size, must contain 
+                               at least one value
+
+        :type n_outs: int
+        :param n_outs: dimension of the output of the network
+        """
+        
         self.sigmoid_layers = []
-        self.rbms           = []
+        self.rbm_layers     = []
         self.params         = []
         self.n_layers       = len(hidden_layers_sizes)
 
-        assert self.n_layers > 0 
+        assert self.n_layers > 0
 
         if not theano_rng:
             theano_rng = RandomStreams(numpy_rng.randint(2**30))
 
-        self.x = T.matrix('x')
-        self.y = T.ivector('y')
+        # allocate symbolic variables for the data
+        self.x  = T.matrix('x')  # the data is presented as rasterized images
+        self.y  = T.ivector('y') # the labels are presented as 1D vector of 
+                                 # [int] labels
 
-        for i in xrange(self.n_layers):
-            if i == 0 : 
+        # The DBN is an MLP, for which all weights of intermidiate layers are shared with a
+        # different RBM.  We will first construct the DBN as a deep multilayer perceptron, and
+        # when constructing each sigmoidal layer we also construct an RBM that shares weights
+        # with that layer. During pretraining we will train these RBMs (which will lead
+        # to chainging the weights of the MLP as well) During finetuning we will finish
+        # training the DBN by doing stochastic gradient descent on the MLP.
+
+        for i in xrange( self.n_layers ):
+            # construct the sigmoidal layer
+
+            # the size of the input is either the number of hidden units of the layer below or
+            # the input size if we are on the first layer
+            if i == 0 :
                 input_size = n_ins
-                layer_input = self.x
             else:
                 input_size = hidden_layers_sizes[i-1]
-                layer_input = self.sigmoid_layers[-1].output
 
-            sigmoid_layer = HiddenLayer(rng = numpy_rng, input = layer_input, 
-                                         n_in = input_size, 
-                                         n_out = hidden_layers_sizes[i],
-                                         activation = T.nnet.sigmoid)
+            # the input to this layer is either the activation of the hidden layer below or the
+            # input of the DBN if you are on the first layer
+            if i == 0 : 
+                layer_input = self.x
+            else:
+                layer_input = self.sigmoid_layers[-1].output
 
+            sigmoid_layer = HiddenLayer(rng   = numpy_rng, 
+                                           input = layer_input, 
+                                           n_in  = input_size, 
+                                           n_out = hidden_layers_sizes[i],
+                                           activation = T.nnet.sigmoid)
+            
+            # add the layer to our list of layers 
             self.sigmoid_layers.append(sigmoid_layer)
-            self.params.extend(sigmoid_layer.params)
-
-            rbm = RBM(numpy_rng = numpy_rng, theano_rng = theano_rng, input = layer_input,  
-                      n_visible = input_size, 
-                      n_hidden  = hidden_layers_sizes[i],
-                      W = sigmoid_layer.W, hbias = sigmoid_layer.b)
-            self.rbms.append(rbm)
-
-        self.logLayer = LogisticRegression( 
-                           input = self.sigmoid_layers[-1].output, 
-                           n_in = hidden_layers_sizes[-1], n_out = n_outs)
 
-        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
-        self.errors = self.logLayer.errors(self.y)
+            # its arguably a philosophical question...  but we are going to only declare that
+            # the parameters of the sigmoid_layers are parameters of the DBN. The visible
+            # biases in the RBM are parameters of those RBMs, but not of the DBN.
+            self.params.extend(sigmoid_layer.params)
+        
+            # Construct an RBM that shared weights with this layer
+            rbm_layer = RBM(numpy_rng = numpy_rng, theano_rng = theano_rng, 
+                          input = layer_input, 
+                          n_visible = input_size, 
+                          n_hidden  = hidden_layers_sizes[i],  
+                          W = sigmoid_layer.W, 
+                          hbias = sigmoid_layer.b)
+            self.rbm_layers.append(rbm_layer)        
 
+        
+        # We now need to add a logistic layer on top of the MLP
+        self.logLayer = LogisticRegression(\
+                         input = self.sigmoid_layers[-1].output,\
+                         n_in = hidden_layers_sizes[-1], n_out = n_outs)
         self.params.extend(self.logLayer.params)
-        self.PCD_chains = {}
 
+        # construct a function that implements one step of fine-tuning compute the cost for
+        # second phase of training, defined as the negative log likelihood 
+        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
 
-            
-    def build_pretraining_functions(self, train_set_x, batch_size,type = 'CD' ):
+        # compute the gradients with respect to the model parameters
+        # symbolic variable that points to the number of errors made on the
+        # minibatch given by self.x and self.y
+        self.errors = self.logLayer.errors(self.y)
 
-        index = T.lscalar()
-        lr    = T.scalar()
-            
-        n_batches   = train_set_x.value.shape[0] / batch_size
-        batch_begin = (index % n_batches) * batch_size
-        batch_end   = batch_begin + batch_size
-        data_size   = train_set_x.value.shape[1]
+    def pretraining_functions(self, train_set_x, batch_size):
+        ''' Generates a list of functions, for performing one step of gradient descent at a
+        given layer. The function will require as input the minibatch index, and to train an
+        RBM you just need to iterate, calling the corresponding function on all minibatch
+        indexes.
+
+        :type train_set_x: theano.tensor.TensorType
+        :param train_set_x: Shared var. that contains all datapoints used for training the RBM
+        :type batch_size: int
+        :param batch_size: size of a [mini]batch
+        '''
+
+        # index to a [mini]batch
+        index            = T.lscalar('index')   # index to a minibatch
+        learning_rate    = T.scalar('lr')    # learning rate to use
+
+        # number of batches
+        n_batches = train_set_x.value.shape[0] / batch_size
+        # begining of a batch, given `index`
+        batch_begin = index * batch_size
+        # ending of a batch given `index`
+        batch_end = batch_begin+batch_size
 
         pretrain_fns = []
-        for rbm in self.rbms :
-            if type == "CD":
-                 updates = rbm.cd(lr = lr) 
-            elif type == 'PCD':
-                 persistent_chain = theano.shared( numpy.zeros((batch_size,data_size)))
-                 self.PCD_chain[rbm] = persistent_chain
-                 updates = rbm.cd(lr = lr, presistent =  persistent_chain)
-            else:
-                raise NotImplementedError()
-
-            fn = theano.function([index, theano.Param(lr, default = 0.1)], [],
-                           updates = updates, 
-                           givens = {self.x: train_set_x[batch_begin:batch_end]})
-
+        for rbm in self.rbm_layers:
+
+            # get the cost and the updates list
+            # TODO: change cost function to reconstruction error
+            cost,updates = rbm.cd(learning_rate, persistent=None)
+
+            # compile the theano function    
+            fn = theano.function(inputs = [index, 
+                              theano.Param(learning_rate, default = 0.1)], 
+                    outputs = cost, 
+                    updates = updates,
+                    givens  = {self.x :train_set_x[batch_begin:batch_end]})
+            # append `fn` to the list of functions
             pretrain_fns.append(fn)
 
         return pretrain_fns
+ 
 
-
-    def finetune(self, datasets, batch_size):
+    def build_finetune_functions(self, datasets, batch_size, learning_rate):
+        '''Generates a function `train` that implements one step of finetuning, a function
+        `validate` that computes the error on a batch from the validation set, and a function
+        `test` that computes the error on a batch from the testing set
+
+        :type datasets: list of pairs of theano.tensor.TensorType
+        :param datasets: It is a list that contain all the datasets;  the has to contain three
+        pairs, `train`, `valid`, `test` in this order, where each pair is formed of two Theano
+        variables, one for the datapoints, the other for the labels
+        :type batch_size: int
+        :param batch_size: size of a minibatch
+        :type learning_rate: float
+        :param learning_rate: learning rate used during finetune stage
+        '''
 
         (train_set_x, train_set_y) = datasets[0]
         (valid_set_x, valid_set_y) = datasets[1]
@@ -106,27 +184,25 @@ def finetune(self, datasets, batch_size):
         n_valid_batches = valid_set_x.value.shape[0] / batch_size
         n_test_batches  = test_set_x.value.shape[0]  / batch_size
 
-        index   = T.lscalar()    # index to a [mini]batch 
-        lr      = T.scalar()
+        index   = T.lscalar('index')    # index to a [mini]batch 
 
-        
         # compute the gradients with respect to the model parameters
         gparams = T.grad(self.finetune_cost, self.params)
 
         # compute list of fine-tuning updates
         updates = {}
         for param, gparam in zip(self.params, gparams):
-              updates[param] = param - gparam*lr
+            updates[param] = param - gparam*learning_rate
 
-        train_fn = theano.function(inputs = [index, theano.Param(lr,default=0.1)], 
-               outputs =   self.finetune_cost, 
-               updates = updates,
-               givens  = {
-                    self.x : train_set_x[index*batch_size:(index+1)*batch_size],
-                    self.y : train_set_y[index*batch_size:(index+1)*batch_size]})
+        train_fn = theano.function(inputs = [index], 
+              outputs =   self.finetune_cost, 
+              updates = updates,
+              givens  = {
+                self.x : train_set_x[index*batch_size:(index+1)*batch_size],
+                self.y : train_set_y[index*batch_size:(index+1)*batch_size]})
 
         test_score_i = theano.function([index], self.errors,
-               givens = {
+                 givens = {
                    self.x: test_set_x[index*batch_size:(index+1)*batch_size],
                    self.y: test_set_y[index*batch_size:(index+1)*batch_size]})
 
@@ -137,7 +213,7 @@ def finetune(self, datasets, batch_size):
 
         # Create a function that scans the entire validation set
         def valid_score():
-           return [valid_score_i(i) for i in xrange(n_valid_batches)]
+            return [valid_score_i(i) for i in xrange(n_valid_batches)]
 
         # Create a function that scans the entire test set
         def test_score():
@@ -146,9 +222,32 @@ def test_score():
         return train_fn, valid_score, test_score
 
 
-def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
+
+
+
+
+def test_DBN( finetune_lr = 0.1, pretraining_epochs = 10, \
               pretrain_lr = 0.1, training_epochs = 1000, \
               dataset='mnist.pkl.gz'):
+    """
+    Demonstrates how to train and test a Deep Belief Network.
+
+    This is demonstrated on MNIST.
+
+    :type learning_rate: float
+    :param learning_rate: learning rate used in the finetune stage 
+    :type pretraining_epochs: int
+    :param pretraining_epochs: number of epoch to do pretraining
+    :type pretrain_lr: float
+    :param pretrain_lr: learning rate to be used during pre-training
+    :type n_iter: int
+    :param n_iter: maximal number of iterations ot run the optimizer 
+    :type dataset: string
+    :param dataset: path the the pickled dataset
+    """
+
+    print 'finetune_lr = ', finetune_lr
+    print 'pretrain_lr = ', pretrain_lr
 
     datasets = load_data(dataset)
 
@@ -157,7 +256,6 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
     test_set_x , test_set_y  = datasets[2]
 
 
-
     batch_size = 20    # size of the minibatch
 
     # compute number of minibatches for training, validation and testing
@@ -166,20 +264,19 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
     # numpy random generator
     numpy_rng = numpy.random.RandomState(123)
     print '... building the model'
-    # construct the stacked denoising autoencoder class
-    dbn = DBN( numpy_rng = numpy_rng, n_ins = 28*28, 
-               hidden_layers_sizes = [100,100,100],
-               n_outs = 10)
+    # construct the Deep Belief Network
+    dbn = DBN(numpy_rng = numpy_rng, n_ins = 28*28, 
+              hidden_layers_sizes = [1000,1000,1000],
+              n_outs = 10)
 
 
     #########################
     # PRETRAINING THE MODEL #
     #########################
     print '... getting the pretraining functions'
-    pretraining_fns = dbn.build_pretraining_functions( 
-                                        train_set_x   = train_set_x, 
-                                        batch_size    = batch_size, 
-                                        type = 'CD' ) 
+    pretraining_fns = dbn.pretraining_functions(
+            train_set_x   = train_set_x, 
+            batch_size    = batch_size ) 
 
     print '... pre-training the model'
     start_time = time.clock()  
@@ -188,9 +285,11 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
         # go through pretraining epochs 
         for epoch in xrange(pretraining_epochs):
             # go through the training set
+            c = []
             for batch_index in xrange(n_train_batches):
-                 pretraining_fns[i](batch_index,pretrain_lr)
-            print 'Pre-training layer %i, epoch %d '%(i,epoch)
+                c.append(pretraining_fns[i](index = batch_index, 
+                         lr = pretrain_lr ) )
+            print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),numpy.mean(c)
 
     end_time = time.clock()
 
@@ -202,8 +301,9 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
 
     # get the training, validation and testing function for the model
     print '... getting the finetuning functions'
-    train_fn, validate_model, test_model = dbn.finetune ( 
-                datasets = datasets, batch_size = batch_size) 
+    train_fn, validate_model, test_model = dbn.build_finetune_functions ( 
+                datasets = datasets, batch_size = batch_size, 
+                learning_rate = finetune_lr) 
 
     print '... finetunning the model'
     # early-stopping parameters
@@ -231,7 +331,7 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
       epoch = epoch + 1
       for minibatch_index in xrange(n_train_batches):
 
-        minibatch_avg_cost = train_fn(minibatch_index, finetune_lr)
+        minibatch_avg_cost = train_fn(minibatch_index)
         iter    = epoch * n_train_batches + minibatch_index
 
         if (iter+1) % validation_frequency == 0: 
@@ -278,8 +378,7 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
 
 
 
-
 if __name__ == '__main__':
-    test_DBN()
-
-
+    pretrain_lr = numpy.float(os.sys.argv[1])
+    finetune_lr = numpy.float(os.sys.argv[2])
+    test_DBN(pretrain_lr=pretrain_lr, finetune_lr=finetune_lr)