11"""
2- This tutorial introduces deep belief networks (DBN) using Theano.
32"""
3+ import os
44
5- import numpy , time , cPickle , gzip
5+ import numpy , time , cPickle , gzip
66
77import theano
88import theano .tensor as T
1313from rbm import RBM
1414
1515
16+
1617class DBN (object ):
17- """ DBN """
18+ """
19+ """
1820
1921 def __init__ (self , numpy_rng , theano_rng = None , n_ins = 784 ,
2022 hidden_layers_sizes = [500 ,500 ], n_outs = 10 ):
21-
23+ """This class is made to support a variable number of layers.
24+
25+ :type numpy_rng: numpy.random.RandomState
26+ :param numpy_rng: numpy random number generator used to draw initial
27+ weights
28+
29+ :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
30+ :param theano_rng: Theano random generator; if None is given one is
31+ generated based on a seed drawn from `rng`
32+
33+ :type n_ins: int
34+ :param n_ins: dimension of the input to the DBN
35+
36+ :type n_layers_sizes: list of ints
37+ :param n_layers_sizes: intermidiate layers size, must contain
38+ at least one value
39+
40+ :type n_outs: int
41+ :param n_outs: dimension of the output of the network
42+ """
43+
2244 self .sigmoid_layers = []
23- self .rbms = []
45+ self .rbm_layers = []
2446 self .params = []
2547 self .n_layers = len (hidden_layers_sizes )
2648
27- assert self .n_layers > 0
49+ assert self .n_layers > 0
2850
2951 if not theano_rng :
3052 theano_rng = RandomStreams (numpy_rng .randint (2 ** 30 ))
3153
32- self .x = T .matrix ('x' )
33- self .y = T .ivector ('y' )
54+ # allocate symbolic variables for the data
55+ self .x = T .matrix ('x' ) # the data is presented as rasterized images
56+ self .y = T .ivector ('y' ) # the labels are presented as 1D vector of
57+ # [int] labels
3458
35- for i in xrange (self .n_layers ):
36- if i == 0 :
59+ # The DBN is an MLP, for which all weights of intermidiate layers are shared with a
60+ # different RBM. We will first construct the DBN as a deep multilayer perceptron, and
61+ # when constructing each sigmoidal layer we also construct an RBM that shares weights
62+ # with that layer. During pretraining we will train these RBMs (which will lead
63+ # to chainging the weights of the MLP as well) During finetuning we will finish
64+ # training the DBN by doing stochastic gradient descent on the MLP.
65+
66+ for i in xrange ( self .n_layers ):
67+ # construct the sigmoidal layer
68+
69+ # the size of the input is either the number of hidden units of the layer below or
70+ # the input size if we are on the first layer
71+ if i == 0 :
3772 input_size = n_ins
38- layer_input = self .x
3973 else :
4074 input_size = hidden_layers_sizes [i - 1 ]
41- layer_input = self .sigmoid_layers [- 1 ].output
4275
43- sigmoid_layer = HiddenLayer (rng = numpy_rng , input = layer_input ,
44- n_in = input_size ,
45- n_out = hidden_layers_sizes [i ],
46- activation = T .nnet .sigmoid )
76+ # the input to this layer is either the activation of the hidden layer below or the
77+ # input of the DBN if you are on the first layer
78+ if i == 0 :
79+ layer_input = self .x
80+ else :
81+ layer_input = self .sigmoid_layers [- 1 ].output
4782
83+ sigmoid_layer = HiddenLayer (rng = numpy_rng ,
84+ input = layer_input ,
85+ n_in = input_size ,
86+ n_out = hidden_layers_sizes [i ],
87+ activation = T .nnet .sigmoid )
88+
89+ # add the layer to our list of layers
4890 self .sigmoid_layers .append (sigmoid_layer )
49- self .params .extend (sigmoid_layer .params )
50-
51- rbm = RBM (numpy_rng = numpy_rng , theano_rng = theano_rng , input = layer_input ,
52- n_visible = input_size ,
53- n_hidden = hidden_layers_sizes [i ],
54- W = sigmoid_layer .W , hbias = sigmoid_layer .b )
55- self .rbms .append (rbm )
56-
57- self .logLayer = LogisticRegression (
58- input = self .sigmoid_layers [- 1 ].output ,
59- n_in = hidden_layers_sizes [- 1 ], n_out = n_outs )
6091
61- self .finetune_cost = self .logLayer .negative_log_likelihood (self .y )
62- self .errors = self .logLayer .errors (self .y )
92+ # its arguably a philosophical question... but we are going to only declare that
93+ # the parameters of the sigmoid_layers are parameters of the DBN. The visible
94+ # biases in the RBM are parameters of those RBMs, but not of the DBN.
95+ self .params .extend (sigmoid_layer .params )
96+
97+ # Construct an RBM that shared weights with this layer
98+ rbm_layer = RBM (numpy_rng = numpy_rng , theano_rng = theano_rng ,
99+ input = layer_input ,
100+ n_visible = input_size ,
101+ n_hidden = hidden_layers_sizes [i ],
102+ W = sigmoid_layer .W ,
103+ hbias = sigmoid_layer .b )
104+ self .rbm_layers .append (rbm_layer )
63105
106+
107+ # We now need to add a logistic layer on top of the MLP
108+ self .logLayer = LogisticRegression (\
109+ input = self .sigmoid_layers [- 1 ].output ,\
110+ n_in = hidden_layers_sizes [- 1 ], n_out = n_outs )
64111 self .params .extend (self .logLayer .params )
65- self .PCD_chains = {}
66112
113+ # construct a function that implements one step of fine-tuning compute the cost for
114+ # second phase of training, defined as the negative log likelihood
115+ self .finetune_cost = self .logLayer .negative_log_likelihood (self .y )
67116
68-
69- def build_pretraining_functions (self , train_set_x , batch_size ,type = 'CD' ):
117+ # compute the gradients with respect to the model parameters
118+ # symbolic variable that points to the number of errors made on the
119+ # minibatch given by self.x and self.y
120+ self .errors = self .logLayer .errors (self .y )
70121
71- index = T .lscalar ()
72- lr = T .scalar ()
73-
74- n_batches = train_set_x .value .shape [0 ] / batch_size
75- batch_begin = (index % n_batches ) * batch_size
76- batch_end = batch_begin + batch_size
77- data_size = train_set_x .value .shape [1 ]
122+ def pretraining_functions (self , train_set_x , batch_size ):
123+ ''' Generates a list of functions, for performing one step of gradient descent at a
124+ given layer. The function will require as input the minibatch index, and to train an
125+ RBM you just need to iterate, calling the corresponding function on all minibatch
126+ indexes.
127+
128+ :type train_set_x: theano.tensor.TensorType
129+ :param train_set_x: Shared var. that contains all datapoints used for training the RBM
130+ :type batch_size: int
131+ :param batch_size: size of a [mini]batch
132+ '''
133+
134+ # index to a [mini]batch
135+ index = T .lscalar ('index' ) # index to a minibatch
136+ learning_rate = T .scalar ('lr' ) # learning rate to use
137+
138+ # number of batches
139+ n_batches = train_set_x .value .shape [0 ] / batch_size
140+ # begining of a batch, given `index`
141+ batch_begin = index * batch_size
142+ # ending of a batch given `index`
143+ batch_end = batch_begin + batch_size
78144
79145 pretrain_fns = []
80- for rbm in self .rbms :
81- if type == "CD" :
82- updates = rbm .cd (lr = lr )
83- elif type == 'PCD' :
84- persistent_chain = theano .shared ( numpy .zeros ((batch_size ,data_size )))
85- self .PCD_chain [rbm ] = persistent_chain
86- updates = rbm .cd (lr = lr , presistent = persistent_chain )
87- else :
88- raise NotImplementedError ()
89-
90- fn = theano .function ([index , theano .Param (lr , default = 0.1 )], [],
91- updates = updates ,
92- givens = {self .x : train_set_x [batch_begin :batch_end ]})
93-
146+ for rbm in self .rbm_layers :
147+
148+ # get the cost and the updates list
149+ # TODO: change cost function to reconstruction error
150+ cost ,updates = rbm .cd (learning_rate , persistent = None )
151+
152+ # compile the theano function
153+ fn = theano .function (inputs = [index ,
154+ theano .Param (learning_rate , default = 0.1 )],
155+ outputs = cost ,
156+ updates = updates ,
157+ givens = {self .x :train_set_x [batch_begin :batch_end ]})
158+ # append `fn` to the list of functions
94159 pretrain_fns .append (fn )
95160
96161 return pretrain_fns
162+
97163
98-
99- def finetune (self , datasets , batch_size ):
164+ def build_finetune_functions (self , datasets , batch_size , learning_rate ):
165+ '''Generates a function `train` that implements one step of finetuning, a function
166+ `validate` that computes the error on a batch from the validation set, and a function
167+ `test` that computes the error on a batch from the testing set
168+
169+ :type datasets: list of pairs of theano.tensor.TensorType
170+ :param datasets: It is a list that contain all the datasets; the has to contain three
171+ pairs, `train`, `valid`, `test` in this order, where each pair is formed of two Theano
172+ variables, one for the datapoints, the other for the labels
173+ :type batch_size: int
174+ :param batch_size: size of a minibatch
175+ :type learning_rate: float
176+ :param learning_rate: learning rate used during finetune stage
177+ '''
100178
101179 (train_set_x , train_set_y ) = datasets [0 ]
102180 (valid_set_x , valid_set_y ) = datasets [1 ]
@@ -106,27 +184,25 @@ def finetune(self, datasets, batch_size):
106184 n_valid_batches = valid_set_x .value .shape [0 ] / batch_size
107185 n_test_batches = test_set_x .value .shape [0 ] / batch_size
108186
109- index = T .lscalar () # index to a [mini]batch
110- lr = T .scalar ()
187+ index = T .lscalar ('index' ) # index to a [mini]batch
111188
112-
113189 # compute the gradients with respect to the model parameters
114190 gparams = T .grad (self .finetune_cost , self .params )
115191
116192 # compute list of fine-tuning updates
117193 updates = {}
118194 for param , gparam in zip (self .params , gparams ):
119- updates [param ] = param - gparam * lr
195+ updates [param ] = param - gparam * learning_rate
120196
121- train_fn = theano .function (inputs = [index , theano . Param ( lr , default = 0.1 ) ],
122- outputs = self .finetune_cost ,
123- updates = updates ,
124- givens = {
125- self .x : train_set_x [index * batch_size :(index + 1 )* batch_size ],
126- self .y : train_set_y [index * batch_size :(index + 1 )* batch_size ]})
197+ train_fn = theano .function (inputs = [index ],
198+ outputs = self .finetune_cost ,
199+ updates = updates ,
200+ givens = {
201+ self .x : train_set_x [index * batch_size :(index + 1 )* batch_size ],
202+ self .y : train_set_y [index * batch_size :(index + 1 )* batch_size ]})
127203
128204 test_score_i = theano .function ([index ], self .errors ,
129- givens = {
205+ givens = {
130206 self .x : test_set_x [index * batch_size :(index + 1 )* batch_size ],
131207 self .y : test_set_y [index * batch_size :(index + 1 )* batch_size ]})
132208
@@ -137,7 +213,7 @@ def finetune(self, datasets, batch_size):
137213
138214 # Create a function that scans the entire validation set
139215 def valid_score ():
140- return [valid_score_i (i ) for i in xrange (n_valid_batches )]
216+ return [valid_score_i (i ) for i in xrange (n_valid_batches )]
141217
142218 # Create a function that scans the entire test set
143219 def test_score ():
@@ -146,9 +222,32 @@ def test_score():
146222 return train_fn , valid_score , test_score
147223
148224
149- def test_DBN ( finetune_lr = 0.1 , pretraining_epochs = 2 , \
225+
226+
227+
228+
229+ def test_DBN ( finetune_lr = 0.1 , pretraining_epochs = 10 , \
150230 pretrain_lr = 0.1 , training_epochs = 1000 , \
151231 dataset = 'mnist.pkl.gz' ):
232+ """
233+ Demonstrates how to train and test a Deep Belief Network.
234+
235+ This is demonstrated on MNIST.
236+
237+ :type learning_rate: float
238+ :param learning_rate: learning rate used in the finetune stage
239+ :type pretraining_epochs: int
240+ :param pretraining_epochs: number of epoch to do pretraining
241+ :type pretrain_lr: float
242+ :param pretrain_lr: learning rate to be used during pre-training
243+ :type n_iter: int
244+ :param n_iter: maximal number of iterations ot run the optimizer
245+ :type dataset: string
246+ :param dataset: path the the pickled dataset
247+ """
248+
249+ print 'finetune_lr = ' , finetune_lr
250+ print 'pretrain_lr = ' , pretrain_lr
152251
153252 datasets = load_data (dataset )
154253
@@ -157,7 +256,6 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
157256 test_set_x , test_set_y = datasets [2 ]
158257
159258
160-
161259 batch_size = 20 # size of the minibatch
162260
163261 # compute number of minibatches for training, validation and testing
@@ -166,20 +264,19 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
166264 # numpy random generator
167265 numpy_rng = numpy .random .RandomState (123 )
168266 print '... building the model'
169- # construct the stacked denoising autoencoder class
170- dbn = DBN ( numpy_rng = numpy_rng , n_ins = 28 * 28 ,
171- hidden_layers_sizes = [100 , 100 , 100 ],
172- n_outs = 10 )
267+ # construct the Deep Belief Network
268+ dbn = DBN (numpy_rng = numpy_rng , n_ins = 28 * 28 ,
269+ hidden_layers_sizes = [1000 , 1000 , 1000 ],
270+ n_outs = 10 )
173271
174272
175273 #########################
176274 # PRETRAINING THE MODEL #
177275 #########################
178276 print '... getting the pretraining functions'
179- pretraining_fns = dbn .build_pretraining_functions (
180- train_set_x = train_set_x ,
181- batch_size = batch_size ,
182- type = 'CD' )
277+ pretraining_fns = dbn .pretraining_functions (
278+ train_set_x = train_set_x ,
279+ batch_size = batch_size )
183280
184281 print '... pre-training the model'
185282 start_time = time .clock ()
@@ -188,9 +285,11 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
188285 # go through pretraining epochs
189286 for epoch in xrange (pretraining_epochs ):
190287 # go through the training set
288+ c = []
191289 for batch_index in xrange (n_train_batches ):
192- pretraining_fns [i ](batch_index ,pretrain_lr )
193- print 'Pre-training layer %i, epoch %d ' % (i ,epoch )
290+ c .append (pretraining_fns [i ](index = batch_index ,
291+ lr = pretrain_lr ) )
292+ print 'Pre-training layer %i, epoch %d, cost ' % (i ,epoch ),numpy .mean (c )
194293
195294 end_time = time .clock ()
196295
@@ -202,8 +301,9 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
202301
203302 # get the training, validation and testing function for the model
204303 print '... getting the finetuning functions'
205- train_fn , validate_model , test_model = dbn .finetune (
206- datasets = datasets , batch_size = batch_size )
304+ train_fn , validate_model , test_model = dbn .build_finetune_functions (
305+ datasets = datasets , batch_size = batch_size ,
306+ learning_rate = finetune_lr )
207307
208308 print '... finetunning the model'
209309 # early-stopping parameters
@@ -231,7 +331,7 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
231331 epoch = epoch + 1
232332 for minibatch_index in xrange (n_train_batches ):
233333
234- minibatch_avg_cost = train_fn (minibatch_index , finetune_lr )
334+ minibatch_avg_cost = train_fn (minibatch_index )
235335 iter = epoch * n_train_batches + minibatch_index
236336
237337 if (iter + 1 ) % validation_frequency == 0 :
@@ -278,8 +378,7 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
278378
279379
280380
281-
282381if __name__ == '__main__' :
283- test_DBN ( )
284-
285-
382+ pretrain_lr = numpy . float ( os . sys . argv [ 1 ] )
383+ finetune_lr = numpy . float ( os . sys . argv [ 2 ])
384+ test_DBN ( pretrain_lr = pretrain_lr , finetune_lr = finetune_lr )
0 commit comments