Skip to content

Commit 76a57eb

Browse files
committed
skeleton of DBN tutorial (removed old dbn.py file)
1 parent 0245100 commit 76a57eb

3 files changed

Lines changed: 239 additions & 110 deletions

File tree

code/DBN.py

Lines changed: 185 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
"""
2-
This tutorial introduces deep belief networks (DBN) using Theano.
32
"""
3+
import os
44

5-
import numpy, time, cPickle, gzip
5+
import numpy, time, cPickle, gzip
66

77
import theano
88
import theano.tensor as T
@@ -13,90 +13,168 @@
1313
from rbm import RBM
1414

1515

16+
1617
class DBN(object):
17-
""" DBN """
18+
"""
19+
"""
1820

1921
def __init__(self, numpy_rng, theano_rng = None, n_ins = 784,
2022
hidden_layers_sizes = [500,500], n_outs = 10):
21-
23+
"""This class is made to support a variable number of layers.
24+
25+
:type numpy_rng: numpy.random.RandomState
26+
:param numpy_rng: numpy random number generator used to draw initial
27+
weights
28+
29+
:type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
30+
:param theano_rng: Theano random generator; if None is given one is
31+
generated based on a seed drawn from `rng`
32+
33+
:type n_ins: int
34+
:param n_ins: dimension of the input to the DBN
35+
36+
:type n_layers_sizes: list of ints
37+
:param n_layers_sizes: intermidiate layers size, must contain
38+
at least one value
39+
40+
:type n_outs: int
41+
:param n_outs: dimension of the output of the network
42+
"""
43+
2244
self.sigmoid_layers = []
23-
self.rbms = []
45+
self.rbm_layers = []
2446
self.params = []
2547
self.n_layers = len(hidden_layers_sizes)
2648

27-
assert self.n_layers > 0
49+
assert self.n_layers > 0
2850

2951
if not theano_rng:
3052
theano_rng = RandomStreams(numpy_rng.randint(2**30))
3153

32-
self.x = T.matrix('x')
33-
self.y = T.ivector('y')
54+
# allocate symbolic variables for the data
55+
self.x = T.matrix('x') # the data is presented as rasterized images
56+
self.y = T.ivector('y') # the labels are presented as 1D vector of
57+
# [int] labels
3458

35-
for i in xrange(self.n_layers):
36-
if i == 0 :
59+
# The DBN is an MLP, for which all weights of intermidiate layers are shared with a
60+
# different RBM. We will first construct the DBN as a deep multilayer perceptron, and
61+
# when constructing each sigmoidal layer we also construct an RBM that shares weights
62+
# with that layer. During pretraining we will train these RBMs (which will lead
63+
# to chainging the weights of the MLP as well) During finetuning we will finish
64+
# training the DBN by doing stochastic gradient descent on the MLP.
65+
66+
for i in xrange( self.n_layers ):
67+
# construct the sigmoidal layer
68+
69+
# the size of the input is either the number of hidden units of the layer below or
70+
# the input size if we are on the first layer
71+
if i == 0 :
3772
input_size = n_ins
38-
layer_input = self.x
3973
else:
4074
input_size = hidden_layers_sizes[i-1]
41-
layer_input = self.sigmoid_layers[-1].output
4275

43-
sigmoid_layer = HiddenLayer(rng = numpy_rng, input = layer_input,
44-
n_in = input_size,
45-
n_out = hidden_layers_sizes[i],
46-
activation = T.nnet.sigmoid)
76+
# the input to this layer is either the activation of the hidden layer below or the
77+
# input of the DBN if you are on the first layer
78+
if i == 0 :
79+
layer_input = self.x
80+
else:
81+
layer_input = self.sigmoid_layers[-1].output
4782

83+
sigmoid_layer = HiddenLayer(rng = numpy_rng,
84+
input = layer_input,
85+
n_in = input_size,
86+
n_out = hidden_layers_sizes[i],
87+
activation = T.nnet.sigmoid)
88+
89+
# add the layer to our list of layers
4890
self.sigmoid_layers.append(sigmoid_layer)
49-
self.params.extend(sigmoid_layer.params)
50-
51-
rbm = RBM(numpy_rng = numpy_rng, theano_rng = theano_rng, input = layer_input,
52-
n_visible = input_size,
53-
n_hidden = hidden_layers_sizes[i],
54-
W = sigmoid_layer.W, hbias = sigmoid_layer.b)
55-
self.rbms.append(rbm)
56-
57-
self.logLayer = LogisticRegression(
58-
input = self.sigmoid_layers[-1].output,
59-
n_in = hidden_layers_sizes[-1], n_out = n_outs)
6091

61-
self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
62-
self.errors = self.logLayer.errors(self.y)
92+
# its arguably a philosophical question... but we are going to only declare that
93+
# the parameters of the sigmoid_layers are parameters of the DBN. The visible
94+
# biases in the RBM are parameters of those RBMs, but not of the DBN.
95+
self.params.extend(sigmoid_layer.params)
96+
97+
# Construct an RBM that shared weights with this layer
98+
rbm_layer = RBM(numpy_rng = numpy_rng, theano_rng = theano_rng,
99+
input = layer_input,
100+
n_visible = input_size,
101+
n_hidden = hidden_layers_sizes[i],
102+
W = sigmoid_layer.W,
103+
hbias = sigmoid_layer.b)
104+
self.rbm_layers.append(rbm_layer)
63105

106+
107+
# We now need to add a logistic layer on top of the MLP
108+
self.logLayer = LogisticRegression(\
109+
input = self.sigmoid_layers[-1].output,\
110+
n_in = hidden_layers_sizes[-1], n_out = n_outs)
64111
self.params.extend(self.logLayer.params)
65-
self.PCD_chains = {}
66112

113+
# construct a function that implements one step of fine-tuning compute the cost for
114+
# second phase of training, defined as the negative log likelihood
115+
self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
67116

68-
69-
def build_pretraining_functions(self, train_set_x, batch_size,type = 'CD' ):
117+
# compute the gradients with respect to the model parameters
118+
# symbolic variable that points to the number of errors made on the
119+
# minibatch given by self.x and self.y
120+
self.errors = self.logLayer.errors(self.y)
70121

71-
index = T.lscalar()
72-
lr = T.scalar()
73-
74-
n_batches = train_set_x.value.shape[0] / batch_size
75-
batch_begin = (index % n_batches) * batch_size
76-
batch_end = batch_begin + batch_size
77-
data_size = train_set_x.value.shape[1]
122+
def pretraining_functions(self, train_set_x, batch_size):
123+
''' Generates a list of functions, for performing one step of gradient descent at a
124+
given layer. The function will require as input the minibatch index, and to train an
125+
RBM you just need to iterate, calling the corresponding function on all minibatch
126+
indexes.
127+
128+
:type train_set_x: theano.tensor.TensorType
129+
:param train_set_x: Shared var. that contains all datapoints used for training the RBM
130+
:type batch_size: int
131+
:param batch_size: size of a [mini]batch
132+
'''
133+
134+
# index to a [mini]batch
135+
index = T.lscalar('index') # index to a minibatch
136+
learning_rate = T.scalar('lr') # learning rate to use
137+
138+
# number of batches
139+
n_batches = train_set_x.value.shape[0] / batch_size
140+
# begining of a batch, given `index`
141+
batch_begin = index * batch_size
142+
# ending of a batch given `index`
143+
batch_end = batch_begin+batch_size
78144

79145
pretrain_fns = []
80-
for rbm in self.rbms :
81-
if type == "CD":
82-
updates = rbm.cd(lr = lr)
83-
elif type == 'PCD':
84-
persistent_chain = theano.shared( numpy.zeros((batch_size,data_size)))
85-
self.PCD_chain[rbm] = persistent_chain
86-
updates = rbm.cd(lr = lr, presistent = persistent_chain)
87-
else:
88-
raise NotImplementedError()
89-
90-
fn = theano.function([index, theano.Param(lr, default = 0.1)], [],
91-
updates = updates,
92-
givens = {self.x: train_set_x[batch_begin:batch_end]})
93-
146+
for rbm in self.rbm_layers:
147+
148+
# get the cost and the updates list
149+
# TODO: change cost function to reconstruction error
150+
cost,updates = rbm.cd(learning_rate, persistent=None)
151+
152+
# compile the theano function
153+
fn = theano.function(inputs = [index,
154+
theano.Param(learning_rate, default = 0.1)],
155+
outputs = cost,
156+
updates = updates,
157+
givens = {self.x :train_set_x[batch_begin:batch_end]})
158+
# append `fn` to the list of functions
94159
pretrain_fns.append(fn)
95160

96161
return pretrain_fns
162+
97163

98-
99-
def finetune(self, datasets, batch_size):
164+
def build_finetune_functions(self, datasets, batch_size, learning_rate):
165+
'''Generates a function `train` that implements one step of finetuning, a function
166+
`validate` that computes the error on a batch from the validation set, and a function
167+
`test` that computes the error on a batch from the testing set
168+
169+
:type datasets: list of pairs of theano.tensor.TensorType
170+
:param datasets: It is a list that contain all the datasets; the has to contain three
171+
pairs, `train`, `valid`, `test` in this order, where each pair is formed of two Theano
172+
variables, one for the datapoints, the other for the labels
173+
:type batch_size: int
174+
:param batch_size: size of a minibatch
175+
:type learning_rate: float
176+
:param learning_rate: learning rate used during finetune stage
177+
'''
100178

101179
(train_set_x, train_set_y) = datasets[0]
102180
(valid_set_x, valid_set_y) = datasets[1]
@@ -106,27 +184,25 @@ def finetune(self, datasets, batch_size):
106184
n_valid_batches = valid_set_x.value.shape[0] / batch_size
107185
n_test_batches = test_set_x.value.shape[0] / batch_size
108186

109-
index = T.lscalar() # index to a [mini]batch
110-
lr = T.scalar()
187+
index = T.lscalar('index') # index to a [mini]batch
111188

112-
113189
# compute the gradients with respect to the model parameters
114190
gparams = T.grad(self.finetune_cost, self.params)
115191

116192
# compute list of fine-tuning updates
117193
updates = {}
118194
for param, gparam in zip(self.params, gparams):
119-
updates[param] = param - gparam*lr
195+
updates[param] = param - gparam*learning_rate
120196

121-
train_fn = theano.function(inputs = [index, theano.Param(lr,default=0.1)],
122-
outputs = self.finetune_cost,
123-
updates = updates,
124-
givens = {
125-
self.x : train_set_x[index*batch_size:(index+1)*batch_size],
126-
self.y : train_set_y[index*batch_size:(index+1)*batch_size]})
197+
train_fn = theano.function(inputs = [index],
198+
outputs = self.finetune_cost,
199+
updates = updates,
200+
givens = {
201+
self.x : train_set_x[index*batch_size:(index+1)*batch_size],
202+
self.y : train_set_y[index*batch_size:(index+1)*batch_size]})
127203

128204
test_score_i = theano.function([index], self.errors,
129-
givens = {
205+
givens = {
130206
self.x: test_set_x[index*batch_size:(index+1)*batch_size],
131207
self.y: test_set_y[index*batch_size:(index+1)*batch_size]})
132208

@@ -137,7 +213,7 @@ def finetune(self, datasets, batch_size):
137213

138214
# Create a function that scans the entire validation set
139215
def valid_score():
140-
return [valid_score_i(i) for i in xrange(n_valid_batches)]
216+
return [valid_score_i(i) for i in xrange(n_valid_batches)]
141217

142218
# Create a function that scans the entire test set
143219
def test_score():
@@ -146,9 +222,32 @@ def test_score():
146222
return train_fn, valid_score, test_score
147223

148224

149-
def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
225+
226+
227+
228+
229+
def test_DBN( finetune_lr = 0.1, pretraining_epochs = 10, \
150230
pretrain_lr = 0.1, training_epochs = 1000, \
151231
dataset='mnist.pkl.gz'):
232+
"""
233+
Demonstrates how to train and test a Deep Belief Network.
234+
235+
This is demonstrated on MNIST.
236+
237+
:type learning_rate: float
238+
:param learning_rate: learning rate used in the finetune stage
239+
:type pretraining_epochs: int
240+
:param pretraining_epochs: number of epoch to do pretraining
241+
:type pretrain_lr: float
242+
:param pretrain_lr: learning rate to be used during pre-training
243+
:type n_iter: int
244+
:param n_iter: maximal number of iterations ot run the optimizer
245+
:type dataset: string
246+
:param dataset: path the the pickled dataset
247+
"""
248+
249+
print 'finetune_lr = ', finetune_lr
250+
print 'pretrain_lr = ', pretrain_lr
152251

153252
datasets = load_data(dataset)
154253

@@ -157,7 +256,6 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
157256
test_set_x , test_set_y = datasets[2]
158257

159258

160-
161259
batch_size = 20 # size of the minibatch
162260

163261
# compute number of minibatches for training, validation and testing
@@ -166,20 +264,19 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
166264
# numpy random generator
167265
numpy_rng = numpy.random.RandomState(123)
168266
print '... building the model'
169-
# construct the stacked denoising autoencoder class
170-
dbn = DBN( numpy_rng = numpy_rng, n_ins = 28*28,
171-
hidden_layers_sizes = [100,100,100],
172-
n_outs = 10)
267+
# construct the Deep Belief Network
268+
dbn = DBN(numpy_rng = numpy_rng, n_ins = 28*28,
269+
hidden_layers_sizes = [1000,1000,1000],
270+
n_outs = 10)
173271

174272

175273
#########################
176274
# PRETRAINING THE MODEL #
177275
#########################
178276
print '... getting the pretraining functions'
179-
pretraining_fns = dbn.build_pretraining_functions(
180-
train_set_x = train_set_x,
181-
batch_size = batch_size,
182-
type = 'CD' )
277+
pretraining_fns = dbn.pretraining_functions(
278+
train_set_x = train_set_x,
279+
batch_size = batch_size )
183280

184281
print '... pre-training the model'
185282
start_time = time.clock()
@@ -188,9 +285,11 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
188285
# go through pretraining epochs
189286
for epoch in xrange(pretraining_epochs):
190287
# go through the training set
288+
c = []
191289
for batch_index in xrange(n_train_batches):
192-
pretraining_fns[i](batch_index,pretrain_lr)
193-
print 'Pre-training layer %i, epoch %d '%(i,epoch)
290+
c.append(pretraining_fns[i](index = batch_index,
291+
lr = pretrain_lr ) )
292+
print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),numpy.mean(c)
194293

195294
end_time = time.clock()
196295

@@ -202,8 +301,9 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
202301

203302
# get the training, validation and testing function for the model
204303
print '... getting the finetuning functions'
205-
train_fn, validate_model, test_model = dbn.finetune (
206-
datasets = datasets, batch_size = batch_size)
304+
train_fn, validate_model, test_model = dbn.build_finetune_functions (
305+
datasets = datasets, batch_size = batch_size,
306+
learning_rate = finetune_lr)
207307

208308
print '... finetunning the model'
209309
# early-stopping parameters
@@ -231,7 +331,7 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
231331
epoch = epoch + 1
232332
for minibatch_index in xrange(n_train_batches):
233333

234-
minibatch_avg_cost = train_fn(minibatch_index, finetune_lr)
334+
minibatch_avg_cost = train_fn(minibatch_index)
235335
iter = epoch * n_train_batches + minibatch_index
236336

237337
if (iter+1) % validation_frequency == 0:
@@ -278,8 +378,7 @@ def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
278378

279379

280380

281-
282381
if __name__ == '__main__':
283-
test_DBN()
284-
285-
382+
pretrain_lr = numpy.float(os.sys.argv[1])
383+
finetune_lr = numpy.float(os.sys.argv[2])
384+
test_DBN(pretrain_lr=pretrain_lr, finetune_lr=finetune_lr)

0 commit comments

Comments
 (0)