Skip to content

Commit d71dce9

Browse files
author
Razvan Pascanu
committed
I merged my version 2 into the official deep learning tutorials
1 parent 86eab7d commit d71dce9

21 files changed

Lines changed: 2424 additions & 1331 deletions

code/DBN.py

Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
"""
2+
This tutorial introduces deep belief networks (DBN) using Theano.
3+
"""
4+
5+
import numpy, time, cPickle, gzip
6+
7+
import theano
8+
import theano.tensor as T
9+
from theano.tensor.shared_randomstreams import RandomStreams
10+
11+
from logistic_sgd import LogisticRegression, load_data
12+
from mlp import HiddenLayer
13+
from rbm import RBM
14+
15+
16+
class DBN(object):
17+
""" DBN """
18+
19+
def __init__(self, numpy_rng, theano_rng = None, n_ins = 784,
20+
hidden_layers_sizes = [500,500], n_outs = 10):
21+
22+
self.sigmoid_layers = []
23+
self.rbms = []
24+
self.params = []
25+
self.n_layers = len(hidden_layers_sizes)
26+
27+
assert self.n_layers > 0
28+
29+
if not theano_rng:
30+
theano_rng = RandomStreams(numpy_rng.randint(2**30))
31+
32+
self.x = T.matrix('x')
33+
self.y = T.ivector('y')
34+
35+
for i in xrange(self.n_layers):
36+
if i == 0 :
37+
input_size = n_ins
38+
layer_input = self.x
39+
else:
40+
input_size = hidden_layers_sizes[i-1]
41+
layer_input = self.sigmoid_layers[-1].output
42+
43+
sigmoid_layer = HiddenLayer(rng = numpy_rng, input = layer_input,
44+
n_in = input_size,
45+
n_out = hidden_layers_sizes[i],
46+
activation = T.nnet.sigmoid)
47+
48+
self.sigmoid_layers.append(sigmoid_layer)
49+
self.params.extend(sigmoid_layer.params)
50+
51+
rbm = RBM(numpy_rng = numpy_rng, theano_rng = theano_rng, input = layer_input,
52+
n_visible = input_size,
53+
n_hidden = hidden_layers_sizes[i],
54+
W = sigmoid_layer.W, hbias = sigmoid_layer.b)
55+
self.rbms.append(rbm)
56+
57+
self.logLayer = LogisticRegression(
58+
input = self.sigmoid_layers[-1].output,
59+
n_in = hidden_layers_sizes[-1], n_out = n_outs)
60+
61+
self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
62+
self.errors = self.logLayer.errors(self.y)
63+
64+
self.params.extend(self.logLayer.params)
65+
self.PCD_chains = {}
66+
67+
68+
69+
def build_pretraining_functions(self, train_set_x, batch_size,type = 'CD' ):
70+
71+
index = T.lscalar()
72+
lr = T.scalar()
73+
74+
n_batches = train_set_x.value.shape[0] / batch_size
75+
batch_begin = (index % n_batches) * batch_size
76+
batch_end = batch_begin + batch_size
77+
data_size = train_set_x.value.shape[1]
78+
79+
pretrain_fns = []
80+
for rbm in self.rbms :
81+
if type == "CD":
82+
updates = rbm.cd(lr = lr)
83+
elif type == 'PCD':
84+
persistent_chain = theano.shared( numpy.zeros((batch_size,data_size)))
85+
self.PCD_chain[rbm] = persistent_chain
86+
updates = rbm.cd(lr = lr, presistent = persistent_chain)
87+
else:
88+
raise NotImplementedError()
89+
90+
fn = theano.function([index, theano.Param(lr, default = 0.1)], [],
91+
updates = updates,
92+
givens = {self.x: train_set_x[batch_begin:batch_end]})
93+
94+
pretrain_fns.append(fn)
95+
96+
return pretrain_fns
97+
98+
99+
def finetune(self, datasets, batch_size):
100+
101+
(train_set_x, train_set_y) = datasets[0]
102+
(valid_set_x, valid_set_y) = datasets[1]
103+
(test_set_x , test_set_y ) = datasets[2]
104+
105+
# compute number of minibatches for training, validation and testing
106+
n_valid_batches = valid_set_x.value.shape[0] / batch_size
107+
n_test_batches = test_set_x.value.shape[0] / batch_size
108+
109+
index = T.lscalar() # index to a [mini]batch
110+
lr = T.scalar()
111+
112+
113+
# compute the gradients with respect to the model parameters
114+
gparams = T.grad(self.finetune_cost, self.params)
115+
116+
# compute list of fine-tuning updates
117+
updates = {}
118+
for param, gparam in zip(self.params, gparams):
119+
updates[param] = param - gparam*lr
120+
121+
train_fn = theano.function(inputs = [index, theano.Param(lr,default=0.1)],
122+
outputs = self.finetune_cost,
123+
updates = updates,
124+
givens = {
125+
self.x : train_set_x[index*batch_size:(index+1)*batch_size],
126+
self.y : train_set_y[index*batch_size:(index+1)*batch_size]})
127+
128+
test_score_i = theano.function([index], self.errors,
129+
givens = {
130+
self.x: test_set_x[index*batch_size:(index+1)*batch_size],
131+
self.y: test_set_y[index*batch_size:(index+1)*batch_size]})
132+
133+
valid_score_i = theano.function([index], self.errors,
134+
givens = {
135+
self.x: valid_set_x[index*batch_size:(index+1)*batch_size],
136+
self.y: valid_set_y[index*batch_size:(index+1)*batch_size]})
137+
138+
# Create a function that scans the entire validation set
139+
def valid_score():
140+
return [valid_score_i(i) for i in xrange(n_valid_batches)]
141+
142+
# Create a function that scans the entire test set
143+
def test_score():
144+
return [test_score_i(i) for i in xrange(n_test_batches)]
145+
146+
return train_fn, valid_score, test_score
147+
148+
149+
def test_DBN( finetune_lr = 0.1, pretraining_epochs = 2, \
150+
pretrain_lr = 0.1, training_epochs = 1000, \
151+
dataset='mnist.pkl.gz'):
152+
153+
datasets = load_data(dataset)
154+
155+
train_set_x, train_set_y = datasets[0]
156+
valid_set_x, valid_set_y = datasets[1]
157+
test_set_x , test_set_y = datasets[2]
158+
159+
160+
161+
batch_size = 20 # size of the minibatch
162+
163+
# compute number of minibatches for training, validation and testing
164+
n_train_batches = train_set_x.value.shape[0] / batch_size
165+
166+
# numpy random generator
167+
numpy_rng = numpy.random.RandomState(123)
168+
print '... building the model'
169+
# construct the stacked denoising autoencoder class
170+
dbn = DBN( numpy_rng = numpy_rng, n_ins = 28*28,
171+
hidden_layers_sizes = [100,100,100],
172+
n_outs = 10)
173+
174+
175+
#########################
176+
# PRETRAINING THE MODEL #
177+
#########################
178+
print '... getting the pretraining functions'
179+
pretraining_fns = dbn.build_pretraining_functions(
180+
train_set_x = train_set_x,
181+
batch_size = batch_size,
182+
type = 'CD' )
183+
184+
print '... pre-training the model'
185+
start_time = time.clock()
186+
## Pre-train layer-wise
187+
for i in xrange(dbn.n_layers):
188+
# go through pretraining epochs
189+
for epoch in xrange(pretraining_epochs):
190+
# go through the training set
191+
for batch_index in xrange(n_train_batches):
192+
pretraining_fns[i](batch_index,pretrain_lr)
193+
print 'Pre-training layer %i, epoch %d '%(i,epoch)
194+
195+
end_time = time.clock()
196+
197+
print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
198+
199+
########################
200+
# FINETUNING THE MODEL #
201+
########################
202+
203+
# get the training, validation and testing function for the model
204+
print '... getting the finetuning functions'
205+
train_fn, validate_model, test_model = dbn.finetune (
206+
datasets = datasets, batch_size = batch_size)
207+
208+
print '... finetunning the model'
209+
# early-stopping parameters
210+
patience = 10000 # look as this many examples regardless
211+
patience_increase = 2. # wait this much longer when a new best is
212+
# found
213+
improvement_threshold = 0.995 # a relative improvement of this much is
214+
# considered significant
215+
validation_frequency = min(n_train_batches, patience/2)
216+
# go through this many
217+
# minibatche before checking the network
218+
# on the validation set; in this case we
219+
# check every epoch
220+
221+
222+
best_params = None
223+
best_validation_loss = float('inf')
224+
test_score = 0.
225+
start_time = time.clock()
226+
227+
done_looping = False
228+
epoch = 0
229+
230+
while (epoch < training_epochs) and (not done_looping):
231+
epoch = epoch + 1
232+
for minibatch_index in xrange(n_train_batches):
233+
234+
minibatch_avg_cost = train_fn(minibatch_index, finetune_lr)
235+
iter = epoch * n_train_batches + minibatch_index
236+
237+
if (iter+1) % validation_frequency == 0:
238+
239+
validation_losses = validate_model()
240+
this_validation_loss = numpy.mean(validation_losses)
241+
print('epoch %i, minibatch %i/%i, validation error %f %%' % \
242+
(epoch, minibatch_index+1, n_train_batches, \
243+
this_validation_loss*100.))
244+
245+
246+
# if we got the best validation score until now
247+
if this_validation_loss < best_validation_loss:
248+
249+
#improve patience if loss improvement is good enough
250+
if this_validation_loss < best_validation_loss * \
251+
improvement_threshold :
252+
patience = max(patience, iter * patience_increase)
253+
254+
# save best validation score and iteration number
255+
best_validation_loss = this_validation_loss
256+
best_iter = iter
257+
258+
# test it on the test set
259+
test_losses = test_model()
260+
test_score = numpy.mean(test_losses)
261+
print((' epoch %i, minibatch %i/%i, test error of best '
262+
'model %f %%') %
263+
(epoch, minibatch_index+1, n_train_batches,
264+
test_score*100.))
265+
266+
267+
if patience <= iter :
268+
done_looping = True
269+
break
270+
271+
end_time = time.clock()
272+
print(('Optimization complete with best validation score of %f %%,'
273+
'with test performance %f %%') %
274+
(best_validation_loss * 100., test_score*100.))
275+
print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
276+
277+
278+
279+
280+
281+
282+
if __name__ == '__main__':
283+
test_DBN()
284+
285+

0 commit comments

Comments
 (0)