Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 48 additions & 53 deletions code/DBN.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
"""
from __future__ import print_function, division
import os
import sys
import timeit
Expand Down Expand Up @@ -61,9 +62,12 @@ def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30))

# allocate symbolic variables for the data
self.x = T.matrix('x') # the data is presented as rasterized images
self.y = T.ivector('y') # the labels are presented as 1D vector
# of [int] labels

# the data is presented as rasterized images
self.x = T.matrix('x')

# the labels are presented as 1D vector of [int] labels
self.y = T.ivector('y')
# end-snippet-1
# The DBN is an MLP, for which all weights of intermediate
# layers are shared with a different RBM. We will first
Expand Down Expand Up @@ -156,8 +160,6 @@ def pretraining_functions(self, train_set_x, batch_size, k):
index = T.lscalar('index') # index to a minibatch
learning_rate = T.scalar('lr') # learning rate to use

# number of batches
n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
# begining of a batch, given `index`
batch_begin = index * batch_size
# ending of a batch given `index`
Expand Down Expand Up @@ -211,9 +213,9 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate):

# compute number of minibatches for training, validation and testing
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
n_valid_batches /= batch_size
n_valid_batches //= batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0]
n_test_batches /= batch_size
n_test_batches //= batch_size

index = T.lscalar('index') # index to a [mini]batch

Expand Down Expand Up @@ -307,11 +309,11 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
test_set_x, test_set_y = datasets[2]

# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size

# numpy random generator
numpy_rng = numpy.random.RandomState(123)
print '... building the model'
print('... building the model')
# construct the Deep Belief Network
dbn = DBN(numpy_rng=numpy_rng, n_ins=28 * 28,
hidden_layers_sizes=[1000, 1000, 1000],
Expand All @@ -321,14 +323,14 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
#########################
# PRETRAINING THE MODEL #
#########################
print '... getting the pretraining functions'
print('... getting the pretraining functions')
pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
batch_size=batch_size,
k=k)

print '... pre-training the model'
print('... pre-training the model')
start_time = timeit.default_timer()
## Pre-train layer-wise
# Pre-train layer-wise
for i in range(dbn.n_layers):
# go through pretraining epochs
for epoch in range(pretraining_epochs):
Expand All @@ -337,38 +339,40 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
for batch_index in range(n_train_batches):
c.append(pretraining_fns[i](index=batch_index,
lr=pretrain_lr))
print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
print numpy.mean(c)
print('Pre-training layer %i, epoch %d, cost ' % (i, epoch), end=' ')
print(numpy.mean(c))

end_time = timeit.default_timer()
# end-snippet-2
print >> sys.stderr, ('The pretraining code for file ' +
os.path.split(__file__)[1] +
' ran for %.2fm' % ((end_time - start_time) / 60.))
print('The pretraining code for file ' + os.path.split(__file__)[1] +
' ran for %.2fm' % ((end_time - start_time) / 60.), file=sys.stderr)
########################
# FINETUNING THE MODEL #
########################

# get the training, validation and testing function for the model
print '... getting the finetuning functions'
print('... getting the finetuning functions')
train_fn, validate_model, test_model = dbn.build_finetune_functions(
datasets=datasets,
batch_size=batch_size,
learning_rate=finetune_lr
)

print '... finetuning the model'
print('... finetuning the model')
# early-stopping parameters
patience = 4 * n_train_batches # look as this many examples regardless
patience_increase = 2. # wait this much longer when a new best is
# found
improvement_threshold = 0.995 # a relative improvement of this much is
# considered significant

# look as this many examples regardless
patience = 4 * n_train_batches

# wait this much longer when a new best is found
patience_increase = 2.

# a relative improvement of this much is considered significant
improvement_threshold = 0.995

# go through this many minibatches before checking the network on
# the validation set; in this case we check every epoch
validation_frequency = min(n_train_batches, patience / 2)
# go through this many
# minibatches before checking the network
# on the validation set; in this case we
# check every epoch

best_validation_loss = numpy.inf
test_score = 0.
Expand All @@ -381,31 +385,27 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
epoch = epoch + 1
for minibatch_index in range(n_train_batches):

minibatch_avg_cost = train_fn(minibatch_index)
train_fn(minibatch_index)
iter = (epoch - 1) * n_train_batches + minibatch_index

if (iter + 1) % validation_frequency == 0:

validation_losses = validate_model()
this_validation_loss = numpy.mean(validation_losses)
print(
'epoch %i, minibatch %i/%i, validation error %f %%'
% (
epoch,
minibatch_index + 1,
n_train_batches,
this_validation_loss * 100.
print('epoch %i, minibatch %i/%i, validation error %f %%' % (
epoch,
minibatch_index + 1,
n_train_batches,
this_validation_loss * 100.
)
)

# if we got the best validation score until now
if this_validation_loss < best_validation_loss:

#improve patience if loss improvement is good enough
if (
this_validation_loss < best_validation_loss *
improvement_threshold
):
# improve patience if loss improvement is good enough
if (this_validation_loss < best_validation_loss *
improvement_threshold):
patience = max(patience, iter * patience_increase)

# save best validation score and iteration number
Expand All @@ -418,24 +418,19 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
print((' epoch %i, minibatch %i/%i, test error of '
'best model %f %%') %
(epoch, minibatch_index + 1, n_train_batches,
test_score * 100.))
test_score * 100.))

if patience <= iter:
done_looping = True
break

end_time = timeit.default_timer()
print(
(
'Optimization complete with best validation score of %f %%, '
'obtained at iteration %i, '
'with test performance %f %%'
) % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
)
print >> sys.stderr, ('The fine tuning code for file ' +
os.path.split(__file__)[1] +
' ran for %.2fm' % ((end_time - start_time)
/ 60.))
print(('Optimization complete with best validation score of %f %%, '
'obtained at iteration %i, '
'with test performance %f %%'
) % (best_validation_loss * 100., best_iter + 1, test_score * 100.))
print('The fine tuning code for file ' + os.path.split(__file__)[1] +
' ran for %.2fm' % ((end_time - start_time) / 60.), file=sys.stderr)


if __name__ == '__main__':
Expand Down
10 changes: 5 additions & 5 deletions code/imdb_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

3) Then run this script.
"""

from __future__ import print_function
dataset_path='/Tmp/bastienf/aclImdb/'

import numpy
Expand All @@ -27,12 +27,12 @@

def tokenize(sentences):

print 'Tokenizing..',
print('Tokenizing..', end=' ')
text = "\n".join(sentences)
tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE)
tok_text, _ = tokenizer.communicate(text)
toks = tok_text.split('\n')[:-1]
print 'Done'
print('Done')

return toks

Expand All @@ -52,7 +52,7 @@ def build_dict(path):

sentences = tokenize(sentences)

print 'Building dictionary..',
print('Building dictionary..', end=' ')
wordcount = dict()
for ss in sentences:
words = ss.strip().lower().split()
Expand All @@ -72,7 +72,7 @@ def build_dict(path):
for idx, ss in enumerate(sorted_idx):
worddict[keys[ss]] = idx+2 # leave 0 and 1 (UNK)

print numpy.sum(counts), ' total words ', len(keys), ' unique words'
print(numpy.sum(counts), ' total words ', len(keys), ' unique words')

return worddict

Expand Down
25 changes: 11 additions & 14 deletions code/logistic_cg.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@


"""
from __future__ import print_function, division
__docformat__ = 'restructedtext en'


Expand Down Expand Up @@ -165,17 +166,17 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):

batch_size = 600 # size of the minibatch

n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

n_in = 28 * 28 # number of input units
n_out = 10 # number of output units

######################
# BUILD ACTUAL MODEL #
######################
print '... building the model'
print('... building the model')

# allocate symbolic variables for the data
minibatch_offset = T.lscalar() # offset to the start of a [mini]batch
Expand Down Expand Up @@ -260,7 +261,7 @@ def callback(theta_value):
validation_losses = [validate_model(i * batch_size)
for i in range(n_valid_batches)]
this_validation_loss = numpy.mean(validation_losses)
print('validation error %f %%' % (this_validation_loss * 100.,))
print(('validation error %f %%' % (this_validation_loss * 100.,)))

# check if it is better then best validation score got until now
if this_validation_loss < validation_scores[0]:
Expand Down Expand Up @@ -288,17 +289,13 @@ def callback(theta_value):
maxiter=n_epochs
)
end_time = timeit.default_timer()
print(
(
'Optimization complete with best validation score of %f %%, with '
'test performance %f %%'
)
% (validation_scores[0] * 100., validation_scores[1] * 100.)
print(('Optimization complete with best validation score of %f %%, with '
'test performance %f %%'
) % (validation_scores[0] * 100., validation_scores[1] * 100.)
)

print >> sys.stderr, ('The code for file ' +
os.path.split(__file__)[1] +
' ran for %.1fs' % ((end_time - start_time)))
print('The code for file ' + os.path.split(__file__)[1] +
' ran for %.1fs' % (end_time - start_time), file=sys.stderr)


if __name__ == '__main__':
Expand Down
11 changes: 6 additions & 5 deletions code/test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import absolute_import, print_function, division
import sys

import numpy
Expand Down Expand Up @@ -137,12 +138,12 @@ def speed():
def time_test(m, l, idx, f, **kwargs):
if not to_exec[idx]:
return
print algo[idx]
print(algo[idx])
ts = m.call_time
try:
f(**kwargs)
except Exception, e:
print >> sys.stderr, 'test', algo[idx], 'FAILED', e
except Exception as e:
print('test', algo[idx], 'FAILED', e, file=sys.stderr)
l.append(numpy.nan)
return
te = m.call_time
Expand Down Expand Up @@ -265,7 +266,7 @@ def do_tests():
print >> sys.stderr, 'gpu % expected/get', (
expected_times_gpu / gpu_times)

print
print()
if do_float64 and do_float32:
print >> sys.stderr, 'float64/float32', (
float64_times / float32_times)
Expand All @@ -286,7 +287,7 @@ def compare(x, y):
# time and the real time, we consider this an error.
return sum((ratio < 0.95) + (ratio > 1.05))

print
print()
if do_float64:
err = compare(expected_times_64, float64_times)
print >> sys.stderr, 'speed_failure_float64=' + str(err)
Expand Down