From 75cbba67b4fdc271bae5b7020a2a3fc69b70328d Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Wed, 13 Jul 2016 14:03:47 -0400 Subject: [PATCH] Python 3 + flake8 fixes. --- code/DBN.py | 101 +++++++++++++++++++--------------------- code/imdb_preprocess.py | 10 ++-- code/logistic_cg.py | 25 +++++----- code/test.py | 11 +++-- 4 files changed, 70 insertions(+), 77 deletions(-) diff --git a/code/DBN.py b/code/DBN.py index 6ca88603..3b2bd230 100644 --- a/code/DBN.py +++ b/code/DBN.py @@ -1,5 +1,6 @@ """ """ +from __future__ import print_function, division import os import sys import timeit @@ -61,9 +62,12 @@ def __init__(self, numpy_rng, theano_rng=None, n_ins=784, theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data - self.x = T.matrix('x') # the data is presented as rasterized images - self.y = T.ivector('y') # the labels are presented as 1D vector - # of [int] labels + + # the data is presented as rasterized images + self.x = T.matrix('x') + + # the labels are presented as 1D vector of [int] labels + self.y = T.ivector('y') # end-snippet-1 # The DBN is an MLP, for which all weights of intermediate # layers are shared with a different RBM. We will first @@ -156,8 +160,6 @@ def pretraining_functions(self, train_set_x, batch_size, k): index = T.lscalar('index') # index to a minibatch learning_rate = T.scalar('lr') # learning rate to use - # number of batches - n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # begining of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` @@ -211,9 +213,9 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate): # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] - n_valid_batches /= batch_size + n_valid_batches //= batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] - n_test_batches /= batch_size + n_test_batches //= batch_size index = T.lscalar('index') # index to a [mini]batch @@ -307,11 +309,11 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100, test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing - n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size + n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size # numpy random generator numpy_rng = numpy.random.RandomState(123) - print '... building the model' + print('... building the model') # construct the Deep Belief Network dbn = DBN(numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[1000, 1000, 1000], @@ -321,14 +323,14 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100, ######################### # PRETRAINING THE MODEL # ######################### - print '... getting the pretraining functions' + print('... getting the pretraining functions') pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) - print '... pre-training the model' + print('... pre-training the model') start_time = timeit.default_timer() - ## Pre-train layer-wise + # Pre-train layer-wise for i in range(dbn.n_layers): # go through pretraining epochs for epoch in range(pretraining_epochs): @@ -337,38 +339,40 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100, for batch_index in range(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr)) - print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), - print numpy.mean(c) + print('Pre-training layer %i, epoch %d, cost ' % (i, epoch), end=' ') + print(numpy.mean(c)) end_time = timeit.default_timer() # end-snippet-2 - print >> sys.stderr, ('The pretraining code for file ' + - os.path.split(__file__)[1] + - ' ran for %.2fm' % ((end_time - start_time) / 60.)) + print('The pretraining code for file ' + os.path.split(__file__)[1] + + ' ran for %.2fm' % ((end_time - start_time) / 60.), file=sys.stderr) ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model - print '... getting the finetuning functions' + print('... getting the finetuning functions') train_fn, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr ) - print '... finetuning the model' + print('... finetuning the model') # early-stopping parameters - patience = 4 * n_train_batches # look as this many examples regardless - patience_increase = 2. # wait this much longer when a new best is - # found - improvement_threshold = 0.995 # a relative improvement of this much is - # considered significant + + # look as this many examples regardless + patience = 4 * n_train_batches + + # wait this much longer when a new best is found + patience_increase = 2. + + # a relative improvement of this much is considered significant + improvement_threshold = 0.995 + + # go through this many minibatches before checking the network on + # the validation set; in this case we check every epoch validation_frequency = min(n_train_batches, patience / 2) - # go through this many - # minibatches before checking the network - # on the validation set; in this case we - # check every epoch best_validation_loss = numpy.inf test_score = 0. @@ -381,31 +385,27 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100, epoch = epoch + 1 for minibatch_index in range(n_train_batches): - minibatch_avg_cost = train_fn(minibatch_index) + train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) - print( - 'epoch %i, minibatch %i/%i, validation error %f %%' - % ( - epoch, - minibatch_index + 1, - n_train_batches, - this_validation_loss * 100. + print('epoch %i, minibatch %i/%i, validation error %f %%' % ( + epoch, + minibatch_index + 1, + n_train_batches, + this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: - #improve patience if loss improvement is good enough - if ( - this_validation_loss < best_validation_loss * - improvement_threshold - ): + # improve patience if loss improvement is good enough + if (this_validation_loss < best_validation_loss * + improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number @@ -418,24 +418,19 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100, print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, - test_score * 100.)) + test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() - print( - ( - 'Optimization complete with best validation score of %f %%, ' - 'obtained at iteration %i, ' - 'with test performance %f %%' - ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.) - ) - print >> sys.stderr, ('The fine tuning code for file ' + - os.path.split(__file__)[1] + - ' ran for %.2fm' % ((end_time - start_time) - / 60.)) + print(('Optimization complete with best validation score of %f %%, ' + 'obtained at iteration %i, ' + 'with test performance %f %%' + ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) + print('The fine tuning code for file ' + os.path.split(__file__)[1] + + ' ran for %.2fm' % ((end_time - start_time) / 60.), file=sys.stderr) if __name__ == '__main__': diff --git a/code/imdb_preprocess.py b/code/imdb_preprocess.py index c20b37b6..62ebb556 100644 --- a/code/imdb_preprocess.py +++ b/code/imdb_preprocess.py @@ -8,7 +8,7 @@ 3) Then run this script. """ - +from __future__ import print_function dataset_path='/Tmp/bastienf/aclImdb/' import numpy @@ -27,12 +27,12 @@ def tokenize(sentences): - print 'Tokenizing..', + print('Tokenizing..', end=' ') text = "\n".join(sentences) tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE) tok_text, _ = tokenizer.communicate(text) toks = tok_text.split('\n')[:-1] - print 'Done' + print('Done') return toks @@ -52,7 +52,7 @@ def build_dict(path): sentences = tokenize(sentences) - print 'Building dictionary..', + print('Building dictionary..', end=' ') wordcount = dict() for ss in sentences: words = ss.strip().lower().split() @@ -72,7 +72,7 @@ def build_dict(path): for idx, ss in enumerate(sorted_idx): worddict[keys[ss]] = idx+2 # leave 0 and 1 (UNK) - print numpy.sum(counts), ' total words ', len(keys), ' unique words' + print(numpy.sum(counts), ' total words ', len(keys), ' unique words') return worddict diff --git a/code/logistic_cg.py b/code/logistic_cg.py index 40c72c2f..c2970d51 100644 --- a/code/logistic_cg.py +++ b/code/logistic_cg.py @@ -33,6 +33,7 @@ """ +from __future__ import print_function, division __docformat__ = 'restructedtext en' @@ -165,9 +166,9 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'): batch_size = 600 # size of the minibatch - n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size - n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size - n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size + n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size + n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size + n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size n_in = 28 * 28 # number of input units n_out = 10 # number of output units @@ -175,7 +176,7 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'): ###################### # BUILD ACTUAL MODEL # ###################### - print '... building the model' + print('... building the model') # allocate symbolic variables for the data minibatch_offset = T.lscalar() # offset to the start of a [mini]batch @@ -260,7 +261,7 @@ def callback(theta_value): validation_losses = [validate_model(i * batch_size) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) - print('validation error %f %%' % (this_validation_loss * 100.,)) + print(('validation error %f %%' % (this_validation_loss * 100.,))) # check if it is better then best validation score got until now if this_validation_loss < validation_scores[0]: @@ -288,17 +289,13 @@ def callback(theta_value): maxiter=n_epochs ) end_time = timeit.default_timer() - print( - ( - 'Optimization complete with best validation score of %f %%, with ' - 'test performance %f %%' - ) - % (validation_scores[0] * 100., validation_scores[1] * 100.) + print(('Optimization complete with best validation score of %f %%, with ' + 'test performance %f %%' + ) % (validation_scores[0] * 100., validation_scores[1] * 100.) ) - print >> sys.stderr, ('The code for file ' + - os.path.split(__file__)[1] + - ' ran for %.1fs' % ((end_time - start_time))) + print('The code for file ' + os.path.split(__file__)[1] + + ' ran for %.1fs' % (end_time - start_time), file=sys.stderr) if __name__ == '__main__': diff --git a/code/test.py b/code/test.py index 6aee1084..926cae7b 100644 --- a/code/test.py +++ b/code/test.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import, print_function, division import sys import numpy @@ -137,12 +138,12 @@ def speed(): def time_test(m, l, idx, f, **kwargs): if not to_exec[idx]: return - print algo[idx] + print(algo[idx]) ts = m.call_time try: f(**kwargs) - except Exception, e: - print >> sys.stderr, 'test', algo[idx], 'FAILED', e + except Exception as e: + print('test', algo[idx], 'FAILED', e, file=sys.stderr) l.append(numpy.nan) return te = m.call_time @@ -265,7 +266,7 @@ def do_tests(): print >> sys.stderr, 'gpu % expected/get', ( expected_times_gpu / gpu_times) - print + print() if do_float64 and do_float32: print >> sys.stderr, 'float64/float32', ( float64_times / float32_times) @@ -286,7 +287,7 @@ def compare(x, y): # time and the real time, we consider this an error. return sum((ratio < 0.95) + (ratio > 1.05)) - print + print() if do_float64: err = compare(expected_times_64, float64_times) print >> sys.stderr, 'speed_failure_float64=' + str(err)