From 75cbba67b4fdc271bae5b7020a2a3fc69b70328d Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <abergeron@gmail.com>
Date: Wed, 13 Jul 2016 14:03:47 -0400
Subject: [PATCH] Python 3 + flake8 fixes.

---
 code/DBN.py             | 101 +++++++++++++++++++---------------------
 code/imdb_preprocess.py |  10 ++--
 code/logistic_cg.py     |  25 +++++-----
 code/test.py            |  11 +++--
 4 files changed, 70 insertions(+), 77 deletions(-)

diff --git a/code/DBN.py b/code/DBN.py
index 6ca88603..3b2bd230 100644
--- a/code/DBN.py
+++ b/code/DBN.py
@@ -1,5 +1,6 @@
 """
 """
+from __future__ import print_function, division
 import os
 import sys
 import timeit
@@ -61,9 +62,12 @@ def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
             theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30))
 
         # allocate symbolic variables for the data
-        self.x = T.matrix('x')  # the data is presented as rasterized images
-        self.y = T.ivector('y')  # the labels are presented as 1D vector
-                                 # of [int] labels
+
+        # the data is presented as rasterized images
+        self.x = T.matrix('x')
+
+        # the labels are presented as 1D vector of [int] labels
+        self.y = T.ivector('y')
         # end-snippet-1
         # The DBN is an MLP, for which all weights of intermediate
         # layers are shared with a different RBM.  We will first
@@ -156,8 +160,6 @@ def pretraining_functions(self, train_set_x, batch_size, k):
         index = T.lscalar('index')  # index to a minibatch
         learning_rate = T.scalar('lr')  # learning rate to use
 
-        # number of batches
-        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
         # begining of a batch, given `index`
         batch_begin = index * batch_size
         # ending of a batch given `index`
@@ -211,9 +213,9 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate):
 
         # compute number of minibatches for training, validation and testing
         n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
-        n_valid_batches /= batch_size
+        n_valid_batches //= batch_size
         n_test_batches = test_set_x.get_value(borrow=True).shape[0]
-        n_test_batches /= batch_size
+        n_test_batches //= batch_size
 
         index = T.lscalar('index')  # index to a [mini]batch
 
@@ -307,11 +309,11 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
     test_set_x, test_set_y = datasets[2]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
 
     # numpy random generator
     numpy_rng = numpy.random.RandomState(123)
-    print '... building the model'
+    print('... building the model')
     # construct the Deep Belief Network
     dbn = DBN(numpy_rng=numpy_rng, n_ins=28 * 28,
               hidden_layers_sizes=[1000, 1000, 1000],
@@ -321,14 +323,14 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
     #########################
     # PRETRAINING THE MODEL #
     #########################
-    print '... getting the pretraining functions'
+    print('... getting the pretraining functions')
     pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
                                                 batch_size=batch_size,
                                                 k=k)
 
-    print '... pre-training the model'
+    print('... pre-training the model')
     start_time = timeit.default_timer()
-    ## Pre-train layer-wise
+    # Pre-train layer-wise
     for i in range(dbn.n_layers):
         # go through pretraining epochs
         for epoch in range(pretraining_epochs):
@@ -337,38 +339,40 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
             for batch_index in range(n_train_batches):
                 c.append(pretraining_fns[i](index=batch_index,
                                             lr=pretrain_lr))
-            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
-            print numpy.mean(c)
+            print('Pre-training layer %i, epoch %d, cost ' % (i, epoch), end=' ')
+            print(numpy.mean(c))
 
     end_time = timeit.default_timer()
     # end-snippet-2
-    print >> sys.stderr, ('The pretraining code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+    print('The pretraining code for file ' + os.path.split(__file__)[1] +
+          ' ran for %.2fm' % ((end_time - start_time) / 60.), file=sys.stderr)
     ########################
     # FINETUNING THE MODEL #
     ########################
 
     # get the training, validation and testing function for the model
-    print '... getting the finetuning functions'
+    print('... getting the finetuning functions')
     train_fn, validate_model, test_model = dbn.build_finetune_functions(
         datasets=datasets,
         batch_size=batch_size,
         learning_rate=finetune_lr
     )
 
-    print '... finetuning the model'
+    print('... finetuning the model')
     # early-stopping parameters
-    patience = 4 * n_train_batches  # look as this many examples regardless
-    patience_increase = 2.    # wait this much longer when a new best is
-                              # found
-    improvement_threshold = 0.995  # a relative improvement of this much is
-                                   # considered significant
+
+    # look as this many examples regardless
+    patience = 4 * n_train_batches
+
+    # wait this much longer when a new best is found
+    patience_increase = 2.
+
+    # a relative improvement of this much is considered significant
+    improvement_threshold = 0.995
+
+    # go through this many minibatches before checking the network on
+    # the validation set; in this case we check every epoch
     validation_frequency = min(n_train_batches, patience / 2)
-                                  # go through this many
-                                  # minibatches before checking the network
-                                  # on the validation set; in this case we
-                                  # check every epoch
 
     best_validation_loss = numpy.inf
     test_score = 0.
@@ -381,31 +385,27 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
         epoch = epoch + 1
         for minibatch_index in range(n_train_batches):
 
-            minibatch_avg_cost = train_fn(minibatch_index)
+            train_fn(minibatch_index)
             iter = (epoch - 1) * n_train_batches + minibatch_index
 
             if (iter + 1) % validation_frequency == 0:
 
                 validation_losses = validate_model()
                 this_validation_loss = numpy.mean(validation_losses)
-                print(
-                    'epoch %i, minibatch %i/%i, validation error %f %%'
-                    % (
-                        epoch,
-                        minibatch_index + 1,
-                        n_train_batches,
-                        this_validation_loss * 100.
+                print('epoch %i, minibatch %i/%i, validation error %f %%' % (
+                    epoch,
+                    minibatch_index + 1,
+                    n_train_batches,
+                    this_validation_loss * 100.
                     )
                 )
 
                 # if we got the best validation score until now
                 if this_validation_loss < best_validation_loss:
 
-                    #improve patience if loss improvement is good enough
-                    if (
-                        this_validation_loss < best_validation_loss *
-                        improvement_threshold
-                    ):
+                    # improve patience if loss improvement is good enough
+                    if (this_validation_loss < best_validation_loss *
+                            improvement_threshold):
                         patience = max(patience, iter * patience_increase)
 
                     # save best validation score and iteration number
@@ -418,24 +418,19 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
                     print(('     epoch %i, minibatch %i/%i, test error of '
                            'best model %f %%') %
                           (epoch, minibatch_index + 1, n_train_batches,
-                           test_score * 100.))
+                          test_score * 100.))
 
             if patience <= iter:
                 done_looping = True
                 break
 
     end_time = timeit.default_timer()
-    print(
-        (
-            'Optimization complete with best validation score of %f %%, '
-            'obtained at iteration %i, '
-            'with test performance %f %%'
-        ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
-    )
-    print >> sys.stderr, ('The fine tuning code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time)
-                                              / 60.))
+    print(('Optimization complete with best validation score of %f %%, '
+           'obtained at iteration %i, '
+           'with test performance %f %%'
+           ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.))
+    print('The fine tuning code for file ' + os.path.split(__file__)[1] +
+          ' ran for %.2fm' % ((end_time - start_time) / 60.), file=sys.stderr)
 
 
 if __name__ == '__main__':
diff --git a/code/imdb_preprocess.py b/code/imdb_preprocess.py
index c20b37b6..62ebb556 100644
--- a/code/imdb_preprocess.py
+++ b/code/imdb_preprocess.py
@@ -8,7 +8,7 @@
 
 3) Then run this script.
 """
-
+from __future__ import print_function
 dataset_path='/Tmp/bastienf/aclImdb/'
 
 import numpy
@@ -27,12 +27,12 @@
 
 def tokenize(sentences):
 
-    print 'Tokenizing..',
+    print('Tokenizing..', end=' ')
     text = "\n".join(sentences)
     tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE)
     tok_text, _ = tokenizer.communicate(text)
     toks = tok_text.split('\n')[:-1]
-    print 'Done'
+    print('Done')
 
     return toks
 
@@ -52,7 +52,7 @@ def build_dict(path):
 
     sentences = tokenize(sentences)
 
-    print 'Building dictionary..',
+    print('Building dictionary..', end=' ')
     wordcount = dict()
     for ss in sentences:
         words = ss.strip().lower().split()
@@ -72,7 +72,7 @@ def build_dict(path):
     for idx, ss in enumerate(sorted_idx):
         worddict[keys[ss]] = idx+2  # leave 0 and 1 (UNK)
 
-    print numpy.sum(counts), ' total words ', len(keys), ' unique words'
+    print(numpy.sum(counts), ' total words ', len(keys), ' unique words')
 
     return worddict
 
diff --git a/code/logistic_cg.py b/code/logistic_cg.py
index 40c72c2f..c2970d51 100644
--- a/code/logistic_cg.py
+++ b/code/logistic_cg.py
@@ -33,6 +33,7 @@
 
 
 """
+from __future__ import print_function, division
 __docformat__ = 'restructedtext en'
 
 
@@ -165,9 +166,9 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
 
     batch_size = 600    # size of the minibatch
 
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
 
     n_in = 28 * 28  # number of input units
     n_out = 10  # number of output units
@@ -175,7 +176,7 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
     ######################
     # BUILD ACTUAL MODEL #
     ######################
-    print '... building the model'
+    print('... building the model')
 
     # allocate symbolic variables for the data
     minibatch_offset = T.lscalar()  # offset to the start of a [mini]batch
@@ -260,7 +261,7 @@ def callback(theta_value):
         validation_losses = [validate_model(i * batch_size)
                              for i in range(n_valid_batches)]
         this_validation_loss = numpy.mean(validation_losses)
-        print('validation error %f %%' % (this_validation_loss * 100.,))
+        print(('validation error %f %%' % (this_validation_loss * 100.,)))
 
         # check if it is better then best validation score got until now
         if this_validation_loss < validation_scores[0]:
@@ -288,17 +289,13 @@ def callback(theta_value):
         maxiter=n_epochs
     )
     end_time = timeit.default_timer()
-    print(
-        (
-            'Optimization complete with best validation score of %f %%, with '
-            'test performance %f %%'
-        )
-        % (validation_scores[0] * 100., validation_scores[1] * 100.)
+    print(('Optimization complete with best validation score of %f %%, with '
+           'test performance %f %%'
+           ) % (validation_scores[0] * 100., validation_scores[1] * 100.)
     )
 
-    print >> sys.stderr, ('The code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.1fs' % ((end_time - start_time)))
+    print('The code for file ' + os.path.split(__file__)[1] +
+          ' ran for %.1fs' % (end_time - start_time), file=sys.stderr)
 
 
 if __name__ == '__main__':
diff --git a/code/test.py b/code/test.py
index 6aee1084..926cae7b 100644
--- a/code/test.py
+++ b/code/test.py
@@ -1,3 +1,4 @@
+from __future__ import absolute_import, print_function, division
 import sys
 
 import numpy
@@ -137,12 +138,12 @@ def speed():
     def time_test(m, l, idx, f, **kwargs):
         if not to_exec[idx]:
             return
-        print algo[idx]
+        print(algo[idx])
         ts = m.call_time
         try:
             f(**kwargs)
-        except Exception, e:
-            print >> sys.stderr, 'test', algo[idx], 'FAILED', e
+        except Exception as e:
+            print('test', algo[idx], 'FAILED', e, file=sys.stderr)
             l.append(numpy.nan)
             return
         te = m.call_time
@@ -265,7 +266,7 @@ def do_tests():
                 print >> sys.stderr, 'gpu % expected/get', (
                     expected_times_gpu / gpu_times)
 
-            print
+            print()
             if do_float64 and do_float32:
                 print >> sys.stderr, 'float64/float32', (
                     float64_times / float32_times)
@@ -286,7 +287,7 @@ def compare(x, y):
         # time and the real time, we consider this an error.
         return sum((ratio < 0.95) + (ratio > 1.05))
 
-    print
+    print()
     if do_float64:
         err = compare(expected_times_64, float64_times)
         print >> sys.stderr, 'speed_failure_float64=' + str(err)