diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh
new file mode 100755
index 00000000..15da288b
--- /dev/null
+++ b/.jenkins/jenkins_buildbot_dlt.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+# CUDA
+export PATH=/usr/local/cuda/bin:$PATH
+export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+export LIBRARY_PATH=/usr/local/cuda/lib64:$LIBRARY_PATH
+
+BUILDBOT_DIR=$WORKSPACE/nightly_build
+
+mkdir -p ${BUILDBOT_DIR}
+
+date
+COMPILEDIR=$HOME/.theano/lisa_theano_buildbot_deeplearning
+NOSETESTS=${BUILDBOT_DIR}/Theano/bin/theano-nose
+XUNIT="--with-xunit --xunit-file="
+# name test suites
+SUITE="--xunit-testsuite-name="
+
+FLAGS=warn.ignore_bug_before=0.5,compiledir=${COMPILEDIR}
+export PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/Pylearn:$PYTHONPATH
+
+# Install libgpuarray and pygpu
+cd ${BUILDBOT_DIR}
+
+# Make fresh clone (with no history since we don't need it)
+rm -rf libgpuarray
+git clone "https://github.com/Theano/libgpuarray.git"
+
+(cd libgpuarray && echo "libgpuarray commit" && git rev-parse HEAD)
+
+# Clean up previous installs (to make sure no old files are left)
+rm -rf local
+mkdir local
+
+# Build libgpuarray and run C tests
+mkdir libgpuarray/build
+(cd libgpuarray/build && cmake .. -DCMAKE_BUILD_TYPE=${GPUARRAY_CONFIG} -DCMAKE_INSTALL_PREFIX=${BUILDBOT_DIR}/local && make)
+
+# Finally install
+(cd libgpuarray/build && make install)
+export LD_LIBRARY_PATH=${BUILDBOT_DIR}/local/lib:${LD_LIBRARY_PATH}
+export LIBRARY_PATH=${BUILDBOT_DIR}/local/lib:${LIBRARY_PATH}
+export CPATH=${BUILDBOT_DIR}/local/include:${CPATH}
+
+# Build the pygpu modules
+(cd libgpuarray && python setup.py build_ext --inplace -I${BUILDBOT_DIR}/local/include -L${BUILDBOT_DIR}/local/lib)
+
+mkdir ${BUILDBOT_DIR}/local/lib/python
+export PYTHONPATH=${PYTHONPATH}:${BUILDBOT_DIR}/local/lib/python
+# Then install
+(cd libgpuarray && python setup.py install --home=${BUILDBOT_DIR}/local)
+
+# Install Theano
+cd ${BUILDBOT_DIR}
+if [ ! -d ${BUILDBOT_DIR}/Theano ]; then
+  git clone git://github.com/Theano/Theano.git
+fi
+# update repo
+cd ${BUILDBOT_DIR}/Theano; git pull
+
+cd ${WORKSPACE}/data
+./download.sh
+
+cd ${BUILDBOT_DIR}/Theano
+echo "git version for Theano:" `git rev-parse HEAD`
+cd ${WORKSPACE}/code
+echo "git version:" `git rev-parse HEAD`
+
+echo "==== Executing nosetests speed with mode=FAST_RUN"
+NAME=dlt_speed
+FILE=${BUILDBOT_DIR}/${NAME}_tests.xml
+THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${XUNIT}${FILE} ${SUITE}${NAME} test.py:speed
+
+echo "==== Executing nosetests with mode=FAST_RUN,floatX=float32"
+NAME=dlt_float32
+FILE=${BUILDBOT_DIR}/${NAME}_tests.xml
+THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE} ${SUITE}${NAME}
+
+echo "==== Executing nosetests with mode=FAST_RUN,floatX=float32,device=cuda"
+NAME=dlt_float32_cuda
+FILE=${BUILDBOT_DIR}/${NAME}_tests.xml
+PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/DeepLearningTutorials/code:${PYTHONPATH} THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32,device=cuda nosetests test.py ${XUNIT}${FILE} ${SUITE}${NAME}
diff --git a/.travis.yml b/.travis.yml
index 7873dedf..e2f2d530 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,28 +1,29 @@
 # After changing this file, check it on:
 # http://lint.travis-ci.org/
+sudo: false
 
 language: python
 #python:
-#  - "2.7"
-#  - "3.2"
+#  - "2.6"
+#  - "3.3"
 # command to install dependencies
 before_install:
   - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
   - chmod +x miniconda.sh
   - ./miniconda.sh -b
-  - export PATH=/home/travis/miniconda/bin:$PATH
+  - export PATH=/home/travis/miniconda/bin:/home/travis/miniconda2/bin:$PATH
   - conda update --yes conda
 
 install:
-  - conda create --yes -q -n pyenv mkl python=2.7 numpy scipy pip nose yaml pyflakes pillow pyparsing=1.5
+  - conda create --yes -q -n pyenv mkl python=2.7 numpy=1.10 scipy=0.16.1 pip nose yaml pyflakes pillow pyparsing=1.5
   - source activate pyenv
   - pip install git+git://github.com/Theano/Theano.git
 
 env:
   - PART="test.py:test_logistic_sgd test.py:test_logistic_cg test.py:test_mlp test.py:test_convolutional_mlp test.py:test_dA"
-  - PART="test.py:test_SdA"
+  - PART="test.py:test_SdA test.py:test_lstm"
   - PART="test.py:test_dbn"
-  - PART="test.py:test_rbm test.py:test_rnnrbm"
+  - PART="test.py:test_rbm test.py:test_rnnrbm test.py:test_rnnslu"
   - PART="-e test.py"
 
 #i7-2600K CPU @ 3.40GHz
diff --git a/code/DBN.py b/code/DBN.py
index b54ac5bc..e1bb66df 100644
--- a/code/DBN.py
+++ b/code/DBN.py
@@ -1,5 +1,6 @@
 """
 """
+from __future__ import print_function, division
 import os
 import sys
 import timeit
@@ -61,9 +62,12 @@ def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
             theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30))
 
         # allocate symbolic variables for the data
-        self.x = T.matrix('x')  # the data is presented as rasterized images
-        self.y = T.ivector('y')  # the labels are presented as 1D vector
-                                 # of [int] labels
+
+        # the data is presented as rasterized images
+        self.x = T.matrix('x')
+
+        # the labels are presented as 1D vector of [int] labels
+        self.y = T.ivector('y')
         # end-snippet-1
         # The DBN is an MLP, for which all weights of intermediate
         # layers are shared with a different RBM.  We will first
@@ -75,7 +79,7 @@ def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
         # training the DBN by doing stochastic gradient descent on the
         # MLP.
 
-        for i in xrange(self.n_layers):
+        for i in range(self.n_layers):
             # construct the sigmoidal layer
 
             # the size of the input is either the number of hidden
@@ -156,8 +160,6 @@ def pretraining_functions(self, train_set_x, batch_size, k):
         index = T.lscalar('index')  # index to a minibatch
         learning_rate = T.scalar('lr')  # learning rate to use
 
-        # number of batches
-        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
         # begining of a batch, given `index`
         batch_begin = index * batch_size
         # ending of a batch given `index`
@@ -174,7 +176,7 @@ def pretraining_functions(self, train_set_x, batch_size, k):
 
             # compile the theano function
             fn = theano.function(
-                inputs=[index, theano.Param(learning_rate, default=0.1)],
+                inputs=[index, theano.In(learning_rate, value=0.1)],
                 outputs=cost,
                 updates=updates,
                 givens={
@@ -211,9 +213,9 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate):
 
         # compute number of minibatches for training, validation and testing
         n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
-        n_valid_batches /= batch_size
+        n_valid_batches //= batch_size
         n_test_batches = test_set_x.get_value(borrow=True).shape[0]
-        n_test_batches /= batch_size
+        n_test_batches //= batch_size
 
         index = T.lscalar('index')  # index to a [mini]batch
 
@@ -267,11 +269,11 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate):
 
         # Create a function that scans the entire validation set
         def valid_score():
-            return [valid_score_i(i) for i in xrange(n_valid_batches)]
+            return [valid_score_i(i) for i in range(n_valid_batches)]
 
         # Create a function that scans the entire test set
         def test_score():
-            return [test_score_i(i) for i in xrange(n_test_batches)]
+            return [test_score_i(i) for i in range(n_test_batches)]
 
         return train_fn, valid_score, test_score
 
@@ -307,11 +309,11 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
     test_set_x, test_set_y = datasets[2]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
 
     # numpy random generator
     numpy_rng = numpy.random.RandomState(123)
-    print '... building the model'
+    print('... building the model')
     # construct the Deep Belief Network
     dbn = DBN(numpy_rng=numpy_rng, n_ins=28 * 28,
               hidden_layers_sizes=[1000, 1000, 1000],
@@ -321,54 +323,56 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
     #########################
     # PRETRAINING THE MODEL #
     #########################
-    print '... getting the pretraining functions'
+    print('... getting the pretraining functions')
     pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
                                                 batch_size=batch_size,
                                                 k=k)
 
-    print '... pre-training the model'
+    print('... pre-training the model')
     start_time = timeit.default_timer()
-    ## Pre-train layer-wise
-    for i in xrange(dbn.n_layers):
+    # Pre-train layer-wise
+    for i in range(dbn.n_layers):
         # go through pretraining epochs
-        for epoch in xrange(pretraining_epochs):
+        for epoch in range(pretraining_epochs):
             # go through the training set
             c = []
-            for batch_index in xrange(n_train_batches):
+            for batch_index in range(n_train_batches):
                 c.append(pretraining_fns[i](index=batch_index,
                                             lr=pretrain_lr))
-            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
-            print numpy.mean(c)
+            print('Pre-training layer %i, epoch %d, cost ' % (i, epoch), end=' ')
+            print(numpy.mean(c, dtype='float64'))
 
     end_time = timeit.default_timer()
     # end-snippet-2
-    print >> sys.stderr, ('The pretraining code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+    print('The pretraining code for file ' + os.path.split(__file__)[1] +
+          ' ran for %.2fm' % ((end_time - start_time) / 60.), file=sys.stderr)
     ########################
     # FINETUNING THE MODEL #
     ########################
 
     # get the training, validation and testing function for the model
-    print '... getting the finetuning functions'
+    print('... getting the finetuning functions')
     train_fn, validate_model, test_model = dbn.build_finetune_functions(
         datasets=datasets,
         batch_size=batch_size,
         learning_rate=finetune_lr
     )
 
-    print '... finetuning the model'
+    print('... finetuning the model')
     # early-stopping parameters
-    patience = 4 * n_train_batches  # look as this many examples regardless
-    patience_increase = 2.    # wait this much longer when a new best is
-                              # found
-    improvement_threshold = 0.995  # a relative improvement of this much is
-                                   # considered significant
+
+    # look as this many examples regardless
+    patience = 4 * n_train_batches
+
+    # wait this much longer when a new best is found
+    patience_increase = 2.
+
+    # a relative improvement of this much is considered significant
+    improvement_threshold = 0.995
+
+    # go through this many minibatches before checking the network on
+    # the validation set; in this case we check every epoch
     validation_frequency = min(n_train_batches, patience / 2)
-                                  # go through this many
-                                  # minibatches before checking the network
-                                  # on the validation set; in this case we
-                                  # check every epoch
 
     best_validation_loss = numpy.inf
     test_score = 0.
@@ -379,33 +383,29 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
 
     while (epoch < training_epochs) and (not done_looping):
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
 
-            minibatch_avg_cost = train_fn(minibatch_index)
+            train_fn(minibatch_index)
             iter = (epoch - 1) * n_train_batches + minibatch_index
 
             if (iter + 1) % validation_frequency == 0:
 
                 validation_losses = validate_model()
-                this_validation_loss = numpy.mean(validation_losses)
-                print(
-                    'epoch %i, minibatch %i/%i, validation error %f %%'
-                    % (
-                        epoch,
-                        minibatch_index + 1,
-                        n_train_batches,
-                        this_validation_loss * 100.
+                this_validation_loss = numpy.mean(validation_losses, dtype='float64')
+                print('epoch %i, minibatch %i/%i, validation error %f %%' % (
+                    epoch,
+                    minibatch_index + 1,
+                    n_train_batches,
+                    this_validation_loss * 100.
                     )
                 )
 
                 # if we got the best validation score until now
                 if this_validation_loss < best_validation_loss:
 
-                    #improve patience if loss improvement is good enough
-                    if (
-                        this_validation_loss < best_validation_loss *
-                        improvement_threshold
-                    ):
+                    # improve patience if loss improvement is good enough
+                    if (this_validation_loss < best_validation_loss *
+                            improvement_threshold):
                         patience = max(patience, iter * patience_increase)
 
                     # save best validation score and iteration number
@@ -414,28 +414,23 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
 
                     # test it on the test set
                     test_losses = test_model()
-                    test_score = numpy.mean(test_losses)
+                    test_score = numpy.mean(test_losses, dtype='float64')
                     print(('     epoch %i, minibatch %i/%i, test error of '
                            'best model %f %%') %
                           (epoch, minibatch_index + 1, n_train_batches,
-                           test_score * 100.))
+                          test_score * 100.))
 
             if patience <= iter:
                 done_looping = True
                 break
 
     end_time = timeit.default_timer()
-    print(
-        (
-            'Optimization complete with best validation score of %f %%, '
-            'obtained at iteration %i, '
-            'with test performance %f %%'
-        ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
-    )
-    print >> sys.stderr, ('The fine tuning code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time)
-                                              / 60.))
+    print(('Optimization complete with best validation score of %f %%, '
+           'obtained at iteration %i, '
+           'with test performance %f %%'
+           ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.))
+    print('The fine tuning code for file ' + os.path.split(__file__)[1] +
+          ' ran for %.2fm' % ((end_time - start_time) / 60.), file=sys.stderr)
 
 
 if __name__ == '__main__':
diff --git a/code/SdA.py b/code/SdA.py
index 82660e99..8da74797 100644
--- a/code/SdA.py
+++ b/code/SdA.py
@@ -29,6 +29,9 @@
    Systems 19, 2007
 
 """
+
+from __future__ import print_function
+
 import os
 import sys
 import timeit
@@ -37,7 +40,7 @@
 
 import theano
 import theano.tensor as T
-from theano.tensor.shared_randomstreams import RandomStreams
+from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 
 from logistic_sgd import LogisticRegression, load_data
 from mlp import HiddenLayer
@@ -78,8 +81,8 @@ def __init__(
         :type n_ins: int
         :param n_ins: dimension of the input to the sdA
 
-        :type n_layers_sizes: list of ints
-        :param n_layers_sizes: intermediate layers size, must contain
+        :type hidden_layers_sizes: list of ints
+        :param hidden_layers_sizes: intermediate layers size, must contain
                                at least one value
 
         :type n_outs: int
@@ -116,7 +119,7 @@ def __init__(
         # stochastich gradient descent on the MLP
 
         # start-snippet-2
-        for i in xrange(self.n_layers):
+        for i in range(self.n_layers):
             # construct the sigmoidal layer
 
             # the size of the input is either the number of hidden units of
@@ -214,8 +217,8 @@ def pretraining_functions(self, train_set_x, batch_size):
             fn = theano.function(
                 inputs=[
                     index,
-                    theano.Param(corruption_level, default=0.2),
-                    theano.Param(learning_rate, default=0.1)
+                    theano.In(corruption_level, value=0.2),
+                    theano.In(learning_rate, value=0.1)
                 ],
                 outputs=cost,
                 updates=updates,
@@ -254,9 +257,9 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate):
 
         # compute number of minibatches for training, validation and testing
         n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
-        n_valid_batches /= batch_size
+        n_valid_batches //= batch_size
         n_test_batches = test_set_x.get_value(borrow=True).shape[0]
-        n_test_batches /= batch_size
+        n_test_batches //= batch_size
 
         index = T.lscalar('index')  # index to a [mini]batch
 
@@ -314,11 +317,11 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate):
 
         # Create a function that scans the entire validation set
         def valid_score():
-            return [valid_score_i(i) for i in xrange(n_valid_batches)]
+            return [valid_score_i(i) for i in range(n_valid_batches)]
 
         # Create a function that scans the entire test set
         def test_score():
-            return [test_score_i(i) for i in xrange(n_test_batches)]
+            return [test_score_i(i) for i in range(n_test_batches)]
 
         return train_fn, valid_score, test_score
 
@@ -357,12 +360,12 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
 
     # compute number of minibatches for training, validation and testing
     n_train_batches = train_set_x.get_value(borrow=True).shape[0]
-    n_train_batches /= batch_size
+    n_train_batches //= batch_size
 
     # numpy random generator
     # start-snippet-3
     numpy_rng = numpy.random.RandomState(89677)
-    print '... building the model'
+    print('... building the model')
     # construct the stacked denoising autoencoder class
     sda = SdA(
         numpy_rng=numpy_rng,
@@ -374,52 +377,51 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
     #########################
     # PRETRAINING THE MODEL #
     #########################
-    print '... getting the pretraining functions'
+    print('... getting the pretraining functions')
     pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
                                                 batch_size=batch_size)
 
-    print '... pre-training the model'
+    print('... pre-training the model')
     start_time = timeit.default_timer()
     ## Pre-train layer-wise
     corruption_levels = [.1, .2, .3]
-    for i in xrange(sda.n_layers):
+    for i in range(sda.n_layers):
         # go through pretraining epochs
-        for epoch in xrange(pretraining_epochs):
+        for epoch in range(pretraining_epochs):
             # go through the training set
             c = []
-            for batch_index in xrange(n_train_batches):
+            for batch_index in range(n_train_batches):
                 c.append(pretraining_fns[i](index=batch_index,
                          corruption=corruption_levels[i],
                          lr=pretrain_lr))
-            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
-            print numpy.mean(c)
+            print('Pre-training layer %i, epoch %d, cost %f' % (i, epoch, numpy.mean(c, dtype='float64')))
 
     end_time = timeit.default_timer()
 
-    print >> sys.stderr, ('The pretraining code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+    print(('The pretraining code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
     # end-snippet-4
     ########################
     # FINETUNING THE MODEL #
     ########################
 
     # get the training, validation and testing function for the model
-    print '... getting the finetuning functions'
+    print('... getting the finetuning functions')
     train_fn, validate_model, test_model = sda.build_finetune_functions(
         datasets=datasets,
         batch_size=batch_size,
         learning_rate=finetune_lr
     )
 
-    print '... finetunning the model'
+    print('... finetunning the model')
     # early-stopping parameters
     patience = 10 * n_train_batches  # look as this many examples regardless
     patience_increase = 2.  # wait this much longer when a new best is
                             # found
     improvement_threshold = 0.995  # a relative improvement of this much is
                                    # considered significant
-    validation_frequency = min(n_train_batches, patience / 2)
+    validation_frequency = min(n_train_batches, patience // 2)
                                   # go through this many
                                   # minibatche before checking the network
                                   # on the validation set; in this case we
@@ -434,13 +436,13 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
 
     while (epoch < training_epochs) and (not done_looping):
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
             minibatch_avg_cost = train_fn(minibatch_index)
             iter = (epoch - 1) * n_train_batches + minibatch_index
 
             if (iter + 1) % validation_frequency == 0:
                 validation_losses = validate_model()
-                this_validation_loss = numpy.mean(validation_losses)
+                this_validation_loss = numpy.mean(validation_losses, dtype='float64')
                 print('epoch %i, minibatch %i/%i, validation error %f %%' %
                       (epoch, minibatch_index + 1, n_train_batches,
                        this_validation_loss * 100.))
@@ -461,7 +463,7 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
 
                     # test it on the test set
                     test_losses = test_model()
-                    test_score = numpy.mean(test_losses)
+                    test_score = numpy.mean(test_losses, dtype='float64')
                     print(('     epoch %i, minibatch %i/%i, test error of '
                            'best model %f %%') %
                           (epoch, minibatch_index + 1, n_train_batches,
@@ -480,9 +482,9 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
         )
         % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
     )
-    print >> sys.stderr, ('The training code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+    print(('The training code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
 
 
 if __name__ == '__main__':
diff --git a/code/cA.py b/code/cA.py
index e26a1ddf..8dc5d8b6 100644
--- a/code/cA.py
+++ b/code/cA.py
@@ -28,6 +28,9 @@
    Systems 19, 2007
 
 """
+
+from __future__ import print_function
+
 import os
 import sys
 import timeit
@@ -205,7 +208,7 @@ def get_cost_updates(self, contraction_level, learning_rate):
                              axis=1)
 
         # Compute the jacobian and average over the number of samples/minibatch
-        self.L_jacob = T.sum(J ** 2) / self.n_batchsize
+        self.L_jacob = T.sum(J ** 2) // self.n_batchsize
 
         # note : L is now a vector, where each element is the
         #        cross-entropy cost of the reconstruction of the
@@ -246,7 +249,7 @@ def test_cA(learning_rate=0.01, training_epochs=20,
     train_set_x, train_set_y = datasets[0]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
 
     # allocate symbolic variables for the data
     index = T.lscalar()    # index to a [mini]batch
@@ -283,22 +286,22 @@ def test_cA(learning_rate=0.01, training_epochs=20,
     ############
 
     # go through training epochs
-    for epoch in xrange(training_epochs):
+    for epoch in range(training_epochs):
         # go through trainng set
         c = []
-        for batch_index in xrange(n_train_batches):
+        for batch_index in range(n_train_batches):
             c.append(train_ca(batch_index))
 
         c_array = numpy.vstack(c)
-        print 'Training epoch %d, reconstruction cost ' % epoch, numpy.mean(
-            c_array[0]), ' jacobian norm ', numpy.mean(numpy.sqrt(c_array[1]))
+        print('Training epoch %d, reconstruction cost ' % epoch, numpy.mean(
+            c_array[0]), ' jacobian norm ', numpy.mean(numpy.sqrt(c_array[1])))
 
     end_time = timeit.default_timer()
 
     training_time = (end_time - start_time)
 
-    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((training_time) / 60.))
+    print(('The code for file ' + os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((training_time) / 60.)), file=sys.stderr)
     image = Image.fromarray(tile_raster_images(
         X=ca.W.get_value(borrow=True).T,
         img_shape=(28, 28), tile_shape=(10, 10),
diff --git a/code/convolutional_mlp.py b/code/convolutional_mlp.py
index 64bf5e69..6bbb47a1 100644
--- a/code/convolutional_mlp.py
+++ b/code/convolutional_mlp.py
@@ -21,6 +21,9 @@
    http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf
 
 """
+
+from __future__ import print_function
+
 import os
 import sys
 import timeit
@@ -29,8 +32,8 @@
 
 import theano
 import theano.tensor as T
-from theano.tensor.signal import downsample
-from theano.tensor.nnet import conv
+from theano.tensor.signal import pool
+from theano.tensor.nnet import conv2d
 
 from logistic_sgd import LogisticRegression, load_data
 from mlp import HiddenLayer
@@ -70,7 +73,7 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
         # each unit in the lower layer receives a gradient from:
         # "num output feature maps * filter height * filter width" /
         #   pooling size
-        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
+        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) //
                    numpy.prod(poolsize))
         # initialize weights with random weights
         W_bound = numpy.sqrt(6. / (fan_in + fan_out))
@@ -87,15 +90,15 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
         self.b = theano.shared(value=b_values, borrow=True)
 
         # convolve input feature maps with filters
-        conv_out = conv.conv2d(
+        conv_out = conv2d(
             input=input,
             filters=self.W,
             filter_shape=filter_shape,
-            image_shape=image_shape
+            input_shape=image_shape
         )
 
-        # downsample each feature map individually, using maxpooling
-        pooled_out = downsample.max_pool_2d(
+        # pool each feature map individually, using maxpooling
+        pooled_out = pool.pool_2d(
             input=conv_out,
             ds=poolsize,
             ignore_border=True
@@ -145,9 +148,9 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
     n_train_batches = train_set_x.get_value(borrow=True).shape[0]
     n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
     n_test_batches = test_set_x.get_value(borrow=True).shape[0]
-    n_train_batches /= batch_size
-    n_valid_batches /= batch_size
-    n_test_batches /= batch_size
+    n_train_batches //= batch_size
+    n_valid_batches //= batch_size
+    n_test_batches //= batch_size
 
     # allocate symbolic variables for the data
     index = T.lscalar()  # index to a [mini]batch
@@ -160,7 +163,7 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
     ######################
     # BUILD ACTUAL MODEL #
     ######################
-    print '... building the model'
+    print('... building the model')
 
     # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
     # to a 4D tensor, compatible with our LeNetConvPoolLayer
@@ -261,14 +264,14 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
     ###############
     # TRAIN MODEL #
     ###############
-    print '... training'
+    print('... training')
     # early-stopping parameters
     patience = 10000  # look as this many examples regardless
     patience_increase = 2  # wait this much longer when a new best is
                            # found
     improvement_threshold = 0.995  # a relative improvement of this much is
                                    # considered significant
-    validation_frequency = min(n_train_batches, patience / 2)
+    validation_frequency = min(n_train_batches, patience // 2)
                                   # go through this many
                                   # minibatche before checking the network
                                   # on the validation set; in this case we
@@ -284,19 +287,19 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
 
     while (epoch < n_epochs) and (not done_looping):
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
 
             iter = (epoch - 1) * n_train_batches + minibatch_index
 
             if iter % 100 == 0:
-                print 'training @ iter = ', iter
+                print('training @ iter = ', iter)
             cost_ij = train_model(minibatch_index)
 
             if (iter + 1) % validation_frequency == 0:
 
                 # compute zero-one loss on validation set
                 validation_losses = [validate_model(i) for i
-                                     in xrange(n_valid_batches)]
+                                     in range(n_valid_batches)]
                 this_validation_loss = numpy.mean(validation_losses)
                 print('epoch %i, minibatch %i/%i, validation error %f %%' %
                       (epoch, minibatch_index + 1, n_train_batches,
@@ -317,7 +320,7 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
                     # test it on the test set
                     test_losses = [
                         test_model(i)
-                        for i in xrange(n_test_batches)
+                        for i in range(n_test_batches)
                     ]
                     test_score = numpy.mean(test_losses)
                     print(('     epoch %i, minibatch %i/%i, test error of '
@@ -334,9 +337,9 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
     print('Best validation score of %f %% obtained at iteration %i, '
           'with test performance %f %%' %
           (best_validation_loss * 100., best_iter + 1, test_score * 100.))
-    print >> sys.stderr, ('The code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+    print(('The code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
 
 if __name__ == '__main__':
     evaluate_lenet5()
diff --git a/code/dA.py b/code/dA.py
index 8ea94e33..7d054b20 100644
--- a/code/dA.py
+++ b/code/dA.py
@@ -30,6 +30,8 @@
 
 """
 
+from __future__ import print_function
+
 import os
 import sys
 import timeit
@@ -38,7 +40,7 @@
 
 import theano
 import theano.tensor as T
-from theano.tensor.shared_randomstreams import RandomStreams
+from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 
 from logistic_sgd import load_data
 from utils import tile_raster_images
@@ -193,7 +195,7 @@ def __init__(
 
     def get_corrupted_input(self, input, corruption_level):
         """This function keeps ``1-corruption_level`` entries of the inputs the
-        same and zero-out randomly selected subset of size ``coruption_level``
+        same and zero-out randomly selected subset of size ``corruption_level``
         Note : first argument of theano.rng.binomial is the shape(size) of
                random numbers that it should produce
                second argument is the number of trials
@@ -280,7 +282,7 @@ def test_dA(learning_rate=0.1, training_epochs=15,
     train_set_x, train_set_y = datasets[0]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
 
     # start-snippet-2
     # allocate symbolic variables for the data
@@ -328,21 +330,21 @@ def test_dA(learning_rate=0.1, training_epochs=15,
     ############
 
     # go through training epochs
-    for epoch in xrange(training_epochs):
+    for epoch in range(training_epochs):
         # go through trainng set
         c = []
-        for batch_index in xrange(n_train_batches):
+        for batch_index in range(n_train_batches):
             c.append(train_da(batch_index))
 
-        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
+        print('Training epoch %d, cost ' % epoch, numpy.mean(c, dtype='float64'))
 
     end_time = timeit.default_timer()
 
     training_time = (end_time - start_time)
 
-    print >> sys.stderr, ('The no corruption code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((training_time) / 60.))
+    print(('The no corruption code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((training_time) / 60.)), file=sys.stderr)
     image = Image.fromarray(
         tile_raster_images(X=da.W.get_value(borrow=True).T,
                            img_shape=(28, 28), tile_shape=(10, 10),
@@ -386,21 +388,21 @@ def test_dA(learning_rate=0.1, training_epochs=15,
     ############
 
     # go through training epochs
-    for epoch in xrange(training_epochs):
+    for epoch in range(training_epochs):
         # go through trainng set
         c = []
-        for batch_index in xrange(n_train_batches):
+        for batch_index in range(n_train_batches):
             c.append(train_da(batch_index))
 
-        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
+        print('Training epoch %d, cost ' % epoch, numpy.mean(c, dtype='float64'))
 
     end_time = timeit.default_timer()
 
     training_time = (end_time - start_time)
 
-    print >> sys.stderr, ('The 30% corruption code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % (training_time / 60.))
+    print(('The 30% corruption code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % (training_time / 60.)), file=sys.stderr)
     # end-snippet-3
 
     # start-snippet-4
diff --git a/code/hmc/hmc.py b/code/hmc/hmc.py
index b9c872f0..cf4d20a1 100644
--- a/code/hmc/hmc.py
+++ b/code/hmc/hmc.py
@@ -7,6 +7,7 @@
 from theano import function, shared
 from theano import tensor as TT
 import theano
+import theano.sandbox.rng_mrg
 
 sharedX = (lambda X, name:
            shared(numpy.asarray(X, dtype=theano.config.floatX), name=name))
@@ -128,14 +129,14 @@ def leapfrog(pos, vel, step):
         rval2: dictionary
             Dictionary of updates for the Scan Op
         """
-        # from pos(t) and vel(t-stepsize/2), compute vel(t+stepsize/2)
+        # from pos(t) and vel(t-stepsize//2), compute vel(t+stepsize//2)
         dE_dpos = TT.grad(energy_fn(pos).sum(), pos)
         new_vel = vel - step * dE_dpos
-        # from vel(t+stepsize/2) compute pos(t+stepsize)
+        # from vel(t+stepsize//2) compute pos(t+stepsize)
         new_pos = pos + step * new_vel
         return [new_pos, new_vel], {}
 
-    # compute velocity at time-step: t + stepsize/2
+    # compute velocity at time-step: t + stepsize//2
     initial_energy = energy_fn(initial_pos)
     dE_dpos = TT.grad(initial_energy.sum(), initial_pos)
     vel_half_step = initial_vel - 0.5 * stepsize * dE_dpos
@@ -275,14 +276,14 @@ def hmc_updates(positions, stepsize, avg_acceptance_rate, final_pos, accept,
 
     """
 
-    ## POSITION UPDATES ##
+    # POSITION UPDATES #
     # broadcast `accept` scalar to tensor with the same dimensions as
     # final_pos.
     accept_matrix = accept.dimshuffle(0, *(('x',) * (final_pos.ndim - 1)))
     # if accept is True, update to `final_pos` else stay put
     new_positions = TT.switch(accept_matrix, final_pos, positions)
     # end-snippet-5 start-snippet-7
-    ## STEPSIZE UPDATES ##
+    # STEPSIZE UPDATES #
     # if acceptance rate is too low, our sampler is too "noisy" and we reduce
     # the stepsize. If it is too high, our sampler is too conservative, we can
     # get away with a larger stepsize (resulting in better mixing).
@@ -292,7 +293,7 @@ def hmc_updates(positions, stepsize, avg_acceptance_rate, final_pos, accept,
     new_stepsize = TT.clip(_new_stepsize, stepsize_min, stepsize_max)
 
     # end-snippet-7 start-snippet-6
-    ## ACCEPT RATE UPDATES ##
+    # ACCEPT RATE UPDATES #
     # perform exponential moving average
     mean_dtype = theano.scalar.upcast(accept.dtype, avg_acceptance_rate.dtype)
     new_acceptance_rate = TT.add(
@@ -358,7 +359,7 @@ def new_from_shared_positions(
         stepsize = sharedX(initial_stepsize, 'hmc_stepsize')
         avg_acceptance_rate = sharedX(target_acceptance_rate,
                                       'avg_acceptance_rate')
-        s_rng = TT.shared_randomstreams.RandomStreams(seed)
+        s_rng = theano.sandbox.rng_mrg.MRG_RandomStreams(seed)
 
         # define graph for an `n_steps` HMC simulation
         accept, final_pos = hmc_move(
diff --git a/code/hmc/test_hmc.py b/code/hmc/test_hmc.py
index 0a70190a..42dbc3a7 100644
--- a/code/hmc/test_hmc.py
+++ b/code/hmc/test_hmc.py
@@ -1,8 +1,15 @@
+
+from __future__ import print_function
+
 import numpy
-from scipy import linalg
 import theano
 
-from hmc import HMC_sampler
+try:
+    from hmc import HMC_sampler
+except ImportError as e:
+    # python 3 compatibility
+    # http://stackoverflow.com/questions/3073259/python-nose-import-error
+    from hmc.hmc import HMC_sampler
 
 
 def sampler_on_nd_gaussian(sampler_cls, burnin, n_samples, dim=10):
@@ -15,7 +22,7 @@ def sampler_on_nd_gaussian(sampler_cls, burnin, n_samples, dim=10):
     cov = numpy.array(rng.rand(dim, dim), dtype=theano.config.floatX)
     cov = (cov + cov.T) / 2.
     cov[numpy.arange(dim), numpy.arange(dim)] = 1.0
-    cov_inv = linalg.inv(cov)
+    cov_inv = numpy.linalg.inv(cov)
 
     # Define energy function for a multi-variate Gaussian
     def gaussian_energy(x):
@@ -31,24 +38,24 @@ def gaussian_energy(x):
                           initial_stepsize=1e-3, stepsize_max=0.5)
 
     # Start with a burn-in process
-    garbage = [sampler.draw() for r in xrange(burnin)]  # burn-in Draw
+    garbage = [sampler.draw() for r in range(burnin)]  # burn-in Draw
     # `n_samples`: result is a 3D tensor of dim [n_samples, batchsize,
     # dim]
-    _samples = numpy.asarray([sampler.draw() for r in xrange(n_samples)])
+    _samples = numpy.asarray([sampler.draw() for r in range(n_samples)])
     # Flatten to [n_samples * batchsize, dim]
     samples = _samples.T.reshape(dim, -1).T
 
-    print '****** TARGET VALUES ******'
-    print 'target mean:', mu
-    print 'target cov:\n', cov
+    print('****** TARGET VALUES ******')
+    print('target mean:', mu)
+    print('target cov:\n', cov)
 
-    print '****** EMPIRICAL MEAN/COV USING HMC ******'
-    print 'empirical mean: ', samples.mean(axis=0)
-    print 'empirical_cov:\n', numpy.cov(samples.T)
+    print('****** EMPIRICAL MEAN/COV USING HMC ******')
+    print('empirical mean: ', samples.mean(axis=0))
+    print('empirical_cov:\n', numpy.cov(samples.T))
 
-    print '****** HMC INTERNALS ******'
-    print 'final stepsize', sampler.stepsize.get_value()
-    print 'final acceptance_rate', sampler.avg_acceptance_rate.get_value()
+    print('****** HMC INTERNALS ******')
+    print('final stepsize', sampler.stepsize.get_value())
+    print('final acceptance_rate', sampler.avg_acceptance_rate.get_value())
 
     return sampler
 
diff --git a/code/imdb.py b/code/imdb.py
index 21e0e376..341be231 100644
--- a/code/imdb.py
+++ b/code/imdb.py
@@ -1,4 +1,7 @@
-import cPickle
+from __future__ import print_function
+from six.moves import xrange
+import six.moves.cPickle as pickle
+
 import gzip
 import os
 
@@ -68,9 +71,11 @@ def get_dataset_file(dataset, default_dataset, origin):
             dataset = new_path
 
     if (not os.path.isfile(dataset)) and data_file == default_dataset:
-        import urllib
-        print 'Downloading data from %s' % origin
-        urllib.urlretrieve(origin, dataset)
+        from six.moves import urllib
+        print('Downloading data from %s' % origin)
+        urllib.request.urlretrieve(origin, dataset)
+
+        
     return dataset
 
 
@@ -110,8 +115,8 @@ def load_data(path="imdb.pkl", n_words=100000, valid_portion=0.1, maxlen=None,
     else:
         f = open(path, 'rb')
 
-    train_set = cPickle.load(f)
-    test_set = cPickle.load(f)
+    train_set = pickle.load(f)
+    test_set = pickle.load(f)
     f.close()
     if maxlen:
         new_train_set_x = []
diff --git a/code/imdb_preprocess.py b/code/imdb_preprocess.py
index c20b37b6..62ebb556 100644
--- a/code/imdb_preprocess.py
+++ b/code/imdb_preprocess.py
@@ -8,7 +8,7 @@
 
 3) Then run this script.
 """
-
+from __future__ import print_function
 dataset_path='/Tmp/bastienf/aclImdb/'
 
 import numpy
@@ -27,12 +27,12 @@
 
 def tokenize(sentences):
 
-    print 'Tokenizing..',
+    print('Tokenizing..', end=' ')
     text = "\n".join(sentences)
     tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE)
     tok_text, _ = tokenizer.communicate(text)
     toks = tok_text.split('\n')[:-1]
-    print 'Done'
+    print('Done')
 
     return toks
 
@@ -52,7 +52,7 @@ def build_dict(path):
 
     sentences = tokenize(sentences)
 
-    print 'Building dictionary..',
+    print('Building dictionary..', end=' ')
     wordcount = dict()
     for ss in sentences:
         words = ss.strip().lower().split()
@@ -72,7 +72,7 @@ def build_dict(path):
     for idx, ss in enumerate(sorted_idx):
         worddict[keys[ss]] = idx+2  # leave 0 and 1 (UNK)
 
-    print numpy.sum(counts), ' total words ', len(keys), ' unique words'
+    print(numpy.sum(counts), ' total words ', len(keys), ' unique words')
 
     return worddict
 
diff --git a/code/logistic_cg.py b/code/logistic_cg.py
index db9822ef..c2970d51 100644
--- a/code/logistic_cg.py
+++ b/code/logistic_cg.py
@@ -33,6 +33,7 @@
 
 
 """
+from __future__ import print_function, division
 __docformat__ = 'restructedtext en'
 
 
@@ -165,9 +166,9 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
 
     batch_size = 600    # size of the minibatch
 
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
 
     n_in = 28 * 28  # number of input units
     n_out = 10  # number of output units
@@ -175,7 +176,7 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
     ######################
     # BUILD ACTUAL MODEL #
     ######################
-    print '... building the model'
+    print('... building the model')
 
     # allocate symbolic variables for the data
     minibatch_offset = T.lscalar()  # offset to the start of a [mini]batch
@@ -239,7 +240,7 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
     def train_fn(theta_value):
         classifier.theta.set_value(theta_value, borrow=True)
         train_losses = [batch_cost(i * batch_size)
-                        for i in xrange(n_train_batches)]
+                        for i in range(n_train_batches)]
         return numpy.mean(train_losses)
 
     # creates a function that computes the average gradient of cost with
@@ -247,7 +248,7 @@ def train_fn(theta_value):
     def train_fn_grad(theta_value):
         classifier.theta.set_value(theta_value, borrow=True)
         grad = batch_grad(0)
-        for i in xrange(1, n_train_batches):
+        for i in range(1, n_train_batches):
             grad += batch_grad(i * batch_size)
         return grad / n_train_batches
 
@@ -258,9 +259,9 @@ def callback(theta_value):
         classifier.theta.set_value(theta_value, borrow=True)
         #compute the validation loss
         validation_losses = [validate_model(i * batch_size)
-                             for i in xrange(n_valid_batches)]
+                             for i in range(n_valid_batches)]
         this_validation_loss = numpy.mean(validation_losses)
-        print('validation error %f %%' % (this_validation_loss * 100.,))
+        print(('validation error %f %%' % (this_validation_loss * 100.,)))
 
         # check if it is better then best validation score got until now
         if this_validation_loss < validation_scores[0]:
@@ -268,7 +269,7 @@ def callback(theta_value):
             # testing dataset
             validation_scores[0] = this_validation_loss
             test_losses = [test_model(i * batch_size)
-                           for i in xrange(n_test_batches)]
+                           for i in range(n_test_batches)]
             validation_scores[1] = numpy.mean(test_losses)
 
     ###############
@@ -288,17 +289,13 @@ def callback(theta_value):
         maxiter=n_epochs
     )
     end_time = timeit.default_timer()
-    print(
-        (
-            'Optimization complete with best validation score of %f %%, with '
-            'test performance %f %%'
-        )
-        % (validation_scores[0] * 100., validation_scores[1] * 100.)
+    print(('Optimization complete with best validation score of %f %%, with '
+           'test performance %f %%'
+           ) % (validation_scores[0] * 100., validation_scores[1] * 100.)
     )
 
-    print >> sys.stderr, ('The code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.1fs' % ((end_time - start_time)))
+    print('The code for file ' + os.path.split(__file__)[1] +
+          ' ran for %.1fs' % (end_time - start_time), file=sys.stderr)
 
 
 if __name__ == '__main__':
diff --git a/code/logistic_sgd.py b/code/logistic_sgd.py
index c944f8b3..9f4427e7 100644
--- a/code/logistic_sgd.py
+++ b/code/logistic_sgd.py
@@ -32,9 +32,12 @@
                  Christopher M. Bishop, section 4.3.2
 
 """
+
+from __future__ import print_function
+
 __docformat__ = 'restructedtext en'
 
-import cPickle
+import six.moves.cPickle as pickle
 import gzip
 import os
 import sys
@@ -194,25 +197,27 @@ def load_data(dataset):
             dataset = new_path
 
     if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
-        import urllib
+        from six.moves import urllib
         origin = (
             'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
         )
-        print 'Downloading data from %s' % origin
-        urllib.urlretrieve(origin, dataset)
+        print('Downloading data from %s' % origin)
+        urllib.request.urlretrieve(origin, dataset)
 
-    print '... loading data'
+    print('... loading data')
 
     # Load the dataset
-    f = gzip.open(dataset, 'rb')
-    train_set, valid_set, test_set = cPickle.load(f)
-    f.close()
-    #train_set, valid_set, test_set format: tuple(input, target)
-    #input is an numpy.ndarray of 2 dimensions (a matrix)
-    #witch row's correspond to an example. target is a
-    #numpy.ndarray of 1 dimensions (vector)) that have the same length as
-    #the number of rows in the input. It should give the target
-    #target to the example with the same index in the input.
+    with gzip.open(dataset, 'rb') as f:
+        try:
+            train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
+        except:
+            train_set, valid_set, test_set = pickle.load(f)
+    # train_set, valid_set, test_set format: tuple(input, target)
+    # input is a numpy.ndarray of 2 dimensions (a matrix)
+    # where each row corresponds to an example. target is a
+    # numpy.ndarray of 1 dimension (vector) that has the same length as
+    # the number of rows in the input. It should give the target
+    # to the example with the same index in the input.
 
     def shared_dataset(data_xy, borrow=True):
         """ Function that loads the dataset into shared variables
@@ -276,14 +281,14 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
     test_set_x, test_set_y = datasets[2]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
 
     ######################
     # BUILD ACTUAL MODEL #
     ######################
-    print '... building the model'
+    print('... building the model')
 
     # allocate symbolic variables for the data
     index = T.lscalar()  # index to a [mini]batch
@@ -348,14 +353,14 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
     ###############
     # TRAIN MODEL #
     ###############
-    print '... training the model'
+    print('... training the model')
     # early-stopping parameters
     patience = 5000  # look as this many examples regardless
     patience_increase = 2  # wait this much longer when a new best is
                                   # found
     improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
-    validation_frequency = min(n_train_batches, patience / 2)
+    validation_frequency = min(n_train_batches, patience // 2)
                                   # go through this many
                                   # minibatche before checking the network
                                   # on the validation set; in this case we
@@ -369,7 +374,7 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
     epoch = 0
     while (epoch < n_epochs) and (not done_looping):
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
 
             minibatch_avg_cost = train_model(minibatch_index)
             # iteration number
@@ -378,7 +383,7 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
             if (iter + 1) % validation_frequency == 0:
                 # compute zero-one loss on validation set
                 validation_losses = [validate_model(i)
-                                     for i in xrange(n_valid_batches)]
+                                     for i in range(n_valid_batches)]
                 this_validation_loss = numpy.mean(validation_losses)
 
                 print(
@@ -402,7 +407,7 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
                     # test it on the test set
 
                     test_losses = [test_model(i)
-                                   for i in xrange(n_test_batches)]
+                                   for i in range(n_test_batches)]
                     test_score = numpy.mean(test_losses)
 
                     print(
@@ -419,8 +424,8 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
                     )
 
                     # save the best model
-                    with open('best_model.pkl', 'w') as f:
-                        cPickle.dump(classifier, f)
+                    with open('best_model.pkl', 'wb') as f:
+                        pickle.dump(classifier, f)
 
             if patience <= iter:
                 done_looping = True
@@ -434,11 +439,11 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
         )
         % (best_validation_loss * 100., test_score * 100.)
     )
-    print 'The code run for %d epochs, with %f epochs/sec' % (
-        epoch, 1. * epoch / (end_time - start_time))
-    print >> sys.stderr, ('The code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.1fs' % ((end_time - start_time)))
+    print('The code run for %d epochs, with %f epochs/sec' % (
+        epoch, 1. * epoch / (end_time - start_time)))
+    print(('The code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.1fs' % ((end_time - start_time))), file=sys.stderr)
 
 
 def predict():
@@ -448,7 +453,7 @@ def predict():
     """
 
     # load the saved model
-    classifier = cPickle.load(open('best_model.pkl'))
+    classifier = pickle.load(open('best_model.pkl'))
 
     # compile a predictor function
     predict_model = theano.function(
@@ -462,8 +467,8 @@ def predict():
     test_set_x = test_set_x.get_value()
 
     predicted_values = predict_model(test_set_x[:10])
-    print ("Predicted values for the first 10 examples in test set:")
-    print predicted_values
+    print("Predicted values for the first 10 examples in test set:")
+    print(predicted_values)
 
 
 if __name__ == '__main__':
diff --git a/code/lstm.py b/code/lstm.py
index b64970fb..a3010a9f 100644
--- a/code/lstm.py
+++ b/code/lstm.py
@@ -1,8 +1,11 @@
 '''
 Build a tweet sentiment analyzer
 '''
+
+from __future__ import print_function
+import six.moves.cPickle as pickle
+
 from collections import OrderedDict
-import cPickle as pkl
 import sys
 import time
 
@@ -56,7 +59,7 @@ def zipp(params, tparams):
     """
     When we reload the model. Needed for the GPU stuff.
     """
-    for kk, vv in params.iteritems():
+    for kk, vv in params.items():
         tparams[kk].set_value(vv)
 
 
@@ -65,7 +68,7 @@ def unzip(zipped):
     When we pickle the model. Needed for the GPU stuff.
     """
     new_params = OrderedDict()
-    for kk, vv in zipped.iteritems():
+    for kk, vv in zipped.items():
         new_params[kk] = vv.get_value()
     return new_params
 
@@ -106,7 +109,7 @@ def init_params(options):
 
 def load_params(path, params):
     pp = numpy.load(path)
-    for kk, vv in params.iteritems():
+    for kk, vv in params.items():
         if kk not in pp:
             raise Warning('%s is not in the archive' % kk)
         params[kk] = pp[kk]
@@ -116,7 +119,7 @@ def load_params(path, params):
 
 def init_tparams(params):
     tparams = OrderedDict()
-    for kk, pp in params.iteritems():
+    for kk, pp in params.items():
         tparams[kk] = theano.shared(params[kk], name=kk)
     return tparams
 
@@ -217,7 +220,7 @@ def sgd(lr, tparams, grads, x, mask, y, cost):
     # New set of shared variable that will contain the gradient
     # for a mini-batch.
     gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
-               for k, p in tparams.iteritems()]
+               for k, p in tparams.items()]
     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
 
     # Function that computes gradients for a mini-batch, but do not
@@ -266,13 +269,13 @@ def adadelta(lr, tparams, grads, x, mask, y, cost):
 
     zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
                                   name='%s_grad' % k)
-                    for k, p in tparams.iteritems()]
+                    for k, p in tparams.items()]
     running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.),
                                  name='%s_rup2' % k)
-                   for k, p in tparams.iteritems()]
+                   for k, p in tparams.items()]
     running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
                                     name='%s_rgrad2' % k)
-                      for k, p in tparams.iteritems()]
+                      for k, p in tparams.items()]
 
     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
@@ -329,13 +332,13 @@ def rmsprop(lr, tparams, grads, x, mask, y, cost):
 
     zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
                                   name='%s_grad' % k)
-                    for k, p in tparams.iteritems()]
+                    for k, p in tparams.items()]
     running_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
                                    name='%s_rgrad' % k)
-                     for k, p in tparams.iteritems()]
+                     for k, p in tparams.items()]
     running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
                                     name='%s_rgrad2' % k)
-                      for k, p in tparams.iteritems()]
+                      for k, p in tparams.items()]
 
     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
     rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
@@ -348,7 +351,7 @@ def rmsprop(lr, tparams, grads, x, mask, y, cost):
 
     updir = [theano.shared(p.get_value() * numpy_floatX(0.),
                            name='%s_updir' % k)
-             for k, p in tparams.iteritems()]
+             for k, p in tparams.items()]
     updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
                  for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
                                             running_grads2)]
@@ -418,7 +421,7 @@ def pred_probs(f_pred_prob, prepare_data, data, iterator, verbose=False):
 
         n_done += len(valid_index)
         if verbose:
-            print '%d/%d samples classified' % (n_done, n_samples)
+            print('%d/%d samples classified' % (n_done, n_samples))
 
     return probs
 
@@ -470,11 +473,11 @@ def train_lstm(
 
     # Model options
     model_options = locals().copy()
-    print "model options", model_options
+    print("model options", model_options)
 
     load_data, prepare_data = get_dataset(dataset)
 
-    print 'Loading data'
+    print('Loading data')
     train, valid, test = load_data(n_words=n_words, valid_portion=0.05,
                                    maxlen=maxlen)
     if test_size > 0:
@@ -490,7 +493,7 @@ def train_lstm(
 
     model_options['ydim'] = ydim
 
-    print 'Building model'
+    print('Building model')
     # This create the initial parameters as numpy ndarrays.
     # Dict name (string) -> numpy ndarray
     params = init_params(model_options)
@@ -516,36 +519,36 @@ def train_lstm(
 
     f_cost = theano.function([x, mask, y], cost, name='f_cost')
 
-    grads = tensor.grad(cost, wrt=tparams.values())
+    grads = tensor.grad(cost, wrt=list(tparams.values()))
     f_grad = theano.function([x, mask, y], grads, name='f_grad')
 
     lr = tensor.scalar(name='lr')
     f_grad_shared, f_update = optimizer(lr, tparams, grads,
                                         x, mask, y, cost)
 
-    print 'Optimization'
+    print('Optimization')
 
     kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
     kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)
 
-    print "%d train examples" % len(train[0])
-    print "%d valid examples" % len(valid[0])
-    print "%d test examples" % len(test[0])
+    print("%d train examples" % len(train[0]))
+    print("%d valid examples" % len(valid[0]))
+    print("%d test examples" % len(test[0]))
 
     history_errs = []
     best_p = None
     bad_count = 0
 
     if validFreq == -1:
-        validFreq = len(train[0]) / batch_size
+        validFreq = len(train[0]) // batch_size
     if saveFreq == -1:
-        saveFreq = len(train[0]) / batch_size
+        saveFreq = len(train[0]) // batch_size
 
     uidx = 0  # the number of update done
     estop = False  # early stop
     start_time = time.time()
     try:
-        for eidx in xrange(max_epochs):
+        for eidx in range(max_epochs):
             n_samples = 0
 
             # Get new shuffled index for the training set.
@@ -569,22 +572,22 @@ def train_lstm(
                 f_update(lrate)
 
                 if numpy.isnan(cost) or numpy.isinf(cost):
-                    print 'NaN detected'
+                    print('bad cost detected: ', cost)
                     return 1., 1., 1.
 
                 if numpy.mod(uidx, dispFreq) == 0:
-                    print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost
+                    print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost)
 
                 if saveto and numpy.mod(uidx, saveFreq) == 0:
-                    print 'Saving...',
+                    print('Saving...')
 
                     if best_p is not None:
                         params = best_p
                     else:
                         params = unzip(tparams)
                     numpy.savez(saveto, history_errs=history_errs, **params)
-                    pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
-                    print 'Done'
+                    pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
+                    print('Done')
 
                 if numpy.mod(uidx, validFreq) == 0:
                     use_noise.set_value(0.)
@@ -595,14 +598,14 @@ def train_lstm(
 
                     history_errs.append([valid_err, test_err])
 
-                    if (uidx == 0 or
+                    if (best_p is None or
                         valid_err <= numpy.array(history_errs)[:,
                                                                0].min()):
 
                         best_p = unzip(tparams)
                         bad_counter = 0
 
-                    print ('Train ', train_err, 'Valid ', valid_err,
+                    print('Train ', train_err, 'Valid ', valid_err,
                            'Test ', test_err)
 
                     if (len(history_errs) > patience and
@@ -610,17 +613,17 @@ def train_lstm(
                                                                0].min()):
                         bad_counter += 1
                         if bad_counter > patience:
-                            print 'Early Stop!'
+                            print('Early Stop!')
                             estop = True
                             break
 
-            print 'Seen %d samples' % n_samples
+            print('Seen %d samples' % n_samples)
 
             if estop:
                 break
 
     except KeyboardInterrupt:
-        print "Training interupted"
+        print("Training interupted")
 
     end_time = time.time()
     if best_p is not None:
@@ -634,15 +637,15 @@ def train_lstm(
     valid_err = pred_error(f_pred, prepare_data, valid, kf_valid)
     test_err = pred_error(f_pred, prepare_data, test, kf_test)
 
-    print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err
+    print( 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err )
     if saveto:
         numpy.savez(saveto, train_err=train_err,
                     valid_err=valid_err, test_err=test_err,
                     history_errs=history_errs, **best_p)
-    print 'The code run for %d epochs, with %f sec/epochs' % (
-        (eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))
-    print >> sys.stderr, ('Training took %.1fs' %
-                          (end_time - start_time))
+    print('The code run for %d epochs, with %f sec/epochs' % (
+        (eidx + 1), (end_time - start_time) / (1. * (eidx + 1))))
+    print( ('Training took %.1fs' %
+            (end_time - start_time)), file=sys.stderr)
     return train_err, valid_err, test_err
 
 
diff --git a/code/mlp.py b/code/mlp.py
index 18f34e7c..e865bc8f 100644
--- a/code/mlp.py
+++ b/code/mlp.py
@@ -18,6 +18,9 @@
                  Christopher M. Bishop, section 5
 
 """
+
+from __future__ import print_function
+
 __docformat__ = 'restructedtext en'
 
 
@@ -231,14 +234,14 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
     test_set_x, test_set_y = datasets[2]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
 
     ######################
     # BUILD ACTUAL MODEL #
     ######################
-    print '... building the model'
+    print('... building the model')
 
     # allocate symbolic variables for the data
     index = T.lscalar()  # index to a [mini]batch
@@ -289,7 +292,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
     )
 
     # start-snippet-5
-    # compute the gradient of cost with respect to theta (sotred in params)
+    # compute the gradient of cost with respect to theta (sorted in params)
     # the resulting gradients will be stored in a list gparams
     gparams = [T.grad(cost, param) for param in classifier.params]
 
@@ -322,7 +325,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
     ###############
     # TRAIN MODEL #
     ###############
-    print '... training'
+    print('... training')
 
     # early-stopping parameters
     patience = 10000  # look as this many examples regardless
@@ -330,7 +333,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
                            # found
     improvement_threshold = 0.995  # a relative improvement of this much is
                                    # considered significant
-    validation_frequency = min(n_train_batches, patience / 2)
+    validation_frequency = min(n_train_batches, patience // 2)
                                   # go through this many
                                   # minibatche before checking the network
                                   # on the validation set; in this case we
@@ -346,7 +349,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
 
     while (epoch < n_epochs) and (not done_looping):
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
 
             minibatch_avg_cost = train_model(minibatch_index)
             # iteration number
@@ -355,7 +358,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
             if (iter + 1) % validation_frequency == 0:
                 # compute zero-one loss on validation set
                 validation_losses = [validate_model(i) for i
-                                     in xrange(n_valid_batches)]
+                                     in range(n_valid_batches)]
                 this_validation_loss = numpy.mean(validation_losses)
 
                 print(
@@ -382,7 +385,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
 
                     # test it on the test set
                     test_losses = [test_model(i) for i
-                                   in xrange(n_test_batches)]
+                                   in range(n_test_batches)]
                     test_score = numpy.mean(test_losses)
 
                     print(('     epoch %i, minibatch %i/%i, test error of '
@@ -398,9 +401,9 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
     print(('Optimization complete. Best validation score of %f %% '
            'obtained at iteration %i, with test performance %f %%') %
           (best_validation_loss * 100., best_iter + 1, test_score * 100.))
-    print >> sys.stderr, ('The code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+    print(('The code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
 
 
 if __name__ == '__main__':
diff --git a/code/rbm.py b/code/rbm.py
index 1ba4c86d..6e4f1012 100644
--- a/code/rbm.py
+++ b/code/rbm.py
@@ -4,6 +4,9 @@
 contain hidden variables. Restricted Boltzmann Machines further restrict BMs
 to those without visible-visible and hidden-hidden connections.
 """
+
+from __future__ import print_function
+
 import timeit
 
 try:
@@ -17,7 +20,7 @@
 import theano.tensor as T
 import os
 
-from theano.tensor.shared_randomstreams import RandomStreams
+from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 
 from utils import tile_raster_images
 from logistic_sgd import load_data
@@ -254,7 +257,8 @@ def get_cost_updates(self, lr=0.1, persistent=None, k=1):
             # chain_start is the initial state corresponding to the
             # 6th output
             outputs_info=[None, None, None, None, None, chain_start],
-            n_steps=k
+            n_steps=k,
+            name="gibbs_hvh"
         )
         # start-snippet-3
         # determine gradients on RBM parameters
@@ -384,7 +388,7 @@ def test_rbm(learning_rate=0.1, training_epochs=15,
     test_set_x, test_set_y = datasets[2]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
 
     # allocate symbolic variables for the data
     index = T.lscalar()    # index to a [mini]batch
@@ -431,14 +435,14 @@ def test_rbm(learning_rate=0.1, training_epochs=15,
     start_time = timeit.default_timer()
 
     # go through training epochs
-    for epoch in xrange(training_epochs):
+    for epoch in range(training_epochs):
 
         # go through the training set
         mean_cost = []
-        for batch_index in xrange(n_train_batches):
+        for batch_index in range(n_train_batches):
             mean_cost += [train_rbm(batch_index)]
 
-        print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)
+        print('Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost))
 
         # Plot filters after each training epoch
         plotting_start = timeit.default_timer()
@@ -493,7 +497,8 @@ def test_rbm(learning_rate=0.1, training_epochs=15,
     ) = theano.scan(
         rbm.gibbs_vhv,
         outputs_info=[None, None, None, None, None, persistent_vis_chain],
-        n_steps=plot_every
+        n_steps=plot_every,
+        name="gibbs_vhv"
     )
 
     # add to updates the shared variable that takes care of our persistent
@@ -518,11 +523,11 @@ def test_rbm(learning_rate=0.1, training_epochs=15,
         (29 * n_samples + 1, 29 * n_chains - 1),
         dtype='uint8'
     )
-    for idx in xrange(n_samples):
+    for idx in range(n_samples):
         # generate `plot_every` intermediate samples that we discard,
         # because successive samples in the chain are too correlated
         vis_mf, vis_sample = sample_fn()
-        print ' ... plotting sample ', idx
+        print(' ... plotting sample %d' % idx)
         image_data[29 * idx:29 * idx + 28, :] = tile_raster_images(
             X=vis_mf,
             img_shape=(28, 28),
diff --git a/code/rnnrbm.py b/code/rnnrbm.py
index e1f40b5a..900ffdc6 100644
--- a/code/rnnrbm.py
+++ b/code/rnnrbm.py
@@ -3,6 +3,8 @@
 # RNN-RBM deep learning tutorial
 # More information at http://deeplearning.net/tutorial/rnnrbm.html
 
+from __future__ import print_function
+
 import glob
 import os
 import sys
@@ -11,15 +13,13 @@
 try:
     import pylab
 except ImportError:
-    print (
-        "pylab isn't available. If you use its functionality, it will crash."
-    )
-    print "It can be installed with 'pip install -q Pillow'"
+    print ("pylab isn't available. If you use its functionality, it will crash.")
+    print("It can be installed with 'pip install -q Pillow'")
 
 from midi.utils import midiread, midiwrite
 import theano
 import theano.tensor as T
-from theano.tensor.shared_randomstreams import RandomStreams
+from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 
 #Don't use a python long as this don't work on 32 bits computers.
 numpy.random.seed(0xbeef)
@@ -248,21 +248,21 @@ def train(self, files, batch_size=100, num_epochs=200):
                    for f in files]
 
         try:
-            for epoch in xrange(num_epochs):
+            for epoch in range(num_epochs):
                 numpy.random.shuffle(dataset)
                 costs = []
 
                 for s, sequence in enumerate(dataset):
-                    for i in xrange(0, len(sequence), batch_size):
+                    for i in range(0, len(sequence), batch_size):
                         cost = self.train_function(sequence[i:i + batch_size])
                         costs.append(cost)
 
-                print 'Epoch %i/%i' % (epoch + 1, num_epochs),
-                print numpy.mean(costs)
+                print('Epoch %i/%i' % (epoch + 1, num_epochs))
+                print(numpy.mean(costs))
                 sys.stdout.flush()
 
         except KeyboardInterrupt:
-            print 'Interrupted by user.'
+            print('Interrupted by user.')
 
     def generate(self, filename, show=True):
         '''Generate a sample sequence, plot the resulting piano-roll and save
diff --git a/code/rnnslu.py b/code/rnnslu.py
index fad14db5..3c620178 100644
--- a/code/rnnslu.py
+++ b/code/rnnslu.py
@@ -1,6 +1,9 @@
+
+from __future__ import print_function
+import six.moves.cPickle as pickle
+
 from collections import OrderedDict
 import copy
-import cPickle
 import gzip
 import os
 import urllib
@@ -66,7 +69,10 @@ def atisfold(fold):
     assert fold in range(5)
     filename = os.path.join(PREFIX, 'atis.fold'+str(fold)+'.pkl.gz')
     f = gzip.open(filename, 'rb')
-    train_set, valid_set, test_set, dicts = cPickle.load(f)
+    try:
+        train_set, valid_set, test_set, dicts = pickle.load(f, encoding='latin1')
+    except:
+        train_set, valid_set, test_set, dicts = pickle.load(f)
     return train_set, valid_set, test_set, dicts
 
 
@@ -107,7 +113,7 @@ def download(origin, destination):
     download the corresponding atis file
     from http://www-etud.iro.umontreal.ca/~mesnilgr/atis/
     '''
-    print 'Downloading data from %s' % origin
+    print('Downloading data from %s' % origin)
     urllib.urlretrieve(origin, destination)
 
 
@@ -125,12 +131,17 @@ def get_perf(filename, folder):
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE)
 
-    stdout, _ = proc.communicate(''.join(open(filename).readlines()))
+    stdout, _ = proc.communicate(''.join(open(filename).readlines()).encode('utf-8'))
+    stdout = stdout.decode('utf-8')
+    out = None
+
     for line in stdout.split('\n'):
         if 'accuracy' in line:
             out = line.split()
             break
-
+    # To help debug
+    if out is None:
+        print(stdout.split('\n'))
     precision = float(out[6][:-2])
     recall = float(out[8][:-2])
     f1score = float(out[10])
@@ -234,7 +245,7 @@ def recurrence(x_t, h_tm1):
     def train(self, x, y, window_size, learning_rate):
 
         cwords = contextwin(x, window_size)
-        words = map(lambda x: numpy.asarray(x).astype('int32'), cwords)
+        words = list(map(lambda x: numpy.asarray(x).astype('int32'), cwords))
         labels = y
 
         self.sentence_train(words, labels, learning_rate)
@@ -271,7 +282,7 @@ def main(param=None):
             'nepochs': 60,
             # 60 is recommended
             'savemodel': False}
-    print param
+    print(param)
 
     folder_name = os.path.basename(__file__).split('.')[0]
     folder = os.path.join(os.path.dirname(__file__), folder_name)
@@ -281,17 +292,15 @@ def main(param=None):
     # load the dataset
     train_set, valid_set, test_set, dic = atisfold(param['fold'])
 
-    idx2label = dict((k, v) for v, k in dic['labels2idx'].iteritems())
-    idx2word = dict((k, v) for v, k in dic['words2idx'].iteritems())
+    idx2label = dict((k, v) for v, k in dic['labels2idx'].items())
+    idx2word = dict((k, v) for v, k in dic['words2idx'].items())
 
     train_lex, train_ne, train_y = train_set
     valid_lex, valid_ne, valid_y = valid_set
     test_lex, test_ne, test_y = test_set
 
-    vocsize = len(set(reduce(lambda x, y: list(x) + list(y),
-                             train_lex + valid_lex + test_lex)))
-    nclasses = len(set(reduce(lambda x, y: list(x)+list(y),
-                              train_y + test_y + valid_y)))
+    vocsize = len(dic['words2idx'])
+    nclasses = len(dic['labels2idx'])
     nsentences = len(train_lex)
 
     groundtruth_valid = [map(lambda x: idx2label[x], y) for y in valid_y]
@@ -312,7 +321,7 @@ def main(param=None):
     # train with early stopping on validation set
     best_f1 = -numpy.inf
     param['clr'] = param['lr']
-    for e in xrange(param['nepochs']):
+    for e in range(param['nepochs']):
 
         # shuffle
         shuffle([train_lex, train_ne, train_y], param['seed'])
@@ -322,9 +331,9 @@ def main(param=None):
 
         for i, (x, y) in enumerate(zip(train_lex, train_y)):
             rnn.train(x, y, param['win'], param['clr'])
-            print '[learning] epoch %i >> %2.2f%%' % (
-                e, (i + 1) * 100. / nsentences),
-            print 'completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic),
+            print('[learning] epoch %i >> %2.2f%%' % (
+                e, (i + 1) * 100. / nsentences), end=' ')
+            print('completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic), end='')
             sys.stdout.flush()
 
         # evaluation // back into the real world : idx -> words
@@ -373,7 +382,7 @@ def main(param=None):
                             folder + '/best.valid.txt'])
         else:
             if param['verbose']:
-                print ''
+                print('')
 
         # learning rate decay if no improvement in 10 epochs
         if param['decay'] and abs(param['be']-param['ce']) >= 10:
@@ -384,9 +393,9 @@ def main(param=None):
             break
 
     print('BEST RESULT: epoch', param['be'],
-          'valid F1', param['vf1'],
-          'best test F1', param['tf1'],
-          'with the model', folder)
+           'valid F1', param['vf1'],
+           'best test F1', param['tf1'],
+           'with the model', folder)
 
 
 if __name__ == '__main__':
diff --git a/code/test.py b/code/test.py
index cf226b42..8768d8c1 100644
--- a/code/test.py
+++ b/code/test.py
@@ -1,3 +1,4 @@
+from __future__ import absolute_import, print_function, division
 import sys
 
 import numpy
@@ -15,10 +16,6 @@
 import lstm
 
 
-def test_rnnslu():
-    rnnslu.main()
-
-
 def test_logistic_sgd():
     logistic_sgd.sgd_optimization_mnist(n_epochs=10)
 
@@ -62,6 +59,26 @@ def test_rnnrbm():
     rnnrbm.test_rnnrbm(num_epochs=1)
 
 
+def test_rnnslu():
+    s = {'fold': 3,
+         # 5 folds 0,1,2,3,4
+         'data': 'atis',
+         'lr': 0.0970806646812754,
+         'verbose': 1,
+         'decay': True,
+         # decay on the learning rate if improvement stops
+         'win': 7,
+         # number of words in the context window
+         'nhidden': 200,
+         # number of hidden units
+         'seed': 345,
+         'emb_dimension': 50,
+         # dimension of word embedding
+         'nepochs': 1, # CHANGED
+         'savemodel': False}
+    rnnslu.main(s)
+
+
 def test_lstm():
     lstm.train_lstm(max_epochs=1, test_size=1000, saveto='')
 
@@ -81,52 +98,16 @@ def speed():
     do_gpu = True
 
     algo_executed = [s for idx, s in enumerate(algo) if to_exec[idx]]
-    #Timming expected are from the buildbot that have an i7-920 @
-    # 2.67GHz with hyperthread enabled for the cpu, 12G of ram. An GeForce GTX
-    # 580 for the GPU. OS=Fedora 14, gcc=4.5.1, python/BLAS from EPD
-    # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread.
-
-    expected_times_64 = numpy.asarray([9.8, 22.0, 76.1, 73.7, 116.4,
-                                       346.9, 355.0, 558.1, 130.4, 50.8, 113.6])
-    expected_times_32 = numpy.asarray([8.1, 17.9, 42.5, 66.5, 71,
-                                       191.2, 199.0, 432.8, 119.5, 36.9, 78.0])
-
-    # Number with just 1 decimal are new value that are faster with
-    # the Theano version 0.5rc2 Other number are older. They are not
-    # updated, as we where faster in the past!
-    # TODO: find why and fix this!
-
-# Here is the value for the buildbot on February 3th 2012 with a GTX 285
-#              sgd,         cg           mlp          conv        da
-#              sda          dbn          rbm
-#    gpu times[3.72957802,  9.94316864,  29.1772666,  9.13857198, 25.91144657,
-#              18.30802011, 53.38651466, 285.41386175]
-#    expected [3.076634879, 7.555234910, 18.99226785, 9.58915591, 24.130070450,
-#              24.77524018, 92.66246653, 322.340329170]
-#              sgd,         cg           mlp          conv        da
-#              sda          dbn          rbm
-#expected/get [0.82492841,  0.75984178,  0.65092691,  1.04930573, 0.93125138
-#              1.35324519 1.7356905   1.12937868]
-
-    expected_times_gpu = numpy.asarray([3.0, 7.55523491, 18.99226785,
-                                        5.8, 20.0,
-                                        11.8, 18.2, 280.1, 132.8, 38.8, 10.5])
-    expected_times_64 = [s for idx, s in enumerate(expected_times_64)
-                         if to_exec[idx]]
-    expected_times_32 = [s for idx, s in enumerate(expected_times_32)
-                         if to_exec[idx]]
-    expected_times_gpu = [s for idx, s in enumerate(expected_times_gpu)
-                          if to_exec[idx]]
-
+ 
     def time_test(m, l, idx, f, **kwargs):
         if not to_exec[idx]:
             return
-        print algo[idx]
+        print(algo[idx])
         ts = m.call_time
         try:
             f(**kwargs)
-        except Exception, e:
-            print >> sys.stderr, 'test', algo[idx], 'FAILED', e
+        except Exception as e:
+            print('test', algo[idx], 'FAILED', e, file=sys.stderr)
             l.append(numpy.nan)
             return
         te = m.call_time
@@ -171,114 +152,88 @@ def do_tests():
                   saveto='')
         return numpy.asarray(l)
 
+    # Initialize test count and results dictionnary
+    test_total = 0
+    times_dic = {}
+
     #test in float64 in FAST_RUN mode on the cpu
     import theano
     if do_float64:
         theano.config.floatX = 'float64'
         theano.config.mode = 'FAST_RUN'
         float64_times = do_tests()
-        print >> sys.stderr, algo_executed
-        print >> sys.stderr, 'float64 times', float64_times
-        print >> sys.stderr, 'float64 expected', expected_times_64
-        print >> sys.stderr, 'float64 % expected/get', (
-            expected_times_64 / float64_times)
+        times_dic['float64'] = float64_times
+        test_total += numpy.size(float64_times)
+        print(algo_executed, file=sys.stderr)
+        print('float64 times', float64_times, file=sys.stderr)
 
     #test in float32 in FAST_RUN mode on the cpu
     theano.config.floatX = 'float32'
     if do_float32:
         float32_times = do_tests()
-        print >> sys.stderr, algo_executed
-        print >> sys.stderr, 'float32 times', float32_times
-        print >> sys.stderr, 'float32 expected', expected_times_32
-        print >> sys.stderr, 'float32 % expected/get', (
-            expected_times_32 / float32_times)
+        times_dic['float32'] = float32_times
+        test_total += numpy.size(float32_times)
+        print(algo_executed, file=sys.stderr)
+        print('float32 times', float32_times, file=sys.stderr)
 
         if do_float64:
-            print >> sys.stderr, 'float64/float32', (
-                float64_times / float32_times)
-            print >> sys.stderr
-            print >> sys.stderr, ('Duplicate the timing to have everything '
-                                  'in one place')
-            print >> sys.stderr, algo_executed
-            print >> sys.stderr, 'float64 times', float64_times
-            print >> sys.stderr, 'float64 expected', expected_times_64
-            print >> sys.stderr, 'float64 % expected/get', (
-                expected_times_64 / float64_times)
-            print >> sys.stderr, 'float32 times', float32_times
-            print >> sys.stderr, 'float32 expected', expected_times_32
-            print >> sys.stderr, 'float32 % expected/get', (
-                expected_times_32 / float32_times)
-
-            print >> sys.stderr, 'float64/float32', (
-                float64_times / float32_times)
-            print >> sys.stderr, 'expected float64/float32', (
-                expected_times_64 / float32_times)
+            print('float64/float32', (
+                float64_times / float32_times), file=sys.stderr)
+            print(file=sys.stderr)
+            print(('Duplicate the timing to have everything '
+                                  'in one place'), file=sys.stderr)
+            print(algo_executed, file=sys.stderr)
+            print('float64 times', float64_times, file=sys.stderr)
+            print('float32 times', float32_times, file=sys.stderr)
+
+            print('float64/float32', (
+                float64_times / float32_times), file=sys.stderr)
 
     #test in float32 in FAST_RUN mode on the gpu
-    import theano.sandbox.cuda
+    import theano.gpuarray
     if do_gpu:
-        theano.sandbox.cuda.use('gpu')
+        theano.gpuarray.use('cuda')
         gpu_times = do_tests()
-        print >> sys.stderr, algo_executed
-        print >> sys.stderr, 'gpu times', gpu_times
-        print >> sys.stderr, 'gpu expected', expected_times_gpu
-        print >> sys.stderr, 'gpu % expected/get', (
-            expected_times_gpu / gpu_times)
+        times_dic['gpu'] = gpu_times
+        test_total += numpy.size(gpu_times)
+        print(algo_executed, file=sys.stderr)
+        print('gpu times', gpu_times, file=sys.stderr)
 
         if do_float64:
-            print >> sys.stderr, 'float64/gpu', float64_times / gpu_times
+            print('float64/gpu', float64_times / gpu_times, file=sys.stderr)
 
         if (do_float64 + do_float32 + do_gpu) > 1:
-            print >> sys.stderr
-            print >> sys.stderr, ('Duplicate the timing to have everything '
-                                  'in one place')
-            print >> sys.stderr, algo_executed
+            print(file=sys.stderr)
+            print(('Duplicate the timing to have everything '
+                                  'in one place'), file=sys.stderr)
+            print(algo_executed, file=sys.stderr)
             if do_float64:
-                print >> sys.stderr, 'float64 times', float64_times
-                print >> sys.stderr, 'float64 expected', expected_times_64
-                print >> sys.stderr, 'float64 % expected/get', (
-                    expected_times_64 / float64_times)
+                print('float64 times', float64_times, file=sys.stderr)
             if do_float32:
-                print >> sys.stderr, 'float32 times', float32_times
-                print >> sys.stderr, 'float32 expected', expected_times_32
-                print >> sys.stderr, 'float32 % expected/get', (
-                    expected_times_32 / float32_times)
+                print('float32 times', float32_times, file=sys.stderr)
             if do_gpu:
-                print >> sys.stderr, 'gpu times', gpu_times
-                print >> sys.stderr, 'gpu expected', expected_times_gpu
-                print >> sys.stderr, 'gpu % expected/get', (
-                    expected_times_gpu / gpu_times)
+                print('gpu times', gpu_times, file=sys.stderr)
 
-            print
+            print()
             if do_float64 and do_float32:
-                print >> sys.stderr, 'float64/float32', (
-                    float64_times / float32_times)
-                print >> sys.stderr, 'expected float64/float32', (
-                    expected_times_64 / float32_times)
+                print('float64/float32', (
+                    float64_times / float32_times), file=sys.stderr)
             if do_float64 and do_gpu:
-                print >> sys.stderr, 'float64/gpu', float64_times / gpu_times
-                print >> sys.stderr, 'expected float64/gpu', (
-                    expected_times_64 / gpu_times)
+                print('float64/gpu', float64_times / gpu_times, file=sys.stderr)
             if do_float32 and do_gpu:
-                print >> sys.stderr, 'float32/gpu', float32_times / gpu_times
-                print >> sys.stderr, 'expected float32/gpu', (
-                    expected_times_32 / gpu_times)
-
-    def compare(x, y):
-        ratio = x / y
-        # If there is more then 5% difference between the expected
-        # time and the real time, we consider this an error.
-        return sum((ratio < 0.95) + (ratio > 1.05))
+                print('float32/gpu', float32_times / gpu_times, file=sys.stderr)
+        
+    # Generate JUnit performance report
+    for label, times in times_dic.items():
+        with open('speedtests_{label}.xml'.format(label=label), 'w') as f:
+            f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+            f.write('<testsuite name="dlt_speedtests_{label}" tests="{ntests}">\n'
+                    .format(label=label, ntests=test_total/len(times_dic)))
+            for algo, time in zip(algo_executed, times):
+                f.write('   <testcase classname="speed.{label}" name="{algo}" time="{time}">'
+                        .format(label=label, algo=algo, time=time))
+                f.write('   </testcase>\n')
+            f.write('</testsuite>\n')
 
-    print
-    if do_float64:
-        err = compare(expected_times_64, float64_times)
-        print >> sys.stderr, 'speed_failure_float64=' + str(err)
-    if do_float32:
-        err = compare(expected_times_32, float32_times)
-        print >> sys.stderr, 'speed_failure_float32=' + str(err)
     if do_gpu:
-        err = compare(expected_times_gpu, gpu_times)
-        print >> sys.stderr, 'speed_failure_gpu=' + str(err)
-
         assert not numpy.isnan(gpu_times).any()
diff --git a/code/utils.py b/code/utils.py
index 3b50019c..ff772ad4 100644
--- a/code/utils.py
+++ b/code/utils.py
@@ -6,7 +6,6 @@
 image from a set of samples or weights.
 """
 
-
 import numpy
 
 
@@ -85,7 +84,7 @@ def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
         else:
             channel_defaults = [0., 0., 0., 1.]
 
-        for i in xrange(4):
+        for i in range(4):
             if X[i] is None:
                 # if channel is None, fill it with zeros of the correct
                 # dtype
@@ -115,8 +114,8 @@ def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
             dt = 'uint8'
         out_array = numpy.zeros(out_shape, dtype=dt)
 
-        for tile_row in xrange(tile_shape[0]):
-            for tile_col in xrange(tile_shape[1]):
+        for tile_row in range(tile_shape[0]):
+            for tile_col in range(tile_shape[1]):
                 if tile_row * tile_shape[1] + tile_col < X.shape[0]:
                     this_x = X[tile_row * tile_shape[1] + tile_col]
                     if scale_rows_to_unit_interval:
diff --git a/data/download.sh b/data/download.sh
index 92ef3d3c..67c5c057 100755
--- a/data/download.sh
+++ b/data/download.sh
@@ -5,7 +5,7 @@ WGET=$?
 which curl >/dev/null 2>&1
 CURL=$?
 if [ "$WGET" -eq 0 ]; then
-    DL_CMD="wget -c"
+    DL_CMD="wget --no-verbose -c"
 elif [ "$CURL" -eq 0 ]; then
     DL_CMD="curl -C - -O"
 else
@@ -15,12 +15,12 @@ fi
 
 $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
 $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist_py3k.pkl.gz
-$DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl.gz && gunzip imdb.pkl.gz
-$DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.dict.pkl.gz && gunzip imdb.dict.pkl.gz
+$DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl.gz && gunzip -f imdb.pkl.gz
+$DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.dict.pkl.gz && gunzip -f imdb.dict.pkl.gz
 $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/Nottingham.zip && unzip -u Nottingham.zip
 $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/midi.zip && unzip -u midi.zip -d ../code && echo "extracted Modified Python MIDI package (GPL)"
-$DL_CMD http://www-etud.iro.umontreal.ca/~mesnilgr/atis/atis.fold0.pkl.gz
-$DL_CMD http://www-etud.iro.umontreal.ca/~mesnilgr/atis/atis.fold1.pkl.gz
-$DL_CMD http://www-etud.iro.umontreal.ca/~mesnilgr/atis/atis.fold2.pkl.gz
-$DL_CMD http://www-etud.iro.umontreal.ca/~mesnilgr/atis/atis.fold3.pkl.gz
-$DL_CMD http://www-etud.iro.umontreal.ca/~mesnilgr/atis/atis.fold4.pkl.gz
+$DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold0.pkl.gz
+$DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold1.pkl.gz
+$DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold2.pkl.gz
+$DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold3.pkl.gz
+$DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold4.pkl.gz
diff --git a/doc/DBN.txt b/doc/DBN.txt
index bb0571eb..be7bfbdc 100644
--- a/doc/DBN.txt
+++ b/doc/DBN.txt
@@ -6,7 +6,7 @@ Deep Belief Networks
 .. note::
   This section assumes the reader has already read through :doc:`logreg`
   and :doc:`mlp` and :doc:`rbm`. Additionally it uses the following Theano
-  functions and concepts : `T.tanh`_, `shared variables`_, `basic arithmetic
+  functions and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic
   ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the
   code on GPU also read `GPU`_.
 
@@ -210,7 +210,7 @@ obtained over these sets.
 Putting it all together
 +++++++++++++++++++++++
 
-The few lines of code below constructs the deep belief network : 
+The few lines of code below constructs the deep belief network: 
 
 .. literalinclude:: ../code/DBN.py
   :start-after: # numpy random generator
diff --git a/doc/SdA.txt b/doc/SdA.txt
index 289a8b0a..6d9ba0da 100644
--- a/doc/SdA.txt
+++ b/doc/SdA.txt
@@ -6,7 +6,7 @@ Stacked Denoising Autoencoders (SdA)
 .. note::
   This section assumes you have already read through :doc:`logreg`
   and :doc:`mlp`. Additionally it uses the following Theano functions
-  and concepts : `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_.
+  and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_.
 
 .. _T.tanh: http://deeplearning.net/software/theano/tutorial/examples.html?highlight=tanh
 
diff --git a/doc/dA.txt b/doc/dA.txt
index 8ff26354..dd05acdf 100644
--- a/doc/dA.txt
+++ b/doc/dA.txt
@@ -6,7 +6,7 @@ Denoising Autoencoders (dA)
 .. note::
   This section assumes the reader has already read through :doc:`logreg`
   and :doc:`mlp`. Additionally it uses the following Theano functions
-  and concepts : `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_.
+  and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_.
 
 .. _T.tanh: http://deeplearning.net/software/theano/tutorial/examples.html?highlight=tanh
 
@@ -126,7 +126,7 @@ signal:
   :pyobject: dA.get_reconstructed_input
 
 And using these functions we can compute the cost and the updates of
-one stochastic gradient descent step :
+one stochastic gradient descent step:
 
 .. literalinclude:: ../code/dA.py
   :pyobject: dA.get_cost_updates
@@ -209,7 +209,7 @@ need to do is to add a stochastic corruption step operating on the input. The in
 corrupted in many ways, but in this tutorial we will stick to the original
 corruption mechanism of randomly masking entries of the input by making
 them zero. The code below
-does just that :
+does just that:
 
 .. literalinclude:: ../code/dA.py
   :pyobject: dA.get_corrupted_input
@@ -221,7 +221,7 @@ For this reason, the constructor of the ``dA`` also gets Theano variables
 pointing to the shared parameters. If those parameters are left to ``None``,
 new ones will be constructed.
 
-The final denoising autoencoder class becomes :
+The final denoising autoencoder class becomes:
 
 .. literalinclude:: ../code/dA.py
   :pyobject: dA
@@ -254,7 +254,7 @@ constant (weights are converted to values between 0 and 1).
 To plot our filters we will need the help of ``tile_raster_images`` (see
 :ref:`how-to-plot`) so we urge the reader to study it. Also
 using the help of the Python Image Library, the following lines of code will
-save the filters as an image :
+save the filters as an image:
 
 .. literalinclude:: ../code/dA.py
   :start-after: start-snippet-4
@@ -264,20 +264,20 @@ save the filters as an image :
 Running the Code
 ++++++++++++++++
 
-To run the code :
+To run the code:
 
 .. code-block:: bash
 
   python dA.py
 
-The resulted filters when we do not use any noise are :
+The resulted filters when we do not use any noise are:
 
 .. figure:: images/filters_corruption_0.png
     :align: center
 
 
 
-The filters for 30 percent noise :
+The filters for 30 percent noise:
 
 
 .. figure:: images/filters_corruption_30.png
diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt
index 5800889d..99c7f054 100644
--- a/doc/gettingstarted.txt
+++ b/doc/gettingstarted.txt
@@ -22,6 +22,11 @@ On each learning algorithm page, you will be able to download the corresponding
 
     git clone https://github.com/lisa-lab/DeepLearningTutorials.git
 
+On Linux or Mac systems, after cloning, all datasets can be downloaded at once with:
+
+    cd DeepLearningTutorials/data
+    ./download.sh
+    
 
 .. _datasets:
 
@@ -85,7 +90,7 @@ MNIST Dataset
  variables and access it based on the minibatch index, given a fixed
  and known batch size. The reason behind shared variables is
  related to using the GPU. There is a large overhead when copying data
- into the GPU memory. If you would copy data on request ( each minibatch
+ into the GPU memory. If you would copy data on request (each minibatch
  individually when needed) as the code will do if you do not use shared
  variables, due to this overhead, the GPU code will not be much faster
  then the CPU code (maybe even slower). If you have your data in
@@ -147,7 +152,7 @@ MNIST Dataset
 
 The data has to be stored as floats on the GPU ( the right
 ``dtype`` for storing on the GPU is given by ``theano.config.floatX``).
-To get around this shortcomming for the labels, we store them as float,
+To get around this shortcoming for the labels, we store them as float,
 and then cast it to int.
 
 .. note::
@@ -286,7 +291,7 @@ In this tutorial, :math:`f` is defined as:
 
     f(x) = {\rm argmax}_k P(Y=k | x, \theta)
 
-In python, using Theano this can be written as :
+In python, using Theano this can be written as:
 
 .. code-block:: python
 
@@ -316,7 +321,7 @@ The likelihood of the correct class is not the same as the
 number of right predictions, but from the point of view of a randomly
 initialized classifier they are pretty similar.
 Remember that likelihood and zero-one loss are different objectives;
-you should see that they are corralated on the validation set but
+you should see that they are correlated on the validation set but
 sometimes one will rise while the other falls, or vice-versa.
 
 Since we usually speak in terms of minimizing a loss function, learning will
@@ -331,7 +336,7 @@ The NLL of our classifier is a differentiable surrogate for the zero-one loss,
 and we use the gradient of this function over our training data as a
 supervised learning signal for deep learning of a classifier.
 
-This can be computed using the following line of code :
+This can be computed using the following line of code:
 
 .. code-block:: python
 
@@ -357,7 +362,7 @@ algorithm in which we repeatedly make small steps downward on an error
 surface defined by a loss function of some parameters.
 For the purpose of ordinary gradient descent we consider that the training
 data is rolled into the loss function. Then the pseudocode of this
-algorithm can be described as :
+algorithm can be described as:
 
 .. code-block:: python
 
@@ -389,7 +394,7 @@ form, we estimate the gradient from just a single example at a time.
 
 The variant that we recommend for deep learning is a further twist on
 stochastic gradient descent using so-called "minibatches".
-Minibatch SGD works identically to SGD, except that we use more than
+Minibatch SGD (MSGD) works identically to SGD, except that we use more than
 one training example to make each estimate of the gradient.  This technique reduces
 variance in the estimate of the gradient, and often makes better use of the
 hierarchical memory organization in modern computers.
@@ -421,11 +426,11 @@ but this choice is almost arbitrary (though harmless).
     because it controls the number of updates done to your parameters. Training the same model
     for 10 epochs using a batch size of 1 yields completely different results compared
     to training for the same 10 epochs but with a batchsize of 20. Keep this in mind when
-    switching between batch sizes and be prepared to tweak all the other parameters acording
+    switching between batch sizes and be prepared to tweak all the other parameters according
     to the batch size used.
 
 All code-blocks above show pseudocode of how the algorithm looks like. Implementing such
-algorithm in Theano can be done as follows :
+algorithm in Theano can be done as follows:
 
 .. code-block:: python
 
@@ -525,7 +530,7 @@ L2 regularization term weighted by :math:`\lambda_2`
   L1  = T.sum(abs(param))
 
   # symbolic Theano variable that represents the squared L2 term
-  L2_sqr = T.sum(param ** 2)
+  L2 = T.sum(param ** 2)
 
   # the loss
   loss = NLL + lambda_1 * L1 + lambda_2 * L2
@@ -578,7 +583,7 @@ of a strategy based on a geometrically increasing amount of patience.
     while (epoch < n_epochs) and (not done_looping):
         # Report "1" for first epoch, "n_epochs" for last epoch
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
 
             d_loss_wrt_params = ... # compute gradient
             params -= learning_rate * d_loss_wrt_params # gradient descent
diff --git a/doc/index.txt b/doc/index.txt
index 7c6605bf..e01e79fc 100644
--- a/doc/index.txt
+++ b/doc/index.txt
@@ -25,7 +25,9 @@ training them on a GPU.
 The algorithm tutorials have some prerequisites.  You should know some python,
 and be familiar with numpy. Since this tutorial is about using Theano, you
 should read over the `Theano basic tutorial`_ first.  Once you've done that,
-read through our :ref:`gettingstarted` chapter -- it introduces the notation, and [downloadable] datasets used in the algorithm tutorials, and the way we do optimization by stochastic gradient descent.  
+read through our :ref:`gettingstarted` chapter -- it introduces the notation, and downloadable datasets used in the algorithm tutorials, and the way we do optimization by stochastic gradient descent.
+
+The code is available on the `Deep Learning Tutorial repositories <https://github.com/lisa-lab/DeepLearningTutorials>`_.
 
 The purely supervised learning algorithms are meant to be read in order:
 
@@ -63,3 +65,6 @@ Energy-based recurrent neural network (RNN-RBM):
 .. _Theano basic tutorial: http://deeplearning.net/software/theano/tutorial
 
 .. _Contractive auto-encoders: https://github.com/lisa-lab/DeepLearningTutorials/blob/master/code/cA.py
+
+Note that the tutorials here are all compatible with Python 2 and 3, with the exception of :ref:`rnnrbm` which is only available for Python 2.
+
diff --git a/doc/lenet.txt b/doc/lenet.txt
index 117dfdab..84b7c3be 100644
--- a/doc/lenet.txt
+++ b/doc/lenet.txt
@@ -7,7 +7,7 @@ Convolutional Neural Networks (LeNet)
     This section assumes the reader has already read through :doc:`logreg` and
     :doc:`mlp`. Additionally, it uses the following new Theano functions and concepts:
     `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_,
-    `floatX`_, `downsample`_ , `conv2d`_, `dimshuffle`_. If you intend to run the
+    `floatX`_, `pool`_ , `conv2d`_, `dimshuffle`_. If you intend to run the
     code on GPU also read `GPU`_.
 
     To run this example on a GPU, you need a good GPU. It needs
@@ -35,7 +35,7 @@ Convolutional Neural Networks (LeNet)
 
 .. _GPU: http://deeplearning.net/software/theano/tutorial/using_gpu.html
 
-.. _downsample: http://deeplearning.net/software/theano/library/tensor/signal/downsample.html
+.. _pool: http://deeplearning.net/software/theano/library/tensor/signal/pool.html
 
 .. _conv2d: http://deeplearning.net/software/theano/library/tensor/signal/conv.html#module-conv
 
@@ -196,7 +196,7 @@ one of Figure 1. The input consists of 3 features maps (an RGB color image) of s
 
         import theano
         from theano import tensor as T
-        from theano.tensor.nnet import conv
+        from theano.tensor.nnet import conv2d
 
         import numpy
 
@@ -226,7 +226,7 @@ one of Figure 1. The input consists of 3 features maps (an RGB color image) of s
                     dtype=input.dtype), name ='b')
 
         # build symbolic expression that computes the convolution of input with filters in w
-        conv_out = conv.conv2d(input, W)
+        conv_out = conv2d(input, W)
 
         # build symbolic expression to add bias and apply activation function, i.e. produce neural net layer output
         # A few words on ``dimshuffle`` :
@@ -320,7 +320,7 @@ Max-pooling is useful in vision for two reasons:
      "smart" way of reducing the dimensionality of intermediate representations.
 
 Max-pooling is done in Theano by way of
-``theano.tensor.signal.downsample.max_pool_2d``. This function takes as input
+``theano.tensor.signal.pool.pool_2d``. This function takes as input
 an N dimensional tensor (where N >= 2) and a downscaling factor and performs
 max-pooling over the 2 trailing dimensions of the tensor.
 
@@ -328,11 +328,11 @@ An example is worth a thousand words:
 
 .. code-block:: python
 
-    from theano.tensor.signal import downsample
+    from theano.tensor.signal import pool
 
     input = T.dtensor4('input')
     maxpool_shape = (2, 2)
-    pool_out = downsample.max_pool_2d(input, maxpool_shape, ignore_border=True)
+    pool_out = pool.pool_2d(input, maxpool_shape, ignore_border=True)
     f = theano.function([input],pool_out)
 
     invals = numpy.random.RandomState(1).rand(3, 2, 5, 5)
@@ -340,7 +340,7 @@ An example is worth a thousand words:
     print 'invals[0, 0, :, :] =\n', invals[0, 0, :, :]
     print 'output[0, 0, :, :] =\n', f(invals)[0, 0, :, :]
 
-    pool_out = downsample.max_pool_2d(input, maxpool_shape, ignore_border=False)
+    pool_out = pool.pool_2d(input, maxpool_shape, ignore_border=False)
     f = theano.function([input],pool_out)
     print 'With ignore_border set to False:'
     print 'invals[1, 0, :, :] =\n ', invals[1, 0, :, :]
@@ -404,7 +404,7 @@ to be compatible with our previous MLP implementation.
     Note that the term "convolution" could corresponds to different mathematical operations:
 
     1. `theano.tensor.nnet.conv2d
-       <http://deeplearning.net/software/theano/library/tensor/nnet/conv.html#theano.tensor.nnet.conv.conv2d>`_,
+       <http://deeplearning.net/software/theano/library/tensor/nnet/conv.html#theano.tensor.nnet.conv2d>`_,
        which is the most common one in almost all of the recent published
        convolutional models.
        In this operation, each output feature map is connected to each
@@ -543,7 +543,7 @@ the task.
 
 Filter Shape
 ************
-Common filter shapes found in the litterature vary greatly, usually based on
+Common filter shapes found in the literature vary greatly, usually based on
 the dataset. Best results on MNIST-sized images (28x28) are usually in the 5x5
 range on the first layer, while natural image datasets (often with hundreds of pixels in each
 dimension) tend to use larger first-layer filters of shape 12x12 or 15x15.
diff --git a/doc/logreg.txt b/doc/logreg.txt
index c2979e63..b582acd4 100644
--- a/doc/logreg.txt
+++ b/doc/logreg.txt
@@ -246,7 +246,7 @@ within the DeepLearningTutorials folder:
 
     python code/logistic_sgd.py
 
-The output one should expect is of the form :
+The output one should expect is of the form:
 
 .. code-block:: bash
 
diff --git a/doc/lstm.txt b/doc/lstm.txt
index 828fd694..aec230ab 100644
--- a/doc/lstm.txt
+++ b/doc/lstm.txt
@@ -75,10 +75,10 @@ previous state, as needed.
 .. figure:: images/lstm_memorycell.png
     :align: center
 
-    **Figure 1** : Illustration of an LSTM memory cell.
+    **Figure 1**: Illustration of an LSTM memory cell.
 
 The equations below describe how a layer of memory cells is updated at every
-timestep :math:`t`. In these equations :
+timestep :math:`t`. In these equations:
 
 *       :math:`x_t` is the input to the memory cell layer at time :math:`t`
 *       :math:`W_i`, :math:`W_f`, :math:`W_c`, :math:`W_o`, :math:`U_i`,
@@ -89,7 +89,7 @@ timestep :math:`t`. In these equations :
 
 First, we compute the values for :math:`i_t`, the input gate, and
 :math:`\widetilde{C_t}` the candidate value for the states of the memory
-cells at time :math:`t` :
+cells at time :math:`t`:
 
 .. math::
     :label: 1
@@ -102,7 +102,7 @@ cells at time :math:`t` :
     \widetilde{C_t} = tanh(W_c x_t + U_c h_{t-1} + b_c)
 
 Second, we compute the value for :math:`f_t`, the activation of the memory
-cells' forget gates at time :math:`t` :
+cells' forget gates at time :math:`t`:
 
 .. math::
     :label: 3
@@ -111,7 +111,7 @@ cells' forget gates at time :math:`t` :
 
 Given the value of the input gate activation :math:`i_t`, the forget gate
 activation :math:`f_t` and the candidate state value :math:`\widetilde{C_t}`,
-we can compute :math:`C_t` the memory cells' new state at time :math:`t` :
+we can compute :math:`C_t` the memory cells' new state at time :math:`t`:
 
 .. math::
     :label: 4
@@ -119,7 +119,7 @@ we can compute :math:`C_t` the memory cells' new state at time :math:`t` :
     C_t = i_t * \widetilde{C_t} + f_t * C_{t-1}
 
 With the new state of the memory cells, we can compute the value of their
-output gates and, subsequently, their outputs :
+output gates and, subsequently, their outputs:
 
 .. math::
     :label: 5
@@ -139,7 +139,7 @@ In this variant, the activation of a cell’s output gate does not depend on the
 memory cell’s state :math:`C_t`. This allows us to perform part of the
 computation more efficiently (see the implementation note, below, for
 details). This means that, in the variant we have implemented, there is no
-matrix :math:`V_o` and equation :eq:`5` is replaced by equation :eq:`5-alt` :
+matrix :math:`V_o` and equation :eq:`5` is replaced by equation :eq:`5-alt`:
 
 .. math::
     :label: 5-alt
@@ -170,11 +170,11 @@ concatenating the four matrices :math:`W_*` into a single weight matrix
 :math:`W` and performing the same concatenation on the weight matrices
 :math:`U_*` to produce the matrix :math:`U` and the bias vectors :math:`b_*`
 to produce the vector :math:`b`. Then, the pre-nonlinearity activations can
-be computed with :
+be computed with:
 
 .. math::
 
-    z = \sigma(W x_t + U h_{t-1} + b)
+    z = W x_t + U h_{t-1} + b
 
 The result is then sliced to obtain the pre-nonlinearity activations for
 :math:`i`, :math:`f`, :math:`\widetilde{C_t}`, and :math:`o` and the
@@ -187,11 +187,11 @@ Code - Citations - Contact
 Code
 ====
 
-The LSTM implementation can be found in the two following files :
+The LSTM implementation can be found in the two following files:
 
-* `lstm.py <http://deeplearning.net/tutorial/code/lstm.py>`_ : Main script. Defines and train the model.
+* `lstm.py <http://deeplearning.net/tutorial/code/lstm.py>`_: Main script. Defines and train the model.
 
-* `imdb.py <http://deeplearning.net/tutorial/code/imdb.py>`_ : Secondary script. Handles the loading and preprocessing of the IMDB dataset.
+* `imdb.py <http://deeplearning.net/tutorial/code/imdb.py>`_: Secondary script. Handles the loading and preprocessing of the IMDB dataset.
 
 After downloading both scripts and putting both in the same folder, the user
 can run the code by calling:
@@ -202,7 +202,7 @@ can run the code by calling:
 
 The script will automatically download the data and decompress it.
 
-**Note** : The provided code supports the Stochastic Gradient Descent (SGD),
+**Note**: The provided code supports the Stochastic Gradient Descent (SGD),
 AdaDelta and RMSProp optimization methods. You are advised to use AdaDelta or
 RMSProp because SGD appears to performs poorly on this task with this
 particular model.
diff --git a/doc/mlp.txt b/doc/mlp.txt
index 2a74aaad..9e59ffbf 100644
--- a/doc/mlp.txt
+++ b/doc/mlp.txt
@@ -90,8 +90,8 @@ The set of parameters to learn is the set :math:`\theta =
 \{W^{(2)},b^{(2)},W^{(1)},b^{(1)}\}`.  Obtaining the gradients
 :math:`\partial{\ell}/\partial{\theta}` can be achieved through the
 **backpropagation algorithm** (a special case of the chain-rule of derivation).
-Thankfully, since Theano performs automatic differentation, we will not need to
-cover this in the tutorial !
+Thankfully, since Theano performs automatic differentiation, we will not need to
+cover this in the tutorial!
 
 
 Going from logistic regression to MLP
@@ -178,13 +178,13 @@ The code below shows how this can be done, in a way which is analogous to our pr
 
 .. literalinclude:: ../code/mlp.py
 
-The user can then run the code by calling :
+The user can then run the code by calling:
 
 .. code-block:: bash
 
     python code/mlp.py
 
-The output one should expect is of the form :
+The output one should expect is of the form:
 
 .. code-block:: bash
 
diff --git a/doc/rbm.txt b/doc/rbm.txt
index a8079012..7a052cc6 100644
--- a/doc/rbm.txt
+++ b/doc/rbm.txt
@@ -7,7 +7,7 @@ Restricted Boltzmann Machines (RBM)
 .. note::
   This section assumes the reader has already read through :doc:`logreg`
   and :doc:`mlp`. Additionally it uses the following Theano functions
-  and concepts : `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_ and `scan`_. If you intend to run the code on GPU also read `GPU`_.
+  and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_ and `scan`_. If you intend to run the code on GPU also read `GPU`_.
 
 .. _T.tanh: http://deeplearning.net/software/theano/tutorial/examples.html?highlight=tanh
 
@@ -573,7 +573,7 @@ The output was the following:
      ... plotting sample  8
      ... plotting sample  9
 
-The pictures below show the filters after 15 epochs :
+The pictures below show the filters after 15 epochs:
 
 .. figure:: images/filters_at_epoch_14.png
     :align: center
diff --git a/doc/rnnrbm.txt b/doc/rnnrbm.txt
index d64a0c4a..75e681f8 100644
--- a/doc/rnnrbm.txt
+++ b/doc/rnnrbm.txt
@@ -17,7 +17,7 @@ Modeling and generating sequences of polyphonic music with the RNN-RBM
   The script also assumes that the content of the `Nottingham Database of folk tunes <http://www.iro.umontreal.ca/~lisa/deep/data/Nottingham.zip>`_ has been extracted in the ``../data`` directory.
   Alternative MIDI datasets are available `here <http://www-etud.iro.umontreal.ca/~boulanni/icml2012>`_.
   
-  Note that both dependencies above can be setup automatically by running the ``download.sh`` script in the ``../data`` directory.
+  Note that both dependencies above can be setup automatically by running the `download.sh <https://github.com/lisa-lab/DeepLearningTutorials/blob/master/data/download.sh>`_ script in the ``../data`` directory of the `Deep Learning Tutorials repository <https://github.com/lisa-lab/DeepLearningTutorials>`_.
 
 .. caution::
   Need Theano 0.6 or more recent.
diff --git a/doc/rnnslu.txt b/doc/rnnslu.txt
index bb294c33..7fef1683 100644
--- a/doc/rnnslu.txt
+++ b/doc/rnnslu.txt
@@ -42,8 +42,9 @@ Thank you!
 Contact
 =======
 
-Please email to `Grégoire Mesnil <http://www-etud.iro.umontreal.ca/~mesnilgr/>`_ for any
-problem report or feedback. We will be glad to hear from you.
+Please email to
+``Grégoire Mesnil (first-add-a-dot-last-add-at-gmail-add-a-dot-com)``
+for any problem report or feedback. We will be glad to hear from you.
 
 Task
 ++++
diff --git a/doc/utilities.txt b/doc/utilities.txt
index 0367127c..eb982ec2 100644
--- a/doc/utilities.txt
+++ b/doc/utilities.txt
@@ -112,7 +112,7 @@ Tiling minibatches together is done for us by the
         else:
             channel_defaults = [0., 0., 0., 1.]
 
-        for i in xrange(4):
+        for i in range(4):
             if X[i] is None:
                 # if channel is None, fill it with zeros of the correct
                 # dtype
@@ -134,8 +134,8 @@ Tiling minibatches together is done for us by the
         out_array = numpy.zeros(out_shape, dtype='uint8' if output_pixel_vals else X.dtype)
 
 
-        for tile_row in xrange(tile_shape[0]):
-            for tile_col in xrange(tile_shape[1]):
+        for tile_row in range(tile_shape[0]):
+            for tile_col in range(tile_shape[1]):
                 if tile_row * tile_shape[1] + tile_col < X.shape[0]:
                     if scale_rows_to_unit_interval:
                         # if we should scale values to be between 0 and 1
diff --git a/misc/do_nightly_build b/misc/do_nightly_build
index bd703f04..ef2b8319 100755
--- a/misc/do_nightly_build
+++ b/misc/do_nightly_build
@@ -1,9 +1,18 @@
 #!/bin/bash
-#we set the compiledir to the /Tmp dir to make the test faster by bypassing the nfs network.
+
+# If not jenkins, set workspace to local Tmp
+if [ -v $WORKSPACE ]; then
+   if [ -v $TMPDIR ]; then
+      TMPDIR=/tmp
+   fi
+   WORKSPACE=$TMPDIR
+fi
+
 date
-ROOT_CWD=/Tmp/nightly_build
-COMPILEDIR=/Tmp/lisa_theano_compile_dir_deeplearning
+ROOT_CWD=$WORKSPACE/nightly_build
+COMPILEDIR=$WORKSPACE/compile/lisa_theano_compile_dir_deeplearning
 NOSETESTS=${ROOT_CWD}/Theano/bin/theano-nose
+XUNIT="--with-xunit --xunit-file="
 
 FLAGS=warn.ignore_bug_before=0.5,compiledir=${COMPILEDIR}
 export PYTHONPATH=${ROOT_CWD}/Theano:${ROOT_CWD}/Pylearn:$PYTHONPATH
@@ -19,14 +28,17 @@ echo "git version:" `git rev-parse HEAD`
 #echo "executing nosetests with mode=FAST_COMPILE"
 #THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS}
 echo "executing nosetests speed with mode=FAST_RUN"
-THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} test.py:speed
+FILE=${ROOT_CWD}/dlt_tests.xml
+THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${XUNIT}${FILE} test.py:speed
 #echo "executing nosetests speed with mode=FAST_RUN and OMP_NUM_THREADS=2"
 #OMP_NUM_THREADS=2 THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} test.py:speed
 echo "executing nosetests with mode=FAST_RUN,floatX=float32"
-THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS}
+FILE=${ROOT_CWD}/dlt_float32_tests.xml
+THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE}
 
 #we change the seed and record it everyday to test different combination. We record it to be able to reproduce bug caused by different seed. We don't want multiple test in DEBUG_MODE each day as this take too long.
 #seed=$RANDOM
 #echo "executing nosetests with mode=DEBUG_MODE with seed of the day $seed"
-#THEANO_DEBUGMODE_CHECK_STRIDES=0 THEANO_DEBUGMODE_PATIENCE=3 THEANO_COMPILEDIR=/Tmp/lisa_theano_compile_dir_deeplearning THEANO_UNITTEST_SEED=$seed THEANO_DEFAULT_MODE=DEBUG_MODE ${NOSETESTS}
+#FILE=${ROOT_CWD}/'dlt_debug_tests.xml'
+#THEANO_DEBUGMODE_CHECK_STRIDES=0 THEANO_DEBUGMODE_PATIENCE=3 THEANO_COMPILEDIR=$WORKSPACE/lisa_theano_compile_dir_deeplearning THEANO_UNITTEST_SEED=$seed THEANO_DEFAULT_MODE=DEBUG_MODE ${NOSETESTS} ${XUNIT}${FILE}