From e4cad43f96b52589e7d95a66156f698963c33e09 Mon Sep 17 00:00:00 2001
From: stray-leone <stray.leone@gmail.com>
Date: Sun, 13 Sep 2015 01:44:59 +0900
Subject: [PATCH 01/90] change the way of getting vocsize, nclasses. with
 previous way, when training data is big, need many hours.

---
 code/rnnslu.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/code/rnnslu.py b/code/rnnslu.py
index 65363688..a8efda74 100644
--- a/code/rnnslu.py
+++ b/code/rnnslu.py
@@ -288,10 +288,8 @@ def main(param=None):
     valid_lex, valid_ne, valid_y = valid_set
     test_lex, test_ne, test_y = test_set
 
-    vocsize = len(set(reduce(lambda x, y: list(x) + list(y),
-                             train_lex + valid_lex + test_lex)))
-    nclasses = len(set(reduce(lambda x, y: list(x)+list(y),
-                              train_y + test_y + valid_y)))
+    vocsize = len(dic['words2idx'])
+    nclasses = len(dic['labels2idx'])
     nsentences = len(train_lex)
 
     groundtruth_valid = [map(lambda x: idx2label[x], y) for y in valid_y]

From 7c1219dabbb24ea62d74ae90b2a39e9ad0c9a090 Mon Sep 17 00:00:00 2001
From: Joakim Skarding <pinef2@gmail.com>
Date: Sat, 24 Oct 2015 20:50:42 +0200
Subject: [PATCH 02/90] Added MSGD abbreviation introduction

---
 doc/gettingstarted.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt
index 5800889d..63f93597 100644
--- a/doc/gettingstarted.txt
+++ b/doc/gettingstarted.txt
@@ -389,7 +389,7 @@ form, we estimate the gradient from just a single example at a time.
 
 The variant that we recommend for deep learning is a further twist on
 stochastic gradient descent using so-called "minibatches".
-Minibatch SGD works identically to SGD, except that we use more than
+Minibatch SGD (MSGD) works identically to SGD, except that we use more than
 one training example to make each estimate of the gradient.  This technique reduces
 variance in the estimate of the gradient, and often makes better use of the
 hierarchical memory organization in modern computers.

From 321a6e1adf3d650e8393f98d65f2faa67ea27f45 Mon Sep 17 00:00:00 2001
From: Frederic <nouiz@nouiz.org>
Date: Wed, 28 Oct 2015 10:37:21 -0400
Subject: [PATCH 03/90] Update atis URL

---
 data/download.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/data/download.sh b/data/download.sh
index 92ef3d3c..ed273bbb 100755
--- a/data/download.sh
+++ b/data/download.sh
@@ -19,8 +19,8 @@ $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl.gz && gunzip imdb.p
 $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.dict.pkl.gz && gunzip imdb.dict.pkl.gz
 $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/Nottingham.zip && unzip -u Nottingham.zip
 $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/midi.zip && unzip -u midi.zip -d ../code && echo "extracted Modified Python MIDI package (GPL)"
-$DL_CMD http://www-etud.iro.umontreal.ca/~mesnilgr/atis/atis.fold0.pkl.gz
-$DL_CMD http://www-etud.iro.umontreal.ca/~mesnilgr/atis/atis.fold1.pkl.gz
-$DL_CMD http://www-etud.iro.umontreal.ca/~mesnilgr/atis/atis.fold2.pkl.gz
-$DL_CMD http://www-etud.iro.umontreal.ca/~mesnilgr/atis/atis.fold3.pkl.gz
-$DL_CMD http://www-etud.iro.umontreal.ca/~mesnilgr/atis/atis.fold4.pkl.gz
+$DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold0.pkl.gz
+$DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold1.pkl.gz
+$DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold2.pkl.gz
+$DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold3.pkl.gz
+$DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold4.pkl.gz

From 95cfe74621f3306cc7f6c0d610411054b98e91e6 Mon Sep 17 00:00:00 2001
From: Frederic <nouiz@nouiz.org>
Date: Wed, 28 Oct 2015 11:00:21 -0400
Subject: [PATCH 04/90] Fix travis, newer scipy version in miniconda have
 problems with gfortran

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 7873dedf..4344a63d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -14,7 +14,7 @@ before_install:
   - conda update --yes conda
 
 install:
-  - conda create --yes -q -n pyenv mkl python=2.7 numpy scipy pip nose yaml pyflakes pillow pyparsing=1.5
+  - conda create --yes -q -n pyenv mkl python=2.7 numpy=1.10 scipy=0.16.0 pip nose yaml pyflakes pillow pyparsing=1.5
   - source activate pyenv
   - pip install git+git://github.com/Theano/Theano.git
 

From b3c4a9e2f7630332f802e9acc2fe1f9452920e49 Mon Sep 17 00:00:00 2001
From: Frederic <nouiz@nouiz.org>
Date: Wed, 28 Oct 2015 11:41:19 -0400
Subject: [PATCH 05/90] Use the new travis infrastucture

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 4344a63d..17e75146 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,6 @@
 # After changing this file, check it on:
 # http://lint.travis-ci.org/
+sudo: false
 
 language: python
 #python:

From 1f628ff222d7f864559bcb62a892cb0dfcb5cb65 Mon Sep 17 00:00:00 2001
From: Frederic <nouiz@nouiz.org>
Date: Wed, 28 Oct 2015 13:31:23 -0400
Subject: [PATCH 06/90] Update Gregoire email

---
 doc/rnnslu.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/rnnslu.txt b/doc/rnnslu.txt
index bb294c33..7fef1683 100644
--- a/doc/rnnslu.txt
+++ b/doc/rnnslu.txt
@@ -42,8 +42,9 @@ Thank you!
 Contact
 =======
 
-Please email to `Grégoire Mesnil <http://www-etud.iro.umontreal.ca/~mesnilgr/>`_ for any
-problem report or feedback. We will be glad to hear from you.
+Please email to
+``Grégoire Mesnil (first-add-a-dot-last-add-at-gmail-add-a-dot-com)``
+for any problem report or feedback. We will be glad to hear from you.
 
 Task
 ++++

From 5b62a38639200960ba58ad39bcb1dd60370b68b2 Mon Sep 17 00:00:00 2001
From: Frederic <nouiz@nouiz.org>
Date: Thu, 29 Oct 2015 16:28:23 -0400
Subject: [PATCH 07/90] Small update to lstm code. uidx have already been
 incremented

fix gh-122
---
 code/lstm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/code/lstm.py b/code/lstm.py
index b64970fb..1d87cfb3 100644
--- a/code/lstm.py
+++ b/code/lstm.py
@@ -569,7 +569,7 @@ def train_lstm(
                 f_update(lrate)
 
                 if numpy.isnan(cost) or numpy.isinf(cost):
-                    print 'NaN detected'
+                    print 'bad cost detected: ', cost
                     return 1., 1., 1.
 
                 if numpy.mod(uidx, dispFreq) == 0:
@@ -595,7 +595,7 @@ def train_lstm(
 
                     history_errs.append([valid_err, test_err])
 
-                    if (uidx == 0 or
+                    if (best_p is None or
                         valid_err <= numpy.array(history_errs)[:,
                                                                0].min()):
 

From 8b1e2b35f6f67017d76a4c0b7600132cf07ce5f8 Mon Sep 17 00:00:00 2001
From: Frederic <nouiz@nouiz.org>
Date: Thu, 29 Oct 2015 16:29:46 -0400
Subject: [PATCH 08/90] Update timming with speed up

---
 code/test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/code/test.py b/code/test.py
index cf226b42..94e03b8d 100644
--- a/code/test.py
+++ b/code/test.py
@@ -87,7 +87,7 @@ def speed():
     # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread.
 
     expected_times_64 = numpy.asarray([9.8, 22.0, 76.1, 73.7, 116.4,
-                                       346.9, 355.0, 558.1, 130.4, 50.8, 113.6])
+                                       346.9, 355.0, 558.1, 130.4, 23.2, 106])
     expected_times_32 = numpy.asarray([8.1, 17.9, 42.5, 66.5, 71,
                                        191.2, 199.0, 432.8, 119.5, 36.9, 78.0])
 
@@ -110,7 +110,7 @@ def speed():
 
     expected_times_gpu = numpy.asarray([3.0, 7.55523491, 18.99226785,
                                         5.8, 20.0,
-                                        11.8, 18.2, 280.1, 132.8, 38.8, 10.5])
+                                        11.2, 17.2, 257.7, 118.8, 34.2, 8.7])
     expected_times_64 = [s for idx, s in enumerate(expected_times_64)
                          if to_exec[idx]]
     expected_times_32 = [s for idx, s in enumerate(expected_times_32)

From 564af7e8381843b368b3fde86ef2d8ce13152cb2 Mon Sep 17 00:00:00 2001
From: Frederic <nouiz@nouiz.org>
Date: Thu, 5 Nov 2015 07:54:38 -0500
Subject: [PATCH 09/90] Fix miniconda path due to having changed

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 17e75146..8a84a9ea 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -11,7 +11,7 @@ before_install:
   - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
   - chmod +x miniconda.sh
   - ./miniconda.sh -b
-  - export PATH=/home/travis/miniconda/bin:$PATH
+  - export PATH=/home/travis/miniconda/bin:/home/travis/miniconda2/bin:$PATH
   - conda update --yes conda
 
 install:

From 74ab26817a1b0a04371a25a7109d7779f7f21b17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ali=20=C4=B0skender=20Turan?= <hankarun@gmail.com>
Date: Tue, 15 Dec 2015 16:56:10 +0430
Subject: [PATCH 10/90] Update gettingstarted.txt

L2_sqr variable must be L2 for the loss function or loss function L2 variable must be L2_sqr
---
 doc/gettingstarted.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt
index 63f93597..e838d706 100644
--- a/doc/gettingstarted.txt
+++ b/doc/gettingstarted.txt
@@ -525,7 +525,7 @@ L2 regularization term weighted by :math:`\lambda_2`
   L1  = T.sum(abs(param))
 
   # symbolic Theano variable that represents the squared L2 term
-  L2_sqr = T.sum(param ** 2)
+  L2 = T.sum(param ** 2)
 
   # the loss
   loss = NLL + lambda_1 * L1 + lambda_2 * L2

From fb5e394b25d4ce0d7a01a776045dc15b93697bf2 Mon Sep 17 00:00:00 2001
From: Frederic <nouiz@nouiz.org>
Date: Tue, 15 Dec 2015 11:31:10 -0500
Subject: [PATCH 11/90] Update timing that is now faster

---
 code/test.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/code/test.py b/code/test.py
index 94e03b8d..41749231 100644
--- a/code/test.py
+++ b/code/test.py
@@ -87,9 +87,9 @@ def speed():
     # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread.
 
     expected_times_64 = numpy.asarray([9.8, 22.0, 76.1, 73.7, 116.4,
-                                       346.9, 355.0, 558.1, 130.4, 23.2, 106])
-    expected_times_32 = numpy.asarray([8.1, 17.9, 42.5, 66.5, 71,
-                                       191.2, 199.0, 432.8, 119.5, 36.9, 78.0])
+                                       346.9, 355.0, 510.9, 130.4, 23.2, 106])
+    expected_times_32 = numpy.asarray([6.4, 17.9, 42.5, 66.5, 71,
+                                       191.2, 199.0, 400.4, 119.5, 36.9, 67.2])
 
     # Number with just 1 decimal are new value that are faster with
     # the Theano version 0.5rc2 Other number are older. They are not
@@ -110,7 +110,7 @@ def speed():
 
     expected_times_gpu = numpy.asarray([3.0, 7.55523491, 18.99226785,
                                         5.8, 20.0,
-                                        11.2, 17.2, 257.7, 118.8, 34.2, 8.7])
+                                        11.2, 17.2, 244.3, 118.8, 34.2, 8.7])
     expected_times_64 = [s for idx, s in enumerate(expected_times_64)
                          if to_exec[idx]]
     expected_times_32 = [s for idx, s in enumerate(expected_times_32)

From e9711aaf2e059ad7beceb2c762b5729ec0de7f68 Mon Sep 17 00:00:00 2001
From: Frederic <nouiz@nouiz.org>
Date: Tue, 15 Dec 2015 11:46:34 -0500
Subject: [PATCH 12/90] Try to fix travis

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 8a84a9ea..4528a5fe 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,7 +15,7 @@ before_install:
   - conda update --yes conda
 
 install:
-  - conda create --yes -q -n pyenv mkl python=2.7 numpy=1.10 scipy=0.16.0 pip nose yaml pyflakes pillow pyparsing=1.5
+  - conda create --yes -q -n pyenv mkl python=2.7 numpy=1.10 scipy=0.16.1 pip nose yaml pyflakes pillow pyparsing=1.5
   - source activate pyenv
   - pip install git+git://github.com/Theano/Theano.git
 

From e9fc9c395d7fd3b8dafafb5229e4660154727128 Mon Sep 17 00:00:00 2001
From: Frederic <nouiz@nouiz.org>
Date: Tue, 15 Dec 2015 12:10:41 -0500
Subject: [PATCH 13/90] Add test_lstm in travis

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 4528a5fe..ae3801c2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -21,7 +21,7 @@ install:
 
 env:
   - PART="test.py:test_logistic_sgd test.py:test_logistic_cg test.py:test_mlp test.py:test_convolutional_mlp test.py:test_dA"
-  - PART="test.py:test_SdA"
+  - PART="test.py:test_SdA test.py:test_lstm"
   - PART="test.py:test_dbn"
   - PART="test.py:test_rbm test.py:test_rnnrbm"
   - PART="-e test.py"

From 407fd81b1a5bf8630187eddabe3ff1e22afa6ca7 Mon Sep 17 00:00:00 2001
From: Frederic <nouiz@nouiz.org>
Date: Tue, 15 Dec 2015 12:10:56 -0500
Subject: [PATCH 14/90] Get rid of scipy dependency by using numpy.

---
 code/hmc/test_hmc.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/code/hmc/test_hmc.py b/code/hmc/test_hmc.py
index 0a70190a..f6c3b522 100644
--- a/code/hmc/test_hmc.py
+++ b/code/hmc/test_hmc.py
@@ -1,5 +1,4 @@
 import numpy
-from scipy import linalg
 import theano
 
 from hmc import HMC_sampler
@@ -15,7 +14,7 @@ def sampler_on_nd_gaussian(sampler_cls, burnin, n_samples, dim=10):
     cov = numpy.array(rng.rand(dim, dim), dtype=theano.config.floatX)
     cov = (cov + cov.T) / 2.
     cov[numpy.arange(dim), numpy.arange(dim)] = 1.0
-    cov_inv = linalg.inv(cov)
+    cov_inv = numpy.linalg.inv(cov)
 
     # Define energy function for a multi-variate Gaussian
     def gaussian_energy(x):

From e1d1e0c826d7d05a84302222d55b45cf3af2a6fa Mon Sep 17 00:00:00 2001
From: Frederic Bastien <nouiz@nouiz.org>
Date: Fri, 15 Jan 2016 16:23:24 -0500
Subject: [PATCH 15/90] Update timing for case that is now faster

---
 code/test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/code/test.py b/code/test.py
index 41749231..76c95b38 100644
--- a/code/test.py
+++ b/code/test.py
@@ -86,9 +86,9 @@ def speed():
     # 580 for the GPU. OS=Fedora 14, gcc=4.5.1, python/BLAS from EPD
     # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread.
 
-    expected_times_64 = numpy.asarray([9.8, 22.0, 76.1, 73.7, 116.4,
+    expected_times_64 = numpy.asarray([9.3, 21.0, 76.1, 73.7, 116.4,
                                        346.9, 355.0, 510.9, 130.4, 23.2, 106])
-    expected_times_32 = numpy.asarray([6.4, 17.9, 42.5, 66.5, 71,
+    expected_times_32 = numpy.asarray([6.4, 14.7, 42.5, 66.5, 71,
                                        191.2, 199.0, 400.4, 119.5, 36.9, 67.2])
 
     # Number with just 1 decimal are new value that are faster with

From b3b1783b3fb1ac92f28dbc3a4e64c5ea7cf85731 Mon Sep 17 00:00:00 2001
From: Frederic Bastien <nouiz@nouiz.org>
Date: Fri, 15 Jan 2016 16:39:06 -0500
Subject: [PATCH 16/90] Add test_rnnslu to travis

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index ae3801c2..258963ee 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -23,7 +23,7 @@ env:
   - PART="test.py:test_logistic_sgd test.py:test_logistic_cg test.py:test_mlp test.py:test_convolutional_mlp test.py:test_dA"
   - PART="test.py:test_SdA test.py:test_lstm"
   - PART="test.py:test_dbn"
-  - PART="test.py:test_rbm test.py:test_rnnrbm"
+  - PART="test.py:test_rbm test.py:test_rnnrbm test.py:test_rnnslu"
   - PART="-e test.py"
 
 #i7-2600K CPU @ 3.40GHz

From 1a1529261e05fb5d27be973439c5cb4f2ce49d94 Mon Sep 17 00:00:00 2001
From: Frederic Bastien <nouiz@nouiz.org>
Date: Fri, 15 Jan 2016 16:39:35 -0500
Subject: [PATCH 17/90] Make test_rnnslu faster

---
 code/test.py | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/code/test.py b/code/test.py
index 76c95b38..ff2ff359 100644
--- a/code/test.py
+++ b/code/test.py
@@ -15,10 +15,6 @@
 import lstm
 
 
-def test_rnnslu():
-    rnnslu.main()
-
-
 def test_logistic_sgd():
     logistic_sgd.sgd_optimization_mnist(n_epochs=10)
 
@@ -62,6 +58,26 @@ def test_rnnrbm():
     rnnrbm.test_rnnrbm(num_epochs=1)
 
 
+def test_rnnslu():
+    s = {'fold': 3,
+         # 5 folds 0,1,2,3,4
+         'data': 'atis',
+         'lr': 0.0970806646812754,
+         'verbose': 1,
+         'decay': True,
+         # decay on the learning rate if improvement stops
+         'win': 7,
+         # number of words in the context window
+         'nhidden': 200,
+         # number of hidden units
+         'seed': 345,
+         'emb_dimension': 50,
+         # dimension of word embedding
+         'nepochs': 1, # CHANGED
+         'savemodel': False}
+    rnnslu.main(s)
+
+
 def test_lstm():
     lstm.train_lstm(max_epochs=1, test_size=1000, saveto='')
 

From 87b2f9a1d22757681c6c4636bd8d9219ba91cd7d Mon Sep 17 00:00:00 2001
From: Frederic Bastien <nouiz@nouiz.org>
Date: Fri, 15 Jan 2016 16:41:35 -0500
Subject: [PATCH 18/90] Small diff to help debug by having better error message

---
 code/rnnslu.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/code/rnnslu.py b/code/rnnslu.py
index 2ea55978..2251e465 100644
--- a/code/rnnslu.py
+++ b/code/rnnslu.py
@@ -126,11 +126,14 @@ def get_perf(filename, folder):
                             stdout=subprocess.PIPE)
 
     stdout, _ = proc.communicate(''.join(open(filename).readlines()))
+    out = None
     for line in stdout.split('\n'):
         if 'accuracy' in line:
             out = line.split()
             break
-
+    # To help debug
+    if out is None:
+        print stdout.split('\n')
     precision = float(out[6][:-2])
     recall = float(out[8][:-2])
     f1score = float(out[10])

From b701733044d73681baa8346973229ed8d0537395 Mon Sep 17 00:00:00 2001
From: Frederic Bastien <nouiz@nouiz.org>
Date: Wed, 27 Jan 2016 21:40:33 -0800
Subject: [PATCH 19/90] Use the In object as Param is deprecated

---
 code/DBN.py | 2 +-
 code/SdA.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/code/DBN.py b/code/DBN.py
index b54ac5bc..ecd563e7 100644
--- a/code/DBN.py
+++ b/code/DBN.py
@@ -174,7 +174,7 @@ def pretraining_functions(self, train_set_x, batch_size, k):
 
             # compile the theano function
             fn = theano.function(
-                inputs=[index, theano.Param(learning_rate, default=0.1)],
+                inputs=[index, theano.In(learning_rate, value=0.1)],
                 outputs=cost,
                 updates=updates,
                 givens={
diff --git a/code/SdA.py b/code/SdA.py
index 82660e99..c74c2986 100644
--- a/code/SdA.py
+++ b/code/SdA.py
@@ -214,8 +214,8 @@ def pretraining_functions(self, train_set_x, batch_size):
             fn = theano.function(
                 inputs=[
                     index,
-                    theano.Param(corruption_level, default=0.2),
-                    theano.Param(learning_rate, default=0.1)
+                    theano.In(corruption_level, value=0.2),
+                    theano.In(learning_rate, value=0.1)
                 ],
                 outputs=cost,
                 updates=updates,

From 6e3d61544f2786f7400a98151db99c5409c8bb4e Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <abergeron@gmail.com>
Date: Tue, 2 Feb 2016 16:41:10 -0500
Subject: [PATCH 20/90] Update convolution to use the updated interface.

---
 code/convolutional_mlp.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/code/convolutional_mlp.py b/code/convolutional_mlp.py
index 64bf5e69..bb6aeaf4 100644
--- a/code/convolutional_mlp.py
+++ b/code/convolutional_mlp.py
@@ -30,7 +30,7 @@
 import theano
 import theano.tensor as T
 from theano.tensor.signal import downsample
-from theano.tensor.nnet import conv
+from theano.tensor.nnet import conv2d
 
 from logistic_sgd import LogisticRegression, load_data
 from mlp import HiddenLayer
@@ -87,7 +87,7 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
         self.b = theano.shared(value=b_values, borrow=True)
 
         # convolve input feature maps with filters
-        conv_out = conv.conv2d(
+        conv_out = conv2d(
             input=input,
             filters=self.W,
             filter_shape=filter_shape,

From 6c5f07bbdbfcc9ed8fb6c8ae05b288a5ce696a03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Bastien?= <nouiz@nouiz.org>
Date: Wed, 3 Feb 2016 12:01:42 -0500
Subject: [PATCH 21/90] Update timing that got speed up.

---
 code/test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/code/test.py b/code/test.py
index 76c95b38..8b6a515e 100644
--- a/code/test.py
+++ b/code/test.py
@@ -87,7 +87,7 @@ def speed():
     # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread.
 
     expected_times_64 = numpy.asarray([9.3, 21.0, 76.1, 73.7, 116.4,
-                                       346.9, 355.0, 510.9, 130.4, 23.2, 106])
+                                       346.9, 355.0, 510.9, 130.4, 23.2, 98.8])
     expected_times_32 = numpy.asarray([6.4, 14.7, 42.5, 66.5, 71,
                                        191.2, 199.0, 400.4, 119.5, 36.9, 67.2])
 
@@ -108,7 +108,7 @@ def speed():
 #expected/get [0.82492841,  0.75984178,  0.65092691,  1.04930573, 0.93125138
 #              1.35324519 1.7356905   1.12937868]
 
-    expected_times_gpu = numpy.asarray([3.0, 7.55523491, 18.99226785,
+    expected_times_gpu = numpy.asarray([2.9, 7.55523491, 18.99226785,
                                         5.8, 20.0,
                                         11.2, 17.2, 244.3, 118.8, 34.2, 8.7])
     expected_times_64 = [s for idx, s in enumerate(expected_times_64)

From c26252342c4d1ef1fea9131c7605d4190e52b2c2 Mon Sep 17 00:00:00 2001
From: Benjamin Irving <benjamin.irving@eng.ox.ac.uk>
Date: Wed, 3 Feb 2016 17:56:49 +0000
Subject: [PATCH 22/90] fix minor typos and formatting

---
 code/logistic_sgd.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/code/logistic_sgd.py b/code/logistic_sgd.py
index c944f8b3..68f26911 100644
--- a/code/logistic_sgd.py
+++ b/code/logistic_sgd.py
@@ -207,12 +207,12 @@ def load_data(dataset):
     f = gzip.open(dataset, 'rb')
     train_set, valid_set, test_set = cPickle.load(f)
     f.close()
-    #train_set, valid_set, test_set format: tuple(input, target)
-    #input is an numpy.ndarray of 2 dimensions (a matrix)
-    #witch row's correspond to an example. target is a
-    #numpy.ndarray of 1 dimensions (vector)) that have the same length as
-    #the number of rows in the input. It should give the target
-    #target to the example with the same index in the input.
+    # train_set, valid_set, test_set format: tuple(input, target)
+    # input is a numpy.ndarray of 2 dimensions (a matrix)
+    # where each row corresponds to an example. target is a
+    # numpy.ndarray of 1 dimension (vector) that has the same length as
+    # the number of rows in the input. It should give the target
+    # to the example with the same index in the input.
 
     def shared_dataset(data_xy, borrow=True):
         """ Function that loads the dataset into shared variables

From d2764f288b4e58e12bd492953d1c1a0b43d92e21 Mon Sep 17 00:00:00 2001
From: Guillaume Alain <gyomalin@gmail.com>
Date: Thu, 21 Jan 2016 11:06:00 -0500
Subject: [PATCH 23/90] successfully ported logistic_sgd.py

---
 code/logistic_sgd.py | 59 ++++++++++++++++++++++++--------------------
 1 file changed, 32 insertions(+), 27 deletions(-)

diff --git a/code/logistic_sgd.py b/code/logistic_sgd.py
index 68f26911..9f4427e7 100644
--- a/code/logistic_sgd.py
+++ b/code/logistic_sgd.py
@@ -32,9 +32,12 @@
                  Christopher M. Bishop, section 4.3.2
 
 """
+
+from __future__ import print_function
+
 __docformat__ = 'restructedtext en'
 
-import cPickle
+import six.moves.cPickle as pickle
 import gzip
 import os
 import sys
@@ -194,19 +197,21 @@ def load_data(dataset):
             dataset = new_path
 
     if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
-        import urllib
+        from six.moves import urllib
         origin = (
             'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
         )
-        print 'Downloading data from %s' % origin
-        urllib.urlretrieve(origin, dataset)
+        print('Downloading data from %s' % origin)
+        urllib.request.urlretrieve(origin, dataset)
 
-    print '... loading data'
+    print('... loading data')
 
     # Load the dataset
-    f = gzip.open(dataset, 'rb')
-    train_set, valid_set, test_set = cPickle.load(f)
-    f.close()
+    with gzip.open(dataset, 'rb') as f:
+        try:
+            train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
+        except:
+            train_set, valid_set, test_set = pickle.load(f)
     # train_set, valid_set, test_set format: tuple(input, target)
     # input is a numpy.ndarray of 2 dimensions (a matrix)
     # where each row corresponds to an example. target is a
@@ -276,14 +281,14 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
     test_set_x, test_set_y = datasets[2]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
 
     ######################
     # BUILD ACTUAL MODEL #
     ######################
-    print '... building the model'
+    print('... building the model')
 
     # allocate symbolic variables for the data
     index = T.lscalar()  # index to a [mini]batch
@@ -348,14 +353,14 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
     ###############
     # TRAIN MODEL #
     ###############
-    print '... training the model'
+    print('... training the model')
     # early-stopping parameters
     patience = 5000  # look as this many examples regardless
     patience_increase = 2  # wait this much longer when a new best is
                                   # found
     improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
-    validation_frequency = min(n_train_batches, patience / 2)
+    validation_frequency = min(n_train_batches, patience // 2)
                                   # go through this many
                                   # minibatche before checking the network
                                   # on the validation set; in this case we
@@ -369,7 +374,7 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
     epoch = 0
     while (epoch < n_epochs) and (not done_looping):
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
 
             minibatch_avg_cost = train_model(minibatch_index)
             # iteration number
@@ -378,7 +383,7 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
             if (iter + 1) % validation_frequency == 0:
                 # compute zero-one loss on validation set
                 validation_losses = [validate_model(i)
-                                     for i in xrange(n_valid_batches)]
+                                     for i in range(n_valid_batches)]
                 this_validation_loss = numpy.mean(validation_losses)
 
                 print(
@@ -402,7 +407,7 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
                     # test it on the test set
 
                     test_losses = [test_model(i)
-                                   for i in xrange(n_test_batches)]
+                                   for i in range(n_test_batches)]
                     test_score = numpy.mean(test_losses)
 
                     print(
@@ -419,8 +424,8 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
                     )
 
                     # save the best model
-                    with open('best_model.pkl', 'w') as f:
-                        cPickle.dump(classifier, f)
+                    with open('best_model.pkl', 'wb') as f:
+                        pickle.dump(classifier, f)
 
             if patience <= iter:
                 done_looping = True
@@ -434,11 +439,11 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
         )
         % (best_validation_loss * 100., test_score * 100.)
     )
-    print 'The code run for %d epochs, with %f epochs/sec' % (
-        epoch, 1. * epoch / (end_time - start_time))
-    print >> sys.stderr, ('The code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.1fs' % ((end_time - start_time)))
+    print('The code run for %d epochs, with %f epochs/sec' % (
+        epoch, 1. * epoch / (end_time - start_time)))
+    print(('The code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.1fs' % ((end_time - start_time))), file=sys.stderr)
 
 
 def predict():
@@ -448,7 +453,7 @@ def predict():
     """
 
     # load the saved model
-    classifier = cPickle.load(open('best_model.pkl'))
+    classifier = pickle.load(open('best_model.pkl'))
 
     # compile a predictor function
     predict_model = theano.function(
@@ -462,8 +467,8 @@ def predict():
     test_set_x = test_set_x.get_value()
 
     predicted_values = predict_model(test_set_x[:10])
-    print ("Predicted values for the first 10 examples in test set:")
-    print predicted_values
+    print("Predicted values for the first 10 examples in test set:")
+    print(predicted_values)
 
 
 if __name__ == '__main__':

From 2c022d15401c67538fabeb1b5ae2a7470d5fb2f2 Mon Sep 17 00:00:00 2001
From: Guillaume Alain <gyomalin@gmail.com>
Date: Thu, 21 Jan 2016 15:28:29 -0500
Subject: [PATCH 24/90] fixed everything except rnnrbm and rnnslu, partial
 tests run but not to completion

---
 code/SdA.py               | 51 +++++++++++++-----------
 code/cA.py                | 16 +++++---
 code/convolutional_mlp.py | 31 ++++++++-------
 code/dA.py                | 28 +++++++------
 code/hmc/hmc.py           |  6 +--
 code/hmc/test_hmc.py      | 29 +++++++++-----
 code/imdb.py              | 17 +++++---
 code/lstm.py              | 83 +++++++++++++++++++++------------------
 code/mlp.py               | 27 +++++++------
 code/rbm.py               | 10 +++--
 code/utils.py             |  1 +
 11 files changed, 169 insertions(+), 130 deletions(-)

diff --git a/code/SdA.py b/code/SdA.py
index c74c2986..d639cb54 100644
--- a/code/SdA.py
+++ b/code/SdA.py
@@ -29,6 +29,9 @@
    Systems 19, 2007
 
 """
+
+from __future__ import print_function
+
 import os
 import sys
 import timeit
@@ -116,7 +119,7 @@ def __init__(
         # stochastich gradient descent on the MLP
 
         # start-snippet-2
-        for i in xrange(self.n_layers):
+        for i in range(self.n_layers):
             # construct the sigmoidal layer
 
             # the size of the input is either the number of hidden units of
@@ -254,9 +257,9 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate):
 
         # compute number of minibatches for training, validation and testing
         n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
-        n_valid_batches /= batch_size
+        n_valid_batches //= batch_size
         n_test_batches = test_set_x.get_value(borrow=True).shape[0]
-        n_test_batches /= batch_size
+        n_test_batches //= batch_size
 
         index = T.lscalar('index')  # index to a [mini]batch
 
@@ -314,11 +317,11 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate):
 
         # Create a function that scans the entire validation set
         def valid_score():
-            return [valid_score_i(i) for i in xrange(n_valid_batches)]
+            return [valid_score_i(i) for i in range(n_valid_batches)]
 
         # Create a function that scans the entire test set
         def test_score():
-            return [test_score_i(i) for i in xrange(n_test_batches)]
+            return [test_score_i(i) for i in range(n_test_batches)]
 
         return train_fn, valid_score, test_score
 
@@ -357,12 +360,12 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
 
     # compute number of minibatches for training, validation and testing
     n_train_batches = train_set_x.get_value(borrow=True).shape[0]
-    n_train_batches /= batch_size
+    n_train_batches //= batch_size
 
     # numpy random generator
     # start-snippet-3
     numpy_rng = numpy.random.RandomState(89677)
-    print '... building the model'
+    print('... building the model')
     # construct the stacked denoising autoencoder class
     sda = SdA(
         numpy_rng=numpy_rng,
@@ -374,52 +377,52 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
     #########################
     # PRETRAINING THE MODEL #
     #########################
-    print '... getting the pretraining functions'
+    print('... getting the pretraining functions')
     pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
                                                 batch_size=batch_size)
 
-    print '... pre-training the model'
+    print('... pre-training the model')
     start_time = timeit.default_timer()
     ## Pre-train layer-wise
     corruption_levels = [.1, .2, .3]
-    for i in xrange(sda.n_layers):
+    for i in range(sda.n_layers):
         # go through pretraining epochs
-        for epoch in xrange(pretraining_epochs):
+        for epoch in range(pretraining_epochs):
             # go through the training set
             c = []
-            for batch_index in xrange(n_train_batches):
+            for batch_index in range(n_train_batches):
                 c.append(pretraining_fns[i](index=batch_index,
                          corruption=corruption_levels[i],
                          lr=pretrain_lr))
-            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
-            print numpy.mean(c)
+            print('Pre-training layer %i, epoch %d, cost ' % (i, epoch))
+            print(numpy.mean(c))
 
     end_time = timeit.default_timer()
 
-    print >> sys.stderr, ('The pretraining code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+    print(('The pretraining code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
     # end-snippet-4
     ########################
     # FINETUNING THE MODEL #
     ########################
 
     # get the training, validation and testing function for the model
-    print '... getting the finetuning functions'
+    print('... getting the finetuning functions')
     train_fn, validate_model, test_model = sda.build_finetune_functions(
         datasets=datasets,
         batch_size=batch_size,
         learning_rate=finetune_lr
     )
 
-    print '... finetunning the model'
+    print('... finetunning the model')
     # early-stopping parameters
     patience = 10 * n_train_batches  # look as this many examples regardless
     patience_increase = 2.  # wait this much longer when a new best is
                             # found
     improvement_threshold = 0.995  # a relative improvement of this much is
                                    # considered significant
-    validation_frequency = min(n_train_batches, patience / 2)
+    validation_frequency = min(n_train_batches, patience // 2)
                                   # go through this many
                                   # minibatche before checking the network
                                   # on the validation set; in this case we
@@ -434,7 +437,7 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
 
     while (epoch < training_epochs) and (not done_looping):
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
             minibatch_avg_cost = train_fn(minibatch_index)
             iter = (epoch - 1) * n_train_batches + minibatch_index
 
@@ -480,9 +483,9 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
         )
         % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
     )
-    print >> sys.stderr, ('The training code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+    print(('The training code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
 
 
 if __name__ == '__main__':
diff --git a/code/cA.py b/code/cA.py
index e26a1ddf..0d563ef2 100644
--- a/code/cA.py
+++ b/code/cA.py
@@ -28,6 +28,10 @@
    Systems 19, 2007
 
 """
+
+from __future__ import print_function
+from six.moves import xrange
+
 import os
 import sys
 import timeit
@@ -205,7 +209,7 @@ def get_cost_updates(self, contraction_level, learning_rate):
                              axis=1)
 
         # Compute the jacobian and average over the number of samples/minibatch
-        self.L_jacob = T.sum(J ** 2) / self.n_batchsize
+        self.L_jacob = T.sum(J ** 2) // self.n_batchsize
 
         # note : L is now a vector, where each element is the
         #        cross-entropy cost of the reconstruction of the
@@ -246,7 +250,7 @@ def test_cA(learning_rate=0.01, training_epochs=20,
     train_set_x, train_set_y = datasets[0]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
 
     # allocate symbolic variables for the data
     index = T.lscalar()    # index to a [mini]batch
@@ -290,15 +294,15 @@ def test_cA(learning_rate=0.01, training_epochs=20,
             c.append(train_ca(batch_index))
 
         c_array = numpy.vstack(c)
-        print 'Training epoch %d, reconstruction cost ' % epoch, numpy.mean(
-            c_array[0]), ' jacobian norm ', numpy.mean(numpy.sqrt(c_array[1]))
+        print('Training epoch %d, reconstruction cost ' % epoch, numpy.mean(
+            c_array[0]), ' jacobian norm ', numpy.mean(numpy.sqrt(c_array[1])))
 
     end_time = timeit.default_timer()
 
     training_time = (end_time - start_time)
 
-    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((training_time) / 60.))
+    print(('The code for file ' + os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((training_time) / 60.)), file=sys.stderr)
     image = Image.fromarray(tile_raster_images(
         X=ca.W.get_value(borrow=True).T,
         img_shape=(28, 28), tile_shape=(10, 10),
diff --git a/code/convolutional_mlp.py b/code/convolutional_mlp.py
index bb6aeaf4..a8811bc1 100644
--- a/code/convolutional_mlp.py
+++ b/code/convolutional_mlp.py
@@ -21,6 +21,9 @@
    http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf
 
 """
+
+from __future__ import print_function
+
 import os
 import sys
 import timeit
@@ -70,7 +73,7 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
         # each unit in the lower layer receives a gradient from:
         # "num output feature maps * filter height * filter width" /
         #   pooling size
-        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
+        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) //
                    numpy.prod(poolsize))
         # initialize weights with random weights
         W_bound = numpy.sqrt(6. / (fan_in + fan_out))
@@ -145,9 +148,9 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
     n_train_batches = train_set_x.get_value(borrow=True).shape[0]
     n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
     n_test_batches = test_set_x.get_value(borrow=True).shape[0]
-    n_train_batches /= batch_size
-    n_valid_batches /= batch_size
-    n_test_batches /= batch_size
+    n_train_batches //= batch_size
+    n_valid_batches //= batch_size
+    n_test_batches //= batch_size
 
     # allocate symbolic variables for the data
     index = T.lscalar()  # index to a [mini]batch
@@ -160,7 +163,7 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
     ######################
     # BUILD ACTUAL MODEL #
     ######################
-    print '... building the model'
+    print('... building the model')
 
     # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
     # to a 4D tensor, compatible with our LeNetConvPoolLayer
@@ -261,14 +264,14 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
     ###############
     # TRAIN MODEL #
     ###############
-    print '... training'
+    print('... training')
     # early-stopping parameters
     patience = 10000  # look as this many examples regardless
     patience_increase = 2  # wait this much longer when a new best is
                            # found
     improvement_threshold = 0.995  # a relative improvement of this much is
                                    # considered significant
-    validation_frequency = min(n_train_batches, patience / 2)
+    validation_frequency = min(n_train_batches, patience // 2)
                                   # go through this many
                                   # minibatche before checking the network
                                   # on the validation set; in this case we
@@ -284,19 +287,19 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
 
     while (epoch < n_epochs) and (not done_looping):
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
 
             iter = (epoch - 1) * n_train_batches + minibatch_index
 
             if iter % 100 == 0:
-                print 'training @ iter = ', iter
+                print('training @ iter = ', iter)
             cost_ij = train_model(minibatch_index)
 
             if (iter + 1) % validation_frequency == 0:
 
                 # compute zero-one loss on validation set
                 validation_losses = [validate_model(i) for i
-                                     in xrange(n_valid_batches)]
+                                     in range(n_valid_batches)]
                 this_validation_loss = numpy.mean(validation_losses)
                 print('epoch %i, minibatch %i/%i, validation error %f %%' %
                       (epoch, minibatch_index + 1, n_train_batches,
@@ -317,7 +320,7 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
                     # test it on the test set
                     test_losses = [
                         test_model(i)
-                        for i in xrange(n_test_batches)
+                        for i in range(n_test_batches)
                     ]
                     test_score = numpy.mean(test_losses)
                     print(('     epoch %i, minibatch %i/%i, test error of '
@@ -334,9 +337,9 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
     print('Best validation score of %f %% obtained at iteration %i, '
           'with test performance %f %%' %
           (best_validation_loss * 100., best_iter + 1, test_score * 100.))
-    print >> sys.stderr, ('The code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+    print(('The code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
 
 if __name__ == '__main__':
     evaluate_lenet5()
diff --git a/code/dA.py b/code/dA.py
index 8ea94e33..0d9efa54 100644
--- a/code/dA.py
+++ b/code/dA.py
@@ -30,6 +30,8 @@
 
 """
 
+from __future__ import print_function
+
 import os
 import sys
 import timeit
@@ -280,7 +282,7 @@ def test_dA(learning_rate=0.1, training_epochs=15,
     train_set_x, train_set_y = datasets[0]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
 
     # start-snippet-2
     # allocate symbolic variables for the data
@@ -328,21 +330,21 @@ def test_dA(learning_rate=0.1, training_epochs=15,
     ############
 
     # go through training epochs
-    for epoch in xrange(training_epochs):
+    for epoch in range(training_epochs):
         # go through trainng set
         c = []
-        for batch_index in xrange(n_train_batches):
+        for batch_index in range(n_train_batches):
             c.append(train_da(batch_index))
 
-        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
+        print('Training epoch %d, cost ' % epoch, numpy.mean(c))
 
     end_time = timeit.default_timer()
 
     training_time = (end_time - start_time)
 
-    print >> sys.stderr, ('The no corruption code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((training_time) / 60.))
+    print(('The no corruption code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((training_time) / 60.)), file=sys.stderr)
     image = Image.fromarray(
         tile_raster_images(X=da.W.get_value(borrow=True).T,
                            img_shape=(28, 28), tile_shape=(10, 10),
@@ -386,21 +388,21 @@ def test_dA(learning_rate=0.1, training_epochs=15,
     ############
 
     # go through training epochs
-    for epoch in xrange(training_epochs):
+    for epoch in range(training_epochs):
         # go through trainng set
         c = []
-        for batch_index in xrange(n_train_batches):
+        for batch_index in range(n_train_batches):
             c.append(train_da(batch_index))
 
-        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
+        print('Training epoch %d, cost ' % epoch, numpy.mean(c))
 
     end_time = timeit.default_timer()
 
     training_time = (end_time - start_time)
 
-    print >> sys.stderr, ('The 30% corruption code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % (training_time / 60.))
+    print(('The 30% corruption code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % (training_time / 60.)), file=sys.stderr)
     # end-snippet-3
 
     # start-snippet-4
diff --git a/code/hmc/hmc.py b/code/hmc/hmc.py
index b9c872f0..aeb49937 100644
--- a/code/hmc/hmc.py
+++ b/code/hmc/hmc.py
@@ -128,14 +128,14 @@ def leapfrog(pos, vel, step):
         rval2: dictionary
             Dictionary of updates for the Scan Op
         """
-        # from pos(t) and vel(t-stepsize/2), compute vel(t+stepsize/2)
+        # from pos(t) and vel(t-stepsize//2), compute vel(t+stepsize//2)
         dE_dpos = TT.grad(energy_fn(pos).sum(), pos)
         new_vel = vel - step * dE_dpos
-        # from vel(t+stepsize/2) compute pos(t+stepsize)
+        # from vel(t+stepsize//2) compute pos(t+stepsize)
         new_pos = pos + step * new_vel
         return [new_pos, new_vel], {}
 
-    # compute velocity at time-step: t + stepsize/2
+    # compute velocity at time-step: t + stepsize//2
     initial_energy = energy_fn(initial_pos)
     dE_dpos = TT.grad(initial_energy.sum(), initial_pos)
     vel_half_step = initial_vel - 0.5 * stepsize * dE_dpos
diff --git a/code/hmc/test_hmc.py b/code/hmc/test_hmc.py
index f6c3b522..c3c425e6 100644
--- a/code/hmc/test_hmc.py
+++ b/code/hmc/test_hmc.py
@@ -1,7 +1,16 @@
+
+from __future__ import print_function
+from six.moves import xrange
+
 import numpy
 import theano
 
-from hmc import HMC_sampler
+try:
+    from hmc import HMC_sampler
+except:
+    # python 3 compatibility
+    # http://stackoverflow.com/questions/3073259/python-nose-import-error
+    from hmc.hmc import HMC_sampler
 
 
 def sampler_on_nd_gaussian(sampler_cls, burnin, n_samples, dim=10):
@@ -37,17 +46,17 @@ def gaussian_energy(x):
     # Flatten to [n_samples * batchsize, dim]
     samples = _samples.T.reshape(dim, -1).T
 
-    print '****** TARGET VALUES ******'
-    print 'target mean:', mu
-    print 'target cov:\n', cov
+    print('****** TARGET VALUES ******')
+    print('target mean:', mu)
+    print('target cov:\n', cov)
 
-    print '****** EMPIRICAL MEAN/COV USING HMC ******'
-    print 'empirical mean: ', samples.mean(axis=0)
-    print 'empirical_cov:\n', numpy.cov(samples.T)
+    print('****** EMPIRICAL MEAN/COV USING HMC ******')
+    print('empirical mean: ', samples.mean(axis=0))
+    print('empirical_cov:\n', numpy.cov(samples.T))
 
-    print '****** HMC INTERNALS ******'
-    print 'final stepsize', sampler.stepsize.get_value()
-    print 'final acceptance_rate', sampler.avg_acceptance_rate.get_value()
+    print('****** HMC INTERNALS ******')
+    print('final stepsize', sampler.stepsize.get_value())
+    print('final acceptance_rate', sampler.avg_acceptance_rate.get_value())
 
     return sampler
 
diff --git a/code/imdb.py b/code/imdb.py
index 21e0e376..341be231 100644
--- a/code/imdb.py
+++ b/code/imdb.py
@@ -1,4 +1,7 @@
-import cPickle
+from __future__ import print_function
+from six.moves import xrange
+import six.moves.cPickle as pickle
+
 import gzip
 import os
 
@@ -68,9 +71,11 @@ def get_dataset_file(dataset, default_dataset, origin):
             dataset = new_path
 
     if (not os.path.isfile(dataset)) and data_file == default_dataset:
-        import urllib
-        print 'Downloading data from %s' % origin
-        urllib.urlretrieve(origin, dataset)
+        from six.moves import urllib
+        print('Downloading data from %s' % origin)
+        urllib.request.urlretrieve(origin, dataset)
+
+        
     return dataset
 
 
@@ -110,8 +115,8 @@ def load_data(path="imdb.pkl", n_words=100000, valid_portion=0.1, maxlen=None,
     else:
         f = open(path, 'rb')
 
-    train_set = cPickle.load(f)
-    test_set = cPickle.load(f)
+    train_set = pickle.load(f)
+    test_set = pickle.load(f)
     f.close()
     if maxlen:
         new_train_set_x = []
diff --git a/code/lstm.py b/code/lstm.py
index 1d87cfb3..b3b89f3e 100644
--- a/code/lstm.py
+++ b/code/lstm.py
@@ -1,8 +1,13 @@
 '''
 Build a tweet sentiment analyzer
 '''
+
+from __future__ import print_function
+from six.moves import xrange
+import six.moves.cPickle as pickle
+
+#from six.moves.collections import OrderedDict
 from collections import OrderedDict
-import cPickle as pkl
 import sys
 import time
 
@@ -56,7 +61,7 @@ def zipp(params, tparams):
     """
     When we reload the model. Needed for the GPU stuff.
     """
-    for kk, vv in params.iteritems():
+    for kk, vv in params.items():
         tparams[kk].set_value(vv)
 
 
@@ -65,7 +70,7 @@ def unzip(zipped):
     When we pickle the model. Needed for the GPU stuff.
     """
     new_params = OrderedDict()
-    for kk, vv in zipped.iteritems():
+    for kk, vv in zipped.items():
         new_params[kk] = vv.get_value()
     return new_params
 
@@ -106,7 +111,7 @@ def init_params(options):
 
 def load_params(path, params):
     pp = numpy.load(path)
-    for kk, vv in params.iteritems():
+    for kk, vv in params.items():
         if kk not in pp:
             raise Warning('%s is not in the archive' % kk)
         params[kk] = pp[kk]
@@ -116,7 +121,7 @@ def load_params(path, params):
 
 def init_tparams(params):
     tparams = OrderedDict()
-    for kk, pp in params.iteritems():
+    for kk, pp in params.items():
         tparams[kk] = theano.shared(params[kk], name=kk)
     return tparams
 
@@ -217,7 +222,7 @@ def sgd(lr, tparams, grads, x, mask, y, cost):
     # New set of shared variable that will contain the gradient
     # for a mini-batch.
     gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
-               for k, p in tparams.iteritems()]
+               for k, p in tparams.items()]
     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
 
     # Function that computes gradients for a mini-batch, but do not
@@ -266,13 +271,13 @@ def adadelta(lr, tparams, grads, x, mask, y, cost):
 
     zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
                                   name='%s_grad' % k)
-                    for k, p in tparams.iteritems()]
+                    for k, p in tparams.items()]
     running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.),
                                  name='%s_rup2' % k)
-                   for k, p in tparams.iteritems()]
+                   for k, p in tparams.items()]
     running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
                                     name='%s_rgrad2' % k)
-                      for k, p in tparams.iteritems()]
+                      for k, p in tparams.items()]
 
     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
@@ -329,13 +334,13 @@ def rmsprop(lr, tparams, grads, x, mask, y, cost):
 
     zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
                                   name='%s_grad' % k)
-                    for k, p in tparams.iteritems()]
+                    for k, p in tparams.items()]
     running_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
                                    name='%s_rgrad' % k)
-                     for k, p in tparams.iteritems()]
+                     for k, p in tparams.items()]
     running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
                                     name='%s_rgrad2' % k)
-                      for k, p in tparams.iteritems()]
+                      for k, p in tparams.items()]
 
     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
     rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
@@ -348,7 +353,7 @@ def rmsprop(lr, tparams, grads, x, mask, y, cost):
 
     updir = [theano.shared(p.get_value() * numpy_floatX(0.),
                            name='%s_updir' % k)
-             for k, p in tparams.iteritems()]
+             for k, p in tparams.items()]
     updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
                  for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
                                             running_grads2)]
@@ -418,7 +423,7 @@ def pred_probs(f_pred_prob, prepare_data, data, iterator, verbose=False):
 
         n_done += len(valid_index)
         if verbose:
-            print '%d/%d samples classified' % (n_done, n_samples)
+            print('%d/%d samples classified' % (n_done, n_samples))
 
     return probs
 
@@ -470,11 +475,11 @@ def train_lstm(
 
     # Model options
     model_options = locals().copy()
-    print "model options", model_options
+    print("model options", model_options)
 
     load_data, prepare_data = get_dataset(dataset)
 
-    print 'Loading data'
+    print('Loading data')
     train, valid, test = load_data(n_words=n_words, valid_portion=0.05,
                                    maxlen=maxlen)
     if test_size > 0:
@@ -490,7 +495,7 @@ def train_lstm(
 
     model_options['ydim'] = ydim
 
-    print 'Building model'
+    print('Building model')
     # This create the initial parameters as numpy ndarrays.
     # Dict name (string) -> numpy ndarray
     params = init_params(model_options)
@@ -516,30 +521,30 @@ def train_lstm(
 
     f_cost = theano.function([x, mask, y], cost, name='f_cost')
 
-    grads = tensor.grad(cost, wrt=tparams.values())
+    grads = tensor.grad(cost, wrt=list(tparams.values()))
     f_grad = theano.function([x, mask, y], grads, name='f_grad')
 
     lr = tensor.scalar(name='lr')
     f_grad_shared, f_update = optimizer(lr, tparams, grads,
                                         x, mask, y, cost)
 
-    print 'Optimization'
+    print('Optimization')
 
     kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
     kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)
 
-    print "%d train examples" % len(train[0])
-    print "%d valid examples" % len(valid[0])
-    print "%d test examples" % len(test[0])
+    print("%d train examples" % len(train[0]))
+    print("%d valid examples" % len(valid[0]))
+    print("%d test examples" % len(test[0]))
 
     history_errs = []
     best_p = None
     bad_count = 0
 
     if validFreq == -1:
-        validFreq = len(train[0]) / batch_size
+        validFreq = len(train[0]) // batch_size
     if saveFreq == -1:
-        saveFreq = len(train[0]) / batch_size
+        saveFreq = len(train[0]) // batch_size
 
     uidx = 0  # the number of update done
     estop = False  # early stop
@@ -569,22 +574,22 @@ def train_lstm(
                 f_update(lrate)
 
                 if numpy.isnan(cost) or numpy.isinf(cost):
-                    print 'bad cost detected: ', cost
+                    print('bad cost detected: ', cost)
                     return 1., 1., 1.
 
                 if numpy.mod(uidx, dispFreq) == 0:
-                    print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost
+                    print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost)
 
                 if saveto and numpy.mod(uidx, saveFreq) == 0:
-                    print 'Saving...',
+                    print('Saving...')
 
                     if best_p is not None:
                         params = best_p
                     else:
                         params = unzip(tparams)
                     numpy.savez(saveto, history_errs=history_errs, **params)
-                    pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
-                    print 'Done'
+                    pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
+                    print('Done')
 
                 if numpy.mod(uidx, validFreq) == 0:
                     use_noise.set_value(0.)
@@ -602,25 +607,25 @@ def train_lstm(
                         best_p = unzip(tparams)
                         bad_counter = 0
 
-                    print ('Train ', train_err, 'Valid ', valid_err,
-                           'Test ', test_err)
+                    print( ('Train ', train_err, 'Valid ', valid_err,
+                           'Test ', test_err) )
 
                     if (len(history_errs) > patience and
                         valid_err >= numpy.array(history_errs)[:-patience,
                                                                0].min()):
                         bad_counter += 1
                         if bad_counter > patience:
-                            print 'Early Stop!'
+                            print('Early Stop!')
                             estop = True
                             break
 
-            print 'Seen %d samples' % n_samples
+            print('Seen %d samples' % n_samples)
 
             if estop:
                 break
 
     except KeyboardInterrupt:
-        print "Training interupted"
+        print("Training interupted")
 
     end_time = time.time()
     if best_p is not None:
@@ -634,15 +639,15 @@ def train_lstm(
     valid_err = pred_error(f_pred, prepare_data, valid, kf_valid)
     test_err = pred_error(f_pred, prepare_data, test, kf_test)
 
-    print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err
+    print( 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err )
     if saveto:
         numpy.savez(saveto, train_err=train_err,
                     valid_err=valid_err, test_err=test_err,
                     history_errs=history_errs, **best_p)
-    print 'The code run for %d epochs, with %f sec/epochs' % (
-        (eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))
-    print >> sys.stderr, ('Training took %.1fs' %
-                          (end_time - start_time))
+    print('The code run for %d epochs, with %f sec/epochs' % (
+        (eidx + 1), (end_time - start_time) / (1. * (eidx + 1))))
+    print( ('Training took %.1fs' %
+            (end_time - start_time)), file=sys.stderr)
     return train_err, valid_err, test_err
 
 
diff --git a/code/mlp.py b/code/mlp.py
index 18f34e7c..1d463d81 100644
--- a/code/mlp.py
+++ b/code/mlp.py
@@ -18,6 +18,9 @@
                  Christopher M. Bishop, section 5
 
 """
+
+from __future__ import print_function
+
 __docformat__ = 'restructedtext en'
 
 
@@ -231,14 +234,14 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
     test_set_x, test_set_y = datasets[2]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
 
     ######################
     # BUILD ACTUAL MODEL #
     ######################
-    print '... building the model'
+    print('... building the model')
 
     # allocate symbolic variables for the data
     index = T.lscalar()  # index to a [mini]batch
@@ -322,7 +325,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
     ###############
     # TRAIN MODEL #
     ###############
-    print '... training'
+    print('... training')
 
     # early-stopping parameters
     patience = 10000  # look as this many examples regardless
@@ -330,7 +333,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
                            # found
     improvement_threshold = 0.995  # a relative improvement of this much is
                                    # considered significant
-    validation_frequency = min(n_train_batches, patience / 2)
+    validation_frequency = min(n_train_batches, patience // 2)
                                   # go through this many
                                   # minibatche before checking the network
                                   # on the validation set; in this case we
@@ -346,7 +349,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
 
     while (epoch < n_epochs) and (not done_looping):
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
 
             minibatch_avg_cost = train_model(minibatch_index)
             # iteration number
@@ -355,7 +358,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
             if (iter + 1) % validation_frequency == 0:
                 # compute zero-one loss on validation set
                 validation_losses = [validate_model(i) for i
-                                     in xrange(n_valid_batches)]
+                                     in range(n_valid_batches)]
                 this_validation_loss = numpy.mean(validation_losses)
 
                 print(
@@ -382,7 +385,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
 
                     # test it on the test set
                     test_losses = [test_model(i) for i
-                                   in xrange(n_test_batches)]
+                                   in range(n_test_batches)]
                     test_score = numpy.mean(test_losses)
 
                     print(('     epoch %i, minibatch %i/%i, test error of '
@@ -398,9 +401,9 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
     print(('Optimization complete. Best validation score of %f %% '
            'obtained at iteration %i, with test performance %f %%') %
           (best_validation_loss * 100., best_iter + 1, test_score * 100.))
-    print >> sys.stderr, ('The code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+    print(('The code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
 
 
 if __name__ == '__main__':
diff --git a/code/rbm.py b/code/rbm.py
index 1ba4c86d..0a947963 100644
--- a/code/rbm.py
+++ b/code/rbm.py
@@ -4,6 +4,10 @@
 contain hidden variables. Restricted Boltzmann Machines further restrict BMs
 to those without visible-visible and hidden-hidden connections.
 """
+
+from __future__ import print_function
+from six.moves import xrange
+
 import timeit
 
 try:
@@ -384,7 +388,7 @@ def test_rbm(learning_rate=0.1, training_epochs=15,
     test_set_x, test_set_y = datasets[2]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
 
     # allocate symbolic variables for the data
     index = T.lscalar()    # index to a [mini]batch
@@ -438,7 +442,7 @@ def test_rbm(learning_rate=0.1, training_epochs=15,
         for batch_index in xrange(n_train_batches):
             mean_cost += [train_rbm(batch_index)]
 
-        print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)
+        print('Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost))
 
         # Plot filters after each training epoch
         plotting_start = timeit.default_timer()
@@ -522,7 +526,7 @@ def test_rbm(learning_rate=0.1, training_epochs=15,
         # generate `plot_every` intermediate samples that we discard,
         # because successive samples in the chain are too correlated
         vis_mf, vis_sample = sample_fn()
-        print ' ... plotting sample ', idx
+        print(' ... plotting sample %d' % idx)
         image_data[29 * idx:29 * idx + 28, :] = tile_raster_images(
             X=vis_mf,
             img_shape=(28, 28),
diff --git a/code/utils.py b/code/utils.py
index 3b50019c..fa4e4d96 100644
--- a/code/utils.py
+++ b/code/utils.py
@@ -7,6 +7,7 @@
 """
 
 
+from six.moves import xrange
 import numpy
 
 

From 53f246dc2cd743131fd918542b7f24936e2decce Mon Sep 17 00:00:00 2001
From: Guillaume Alain <gyomalin@gmail.com>
Date: Thu, 21 Jan 2016 16:02:02 -0500
Subject: [PATCH 25/90] partly fixed rnnrbm, but we will need to do some magic
 with the midi module to make it compatible with python 3

---
 code/rnnrbm.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/code/rnnrbm.py b/code/rnnrbm.py
index e1f40b5a..e5027083 100644
--- a/code/rnnrbm.py
+++ b/code/rnnrbm.py
@@ -3,6 +3,9 @@
 # RNN-RBM deep learning tutorial
 # More information at http://deeplearning.net/tutorial/rnnrbm.html
 
+from __future__ import print_function
+from six.moves import xrange
+
 import glob
 import os
 import sys
@@ -11,10 +14,8 @@
 try:
     import pylab
 except ImportError:
-    print (
-        "pylab isn't available. If you use its functionality, it will crash."
-    )
-    print "It can be installed with 'pip install -q Pillow'"
+    print ("pylab isn't available. If you use its functionality, it will crash.")
+    print("It can be installed with 'pip install -q Pillow'")
 
 from midi.utils import midiread, midiwrite
 import theano
@@ -257,12 +258,12 @@ def train(self, files, batch_size=100, num_epochs=200):
                         cost = self.train_function(sequence[i:i + batch_size])
                         costs.append(cost)
 
-                print 'Epoch %i/%i' % (epoch + 1, num_epochs),
-                print numpy.mean(costs)
+                print('Epoch %i/%i' % (epoch + 1, num_epochs))
+                print(numpy.mean(costs))
                 sys.stdout.flush()
 
         except KeyboardInterrupt:
-            print 'Interrupted by user.'
+            print('Interrupted by user.')
 
     def generate(self, filename, show=True):
         '''Generate a sample sequence, plot the resulting piano-roll and save

From 2c610d38168a38fbd0aa8fc032579114ff660cf2 Mon Sep 17 00:00:00 2001
From: Guillaume Alain <gyomalin@gmail.com>
Date: Thu, 28 Jan 2016 16:26:55 -0500
Subject: [PATCH 26/90] made rnnslu compatible with python 3. tested on cpu for
 many epochs, but not to completion

---
 code/rnnslu.py | 41 +++++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/code/rnnslu.py b/code/rnnslu.py
index 2251e465..110029f4 100644
--- a/code/rnnslu.py
+++ b/code/rnnslu.py
@@ -1,6 +1,10 @@
+
+from __future__ import print_function
+from six.moves import xrange
+import six.moves.cPickle as pickle
+
 from collections import OrderedDict
 import copy
-import cPickle
 import gzip
 import os
 import urllib
@@ -66,7 +70,10 @@ def atisfold(fold):
     assert fold in range(5)
     filename = os.path.join(PREFIX, 'atis.fold'+str(fold)+'.pkl.gz')
     f = gzip.open(filename, 'rb')
-    train_set, valid_set, test_set, dicts = cPickle.load(f)
+    try:
+        train_set, valid_set, test_set, dicts = pickle.load(f, encoding='latin1')
+    except:
+        train_set, valid_set, test_set, dicts = pickle.load(f)
     return train_set, valid_set, test_set, dicts
 
 
@@ -107,7 +114,7 @@ def download(origin, destination):
     download the corresponding atis file
     from http://www-etud.iro.umontreal.ca/~mesnilgr/atis/
     '''
-    print 'Downloading data from %s' % origin
+    print('Downloading data from %s' % origin)
     urllib.urlretrieve(origin, destination)
 
 
@@ -125,8 +132,10 @@ def get_perf(filename, folder):
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE)
 
-    stdout, _ = proc.communicate(''.join(open(filename).readlines()))
+    stdout, _ = proc.communicate(''.join(open(filename).readlines()).encode('utf-8'))
+    stdout = stdout.decode('utf-8')
     out = None
+
     for line in stdout.split('\n'):
         if 'accuracy' in line:
             out = line.split()
@@ -237,7 +246,7 @@ def recurrence(x_t, h_tm1):
     def train(self, x, y, window_size, learning_rate):
 
         cwords = contextwin(x, window_size)
-        words = map(lambda x: numpy.asarray(x).astype('int32'), cwords)
+        words = list(map(lambda x: numpy.asarray(x).astype('int32'), cwords))
         labels = y
 
         self.sentence_train(words, labels, learning_rate)
@@ -274,7 +283,7 @@ def main(param=None):
             'nepochs': 60,
             # 60 is recommended
             'savemodel': False}
-    print param
+    print(param)
 
     folder_name = os.path.basename(__file__).split('.')[0]
     folder = os.path.join(os.path.dirname(__file__), folder_name)
@@ -284,8 +293,8 @@ def main(param=None):
     # load the dataset
     train_set, valid_set, test_set, dic = atisfold(param['fold'])
 
-    idx2label = dict((k, v) for v, k in dic['labels2idx'].iteritems())
-    idx2word = dict((k, v) for v, k in dic['words2idx'].iteritems())
+    idx2label = dict((k, v) for v, k in dic['labels2idx'].items())
+    idx2word = dict((k, v) for v, k in dic['words2idx'].items())
 
     train_lex, train_ne, train_y = train_set
     valid_lex, valid_ne, valid_y = valid_set
@@ -323,9 +332,9 @@ def main(param=None):
 
         for i, (x, y) in enumerate(zip(train_lex, train_y)):
             rnn.train(x, y, param['win'], param['clr'])
-            print '[learning] epoch %i >> %2.2f%%' % (
-                e, (i + 1) * 100. / nsentences),
-            print 'completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic),
+            print('[learning] epoch %i >> %2.2f%%' % (
+                e, (i + 1) * 100. / nsentences),)
+            print('completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic),)
             sys.stdout.flush()
 
         # evaluation // back into the real world : idx -> words
@@ -374,7 +383,7 @@ def main(param=None):
                             folder + '/best.valid.txt'])
         else:
             if param['verbose']:
-                print ''
+                print('')
 
         # learning rate decay if no improvement in 10 epochs
         if param['decay'] and abs(param['be']-param['ce']) >= 10:
@@ -384,10 +393,10 @@ def main(param=None):
         if param['clr'] < 1e-5:
             break
 
-    print('BEST RESULT: epoch', param['be'],
-          'valid F1', param['vf1'],
-          'best test F1', param['tf1'],
-          'with the model', folder)
+    print(('BEST RESULT: epoch', param['be'],
+           'valid F1', param['vf1'],
+           'best test F1', param['tf1'],
+           'with the model', folder))
 
 
 if __name__ == '__main__':

From 226729f96785a96b22a937de199abab62e830de4 Mon Sep 17 00:00:00 2001
From: Guillaume Alain <gyomalin@gmail.com>
Date: Fri, 29 Jan 2016 15:55:43 -0500
Subject: [PATCH 27/90] all fixes suggested by Pascal, plus update for the doc

---
 code/SdA.py    | 3 +--
 code/lstm.py   | 1 -
 code/rnnslu.py | 8 ++++----
 doc/index.txt  | 3 +++
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/code/SdA.py b/code/SdA.py
index d639cb54..25e306c7 100644
--- a/code/SdA.py
+++ b/code/SdA.py
@@ -394,8 +394,7 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
                 c.append(pretraining_fns[i](index=batch_index,
                          corruption=corruption_levels[i],
                          lr=pretrain_lr))
-            print('Pre-training layer %i, epoch %d, cost ' % (i, epoch))
-            print(numpy.mean(c))
+            print('Pre-training layer %i, epoch %d, cost %f' % (i, epoch, numpy.mean(c)))
 
     end_time = timeit.default_timer()
 
diff --git a/code/lstm.py b/code/lstm.py
index b3b89f3e..9c19e1ad 100644
--- a/code/lstm.py
+++ b/code/lstm.py
@@ -6,7 +6,6 @@
 from six.moves import xrange
 import six.moves.cPickle as pickle
 
-#from six.moves.collections import OrderedDict
 from collections import OrderedDict
 import sys
 import time
diff --git a/code/rnnslu.py b/code/rnnslu.py
index 110029f4..45aaf3a6 100644
--- a/code/rnnslu.py
+++ b/code/rnnslu.py
@@ -333,8 +333,8 @@ def main(param=None):
         for i, (x, y) in enumerate(zip(train_lex, train_y)):
             rnn.train(x, y, param['win'], param['clr'])
             print('[learning] epoch %i >> %2.2f%%' % (
-                e, (i + 1) * 100. / nsentences),)
-            print('completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic),)
+                e, (i + 1) * 100. / nsentences), end=' ')
+            print('completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic), end='')
             sys.stdout.flush()
 
         # evaluation // back into the real world : idx -> words
@@ -393,10 +393,10 @@ def main(param=None):
         if param['clr'] < 1e-5:
             break
 
-    print(('BEST RESULT: epoch', param['be'],
+    print('BEST RESULT: epoch', param['be'],
            'valid F1', param['vf1'],
            'best test F1', param['tf1'],
-           'with the model', folder))
+           'with the model', folder)
 
 
 if __name__ == '__main__':
diff --git a/doc/index.txt b/doc/index.txt
index 7c6605bf..68a18ec5 100644
--- a/doc/index.txt
+++ b/doc/index.txt
@@ -63,3 +63,6 @@ Energy-based recurrent neural network (RNN-RBM):
 .. _Theano basic tutorial: http://deeplearning.net/software/theano/tutorial
 
 .. _Contractive auto-encoders: https://github.com/lisa-lab/DeepLearningTutorials/blob/master/code/cA.py
+
+Note that the tutorials here are all compatible with Python 2 and 3, with the exception of :ref:`rnnrbm` which is only available for Python 2.
+

From 90b925b2c716f29b26209375fc28b1e32fad6f22 Mon Sep 17 00:00:00 2001
From: Guillaume Alain <gyomalin@gmail.com>
Date: Mon, 1 Feb 2016 13:36:43 -0500
Subject: [PATCH 28/90] travis python version update as suggested by Fred

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 258963ee..e2f2d530 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,8 +4,8 @@ sudo: false
 
 language: python
 #python:
-#  - "2.7"
-#  - "3.2"
+#  - "2.6"
+#  - "3.3"
 # command to install dependencies
 before_install:
   - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh

From 4c0858de1073660842f3f9b8f53c162ca3107653 Mon Sep 17 00:00:00 2001
From: Guillaume Alain <gyomalin@gmail.com>
Date: Mon, 1 Feb 2016 15:51:10 -0500
Subject: [PATCH 29/90] got rid of all the xrange

---
 code/DBN.py            | 14 +++++++-------
 code/cA.py             |  5 ++---
 code/hmc/test_hmc.py   |  5 ++---
 code/logistic_cg.py    |  8 ++++----
 code/lstm.py           |  3 +--
 code/rbm.py            |  7 +++----
 code/rnnrbm.py         |  5 ++---
 code/rnnslu.py         |  3 +--
 code/utils.py          |  8 +++-----
 doc/gettingstarted.txt |  2 +-
 doc/utilities.txt      |  6 +++---
 11 files changed, 29 insertions(+), 37 deletions(-)

diff --git a/code/DBN.py b/code/DBN.py
index ecd563e7..6ca88603 100644
--- a/code/DBN.py
+++ b/code/DBN.py
@@ -75,7 +75,7 @@ def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
         # training the DBN by doing stochastic gradient descent on the
         # MLP.
 
-        for i in xrange(self.n_layers):
+        for i in range(self.n_layers):
             # construct the sigmoidal layer
 
             # the size of the input is either the number of hidden
@@ -267,11 +267,11 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate):
 
         # Create a function that scans the entire validation set
         def valid_score():
-            return [valid_score_i(i) for i in xrange(n_valid_batches)]
+            return [valid_score_i(i) for i in range(n_valid_batches)]
 
         # Create a function that scans the entire test set
         def test_score():
-            return [test_score_i(i) for i in xrange(n_test_batches)]
+            return [test_score_i(i) for i in range(n_test_batches)]
 
         return train_fn, valid_score, test_score
 
@@ -329,12 +329,12 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
     print '... pre-training the model'
     start_time = timeit.default_timer()
     ## Pre-train layer-wise
-    for i in xrange(dbn.n_layers):
+    for i in range(dbn.n_layers):
         # go through pretraining epochs
-        for epoch in xrange(pretraining_epochs):
+        for epoch in range(pretraining_epochs):
             # go through the training set
             c = []
-            for batch_index in xrange(n_train_batches):
+            for batch_index in range(n_train_batches):
                 c.append(pretraining_fns[i](index=batch_index,
                                             lr=pretrain_lr))
             print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
@@ -379,7 +379,7 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
 
     while (epoch < training_epochs) and (not done_looping):
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
 
             minibatch_avg_cost = train_fn(minibatch_index)
             iter = (epoch - 1) * n_train_batches + minibatch_index
diff --git a/code/cA.py b/code/cA.py
index 0d563ef2..8dc5d8b6 100644
--- a/code/cA.py
+++ b/code/cA.py
@@ -30,7 +30,6 @@
 """
 
 from __future__ import print_function
-from six.moves import xrange
 
 import os
 import sys
@@ -287,10 +286,10 @@ def test_cA(learning_rate=0.01, training_epochs=20,
     ############
 
     # go through training epochs
-    for epoch in xrange(training_epochs):
+    for epoch in range(training_epochs):
         # go through trainng set
         c = []
-        for batch_index in xrange(n_train_batches):
+        for batch_index in range(n_train_batches):
             c.append(train_ca(batch_index))
 
         c_array = numpy.vstack(c)
diff --git a/code/hmc/test_hmc.py b/code/hmc/test_hmc.py
index c3c425e6..be1a1ac6 100644
--- a/code/hmc/test_hmc.py
+++ b/code/hmc/test_hmc.py
@@ -1,6 +1,5 @@
 
 from __future__ import print_function
-from six.moves import xrange
 
 import numpy
 import theano
@@ -39,10 +38,10 @@ def gaussian_energy(x):
                           initial_stepsize=1e-3, stepsize_max=0.5)
 
     # Start with a burn-in process
-    garbage = [sampler.draw() for r in xrange(burnin)]  # burn-in Draw
+    garbage = [sampler.draw() for r in range(burnin)]  # burn-in Draw
     # `n_samples`: result is a 3D tensor of dim [n_samples, batchsize,
     # dim]
-    _samples = numpy.asarray([sampler.draw() for r in xrange(n_samples)])
+    _samples = numpy.asarray([sampler.draw() for r in range(n_samples)])
     # Flatten to [n_samples * batchsize, dim]
     samples = _samples.T.reshape(dim, -1).T
 
diff --git a/code/logistic_cg.py b/code/logistic_cg.py
index db9822ef..40c72c2f 100644
--- a/code/logistic_cg.py
+++ b/code/logistic_cg.py
@@ -239,7 +239,7 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
     def train_fn(theta_value):
         classifier.theta.set_value(theta_value, borrow=True)
         train_losses = [batch_cost(i * batch_size)
-                        for i in xrange(n_train_batches)]
+                        for i in range(n_train_batches)]
         return numpy.mean(train_losses)
 
     # creates a function that computes the average gradient of cost with
@@ -247,7 +247,7 @@ def train_fn(theta_value):
     def train_fn_grad(theta_value):
         classifier.theta.set_value(theta_value, borrow=True)
         grad = batch_grad(0)
-        for i in xrange(1, n_train_batches):
+        for i in range(1, n_train_batches):
             grad += batch_grad(i * batch_size)
         return grad / n_train_batches
 
@@ -258,7 +258,7 @@ def callback(theta_value):
         classifier.theta.set_value(theta_value, borrow=True)
         #compute the validation loss
         validation_losses = [validate_model(i * batch_size)
-                             for i in xrange(n_valid_batches)]
+                             for i in range(n_valid_batches)]
         this_validation_loss = numpy.mean(validation_losses)
         print('validation error %f %%' % (this_validation_loss * 100.,))
 
@@ -268,7 +268,7 @@ def callback(theta_value):
             # testing dataset
             validation_scores[0] = this_validation_loss
             test_losses = [test_model(i * batch_size)
-                           for i in xrange(n_test_batches)]
+                           for i in range(n_test_batches)]
             validation_scores[1] = numpy.mean(test_losses)
 
     ###############
diff --git a/code/lstm.py b/code/lstm.py
index 9c19e1ad..1c285928 100644
--- a/code/lstm.py
+++ b/code/lstm.py
@@ -3,7 +3,6 @@
 '''
 
 from __future__ import print_function
-from six.moves import xrange
 import six.moves.cPickle as pickle
 
 from collections import OrderedDict
@@ -549,7 +548,7 @@ def train_lstm(
     estop = False  # early stop
     start_time = time.time()
     try:
-        for eidx in xrange(max_epochs):
+        for eidx in range(max_epochs):
             n_samples = 0
 
             # Get new shuffled index for the training set.
diff --git a/code/rbm.py b/code/rbm.py
index 0a947963..901b5870 100644
--- a/code/rbm.py
+++ b/code/rbm.py
@@ -6,7 +6,6 @@
 """
 
 from __future__ import print_function
-from six.moves import xrange
 
 import timeit
 
@@ -435,11 +434,11 @@ def test_rbm(learning_rate=0.1, training_epochs=15,
     start_time = timeit.default_timer()
 
     # go through training epochs
-    for epoch in xrange(training_epochs):
+    for epoch in range(training_epochs):
 
         # go through the training set
         mean_cost = []
-        for batch_index in xrange(n_train_batches):
+        for batch_index in range(n_train_batches):
             mean_cost += [train_rbm(batch_index)]
 
         print('Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost))
@@ -522,7 +521,7 @@ def test_rbm(learning_rate=0.1, training_epochs=15,
         (29 * n_samples + 1, 29 * n_chains - 1),
         dtype='uint8'
     )
-    for idx in xrange(n_samples):
+    for idx in range(n_samples):
         # generate `plot_every` intermediate samples that we discard,
         # because successive samples in the chain are too correlated
         vis_mf, vis_sample = sample_fn()
diff --git a/code/rnnrbm.py b/code/rnnrbm.py
index e5027083..b8420b9b 100644
--- a/code/rnnrbm.py
+++ b/code/rnnrbm.py
@@ -4,7 +4,6 @@
 # More information at http://deeplearning.net/tutorial/rnnrbm.html
 
 from __future__ import print_function
-from six.moves import xrange
 
 import glob
 import os
@@ -249,12 +248,12 @@ def train(self, files, batch_size=100, num_epochs=200):
                    for f in files]
 
         try:
-            for epoch in xrange(num_epochs):
+            for epoch in range(num_epochs):
                 numpy.random.shuffle(dataset)
                 costs = []
 
                 for s, sequence in enumerate(dataset):
-                    for i in xrange(0, len(sequence), batch_size):
+                    for i in range(0, len(sequence), batch_size):
                         cost = self.train_function(sequence[i:i + batch_size])
                         costs.append(cost)
 
diff --git a/code/rnnslu.py b/code/rnnslu.py
index 45aaf3a6..0413ee63 100644
--- a/code/rnnslu.py
+++ b/code/rnnslu.py
@@ -1,6 +1,5 @@
 
 from __future__ import print_function
-from six.moves import xrange
 import six.moves.cPickle as pickle
 
 from collections import OrderedDict
@@ -322,7 +321,7 @@ def main(param=None):
     # train with early stopping on validation set
     best_f1 = -numpy.inf
     param['clr'] = param['lr']
-    for e in xrange(param['nepochs']):
+    for e in range(param['nepochs']):
 
         # shuffle
         shuffle([train_lex, train_ne, train_y], param['seed'])
diff --git a/code/utils.py b/code/utils.py
index fa4e4d96..ff772ad4 100644
--- a/code/utils.py
+++ b/code/utils.py
@@ -6,8 +6,6 @@
 image from a set of samples or weights.
 """
 
-
-from six.moves import xrange
 import numpy
 
 
@@ -86,7 +84,7 @@ def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
         else:
             channel_defaults = [0., 0., 0., 1.]
 
-        for i in xrange(4):
+        for i in range(4):
             if X[i] is None:
                 # if channel is None, fill it with zeros of the correct
                 # dtype
@@ -116,8 +114,8 @@ def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
             dt = 'uint8'
         out_array = numpy.zeros(out_shape, dtype=dt)
 
-        for tile_row in xrange(tile_shape[0]):
-            for tile_col in xrange(tile_shape[1]):
+        for tile_row in range(tile_shape[0]):
+            for tile_col in range(tile_shape[1]):
                 if tile_row * tile_shape[1] + tile_col < X.shape[0]:
                     this_x = X[tile_row * tile_shape[1] + tile_col]
                     if scale_rows_to_unit_interval:
diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt
index e838d706..d765f14a 100644
--- a/doc/gettingstarted.txt
+++ b/doc/gettingstarted.txt
@@ -578,7 +578,7 @@ of a strategy based on a geometrically increasing amount of patience.
     while (epoch < n_epochs) and (not done_looping):
         # Report "1" for first epoch, "n_epochs" for last epoch
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
 
             d_loss_wrt_params = ... # compute gradient
             params -= learning_rate * d_loss_wrt_params # gradient descent
diff --git a/doc/utilities.txt b/doc/utilities.txt
index 0367127c..eb982ec2 100644
--- a/doc/utilities.txt
+++ b/doc/utilities.txt
@@ -112,7 +112,7 @@ Tiling minibatches together is done for us by the
         else:
             channel_defaults = [0., 0., 0., 1.]
 
-        for i in xrange(4):
+        for i in range(4):
             if X[i] is None:
                 # if channel is None, fill it with zeros of the correct
                 # dtype
@@ -134,8 +134,8 @@ Tiling minibatches together is done for us by the
         out_array = numpy.zeros(out_shape, dtype='uint8' if output_pixel_vals else X.dtype)
 
 
-        for tile_row in xrange(tile_shape[0]):
-            for tile_col in xrange(tile_shape[1]):
+        for tile_row in range(tile_shape[0]):
+            for tile_col in range(tile_shape[1]):
                 if tile_row * tile_shape[1] + tile_col < X.shape[0]:
                     if scale_rows_to_unit_interval:
                         # if we should scale values to be between 0 and 1

From dcfe518dba2e346268ac88884578db5ce4fbebf4 Mon Sep 17 00:00:00 2001
From: Guillaume Alain <gyomalin@gmail.com>
Date: Wed, 3 Feb 2016 16:37:27 -0500
Subject: [PATCH 30/90] minor edit to respond to Pascal's suggestion

---
 code/hmc/test_hmc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/hmc/test_hmc.py b/code/hmc/test_hmc.py
index be1a1ac6..42dbc3a7 100644
--- a/code/hmc/test_hmc.py
+++ b/code/hmc/test_hmc.py
@@ -6,7 +6,7 @@
 
 try:
     from hmc import HMC_sampler
-except:
+except ImportError as e:
     # python 3 compatibility
     # http://stackoverflow.com/questions/3073259/python-nose-import-error
     from hmc.hmc import HMC_sampler

From 8ca9239cbd9ad4472241bad638c4b283818295da Mon Sep 17 00:00:00 2001
From: Guillaume Alain <gyomalin@gmail.com>
Date: Tue, 9 Feb 2016 11:31:59 -0500
Subject: [PATCH 31/90] missed one print statement

---
 code/rnnslu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/rnnslu.py b/code/rnnslu.py
index 0413ee63..3c620178 100644
--- a/code/rnnslu.py
+++ b/code/rnnslu.py
@@ -141,7 +141,7 @@ def get_perf(filename, folder):
             break
     # To help debug
     if out is None:
-        print stdout.split('\n')
+        print(stdout.split('\n'))
     precision = float(out[6][:-2])
     recall = float(out[8][:-2])
     f1score = float(out[10])

From 0054116a1cadc27fe6353f14ee48479e681c0b19 Mon Sep 17 00:00:00 2001
From: Frederic Bastien <nouiz@nouiz.org>
Date: Wed, 17 Feb 2016 09:02:43 -0500
Subject: [PATCH 32/90] Update timing due to speed up. (lower the number of
 random number generator)

---
 code/test.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/code/test.py b/code/test.py
index 39d0ab4c..b08f39a3 100644
--- a/code/test.py
+++ b/code/test.py
@@ -103,9 +103,9 @@ def speed():
     # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread.
 
     expected_times_64 = numpy.asarray([9.3, 21.0, 76.1, 73.7, 116.4,
-                                       346.9, 355.0, 510.9, 130.4, 23.2, 98.8])
+                                       346.9, 355.0, 268.2, 130.4, 23.2, 98.8])
     expected_times_32 = numpy.asarray([6.4, 14.7, 42.5, 66.5, 71,
-                                       191.2, 199.0, 400.4, 119.5, 36.9, 67.2])
+                                       191.2, 199.0, 201.9, 119.5, 36.9, 67.2])
 
     # Number with just 1 decimal are new value that are faster with
     # the Theano version 0.5rc2 Other number are older. They are not
@@ -125,8 +125,8 @@ def speed():
 #              1.35324519 1.7356905   1.12937868]
 
     expected_times_gpu = numpy.asarray([2.9, 7.55523491, 18.99226785,
-                                        5.8, 20.0,
-                                        11.2, 17.2, 244.3, 118.8, 34.2, 8.7])
+                                        5.8, 19.2,
+                                        11.2, 17.2, 122, 112.5, 31.1, 8.7])
     expected_times_64 = [s for idx, s in enumerate(expected_times_64)
                          if to_exec[idx]]
     expected_times_32 = [s for idx, s in enumerate(expected_times_32)

From 0ef0b4dd4d9ebcacb21c8079595637bc1742e588 Mon Sep 17 00:00:00 2001
From: Frederic Bastien <nouiz@nouiz.org>
Date: Wed, 17 Feb 2016 10:24:53 -0500
Subject: [PATCH 33/90] Make DLT compatible with Theano 0.7

---
 code/DBN.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/DBN.py b/code/DBN.py
index 6ca88603..b8e35fad 100644
--- a/code/DBN.py
+++ b/code/DBN.py
@@ -174,7 +174,7 @@ def pretraining_functions(self, train_set_x, batch_size, k):
 
             # compile the theano function
             fn = theano.function(
-                inputs=[index, theano.In(learning_rate, value=0.1)],
+                inputs=[index, theano.Param(learning_rate, default=0.1)],
                 outputs=cost,
                 updates=updates,
                 givens={

From 0c8507bc469e0a99027350c526372b8c8dd8a75d Mon Sep 17 00:00:00 2001
From: Frederic Bastien <nouiz@nouiz.org>
Date: Wed, 24 Feb 2016 16:29:05 -0500
Subject: [PATCH 34/90] Update speed test to faster speed

---
 code/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/test.py b/code/test.py
index b08f39a3..250e4d7e 100644
--- a/code/test.py
+++ b/code/test.py
@@ -104,7 +104,7 @@ def speed():
 
     expected_times_64 = numpy.asarray([9.3, 21.0, 76.1, 73.7, 116.4,
                                        346.9, 355.0, 268.2, 130.4, 23.2, 98.8])
-    expected_times_32 = numpy.asarray([6.4, 14.7, 42.5, 66.5, 71,
+    expected_times_32 = numpy.asarray([6.4, 14.7, 42.5, 63.1, 71,
                                        191.2, 199.0, 201.9, 119.5, 36.9, 67.2])
 
     # Number with just 1 decimal are new value that are faster with

From cdfcde08e4667d794db3907ae19437c352baab85 Mon Sep 17 00:00:00 2001
From: Frederic Bastien <nouiz@nouiz.org>
Date: Wed, 2 Mar 2016 09:13:43 -0500
Subject: [PATCH 35/90] Speed up 8 expected benchmark speed

---
 code/test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/code/test.py b/code/test.py
index 250e4d7e..e034c63b 100644
--- a/code/test.py
+++ b/code/test.py
@@ -103,9 +103,9 @@ def speed():
     # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread.
 
     expected_times_64 = numpy.asarray([9.3, 21.0, 76.1, 73.7, 116.4,
-                                       346.9, 355.0, 268.2, 130.4, 23.2, 98.8])
+                                       346.9, 355.0, 268.2, 115.8, 16.8, 91.6])
     expected_times_32 = numpy.asarray([6.4, 14.7, 42.5, 63.1, 71,
-                                       191.2, 199.0, 201.9, 119.5, 36.9, 67.2])
+                                       191.2, 199.0, 201.9, 107, 12.6, 61.3])
 
     # Number with just 1 decimal are new value that are faster with
     # the Theano version 0.5rc2 Other number are older. They are not
@@ -126,7 +126,7 @@ def speed():
 
     expected_times_gpu = numpy.asarray([2.9, 7.55523491, 18.99226785,
                                         5.8, 19.2,
-                                        11.2, 17.2, 122, 112.5, 31.1, 8.7])
+                                        11.2, 7.8, 122, 112.5, 31.1, 8.3])
     expected_times_64 = [s for idx, s in enumerate(expected_times_64)
                          if to_exec[idx]]
     expected_times_32 = [s for idx, s in enumerate(expected_times_32)

From bba82fbe92447b7e346a941847581199c05e4eeb Mon Sep 17 00:00:00 2001
From: Jamie White <whitej17@mail.wlu.edu>
Date: Wed, 9 Mar 2016 22:12:06 -0500
Subject: [PATCH 36/90] Update mlp.py

Fixed misspelling of "sorted"
---
 code/mlp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/mlp.py b/code/mlp.py
index 1d463d81..e865bc8f 100644
--- a/code/mlp.py
+++ b/code/mlp.py
@@ -292,7 +292,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
     )
 
     # start-snippet-5
-    # compute the gradient of cost with respect to theta (sotred in params)
+    # compute the gradient of cost with respect to theta (sorted in params)
     # the resulting gradients will be stored in a list gparams
     gparams = [T.grad(cost, param) for param in classifier.params]
 

From 06a9d877642ed22ceccaf913edfb746a013e9184 Mon Sep 17 00:00:00 2001
From: Kyunghyun Cho <kyunghyun.cho@nyu.edu>
Date: Fri, 18 Mar 2016 10:15:24 -0400
Subject: [PATCH 37/90] no nonlinearity in z

---
 doc/lstm.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/lstm.txt b/doc/lstm.txt
index 828fd694..bde70bd8 100644
--- a/doc/lstm.txt
+++ b/doc/lstm.txt
@@ -174,7 +174,7 @@ be computed with :
 
 .. math::
 
-    z = \sigma(W x_t + U h_{t-1} + b)
+    z = W x_t + U h_{t-1} + b
 
 The result is then sliced to obtain the pre-nonlinearity activations for
 :math:`i`, :math:`f`, :math:`\widetilde{C_t}`, and :math:`o` and the

From 146eb2a3680658cca971d2aa3c3f1ab1471075b0 Mon Sep 17 00:00:00 2001
From: Frederic Bastien <nouiz@nouiz.org>
Date: Wed, 9 Mar 2016 09:27:38 -0500
Subject: [PATCH 38/90] Don't be too much version when downloading. Make
 buildbot output smaller

---
 data/download.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data/download.sh b/data/download.sh
index ed273bbb..160b0986 100755
--- a/data/download.sh
+++ b/data/download.sh
@@ -5,7 +5,7 @@ WGET=$?
 which curl >/dev/null 2>&1
 CURL=$?
 if [ "$WGET" -eq 0 ]; then
-    DL_CMD="wget -c"
+    DL_CMD="wget --no-verbose -c"
 elif [ "$CURL" -eq 0 ]; then
     DL_CMD="curl -C - -O"
 else

From 57a80fd2bb51b171b81db05cbd33bcfaf68e322f Mon Sep 17 00:00:00 2001
From: Frederic Bastien <nouiz@nouiz.org>
Date: Wed, 9 Mar 2016 09:27:59 -0500
Subject: [PATCH 39/90] Give name to theano function

---
 code/rbm.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/code/rbm.py b/code/rbm.py
index 901b5870..3800cca7 100644
--- a/code/rbm.py
+++ b/code/rbm.py
@@ -257,7 +257,8 @@ def get_cost_updates(self, lr=0.1, persistent=None, k=1):
             # chain_start is the initial state corresponding to the
             # 6th output
             outputs_info=[None, None, None, None, None, chain_start],
-            n_steps=k
+            n_steps=k,
+            name="gibbs_hvh"
         )
         # start-snippet-3
         # determine gradients on RBM parameters
@@ -496,7 +497,8 @@ def test_rbm(learning_rate=0.1, training_epochs=15,
     ) = theano.scan(
         rbm.gibbs_vhv,
         outputs_info=[None, None, None, None, None, persistent_vis_chain],
-        n_steps=plot_every
+        n_steps=plot_every,
+        name="gibbs_vhv"
     )
 
     # add to updates the shared variable that takes care of our persistent

From ff6939b7bcdb70c7acbd9ed4020eacbb0a65c6d0 Mon Sep 17 00:00:00 2001
From: Frederic Bastien <nouiz@nouiz.org>
Date: Tue, 22 Mar 2016 11:05:15 -0400
Subject: [PATCH 40/90] Finish passing to new conv2d interface

---
 code/convolutional_mlp.py | 2 +-
 doc/lenet.txt             | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/code/convolutional_mlp.py b/code/convolutional_mlp.py
index a8811bc1..62845c99 100644
--- a/code/convolutional_mlp.py
+++ b/code/convolutional_mlp.py
@@ -94,7 +94,7 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
             input=input,
             filters=self.W,
             filter_shape=filter_shape,
-            image_shape=image_shape
+            input_shape=image_shape
         )
 
         # downsample each feature map individually, using maxpooling
diff --git a/doc/lenet.txt b/doc/lenet.txt
index 117dfdab..76614106 100644
--- a/doc/lenet.txt
+++ b/doc/lenet.txt
@@ -196,7 +196,7 @@ one of Figure 1. The input consists of 3 features maps (an RGB color image) of s
 
         import theano
         from theano import tensor as T
-        from theano.tensor.nnet import conv
+        from theano.tensor.nnet import conv2d
 
         import numpy
 
@@ -226,7 +226,7 @@ one of Figure 1. The input consists of 3 features maps (an RGB color image) of s
                     dtype=input.dtype), name ='b')
 
         # build symbolic expression that computes the convolution of input with filters in w
-        conv_out = conv.conv2d(input, W)
+        conv_out = conv2d(input, W)
 
         # build symbolic expression to add bias and apply activation function, i.e. produce neural net layer output
         # A few words on ``dimshuffle`` :
@@ -404,7 +404,7 @@ to be compatible with our previous MLP implementation.
     Note that the term "convolution" could corresponds to different mathematical operations:
 
     1. `theano.tensor.nnet.conv2d
-       <http://deeplearning.net/software/theano/library/tensor/nnet/conv.html#theano.tensor.nnet.conv.conv2d>`_,
+       <http://deeplearning.net/software/theano/library/tensor/nnet/conv.html#theano.tensor.nnet.conv2d>`_,
        which is the most common one in almost all of the recent published
        convolutional models.
        In this operation, each output feature map is connected to each

From ee5c0cb9a5e873d51c25dc60203e828dd1793889 Mon Sep 17 00:00:00 2001
From: Frederic Bastien <nouiz@nouiz.org>
Date: Tue, 22 Mar 2016 11:07:09 -0400
Subject: [PATCH 41/90] Use the new Interface

---
 code/DBN.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/DBN.py b/code/DBN.py
index b8e35fad..6ca88603 100644
--- a/code/DBN.py
+++ b/code/DBN.py
@@ -174,7 +174,7 @@ def pretraining_functions(self, train_set_x, batch_size, k):
 
             # compile the theano function
             fn = theano.function(
-                inputs=[index, theano.Param(learning_rate, default=0.1)],
+                inputs=[index, theano.In(learning_rate, value=0.1)],
                 outputs=cost,
                 updates=updates,
                 givens={

From 797342acc73b94854964e682ec5babbc5735bdfc Mon Sep 17 00:00:00 2001
From: Frederic Bastien <nouiz@nouiz.org>
Date: Fri, 1 Apr 2016 13:34:19 -0400
Subject: [PATCH 42/90] Use the new pool interface

---
 code/convolutional_mlp.py |  6 +++---
 doc/lenet.txt             | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/code/convolutional_mlp.py b/code/convolutional_mlp.py
index 62845c99..6bbb47a1 100644
--- a/code/convolutional_mlp.py
+++ b/code/convolutional_mlp.py
@@ -32,7 +32,7 @@
 
 import theano
 import theano.tensor as T
-from theano.tensor.signal import downsample
+from theano.tensor.signal import pool
 from theano.tensor.nnet import conv2d
 
 from logistic_sgd import LogisticRegression, load_data
@@ -97,8 +97,8 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
             input_shape=image_shape
         )
 
-        # downsample each feature map individually, using maxpooling
-        pooled_out = downsample.max_pool_2d(
+        # pool each feature map individually, using maxpooling
+        pooled_out = pool.pool_2d(
             input=conv_out,
             ds=poolsize,
             ignore_border=True
diff --git a/doc/lenet.txt b/doc/lenet.txt
index 76614106..09f50be6 100644
--- a/doc/lenet.txt
+++ b/doc/lenet.txt
@@ -7,7 +7,7 @@ Convolutional Neural Networks (LeNet)
     This section assumes the reader has already read through :doc:`logreg` and
     :doc:`mlp`. Additionally, it uses the following new Theano functions and concepts:
     `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_,
-    `floatX`_, `downsample`_ , `conv2d`_, `dimshuffle`_. If you intend to run the
+    `floatX`_, `pool`_ , `conv2d`_, `dimshuffle`_. If you intend to run the
     code on GPU also read `GPU`_.
 
     To run this example on a GPU, you need a good GPU. It needs
@@ -35,7 +35,7 @@ Convolutional Neural Networks (LeNet)
 
 .. _GPU: http://deeplearning.net/software/theano/tutorial/using_gpu.html
 
-.. _downsample: http://deeplearning.net/software/theano/library/tensor/signal/downsample.html
+.. _pool: http://deeplearning.net/software/theano/library/tensor/signal/pool.html
 
 .. _conv2d: http://deeplearning.net/software/theano/library/tensor/signal/conv.html#module-conv
 
@@ -320,7 +320,7 @@ Max-pooling is useful in vision for two reasons:
      "smart" way of reducing the dimensionality of intermediate representations.
 
 Max-pooling is done in Theano by way of
-``theano.tensor.signal.downsample.max_pool_2d``. This function takes as input
+``theano.tensor.signal.pool.pool_2d``. This function takes as input
 an N dimensional tensor (where N >= 2) and a downscaling factor and performs
 max-pooling over the 2 trailing dimensions of the tensor.
 
@@ -328,11 +328,11 @@ An example is worth a thousand words:
 
 .. code-block:: python
 
-    from theano.tensor.signal import downsample
+    from theano.tensor.signal import pool
 
     input = T.dtensor4('input')
     maxpool_shape = (2, 2)
-    pool_out = downsample.max_pool_2d(input, maxpool_shape, ignore_border=True)
+    pool_out = pool.pool_2d(input, maxpool_shape, ignore_border=True)
     f = theano.function([input],pool_out)
 
     invals = numpy.random.RandomState(1).rand(3, 2, 5, 5)
@@ -340,7 +340,7 @@ An example is worth a thousand words:
     print 'invals[0, 0, :, :] =\n', invals[0, 0, :, :]
     print 'output[0, 0, :, :] =\n', f(invals)[0, 0, :, :]
 
-    pool_out = downsample.max_pool_2d(input, maxpool_shape, ignore_border=False)
+    pool_out = pool.pool_2d(input, maxpool_shape, ignore_border=False)
     f = theano.function([input],pool_out)
     print 'With ignore_border set to False:'
     print 'invals[1, 0, :, :] =\n ', invals[1, 0, :, :]

From aad4f16662edb643926a38b661f469b6026a6a72 Mon Sep 17 00:00:00 2001
From: "lorenzo.ritter" <lorenzo.ritter@gmail.com>
Date: Wed, 27 Apr 2016 19:10:25 +0200
Subject: [PATCH 43/90] fixed typo in SdA.py

---
 code/SdA.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/code/SdA.py b/code/SdA.py
index 25e306c7..3d9589ac 100644
--- a/code/SdA.py
+++ b/code/SdA.py
@@ -81,8 +81,8 @@ def __init__(
         :type n_ins: int
         :param n_ins: dimension of the input to the sdA
 
-        :type n_layers_sizes: list of ints
-        :param n_layers_sizes: intermediate layers size, must contain
+        :type hidden_layers_sizes: list of ints
+        :param hidden_layers_sizes: intermediate layers size, must contain
                                at least one value
 
         :type n_outs: int

From de99c6eb17d802549bf08fc7ed5ed4f287f967c2 Mon Sep 17 00:00:00 2001
From: Frederic Bastien <nouiz@nouiz.org>
Date: Sun, 8 May 2016 19:50:03 -0400
Subject: [PATCH 44/90] Commit a small speed up.

---
 code/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/test.py b/code/test.py
index e034c63b..6aee1084 100644
--- a/code/test.py
+++ b/code/test.py
@@ -126,7 +126,7 @@ def speed():
 
     expected_times_gpu = numpy.asarray([2.9, 7.55523491, 18.99226785,
                                         5.8, 19.2,
-                                        11.2, 7.8, 122, 112.5, 31.1, 8.3])
+                                        11.2, 7.3, 122, 112.5, 31.1, 8.3])
     expected_times_64 = [s for idx, s in enumerate(expected_times_64)
                          if to_exec[idx]]
     expected_times_32 = [s for idx, s in enumerate(expected_times_32)

From 75cbba67b4fdc271bae5b7020a2a3fc69b70328d Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <abergeron@gmail.com>
Date: Wed, 13 Jul 2016 14:03:47 -0400
Subject: [PATCH 45/90] Python 3 + flake8 fixes.

---
 code/DBN.py             | 101 +++++++++++++++++++---------------------
 code/imdb_preprocess.py |  10 ++--
 code/logistic_cg.py     |  25 +++++-----
 code/test.py            |  11 +++--
 4 files changed, 70 insertions(+), 77 deletions(-)

diff --git a/code/DBN.py b/code/DBN.py
index 6ca88603..3b2bd230 100644
--- a/code/DBN.py
+++ b/code/DBN.py
@@ -1,5 +1,6 @@
 """
 """
+from __future__ import print_function, division
 import os
 import sys
 import timeit
@@ -61,9 +62,12 @@ def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
             theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30))
 
         # allocate symbolic variables for the data
-        self.x = T.matrix('x')  # the data is presented as rasterized images
-        self.y = T.ivector('y')  # the labels are presented as 1D vector
-                                 # of [int] labels
+
+        # the data is presented as rasterized images
+        self.x = T.matrix('x')
+
+        # the labels are presented as 1D vector of [int] labels
+        self.y = T.ivector('y')
         # end-snippet-1
         # The DBN is an MLP, for which all weights of intermediate
         # layers are shared with a different RBM.  We will first
@@ -156,8 +160,6 @@ def pretraining_functions(self, train_set_x, batch_size, k):
         index = T.lscalar('index')  # index to a minibatch
         learning_rate = T.scalar('lr')  # learning rate to use
 
-        # number of batches
-        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
         # begining of a batch, given `index`
         batch_begin = index * batch_size
         # ending of a batch given `index`
@@ -211,9 +213,9 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate):
 
         # compute number of minibatches for training, validation and testing
         n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
-        n_valid_batches /= batch_size
+        n_valid_batches //= batch_size
         n_test_batches = test_set_x.get_value(borrow=True).shape[0]
-        n_test_batches /= batch_size
+        n_test_batches //= batch_size
 
         index = T.lscalar('index')  # index to a [mini]batch
 
@@ -307,11 +309,11 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
     test_set_x, test_set_y = datasets[2]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
 
     # numpy random generator
     numpy_rng = numpy.random.RandomState(123)
-    print '... building the model'
+    print('... building the model')
     # construct the Deep Belief Network
     dbn = DBN(numpy_rng=numpy_rng, n_ins=28 * 28,
               hidden_layers_sizes=[1000, 1000, 1000],
@@ -321,14 +323,14 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
     #########################
     # PRETRAINING THE MODEL #
     #########################
-    print '... getting the pretraining functions'
+    print('... getting the pretraining functions')
     pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
                                                 batch_size=batch_size,
                                                 k=k)
 
-    print '... pre-training the model'
+    print('... pre-training the model')
     start_time = timeit.default_timer()
-    ## Pre-train layer-wise
+    # Pre-train layer-wise
     for i in range(dbn.n_layers):
         # go through pretraining epochs
         for epoch in range(pretraining_epochs):
@@ -337,38 +339,40 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
             for batch_index in range(n_train_batches):
                 c.append(pretraining_fns[i](index=batch_index,
                                             lr=pretrain_lr))
-            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
-            print numpy.mean(c)
+            print('Pre-training layer %i, epoch %d, cost ' % (i, epoch), end=' ')
+            print(numpy.mean(c))
 
     end_time = timeit.default_timer()
     # end-snippet-2
-    print >> sys.stderr, ('The pretraining code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+    print('The pretraining code for file ' + os.path.split(__file__)[1] +
+          ' ran for %.2fm' % ((end_time - start_time) / 60.), file=sys.stderr)
     ########################
     # FINETUNING THE MODEL #
     ########################
 
     # get the training, validation and testing function for the model
-    print '... getting the finetuning functions'
+    print('... getting the finetuning functions')
     train_fn, validate_model, test_model = dbn.build_finetune_functions(
         datasets=datasets,
         batch_size=batch_size,
         learning_rate=finetune_lr
     )
 
-    print '... finetuning the model'
+    print('... finetuning the model')
     # early-stopping parameters
-    patience = 4 * n_train_batches  # look as this many examples regardless
-    patience_increase = 2.    # wait this much longer when a new best is
-                              # found
-    improvement_threshold = 0.995  # a relative improvement of this much is
-                                   # considered significant
+
+    # look as this many examples regardless
+    patience = 4 * n_train_batches
+
+    # wait this much longer when a new best is found
+    patience_increase = 2.
+
+    # a relative improvement of this much is considered significant
+    improvement_threshold = 0.995
+
+    # go through this many minibatches before checking the network on
+    # the validation set; in this case we check every epoch
     validation_frequency = min(n_train_batches, patience / 2)
-                                  # go through this many
-                                  # minibatches before checking the network
-                                  # on the validation set; in this case we
-                                  # check every epoch
 
     best_validation_loss = numpy.inf
     test_score = 0.
@@ -381,31 +385,27 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
         epoch = epoch + 1
         for minibatch_index in range(n_train_batches):
 
-            minibatch_avg_cost = train_fn(minibatch_index)
+            train_fn(minibatch_index)
             iter = (epoch - 1) * n_train_batches + minibatch_index
 
             if (iter + 1) % validation_frequency == 0:
 
                 validation_losses = validate_model()
                 this_validation_loss = numpy.mean(validation_losses)
-                print(
-                    'epoch %i, minibatch %i/%i, validation error %f %%'
-                    % (
-                        epoch,
-                        minibatch_index + 1,
-                        n_train_batches,
-                        this_validation_loss * 100.
+                print('epoch %i, minibatch %i/%i, validation error %f %%' % (
+                    epoch,
+                    minibatch_index + 1,
+                    n_train_batches,
+                    this_validation_loss * 100.
                     )
                 )
 
                 # if we got the best validation score until now
                 if this_validation_loss < best_validation_loss:
 
-                    #improve patience if loss improvement is good enough
-                    if (
-                        this_validation_loss < best_validation_loss *
-                        improvement_threshold
-                    ):
+                    # improve patience if loss improvement is good enough
+                    if (this_validation_loss < best_validation_loss *
+                            improvement_threshold):
                         patience = max(patience, iter * patience_increase)
 
                     # save best validation score and iteration number
@@ -418,24 +418,19 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
                     print(('     epoch %i, minibatch %i/%i, test error of '
                            'best model %f %%') %
                           (epoch, minibatch_index + 1, n_train_batches,
-                           test_score * 100.))
+                          test_score * 100.))
 
             if patience <= iter:
                 done_looping = True
                 break
 
     end_time = timeit.default_timer()
-    print(
-        (
-            'Optimization complete with best validation score of %f %%, '
-            'obtained at iteration %i, '
-            'with test performance %f %%'
-        ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
-    )
-    print >> sys.stderr, ('The fine tuning code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time)
-                                              / 60.))
+    print(('Optimization complete with best validation score of %f %%, '
+           'obtained at iteration %i, '
+           'with test performance %f %%'
+           ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.))
+    print('The fine tuning code for file ' + os.path.split(__file__)[1] +
+          ' ran for %.2fm' % ((end_time - start_time) / 60.), file=sys.stderr)
 
 
 if __name__ == '__main__':
diff --git a/code/imdb_preprocess.py b/code/imdb_preprocess.py
index c20b37b6..62ebb556 100644
--- a/code/imdb_preprocess.py
+++ b/code/imdb_preprocess.py
@@ -8,7 +8,7 @@
 
 3) Then run this script.
 """
-
+from __future__ import print_function
 dataset_path='/Tmp/bastienf/aclImdb/'
 
 import numpy
@@ -27,12 +27,12 @@
 
 def tokenize(sentences):
 
-    print 'Tokenizing..',
+    print('Tokenizing..', end=' ')
     text = "\n".join(sentences)
     tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE)
     tok_text, _ = tokenizer.communicate(text)
     toks = tok_text.split('\n')[:-1]
-    print 'Done'
+    print('Done')
 
     return toks
 
@@ -52,7 +52,7 @@ def build_dict(path):
 
     sentences = tokenize(sentences)
 
-    print 'Building dictionary..',
+    print('Building dictionary..', end=' ')
     wordcount = dict()
     for ss in sentences:
         words = ss.strip().lower().split()
@@ -72,7 +72,7 @@ def build_dict(path):
     for idx, ss in enumerate(sorted_idx):
         worddict[keys[ss]] = idx+2  # leave 0 and 1 (UNK)
 
-    print numpy.sum(counts), ' total words ', len(keys), ' unique words'
+    print(numpy.sum(counts), ' total words ', len(keys), ' unique words')
 
     return worddict
 
diff --git a/code/logistic_cg.py b/code/logistic_cg.py
index 40c72c2f..c2970d51 100644
--- a/code/logistic_cg.py
+++ b/code/logistic_cg.py
@@ -33,6 +33,7 @@
 
 
 """
+from __future__ import print_function, division
 __docformat__ = 'restructedtext en'
 
 
@@ -165,9 +166,9 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
 
     batch_size = 600    # size of the minibatch
 
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
 
     n_in = 28 * 28  # number of input units
     n_out = 10  # number of output units
@@ -175,7 +176,7 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
     ######################
     # BUILD ACTUAL MODEL #
     ######################
-    print '... building the model'
+    print('... building the model')
 
     # allocate symbolic variables for the data
     minibatch_offset = T.lscalar()  # offset to the start of a [mini]batch
@@ -260,7 +261,7 @@ def callback(theta_value):
         validation_losses = [validate_model(i * batch_size)
                              for i in range(n_valid_batches)]
         this_validation_loss = numpy.mean(validation_losses)
-        print('validation error %f %%' % (this_validation_loss * 100.,))
+        print(('validation error %f %%' % (this_validation_loss * 100.,)))
 
         # check if it is better then best validation score got until now
         if this_validation_loss < validation_scores[0]:
@@ -288,17 +289,13 @@ def callback(theta_value):
         maxiter=n_epochs
     )
     end_time = timeit.default_timer()
-    print(
-        (
-            'Optimization complete with best validation score of %f %%, with '
-            'test performance %f %%'
-        )
-        % (validation_scores[0] * 100., validation_scores[1] * 100.)
+    print(('Optimization complete with best validation score of %f %%, with '
+           'test performance %f %%'
+           ) % (validation_scores[0] * 100., validation_scores[1] * 100.)
     )
 
-    print >> sys.stderr, ('The code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.1fs' % ((end_time - start_time)))
+    print('The code for file ' + os.path.split(__file__)[1] +
+          ' ran for %.1fs' % (end_time - start_time), file=sys.stderr)
 
 
 if __name__ == '__main__':
diff --git a/code/test.py b/code/test.py
index 6aee1084..926cae7b 100644
--- a/code/test.py
+++ b/code/test.py
@@ -1,3 +1,4 @@
+from __future__ import absolute_import, print_function, division
 import sys
 
 import numpy
@@ -137,12 +138,12 @@ def speed():
     def time_test(m, l, idx, f, **kwargs):
         if not to_exec[idx]:
             return
-        print algo[idx]
+        print(algo[idx])
         ts = m.call_time
         try:
             f(**kwargs)
-        except Exception, e:
-            print >> sys.stderr, 'test', algo[idx], 'FAILED', e
+        except Exception as e:
+            print('test', algo[idx], 'FAILED', e, file=sys.stderr)
             l.append(numpy.nan)
             return
         te = m.call_time
@@ -265,7 +266,7 @@ def do_tests():
                 print >> sys.stderr, 'gpu % expected/get', (
                     expected_times_gpu / gpu_times)
 
-            print
+            print()
             if do_float64 and do_float32:
                 print >> sys.stderr, 'float64/float32', (
                     float64_times / float32_times)
@@ -286,7 +287,7 @@ def compare(x, y):
         # time and the real time, we consider this an error.
         return sum((ratio < 0.95) + (ratio > 1.05))
 
-    print
+    print()
     if do_float64:
         err = compare(expected_times_64, float64_times)
         print >> sys.stderr, 'speed_failure_float64=' + str(err)

From 4faede82e900555a063d6c7c385d0c3e59c04699 Mon Sep 17 00:00:00 2001
From: Frederic Bastien <nouiz@nouiz.org>
Date: Thu, 28 Jul 2016 10:29:44 -0400
Subject: [PATCH 46/90] python3

---
 code/test.py | 130 +++++++++++++++++++++++++--------------------------
 1 file changed, 65 insertions(+), 65 deletions(-)

diff --git a/code/test.py b/code/test.py
index 926cae7b..4332e8b0 100644
--- a/code/test.py
+++ b/code/test.py
@@ -194,92 +194,92 @@ def do_tests():
         theano.config.floatX = 'float64'
         theano.config.mode = 'FAST_RUN'
         float64_times = do_tests()
-        print >> sys.stderr, algo_executed
-        print >> sys.stderr, 'float64 times', float64_times
-        print >> sys.stderr, 'float64 expected', expected_times_64
-        print >> sys.stderr, 'float64 % expected/get', (
-            expected_times_64 / float64_times)
+        print(algo_executed, file=sys.stderr)
+        print('float64 times', float64_times, file=sys.stderr)
+        print('float64 expected', expected_times_64, file=sys.stderr)
+        print('float64 % expected/get', (
+            expected_times_64 / float64_times), file=sys.stderr)
 
     #test in float32 in FAST_RUN mode on the cpu
     theano.config.floatX = 'float32'
     if do_float32:
         float32_times = do_tests()
-        print >> sys.stderr, algo_executed
-        print >> sys.stderr, 'float32 times', float32_times
-        print >> sys.stderr, 'float32 expected', expected_times_32
-        print >> sys.stderr, 'float32 % expected/get', (
-            expected_times_32 / float32_times)
+        print(algo_executed, file=sys.stderr)
+        print('float32 times', float32_times, file=sys.stderr)
+        print('float32 expected', expected_times_32, file=sys.stderr)
+        print('float32 % expected/get', (
+            expected_times_32 / float32_times), file=sys.stderr)
 
         if do_float64:
-            print >> sys.stderr, 'float64/float32', (
-                float64_times / float32_times)
-            print >> sys.stderr
-            print >> sys.stderr, ('Duplicate the timing to have everything '
-                                  'in one place')
-            print >> sys.stderr, algo_executed
-            print >> sys.stderr, 'float64 times', float64_times
-            print >> sys.stderr, 'float64 expected', expected_times_64
-            print >> sys.stderr, 'float64 % expected/get', (
-                expected_times_64 / float64_times)
-            print >> sys.stderr, 'float32 times', float32_times
-            print >> sys.stderr, 'float32 expected', expected_times_32
-            print >> sys.stderr, 'float32 % expected/get', (
-                expected_times_32 / float32_times)
-
-            print >> sys.stderr, 'float64/float32', (
-                float64_times / float32_times)
-            print >> sys.stderr, 'expected float64/float32', (
-                expected_times_64 / float32_times)
+            print('float64/float32', (
+                float64_times / float32_times), file=sys.stderr)
+            print(file=sys.stderr)
+            print(('Duplicate the timing to have everything '
+                                  'in one place'), file=sys.stderr)
+            print(algo_executed, file=sys.stderr)
+            print('float64 times', float64_times, file=sys.stderr)
+            print('float64 expected', expected_times_64, file=sys.stderr)
+            print('float64 % expected/get', (
+                expected_times_64 / float64_times), file=sys.stderr)
+            print('float32 times', float32_times, file=sys.stderr)
+            print('float32 expected', expected_times_32, file=sys.stderr)
+            print('float32 % expected/get', (
+                expected_times_32 / float32_times), file=sys.stderr)
+
+            print('float64/float32', (
+                float64_times / float32_times), file=sys.stderr)
+            print('expected float64/float32', (
+                expected_times_64 / float32_times), file=sys.stderr)
 
     #test in float32 in FAST_RUN mode on the gpu
     import theano.sandbox.cuda
     if do_gpu:
         theano.sandbox.cuda.use('gpu')
         gpu_times = do_tests()
-        print >> sys.stderr, algo_executed
-        print >> sys.stderr, 'gpu times', gpu_times
-        print >> sys.stderr, 'gpu expected', expected_times_gpu
-        print >> sys.stderr, 'gpu % expected/get', (
-            expected_times_gpu / gpu_times)
+        print(algo_executed, file=sys.stderr)
+        print('gpu times', gpu_times, file=sys.stderr)
+        print('gpu expected', expected_times_gpu, file=sys.stderr)
+        print('gpu % expected/get', (
+            expected_times_gpu / gpu_times), file=sys.stderr)
 
         if do_float64:
-            print >> sys.stderr, 'float64/gpu', float64_times / gpu_times
+            print('float64/gpu', float64_times / gpu_times, file=sys.stderr)
 
         if (do_float64 + do_float32 + do_gpu) > 1:
-            print >> sys.stderr
-            print >> sys.stderr, ('Duplicate the timing to have everything '
-                                  'in one place')
-            print >> sys.stderr, algo_executed
+            print(file=sys.stderr)
+            print(('Duplicate the timing to have everything '
+                                  'in one place'), file=sys.stderr)
+            print(algo_executed, file=sys.stderr)
             if do_float64:
-                print >> sys.stderr, 'float64 times', float64_times
-                print >> sys.stderr, 'float64 expected', expected_times_64
-                print >> sys.stderr, 'float64 % expected/get', (
-                    expected_times_64 / float64_times)
+                print('float64 times', float64_times, file=sys.stderr)
+                print('float64 expected', expected_times_64, file=sys.stderr)
+                print('float64 % expected/get', (
+                    expected_times_64 / float64_times), file=sys.stderr)
             if do_float32:
-                print >> sys.stderr, 'float32 times', float32_times
-                print >> sys.stderr, 'float32 expected', expected_times_32
-                print >> sys.stderr, 'float32 % expected/get', (
-                    expected_times_32 / float32_times)
+                print('float32 times', float32_times, file=sys.stderr)
+                print('float32 expected', expected_times_32, file=sys.stderr)
+                print('float32 % expected/get', (
+                    expected_times_32 / float32_times), file=sys.stderr)
             if do_gpu:
-                print >> sys.stderr, 'gpu times', gpu_times
-                print >> sys.stderr, 'gpu expected', expected_times_gpu
-                print >> sys.stderr, 'gpu % expected/get', (
-                    expected_times_gpu / gpu_times)
+                print('gpu times', gpu_times, file=sys.stderr)
+                print('gpu expected', expected_times_gpu, file=sys.stderr)
+                print('gpu % expected/get', (
+                    expected_times_gpu / gpu_times), file=sys.stderr)
 
             print()
             if do_float64 and do_float32:
-                print >> sys.stderr, 'float64/float32', (
-                    float64_times / float32_times)
-                print >> sys.stderr, 'expected float64/float32', (
-                    expected_times_64 / float32_times)
+                print('float64/float32', (
+                    float64_times / float32_times), file=sys.stderr)
+                print('expected float64/float32', (
+                    expected_times_64 / float32_times), file=sys.stderr)
             if do_float64 and do_gpu:
-                print >> sys.stderr, 'float64/gpu', float64_times / gpu_times
-                print >> sys.stderr, 'expected float64/gpu', (
-                    expected_times_64 / gpu_times)
+                print('float64/gpu', float64_times / gpu_times, file=sys.stderr)
+                print('expected float64/gpu', (
+                    expected_times_64 / gpu_times), file=sys.stderr)
             if do_float32 and do_gpu:
-                print >> sys.stderr, 'float32/gpu', float32_times / gpu_times
-                print >> sys.stderr, 'expected float32/gpu', (
-                    expected_times_32 / gpu_times)
+                print('float32/gpu', float32_times / gpu_times, file=sys.stderr)
+                print('expected float32/gpu', (
+                    expected_times_32 / gpu_times), file=sys.stderr)
 
     def compare(x, y):
         ratio = x / y
@@ -287,15 +287,15 @@ def compare(x, y):
         # time and the real time, we consider this an error.
         return sum((ratio < 0.95) + (ratio > 1.05))
 
-    print()
+    print(file=sys.stderr)
     if do_float64:
         err = compare(expected_times_64, float64_times)
-        print >> sys.stderr, 'speed_failure_float64=' + str(err)
+        print('speed_failure_float64=' + str(err), file=sys.stderr)
     if do_float32:
         err = compare(expected_times_32, float32_times)
-        print >> sys.stderr, 'speed_failure_float32=' + str(err)
+        print('speed_failure_float32=' + str(err), file=sys.stderr)
     if do_gpu:
         err = compare(expected_times_gpu, gpu_times)
-        print >> sys.stderr, 'speed_failure_gpu=' + str(err)
+        print('speed_failure_gpu=' + str(err), file=sys.stderr)
 
         assert not numpy.isnan(gpu_times).any()

From ac029111f94c67c480746ebd23229af099fd2570 Mon Sep 17 00:00:00 2001
From: slefrancois <simon.lefrancois@umontreal.ca>
Date: Wed, 31 Aug 2016 11:18:18 -0400
Subject: [PATCH 47/90] unzip -f to avoid prompt in data download

---
 data/download.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/data/download.sh b/data/download.sh
index 160b0986..67c5c057 100755
--- a/data/download.sh
+++ b/data/download.sh
@@ -15,8 +15,8 @@ fi
 
 $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
 $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist_py3k.pkl.gz
-$DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl.gz && gunzip imdb.pkl.gz
-$DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.dict.pkl.gz && gunzip imdb.dict.pkl.gz
+$DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl.gz && gunzip -f imdb.pkl.gz
+$DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.dict.pkl.gz && gunzip -f imdb.dict.pkl.gz
 $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/Nottingham.zip && unzip -u Nottingham.zip
 $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/midi.zip && unzip -u midi.zip -d ../code && echo "extracted Modified Python MIDI package (GPL)"
 $DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold0.pkl.gz

From f6db4f12f191a421f7a0f948d68cce36290fb617 Mon Sep 17 00:00:00 2001
From: slefrancois <simon.lefrancois@umontreal.ca>
Date: Wed, 7 Sep 2016 10:25:51 -0400
Subject: [PATCH 48/90] change compiledir and add xunit for jenkins

---
 misc/do_nightly_build | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/misc/do_nightly_build b/misc/do_nightly_build
index bd703f04..cafab51c 100755
--- a/misc/do_nightly_build
+++ b/misc/do_nightly_build
@@ -1,9 +1,15 @@
 #!/bin/bash
-#we set the compiledir to the /Tmp dir to make the test faster by bypassing the nfs network.
+
+# If not jenkins, set workspace to local Tmp
+if [ -v $WORKSPACE ]; then
+   WORKSPACE=/Tmp
+fi
+
 date
-ROOT_CWD=/Tmp/nightly_build
-COMPILEDIR=/Tmp/lisa_theano_compile_dir_deeplearning
+ROOT_CWD=$WORKSPACE/nightly_build
+COMPILEDIR=$WORKSPACE/lisa_theano_compile_dir_deeplearning
 NOSETESTS=${ROOT_CWD}/Theano/bin/theano-nose
+XUNIT="--with-xunit --xunit-file="
 
 FLAGS=warn.ignore_bug_before=0.5,compiledir=${COMPILEDIR}
 export PYTHONPATH=${ROOT_CWD}/Theano:${ROOT_CWD}/Pylearn:$PYTHONPATH
@@ -19,14 +25,17 @@ echo "git version:" `git rev-parse HEAD`
 #echo "executing nosetests with mode=FAST_COMPILE"
 #THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS}
 echo "executing nosetests speed with mode=FAST_RUN"
-THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} test.py:speed
+FILE=${ROOT_CWD}/dlt_tests.xml
+THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${XUNIT}${FILE} test.py:speed
 #echo "executing nosetests speed with mode=FAST_RUN and OMP_NUM_THREADS=2"
 #OMP_NUM_THREADS=2 THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} test.py:speed
 echo "executing nosetests with mode=FAST_RUN,floatX=float32"
-THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS}
+FILE=${ROOT_CWD}/dlt_32bit_tests.xml
+THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE}
 
 #we change the seed and record it everyday to test different combination. We record it to be able to reproduce bug caused by different seed. We don't want multiple test in DEBUG_MODE each day as this take too long.
 #seed=$RANDOM
 #echo "executing nosetests with mode=DEBUG_MODE with seed of the day $seed"
-#THEANO_DEBUGMODE_CHECK_STRIDES=0 THEANO_DEBUGMODE_PATIENCE=3 THEANO_COMPILEDIR=/Tmp/lisa_theano_compile_dir_deeplearning THEANO_UNITTEST_SEED=$seed THEANO_DEFAULT_MODE=DEBUG_MODE ${NOSETESTS}
+#FILE=${ROOT_CWD}/'dlt_debug_tests.xml'
+#THEANO_DEBUGMODE_CHECK_STRIDES=0 THEANO_DEBUGMODE_PATIENCE=3 THEANO_COMPILEDIR=$WORKSPACE/lisa_theano_compile_dir_deeplearning THEANO_UNITTEST_SEED=$seed THEANO_DEFAULT_MODE=DEBUG_MODE ${NOSETESTS} ${XUNIT}${FILE}
 

From a0362806a029f20d7ed920868ded79d1b388d741 Mon Sep 17 00:00:00 2001
From: slefrancois <simon.lefrancois@umontreal.ca>
Date: Wed, 7 Sep 2016 19:43:00 -0400
Subject: [PATCH 49/90] dtl compiledir

---
 misc/do_nightly_build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/misc/do_nightly_build b/misc/do_nightly_build
index cafab51c..a8ee32cf 100755
--- a/misc/do_nightly_build
+++ b/misc/do_nightly_build
@@ -7,7 +7,7 @@ fi
 
 date
 ROOT_CWD=$WORKSPACE/nightly_build
-COMPILEDIR=$WORKSPACE/lisa_theano_compile_dir_deeplearning
+COMPILEDIR=$WORKSPACE/compile/lisa_theano_compile_dir_deeplearning
 NOSETESTS=${ROOT_CWD}/Theano/bin/theano-nose
 XUNIT="--with-xunit --xunit-file="
 

From 31e194d4a844db9455cbb72a91b0e717084f84ed Mon Sep 17 00:00:00 2001
From: slefrancois <simon.lefrancois@umontreal.ca>
Date: Fri, 9 Sep 2016 15:45:53 -0400
Subject: [PATCH 50/90] use TMPDIR for buildbot

---
 misc/do_nightly_build | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/misc/do_nightly_build b/misc/do_nightly_build
index a8ee32cf..29281050 100755
--- a/misc/do_nightly_build
+++ b/misc/do_nightly_build
@@ -2,7 +2,10 @@
 
 # If not jenkins, set workspace to local Tmp
 if [ -v $WORKSPACE ]; then
-   WORKSPACE=/Tmp
+   if [ -v $TMPDIR ]; then
+      TMPDIR=/tmp
+   fi
+   WORKSPACE=$TMPDIR
 fi
 
 date

From 80b969171df5bb341788864a46e433aa06858ccb Mon Sep 17 00:00:00 2001
From: slefrancois <simon.lefrancois@umontreal.ca>
Date: Mon, 12 Sep 2016 09:36:55 -0400
Subject: [PATCH 51/90] test file name to float32

---
 misc/do_nightly_build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/misc/do_nightly_build b/misc/do_nightly_build
index 29281050..ef2b8319 100755
--- a/misc/do_nightly_build
+++ b/misc/do_nightly_build
@@ -33,7 +33,7 @@ THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${XUNIT}${FILE} test.py:speed
 #echo "executing nosetests speed with mode=FAST_RUN and OMP_NUM_THREADS=2"
 #OMP_NUM_THREADS=2 THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} test.py:speed
 echo "executing nosetests with mode=FAST_RUN,floatX=float32"
-FILE=${ROOT_CWD}/dlt_32bit_tests.xml
+FILE=${ROOT_CWD}/dlt_float32_tests.xml
 THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE}
 
 #we change the seed and record it everyday to test different combination. We record it to be able to reproduce bug caused by different seed. We don't want multiple test in DEBUG_MODE each day as this take too long.

From 793d6181bc70b45a5d7521131822c62d78d9a418 Mon Sep 17 00:00:00 2001
From: slefrancois <simon.lefrancois@umontreal.ca>
Date: Mon, 19 Sep 2016 12:06:05 -0400
Subject: [PATCH 52/90] add jenkins buildbot script

---
 .jenkins/jenkins_buildbot_dlt.sh | 35 ++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100755 .jenkins/jenkins_buildbot_dlt.sh

diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh
new file mode 100755
index 00000000..0d2e49f2
--- /dev/null
+++ b/.jenkins/jenkins_buildbot_dlt.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+BUILDBOT_DIR=$WORKSPACE/nightly_build
+source $HOME/.bashrc
+
+mkdir -p ${BUILDBOT_DIR}
+
+date
+COMPILEDIR=$WORKSPACE/compile/lisa_theano_compile_dir_deeplearning
+NOSETESTS=${BUILDBOT_DIR}/Theano/bin/theano-nose
+XUNIT="--with-xunit --xunit-file="
+
+FLAGS=warn.ignore_bug_before=0.5,compiledir=${COMPILEDIR}
+export PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/Pylearn:$PYTHONPATH
+
+cd ${BUILDBOT_DIR}
+if [ ! -d ${BUILDBOT_DIR}/Theano ]; then
+  git clone git://github.com/Theano/Theano.git
+fi
+# update repo
+cd ${BUILDBOT_DIR}/Theano; git pull
+
+${WORKSPACE}/data/download.sh
+
+cd ${BUILDBOT_DIR}/Theano
+echo "git version for Theano:" `git rev-parse HEAD`
+cd ${WORKSPACE}/code
+echo "git version:" `git rev-parse HEAD`
+
+echo "executing nosetests speed with mode=FAST_RUN"
+FILE=${BUILDBOT_DIR}/dlt_tests.xml
+THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${XUNIT}${FILE} test.py:speed
+echo "executing nosetests with mode=FAST_RUN,floatX=float32"
+FILE=${BUILDBOT_DIR}/dlt_float32_tests.xml
+THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE}

From 12fb33662170918912f473e75360434b4680c7b9 Mon Sep 17 00:00:00 2001
From: slefrancois <simon.lefrancois@umontreal.ca>
Date: Mon, 19 Sep 2016 14:33:42 -0400
Subject: [PATCH 53/90] midi

---
 .jenkins/jenkins_buildbot_dlt.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh
index 0d2e49f2..2cd41d5e 100755
--- a/.jenkins/jenkins_buildbot_dlt.sh
+++ b/.jenkins/jenkins_buildbot_dlt.sh
@@ -20,7 +20,8 @@ fi
 # update repo
 cd ${BUILDBOT_DIR}/Theano; git pull
 
-${WORKSPACE}/data/download.sh
+cd ${WORKSPACE}/data
+./download.sh
 
 cd ${BUILDBOT_DIR}/Theano
 echo "git version for Theano:" `git rev-parse HEAD`

From 93c9a3642d8952f7816273cddfc55a5a9f64077b Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <abergeron@gmail.com>
Date: Mon, 3 Oct 2016 12:24:53 -0400
Subject: [PATCH 54/90] Use MRG_RandomStreams instead for shared_randomstreams
 for GPU compat.

---
 code/SdA.py     | 2 +-
 code/dA.py      | 2 +-
 code/hmc/hmc.py | 2 +-
 code/rbm.py     | 2 +-
 code/rnnrbm.py  | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/code/SdA.py b/code/SdA.py
index 3d9589ac..eb7b7357 100644
--- a/code/SdA.py
+++ b/code/SdA.py
@@ -40,7 +40,7 @@
 
 import theano
 import theano.tensor as T
-from theano.tensor.shared_randomstreams import RandomStreams
+from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 
 from logistic_sgd import LogisticRegression, load_data
 from mlp import HiddenLayer
diff --git a/code/dA.py b/code/dA.py
index 0d9efa54..aad3d454 100644
--- a/code/dA.py
+++ b/code/dA.py
@@ -40,7 +40,7 @@
 
 import theano
 import theano.tensor as T
-from theano.tensor.shared_randomstreams import RandomStreams
+from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 
 from logistic_sgd import load_data
 from utils import tile_raster_images
diff --git a/code/hmc/hmc.py b/code/hmc/hmc.py
index aeb49937..f16a50c1 100644
--- a/code/hmc/hmc.py
+++ b/code/hmc/hmc.py
@@ -358,7 +358,7 @@ def new_from_shared_positions(
         stepsize = sharedX(initial_stepsize, 'hmc_stepsize')
         avg_acceptance_rate = sharedX(target_acceptance_rate,
                                       'avg_acceptance_rate')
-        s_rng = TT.shared_randomstreams.RandomStreams(seed)
+        s_rng = theano.sandbox.rng_mrg.MRG_RandomStreams(seed)
 
         # define graph for an `n_steps` HMC simulation
         accept, final_pos = hmc_move(
diff --git a/code/rbm.py b/code/rbm.py
index 3800cca7..6e4f1012 100644
--- a/code/rbm.py
+++ b/code/rbm.py
@@ -20,7 +20,7 @@
 import theano.tensor as T
 import os
 
-from theano.tensor.shared_randomstreams import RandomStreams
+from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 
 from utils import tile_raster_images
 from logistic_sgd import load_data
diff --git a/code/rnnrbm.py b/code/rnnrbm.py
index b8420b9b..900ffdc6 100644
--- a/code/rnnrbm.py
+++ b/code/rnnrbm.py
@@ -19,7 +19,7 @@
 from midi.utils import midiread, midiwrite
 import theano
 import theano.tensor as T
-from theano.tensor.shared_randomstreams import RandomStreams
+from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 
 #Don't use a python long as this don't work on 32 bits computers.
 numpy.random.seed(0xbeef)

From 4f251cd72dac2754c173c0a850f215b73fdb19f5 Mon Sep 17 00:00:00 2001
From: slefrancois <simon.lefrancois@umontreal.ca>
Date: Thu, 6 Oct 2016 12:01:01 -0400
Subject: [PATCH 55/90] add testsuites names

---
 .jenkins/jenkins_buildbot_dlt.sh | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh
index 2cd41d5e..a4e4b1e4 100755
--- a/.jenkins/jenkins_buildbot_dlt.sh
+++ b/.jenkins/jenkins_buildbot_dlt.sh
@@ -9,6 +9,8 @@ date
 COMPILEDIR=$WORKSPACE/compile/lisa_theano_compile_dir_deeplearning
 NOSETESTS=${BUILDBOT_DIR}/Theano/bin/theano-nose
 XUNIT="--with-xunit --xunit-file="
+# name test suites
+SUITE="--xunit-prefix-with-testsuite-name --xunit-testsuite-name="
 
 FLAGS=warn.ignore_bug_before=0.5,compiledir=${COMPILEDIR}
 export PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/Pylearn:$PYTHONPATH
@@ -29,8 +31,10 @@ cd ${WORKSPACE}/code
 echo "git version:" `git rev-parse HEAD`
 
 echo "executing nosetests speed with mode=FAST_RUN"
-FILE=${BUILDBOT_DIR}/dlt_tests.xml
-THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${XUNIT}${FILE} test.py:speed
+NAME=dlt_speed
+FILE=${BUILDBOT_DIR}/${NAME}_tests.xml
+THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${XUNIT}${FILE} ${SUITE}${NAME} test.py:speed
 echo "executing nosetests with mode=FAST_RUN,floatX=float32"
-FILE=${BUILDBOT_DIR}/dlt_float32_tests.xml
-THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE}
+NAME=dlt_float32
+FILE=${BUILDBOT_DIR}/${NAME}_tests.xml
+THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE} ${SUITE}${NAME}

From 82c85e2a6f74a92736c2afdd805710dedfcc4f4f Mon Sep 17 00:00:00 2001
From: slefrancois <simon.lefrancois@umontreal.ca>
Date: Thu, 6 Oct 2016 14:36:46 -0400
Subject: [PATCH 56/90] Add JUnit writer for speed tests, remove hardcoded
 reference times

---
 code/test.py | 117 +++++++++++++--------------------------------------
 1 file changed, 29 insertions(+), 88 deletions(-)

diff --git a/code/test.py b/code/test.py
index 4332e8b0..60c0af02 100644
--- a/code/test.py
+++ b/code/test.py
@@ -98,43 +98,7 @@ def speed():
     do_gpu = True
 
     algo_executed = [s for idx, s in enumerate(algo) if to_exec[idx]]
-    #Timming expected are from the buildbot that have an i7-920 @
-    # 2.67GHz with hyperthread enabled for the cpu, 12G of ram. An GeForce GTX
-    # 580 for the GPU. OS=Fedora 14, gcc=4.5.1, python/BLAS from EPD
-    # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread.
-
-    expected_times_64 = numpy.asarray([9.3, 21.0, 76.1, 73.7, 116.4,
-                                       346.9, 355.0, 268.2, 115.8, 16.8, 91.6])
-    expected_times_32 = numpy.asarray([6.4, 14.7, 42.5, 63.1, 71,
-                                       191.2, 199.0, 201.9, 107, 12.6, 61.3])
-
-    # Number with just 1 decimal are new value that are faster with
-    # the Theano version 0.5rc2 Other number are older. They are not
-    # updated, as we where faster in the past!
-    # TODO: find why and fix this!
-
-# Here is the value for the buildbot on February 3th 2012 with a GTX 285
-#              sgd,         cg           mlp          conv        da
-#              sda          dbn          rbm
-#    gpu times[3.72957802,  9.94316864,  29.1772666,  9.13857198, 25.91144657,
-#              18.30802011, 53.38651466, 285.41386175]
-#    expected [3.076634879, 7.555234910, 18.99226785, 9.58915591, 24.130070450,
-#              24.77524018, 92.66246653, 322.340329170]
-#              sgd,         cg           mlp          conv        da
-#              sda          dbn          rbm
-#expected/get [0.82492841,  0.75984178,  0.65092691,  1.04930573, 0.93125138
-#              1.35324519 1.7356905   1.12937868]
-
-    expected_times_gpu = numpy.asarray([2.9, 7.55523491, 18.99226785,
-                                        5.8, 19.2,
-                                        11.2, 7.3, 122, 112.5, 31.1, 8.3])
-    expected_times_64 = [s for idx, s in enumerate(expected_times_64)
-                         if to_exec[idx]]
-    expected_times_32 = [s for idx, s in enumerate(expected_times_32)
-                         if to_exec[idx]]
-    expected_times_gpu = [s for idx, s in enumerate(expected_times_gpu)
-                          if to_exec[idx]]
-
+ 
     def time_test(m, l, idx, f, **kwargs):
         if not to_exec[idx]:
             return
@@ -196,9 +160,6 @@ def do_tests():
         float64_times = do_tests()
         print(algo_executed, file=sys.stderr)
         print('float64 times', float64_times, file=sys.stderr)
-        print('float64 expected', expected_times_64, file=sys.stderr)
-        print('float64 % expected/get', (
-            expected_times_64 / float64_times), file=sys.stderr)
 
     #test in float32 in FAST_RUN mode on the cpu
     theano.config.floatX = 'float32'
@@ -206,9 +167,6 @@ def do_tests():
         float32_times = do_tests()
         print(algo_executed, file=sys.stderr)
         print('float32 times', float32_times, file=sys.stderr)
-        print('float32 expected', expected_times_32, file=sys.stderr)
-        print('float32 % expected/get', (
-            expected_times_32 / float32_times), file=sys.stderr)
 
         if do_float64:
             print('float64/float32', (
@@ -218,18 +176,10 @@ def do_tests():
                                   'in one place'), file=sys.stderr)
             print(algo_executed, file=sys.stderr)
             print('float64 times', float64_times, file=sys.stderr)
-            print('float64 expected', expected_times_64, file=sys.stderr)
-            print('float64 % expected/get', (
-                expected_times_64 / float64_times), file=sys.stderr)
             print('float32 times', float32_times, file=sys.stderr)
-            print('float32 expected', expected_times_32, file=sys.stderr)
-            print('float32 % expected/get', (
-                expected_times_32 / float32_times), file=sys.stderr)
 
             print('float64/float32', (
                 float64_times / float32_times), file=sys.stderr)
-            print('expected float64/float32', (
-                expected_times_64 / float32_times), file=sys.stderr)
 
     #test in float32 in FAST_RUN mode on the gpu
     import theano.sandbox.cuda
@@ -238,9 +188,6 @@ def do_tests():
         gpu_times = do_tests()
         print(algo_executed, file=sys.stderr)
         print('gpu times', gpu_times, file=sys.stderr)
-        print('gpu expected', expected_times_gpu, file=sys.stderr)
-        print('gpu % expected/get', (
-            expected_times_gpu / gpu_times), file=sys.stderr)
 
         if do_float64:
             print('float64/gpu', float64_times / gpu_times, file=sys.stderr)
@@ -252,50 +199,44 @@ def do_tests():
             print(algo_executed, file=sys.stderr)
             if do_float64:
                 print('float64 times', float64_times, file=sys.stderr)
-                print('float64 expected', expected_times_64, file=sys.stderr)
-                print('float64 % expected/get', (
-                    expected_times_64 / float64_times), file=sys.stderr)
             if do_float32:
                 print('float32 times', float32_times, file=sys.stderr)
-                print('float32 expected', expected_times_32, file=sys.stderr)
-                print('float32 % expected/get', (
-                    expected_times_32 / float32_times), file=sys.stderr)
             if do_gpu:
                 print('gpu times', gpu_times, file=sys.stderr)
-                print('gpu expected', expected_times_gpu, file=sys.stderr)
-                print('gpu % expected/get', (
-                    expected_times_gpu / gpu_times), file=sys.stderr)
 
             print()
             if do_float64 and do_float32:
                 print('float64/float32', (
                     float64_times / float32_times), file=sys.stderr)
-                print('expected float64/float32', (
-                    expected_times_64 / float32_times), file=sys.stderr)
             if do_float64 and do_gpu:
                 print('float64/gpu', float64_times / gpu_times, file=sys.stderr)
-                print('expected float64/gpu', (
-                    expected_times_64 / gpu_times), file=sys.stderr)
             if do_float32 and do_gpu:
                 print('float32/gpu', float32_times / gpu_times, file=sys.stderr)
-                print('expected float32/gpu', (
-                    expected_times_32 / gpu_times), file=sys.stderr)
-
-    def compare(x, y):
-        ratio = x / y
-        # If there is more then 5% difference between the expected
-        # time and the real time, we consider this an error.
-        return sum((ratio < 0.95) + (ratio > 1.05))
-
-    print(file=sys.stderr)
-    if do_float64:
-        err = compare(expected_times_64, float64_times)
-        print('speed_failure_float64=' + str(err), file=sys.stderr)
-    if do_float32:
-        err = compare(expected_times_32, float32_times)
-        print('speed_failure_float32=' + str(err), file=sys.stderr)
-    if do_gpu:
-        err = compare(expected_times_gpu, gpu_times)
-        print('speed_failure_gpu=' + str(err), file=sys.stderr)
-
-        assert not numpy.isnan(gpu_times).any()
+        
+    # Write JUnit xml for speed test performance report
+
+    speed_file = 'speedtests_time.xml'
+
+    # Define speed test file write method
+    def write_junit(filename, algos, times, label):
+        with open(filename, 'a') as f:
+            for algo, time in zip(algos, times):
+                f.write('   <testcase classname="{label}" name="{algo}" time="{time}">'
+                        .format(label=label, algo=algo, time=time))
+                f.write('   </testcase>\n')
+
+    test_total = numpy.size(float64_times) \
+                 + numpy.size(float32_times) \
+                 + numpy.size(gpu_times)
+
+    with open(speed_file, 'w') as f:
+        f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+        f.write('<testsuite name="theano_speedtests" tests="{ntests}">\n'
+                .format(ntests=numpy.size(test_total)))
+
+    write_junit(speed_file, algo_executed, float64_times, label='float64')
+    write_junit(speed_file, algo_executed, float32_times, label='float32')
+    write_junit(speed_file, algo_executed, gpu_times, label='gpu')
+        
+    with open(speed_file, 'a') as f:
+        f.write('</testsuite>\n')

From 9918b7a9d377af71ac1323187913861651b26ce8 Mon Sep 17 00:00:00 2001
From: slefrancois <simon.lefrancois@umontreal.ca>
Date: Fri, 7 Oct 2016 09:04:14 -0400
Subject: [PATCH 57/90] remove testsuite prefix option

---
 .jenkins/jenkins_buildbot_dlt.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh
index a4e4b1e4..846cf7fc 100755
--- a/.jenkins/jenkins_buildbot_dlt.sh
+++ b/.jenkins/jenkins_buildbot_dlt.sh
@@ -10,7 +10,7 @@ COMPILEDIR=$WORKSPACE/compile/lisa_theano_compile_dir_deeplearning
 NOSETESTS=${BUILDBOT_DIR}/Theano/bin/theano-nose
 XUNIT="--with-xunit --xunit-file="
 # name test suites
-SUITE="--xunit-prefix-with-testsuite-name --xunit-testsuite-name="
+SUITE="--xunit-testsuite-name="
 
 FLAGS=warn.ignore_bug_before=0.5,compiledir=${COMPILEDIR}
 export PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/Pylearn:$PYTHONPATH

From f14107d31d5cb05f192129a95d3f272acf4dbc09 Mon Sep 17 00:00:00 2001
From: slefrancois <simon.lefrancois@umontreal.ca>
Date: Fri, 7 Oct 2016 17:28:37 -0400
Subject: [PATCH 58/90] single performance file open, correct only access times
 variables if tests ran

---
 code/test.py | 48 +++++++++++++++++++++++-------------------------
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/code/test.py b/code/test.py
index 60c0af02..22a59655 100644
--- a/code/test.py
+++ b/code/test.py
@@ -152,12 +152,18 @@ def do_tests():
                   saveto='')
         return numpy.asarray(l)
 
+    # Initialize test count and results dictionnary
+    test_total = 0
+    times_dic = {}
+
     #test in float64 in FAST_RUN mode on the cpu
     import theano
     if do_float64:
         theano.config.floatX = 'float64'
         theano.config.mode = 'FAST_RUN'
         float64_times = do_tests()
+        times_dic['float64'] = float64_times
+        test_total += numpy.size(float64_times)
         print(algo_executed, file=sys.stderr)
         print('float64 times', float64_times, file=sys.stderr)
 
@@ -165,6 +171,8 @@ def do_tests():
     theano.config.floatX = 'float32'
     if do_float32:
         float32_times = do_tests()
+        times_dic['float32'] = float32_times
+        test_total += numpy.size(float32_times)
         print(algo_executed, file=sys.stderr)
         print('float32 times', float32_times, file=sys.stderr)
 
@@ -186,6 +194,8 @@ def do_tests():
     if do_gpu:
         theano.sandbox.cuda.use('gpu')
         gpu_times = do_tests()
+        times_dic['gpu'] = gpu_times
+        test_total += numpy.size(gpu_times)
         print(algo_executed, file=sys.stderr)
         print('gpu times', gpu_times, file=sys.stderr)
 
@@ -213,30 +223,18 @@ def do_tests():
             if do_float32 and do_gpu:
                 print('float32/gpu', float32_times / gpu_times, file=sys.stderr)
         
-    # Write JUnit xml for speed test performance report
-
-    speed_file = 'speedtests_time.xml'
-
-    # Define speed test file write method
-    def write_junit(filename, algos, times, label):
-        with open(filename, 'a') as f:
-            for algo, time in zip(algos, times):
-                f.write('   <testcase classname="{label}" name="{algo}" time="{time}">'
-                        .format(label=label, algo=algo, time=time))
-                f.write('   </testcase>\n')
-
-    test_total = numpy.size(float64_times) \
-                 + numpy.size(float32_times) \
-                 + numpy.size(gpu_times)
-
-    with open(speed_file, 'w') as f:
+    # Generate JUnit performance report
+    # Define speedtest file write method
+    def write_junit(f, algos, times, label):
+        for algo, time in zip(algos, times):
+            f.write('   <testcase classname="{label}" name="{algo}" time="{time}">'
+                    .format(label=label, algo=algo, time=time))
+            f.write('   </testcase>\n')
+
+    with open('speedtests_time.xml', 'w') as f:
         f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
-        f.write('<testsuite name="theano_speedtests" tests="{ntests}">\n'
-                .format(ntests=numpy.size(test_total)))
-
-    write_junit(speed_file, algo_executed, float64_times, label='float64')
-    write_junit(speed_file, algo_executed, float32_times, label='float32')
-    write_junit(speed_file, algo_executed, gpu_times, label='gpu')
-        
-    with open(speed_file, 'a') as f:
+        f.write('<testsuite name="dlt_speedtests" tests="{ntests}">\n'
+                .format(ntests=test_total))
+        for label, times in times_dic.items():
+            write_junit(f, algo_executed, times, label)
         f.write('</testsuite>\n')

From f724c2c6054c736c548196d2a7a000ec307e0b0d Mon Sep 17 00:00:00 2001
From: slefrancois <simon.lefrancois@umontreal.ca>
Date: Fri, 7 Oct 2016 17:36:44 -0400
Subject: [PATCH 59/90] move assert gpu_times not nan

---
 code/test.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/code/test.py b/code/test.py
index 22a59655..b3077b7c 100644
--- a/code/test.py
+++ b/code/test.py
@@ -238,3 +238,6 @@ def write_junit(f, algos, times, label):
         for label, times in times_dic.items():
             write_junit(f, algo_executed, times, label)
         f.write('</testsuite>\n')
+
+    if do_gpu:
+        assert not numpy.isnan(gpu_times).any()

From 85f56c22ebf46e260e38215d32e7f893e95fdcc3 Mon Sep 17 00:00:00 2001
From: slefrancois <simon.lefrancois@umontreal.ca>
Date: Thu, 13 Oct 2016 13:14:11 -0400
Subject: [PATCH 60/90] add explicit CUDA path to buildbot

---
 .jenkins/jenkins_buildbot_dlt.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh
index 846cf7fc..283eb933 100755
--- a/.jenkins/jenkins_buildbot_dlt.sh
+++ b/.jenkins/jenkins_buildbot_dlt.sh
@@ -1,7 +1,11 @@
 #!/bin/bash
 
+# CUDA
+export PATH=/usr/local/cuda/bin:$PATH
+export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+export LIBRARY_PATH=/usr/local/cuda/lib64:$LIBRARY_PATH
+
 BUILDBOT_DIR=$WORKSPACE/nightly_build
-source $HOME/.bashrc
 
 mkdir -p ${BUILDBOT_DIR}
 

From d4035919fe2342ba83f104e34d13a8962203c1e6 Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <abergeron@gmail.com>
Date: Thu, 20 Oct 2016 14:22:16 -0400
Subject: [PATCH 61/90] Compute mean in higher precision to avoid overflow.

---
 code/DBN.py | 6 +++---
 code/dA.py  | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/code/DBN.py b/code/DBN.py
index 3b2bd230..e1bb66df 100644
--- a/code/DBN.py
+++ b/code/DBN.py
@@ -340,7 +340,7 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
                 c.append(pretraining_fns[i](index=batch_index,
                                             lr=pretrain_lr))
             print('Pre-training layer %i, epoch %d, cost ' % (i, epoch), end=' ')
-            print(numpy.mean(c))
+            print(numpy.mean(c, dtype='float64'))
 
     end_time = timeit.default_timer()
     # end-snippet-2
@@ -391,7 +391,7 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
             if (iter + 1) % validation_frequency == 0:
 
                 validation_losses = validate_model()
-                this_validation_loss = numpy.mean(validation_losses)
+                this_validation_loss = numpy.mean(validation_losses, dtype='float64')
                 print('epoch %i, minibatch %i/%i, validation error %f %%' % (
                     epoch,
                     minibatch_index + 1,
@@ -414,7 +414,7 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
 
                     # test it on the test set
                     test_losses = test_model()
-                    test_score = numpy.mean(test_losses)
+                    test_score = numpy.mean(test_losses, dtype='float64')
                     print(('     epoch %i, minibatch %i/%i, test error of '
                            'best model %f %%') %
                           (epoch, minibatch_index + 1, n_train_batches,
diff --git a/code/dA.py b/code/dA.py
index aad3d454..93a696f8 100644
--- a/code/dA.py
+++ b/code/dA.py
@@ -336,7 +336,7 @@ def test_dA(learning_rate=0.1, training_epochs=15,
         for batch_index in range(n_train_batches):
             c.append(train_da(batch_index))
 
-        print('Training epoch %d, cost ' % epoch, numpy.mean(c))
+        print('Training epoch %d, cost ' % epoch, numpy.mean(c, dtype='float64'))
 
     end_time = timeit.default_timer()
 
@@ -394,7 +394,7 @@ def test_dA(learning_rate=0.1, training_epochs=15,
         for batch_index in range(n_train_batches):
             c.append(train_da(batch_index))
 
-        print('Training epoch %d, cost ' % epoch, numpy.mean(c))
+        print('Training epoch %d, cost ' % epoch, numpy.mean(c, dtype='float64'))
 
     end_time = timeit.default_timer()
 

From 5a13d9869587a84018b939f83f5fd85293c9a8a1 Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <abergeron@gmail.com>
Date: Thu, 20 Oct 2016 16:18:46 -0400
Subject: [PATCH 62/90] Fix import of sandbox.

---
 code/hmc/hmc.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/code/hmc/hmc.py b/code/hmc/hmc.py
index f16a50c1..cf4d20a1 100644
--- a/code/hmc/hmc.py
+++ b/code/hmc/hmc.py
@@ -7,6 +7,7 @@
 from theano import function, shared
 from theano import tensor as TT
 import theano
+import theano.sandbox.rng_mrg
 
 sharedX = (lambda X, name:
            shared(numpy.asarray(X, dtype=theano.config.floatX), name=name))
@@ -275,14 +276,14 @@ def hmc_updates(positions, stepsize, avg_acceptance_rate, final_pos, accept,
 
     """
 
-    ## POSITION UPDATES ##
+    # POSITION UPDATES #
     # broadcast `accept` scalar to tensor with the same dimensions as
     # final_pos.
     accept_matrix = accept.dimshuffle(0, *(('x',) * (final_pos.ndim - 1)))
     # if accept is True, update to `final_pos` else stay put
     new_positions = TT.switch(accept_matrix, final_pos, positions)
     # end-snippet-5 start-snippet-7
-    ## STEPSIZE UPDATES ##
+    # STEPSIZE UPDATES #
     # if acceptance rate is too low, our sampler is too "noisy" and we reduce
     # the stepsize. If it is too high, our sampler is too conservative, we can
     # get away with a larger stepsize (resulting in better mixing).
@@ -292,7 +293,7 @@ def hmc_updates(positions, stepsize, avg_acceptance_rate, final_pos, accept,
     new_stepsize = TT.clip(_new_stepsize, stepsize_min, stepsize_max)
 
     # end-snippet-7 start-snippet-6
-    ## ACCEPT RATE UPDATES ##
+    # ACCEPT RATE UPDATES #
     # perform exponential moving average
     mean_dtype = theano.scalar.upcast(accept.dtype, avg_acceptance_rate.dtype)
     new_acceptance_rate = TT.add(

From 93837e03aeeff6917d2b3a121e05341b663fa890 Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <abergeron@gmail.com>
Date: Mon, 24 Oct 2016 16:33:18 -0400
Subject: [PATCH 63/90] Fix printout in lstm.py.

---
 code/lstm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/code/lstm.py b/code/lstm.py
index 1c285928..a3010a9f 100644
--- a/code/lstm.py
+++ b/code/lstm.py
@@ -605,8 +605,8 @@ def train_lstm(
                         best_p = unzip(tparams)
                         bad_counter = 0
 
-                    print( ('Train ', train_err, 'Valid ', valid_err,
-                           'Test ', test_err) )
+                    print('Train ', train_err, 'Valid ', valid_err,
+                           'Test ', test_err)
 
                     if (len(history_errs) > patience and
                         valid_err >= numpy.array(history_errs)[:-patience,

From 780cecc9abbe6181e8fe37f9bda386bdc01fe2ec Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <abergeron@gmail.com>
Date: Wed, 26 Oct 2016 14:46:32 -0400
Subject: [PATCH 64/90] Adjust mean dtypes for scores in SdA too.

---
 code/SdA.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/code/SdA.py b/code/SdA.py
index eb7b7357..8da74797 100644
--- a/code/SdA.py
+++ b/code/SdA.py
@@ -394,7 +394,7 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
                 c.append(pretraining_fns[i](index=batch_index,
                          corruption=corruption_levels[i],
                          lr=pretrain_lr))
-            print('Pre-training layer %i, epoch %d, cost %f' % (i, epoch, numpy.mean(c)))
+            print('Pre-training layer %i, epoch %d, cost %f' % (i, epoch, numpy.mean(c, dtype='float64')))
 
     end_time = timeit.default_timer()
 
@@ -442,7 +442,7 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
 
             if (iter + 1) % validation_frequency == 0:
                 validation_losses = validate_model()
-                this_validation_loss = numpy.mean(validation_losses)
+                this_validation_loss = numpy.mean(validation_losses, dtype='float64')
                 print('epoch %i, minibatch %i/%i, validation error %f %%' %
                       (epoch, minibatch_index + 1, n_train_batches,
                        this_validation_loss * 100.))
@@ -463,7 +463,7 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
 
                     # test it on the test set
                     test_losses = test_model()
-                    test_score = numpy.mean(test_losses)
+                    test_score = numpy.mean(test_losses, dtype='float64')
                     print(('     epoch %i, minibatch %i/%i, test error of '
                            'best model %f %%') %
                           (epoch, minibatch_index + 1, n_train_batches,

From cd462eccb4f351cec6915c4294b0197fd2aa51d9 Mon Sep 17 00:00:00 2001
From: slefrancois <simon.lefrancois@umontreal.ca>
Date: Thu, 3 Nov 2016 15:20:16 -0400
Subject: [PATCH 65/90] split performance report file

---
 code/test.py | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/code/test.py b/code/test.py
index b3077b7c..c2ad68bc 100644
--- a/code/test.py
+++ b/code/test.py
@@ -224,20 +224,16 @@ def do_tests():
                 print('float32/gpu', float32_times / gpu_times, file=sys.stderr)
         
     # Generate JUnit performance report
-    # Define speedtest file write method
-    def write_junit(f, algos, times, label):
-        for algo, time in zip(algos, times):
-            f.write('   <testcase classname="{label}" name="{algo}" time="{time}">'
-                    .format(label=label, algo=algo, time=time))
-            f.write('   </testcase>\n')
-
-    with open('speedtests_time.xml', 'w') as f:
-        f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
-        f.write('<testsuite name="dlt_speedtests" tests="{ntests}">\n'
-                .format(ntests=test_total))
-        for label, times in times_dic.items():
-            write_junit(f, algo_executed, times, label)
-        f.write('</testsuite>\n')
+    for label, times in times_dic.items():
+        with open('speedtests_{label}.xml'.format(label=label), 'w') as f:
+            f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+            f.write('<testsuite name="dlt_speedtests_{label}" tests="{ntests}">\n'
+                    .format(label=label, ntests=test_total/len(times_dic)))
+            for algo, time in zip(algo_executed, times):
+                f.write('   <testcase classname="speed" name="{algo}" time="{time}">'
+                        .format(label=label, algo=algo, time=time))
+                f.write('   </testcase>\n')
+            f.write('</testsuite>\n')
 
     if do_gpu:
         assert not numpy.isnan(gpu_times).any()

From fd5cb65460df2dee9cfa250e3e7fbc864720bd86 Mon Sep 17 00:00:00 2001
From: Frederic Bastien <nouiz@nouiz.org>
Date: Fri, 13 Jan 2017 15:20:23 -0500
Subject: [PATCH 66/90] Do the speed test on the new gpu back-end.

---
 code/test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/code/test.py b/code/test.py
index c2ad68bc..5053b8c4 100644
--- a/code/test.py
+++ b/code/test.py
@@ -190,9 +190,9 @@ def do_tests():
                 float64_times / float32_times), file=sys.stderr)
 
     #test in float32 in FAST_RUN mode on the gpu
-    import theano.sandbox.cuda
+    import theano.gpuarray
     if do_gpu:
-        theano.sandbox.cuda.use('gpu')
+        theano.gpuarray.use('cuda')
         gpu_times = do_tests()
         times_dic['gpu'] = gpu_times
         test_total += numpy.size(gpu_times)

From e481d33b2492e37274c2db8389f3b5452767dd68 Mon Sep 17 00:00:00 2001
From: slefrancois <simon.lefrancois@umontreal.ca>
Date: Mon, 16 Jan 2017 10:05:22 -0500
Subject: [PATCH 67/90] install libgpuarray for dlt speed tests

---
 .jenkins/jenkins_buildbot_dlt.sh | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh
index 283eb933..243cd4ef 100755
--- a/.jenkins/jenkins_buildbot_dlt.sh
+++ b/.jenkins/jenkins_buildbot_dlt.sh
@@ -19,6 +19,38 @@ SUITE="--xunit-testsuite-name="
 FLAGS=warn.ignore_bug_before=0.5,compiledir=${COMPILEDIR}
 export PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/Pylearn:$PYTHONPATH
 
+# Install libgpuarray and pygpu
+cd ${BUILDBOT_DIR}
+
+# Make fresh clone (with no history since we don't need it)
+rm -rf libgpuarray
+git clone --depth 1 "https://github.com/Theano/libgpuarray.git"
+
+(cd libgpuarray && echo "libgpuarray commit" && git rev-parse HEAD)
+
+# Clean up previous installs (to make sure no old files are left)
+rm -rf local
+mkdir local
+
+# Build libgpuarray and run C tests
+mkdir libgpuarray/build
+(cd libgpuarray/build && cmake .. -DCMAKE_BUILD_TYPE=${GPUARRAY_CONFIG} -DCMAKE_INSTALL_PREFIX=${BUILDBOT_DIR}/local && make)
+
+# Finally install
+(cd libgpuarray/build && make install)
+export LD_LIBRARY_PATH=${BUILDBOT_DIR}/local/lib:${LD_LIBRARY_PATH}
+export LIBRARY_PATH=${BUILDBOT_DIR}/local/lib:${LIBRARY_PATH}
+export CPATH=${BUILDBOT_DIR}/local/include:${CPATH}
+
+# Build the pygpu modules
+(cd libgpuarray && python setup.py build_ext --inplace -I${BUILDBOT_DIR}/local/include -L${BUILDBOT_DIR}/local/lib)
+
+mkdir ${BUILDBOT_DIR}/local/lib/python
+export PYTHONPATH=${PYTHONPATH}:${BUILDBOT_DIR}/local/lib/python
+# Then install
+(cd libgpuarray && python setup.py install --home=${BUILDBOT_DIR}/local)
+
+# Install Theano
 cd ${BUILDBOT_DIR}
 if [ ! -d ${BUILDBOT_DIR}/Theano ]; then
   git clone git://github.com/Theano/Theano.git

From 73e621d37ae6bb7f0747e831822f39435e61bab1 Mon Sep 17 00:00:00 2001
From: Simon Lefrancois <lefransi@iro.umontreal.ca>
Date: Tue, 18 Apr 2017 09:59:43 -0400
Subject: [PATCH 68/90] move speedtest cache outside workspace

---
 .DS_Store                        | Bin 0 -> 6148 bytes
 .jenkins/jenkins_buildbot_dlt.sh |   2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 .DS_Store

diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..3cd979e05c0d9c2d21079f88c5fedc75d991437e
GIT binary patch
literal 6148
zcmeHKF=_)r43v^93~5}Z+%Mz@i*a7y57@*ZO|Zcvsjter{4~!<B4D2lY0`udNV7X9
z?Q&C`PG;td&+U`h+{|WhqJ1+=jnC;5yDNt1JmYJ1*uC#}Z@Hi3Un5vM*o*CLYp)me
z1GtTS*ye)2c8)kHAO)m=6p#W^;CBk3XVYd6iHcG{3P^z;1^9hvaAGeU6XVl?5F-F^
z03C*P%o4yx0@w@3L@+SVQec*PjToNg;9KSO!Z9(+&EsaAQ#X4}C?2<iZ;@`^6BVU^
z6gXGlKDTrB|7ZA(`Tx8~J1HOq{*?kgELY0~Ua5NP<mK3F8+;9C&Jj+-I!X|t9Rs5s
hW5ag*7)hDexW@BdI3`9p_$UYJXMnoMq`+S*a01XL7~22<

literal 0
HcmV?d00001

diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh
index 243cd4ef..c07cb593 100755
--- a/.jenkins/jenkins_buildbot_dlt.sh
+++ b/.jenkins/jenkins_buildbot_dlt.sh
@@ -10,7 +10,7 @@ BUILDBOT_DIR=$WORKSPACE/nightly_build
 mkdir -p ${BUILDBOT_DIR}
 
 date
-COMPILEDIR=$WORKSPACE/compile/lisa_theano_compile_dir_deeplearning
+COMPILEDIR=$HOME/.theano/lisa_theano_buildbot_deeplearning
 NOSETESTS=${BUILDBOT_DIR}/Theano/bin/theano-nose
 XUNIT="--with-xunit --xunit-file="
 # name test suites

From c52fa0099105fab466b721f0b5c5963a10188772 Mon Sep 17 00:00:00 2001
From: Simon Lefrancois <lefransi@iro.umontreal.ca>
Date: Wed, 19 Apr 2017 14:52:56 -0400
Subject: [PATCH 69/90] add label to speedtest class

---
 code/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/test.py b/code/test.py
index 5053b8c4..8768d8c1 100644
--- a/code/test.py
+++ b/code/test.py
@@ -230,7 +230,7 @@ def do_tests():
             f.write('<testsuite name="dlt_speedtests_{label}" tests="{ntests}">\n'
                     .format(label=label, ntests=test_total/len(times_dic)))
             for algo, time in zip(algo_executed, times):
-                f.write('   <testcase classname="speed" name="{algo}" time="{time}">'
+                f.write('   <testcase classname="speed.{label}" name="{algo}" time="{time}">'
                         .format(label=label, algo=algo, time=time))
                 f.write('   </testcase>\n')
             f.write('</testsuite>\n')

From e7b2dc866d8a460cf5e0f20805fa4155649f1840 Mon Sep 17 00:00:00 2001
From: Simon Lefrancois <lefransi@iro.umontreal.ca>
Date: Fri, 28 Apr 2017 14:10:43 -0400
Subject: [PATCH 70/90] buildbot includes theano.gpuarray

---
 .DS_Store                        | Bin 6148 -> 0 bytes
 .jenkins/jenkins_buildbot_dlt.sh |  10 ++++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)
 delete mode 100644 .DS_Store

diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index 3cd979e05c0d9c2d21079f88c5fedc75d991437e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHKF=_)r43v^93~5}Z+%Mz@i*a7y57@*ZO|Zcvsjter{4~!<B4D2lY0`udNV7X9
z?Q&C`PG;td&+U`h+{|WhqJ1+=jnC;5yDNt1JmYJ1*uC#}Z@Hi3Un5vM*o*CLYp)me
z1GtTS*ye)2c8)kHAO)m=6p#W^;CBk3XVYd6iHcG{3P^z;1^9hvaAGeU6XVl?5F-F^
z03C*P%o4yx0@w@3L@+SVQec*PjToNg;9KSO!Z9(+&EsaAQ#X4}C?2<iZ;@`^6BVU^
z6gXGlKDTrB|7ZA(`Tx8~J1HOq{*?kgELY0~Ua5NP<mK3F8+;9C&Jj+-I!X|t9Rs5s
hW5ag*7)hDexW@BdI3`9p_$UYJXMnoMq`+S*a01XL7~22<

diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh
index c07cb593..c8be22b4 100755
--- a/.jenkins/jenkins_buildbot_dlt.sh
+++ b/.jenkins/jenkins_buildbot_dlt.sh
@@ -66,11 +66,17 @@ echo "git version for Theano:" `git rev-parse HEAD`
 cd ${WORKSPACE}/code
 echo "git version:" `git rev-parse HEAD`
 
-echo "executing nosetests speed with mode=FAST_RUN"
+echo "==== Executing nosetests speed with mode=FAST_RUN"
 NAME=dlt_speed
 FILE=${BUILDBOT_DIR}/${NAME}_tests.xml
 THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${XUNIT}${FILE} ${SUITE}${NAME} test.py:speed
-echo "executing nosetests with mode=FAST_RUN,floatX=float32"
+
+echo "==== Executing nosetests with mode=FAST_RUN,floatX=float32"
 NAME=dlt_float32
 FILE=${BUILDBOT_DIR}/${NAME}_tests.xml
 THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE} ${SUITE}${NAME}
+
+echo "==== Executing nosetests with mode=FAST_RUN,floatX=float32,device=cuda"
+NAME=dlt_float32_cuda
+FILE=${BUILDBOT_DIR}/${NAME}_tests.xml
+PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/DeepLearningTutorials/code:${PYTHONPATH} THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32,device=cuda nosetests-2.7 test.py ${XUNIT}${FILE} ${SUITE}${NAME}

From 61adbf8902aefa024d7803baf1bc4c83ce2dcacb Mon Sep 17 00:00:00 2001
From: Philip Kirkbride <kirkins@gmail.com>
Date: Tue, 6 Jun 2017 14:56:35 -0400
Subject: [PATCH 71/90] fix typos/spelling

---
 doc/gettingstarted.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt
index d765f14a..85111d11 100644
--- a/doc/gettingstarted.txt
+++ b/doc/gettingstarted.txt
@@ -147,7 +147,7 @@ MNIST Dataset
 
 The data has to be stored as floats on the GPU ( the right
 ``dtype`` for storing on the GPU is given by ``theano.config.floatX``).
-To get around this shortcomming for the labels, we store them as float,
+To get around this shortcoming for the labels, we store them as float,
 and then cast it to int.
 
 .. note::
@@ -316,7 +316,7 @@ The likelihood of the correct class is not the same as the
 number of right predictions, but from the point of view of a randomly
 initialized classifier they are pretty similar.
 Remember that likelihood and zero-one loss are different objectives;
-you should see that they are corralated on the validation set but
+you should see that they are correlated on the validation set but
 sometimes one will rise while the other falls, or vice-versa.
 
 Since we usually speak in terms of minimizing a loss function, learning will
@@ -421,7 +421,7 @@ but this choice is almost arbitrary (though harmless).
     because it controls the number of updates done to your parameters. Training the same model
     for 10 epochs using a batch size of 1 yields completely different results compared
     to training for the same 10 epochs but with a batchsize of 20. Keep this in mind when
-    switching between batch sizes and be prepared to tweak all the other parameters acording
+    switching between batch sizes and be prepared to tweak all the other parameters according
     to the batch size used.
 
 All code-blocks above show pseudocode of how the algorithm looks like. Implementing such

From 8819681562c539054c97097f6100d1a69bcbe75d Mon Sep 17 00:00:00 2001
From: Philip Kirkbride <kirkins@gmail.com>
Date: Tue, 6 Jun 2017 14:59:57 -0400
Subject: [PATCH 72/90] remove extra space

---
 doc/gettingstarted.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt
index 85111d11..06e2e88e 100644
--- a/doc/gettingstarted.txt
+++ b/doc/gettingstarted.txt
@@ -85,7 +85,7 @@ MNIST Dataset
  variables and access it based on the minibatch index, given a fixed
  and known batch size. The reason behind shared variables is
  related to using the GPU. There is a large overhead when copying data
- into the GPU memory. If you would copy data on request ( each minibatch
+ into the GPU memory. If you would copy data on request (each minibatch
  individually when needed) as the code will do if you do not use shared
  variables, due to this overhead, the GPU code will not be much faster
  then the CPU code (maybe even slower). If you have your data in

From 59667bd502e4ee05a5221293e4c2370bb065be52 Mon Sep 17 00:00:00 2001
From: Philip Kirkbride <kirkins@gmail.com>
Date: Tue, 6 Jun 2017 15:01:42 -0400
Subject: [PATCH 73/90] remove extra space

---
 doc/gettingstarted.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt
index 06e2e88e..256ee07d 100644
--- a/doc/gettingstarted.txt
+++ b/doc/gettingstarted.txt
@@ -286,7 +286,7 @@ In this tutorial, :math:`f` is defined as:
 
     f(x) = {\rm argmax}_k P(Y=k | x, \theta)
 
-In python, using Theano this can be written as :
+In python, using Theano this can be written as:
 
 .. code-block:: python
 

From 37048765dadf7146c3aafc4994cf8721cb7518b3 Mon Sep 17 00:00:00 2001
From: Philip Kirkbride <kirkins@gmail.com>
Date: Tue, 6 Jun 2017 15:06:50 -0400
Subject: [PATCH 74/90] remove more spaces

---
 doc/gettingstarted.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt
index 256ee07d..0019c3c6 100644
--- a/doc/gettingstarted.txt
+++ b/doc/gettingstarted.txt
@@ -331,7 +331,7 @@ The NLL of our classifier is a differentiable surrogate for the zero-one loss,
 and we use the gradient of this function over our training data as a
 supervised learning signal for deep learning of a classifier.
 
-This can be computed using the following line of code :
+This can be computed using the following line of code:
 
 .. code-block:: python
 
@@ -357,7 +357,7 @@ algorithm in which we repeatedly make small steps downward on an error
 surface defined by a loss function of some parameters.
 For the purpose of ordinary gradient descent we consider that the training
 data is rolled into the loss function. Then the pseudocode of this
-algorithm can be described as :
+algorithm can be described as:
 
 .. code-block:: python
 
@@ -425,7 +425,7 @@ but this choice is almost arbitrary (though harmless).
     to the batch size used.
 
 All code-blocks above show pseudocode of how the algorithm looks like. Implementing such
-algorithm in Theano can be done as follows :
+algorithm in Theano can be done as follows:
 
 .. code-block:: python
 

From f78ba92c513edc177f1ff88eb34fb4a78310e652 Mon Sep 17 00:00:00 2001
From: Philip Kirkbride <phk@2klic.com>
Date: Tue, 6 Jun 2017 15:14:38 -0400
Subject: [PATCH 75/90] extra space in logreg

---
 doc/logreg.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/logreg.txt b/doc/logreg.txt
index c2979e63..b582acd4 100644
--- a/doc/logreg.txt
+++ b/doc/logreg.txt
@@ -246,7 +246,7 @@ within the DeepLearningTutorials folder:
 
     python code/logistic_sgd.py
 
-The output one should expect is of the form :
+The output one should expect is of the form:
 
 .. code-block:: bash
 

From 1867a4e5a3f10730a6a844a91dc425962ab94fa5 Mon Sep 17 00:00:00 2001
From: Philip Kirkbride <phk@2klic.com>
Date: Tue, 6 Jun 2017 15:16:05 -0400
Subject: [PATCH 76/90] remove spaces in mlp page

---
 doc/mlp.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/mlp.txt b/doc/mlp.txt
index 2a74aaad..0ecc7a89 100644
--- a/doc/mlp.txt
+++ b/doc/mlp.txt
@@ -178,13 +178,13 @@ The code below shows how this can be done, in a way which is analogous to our pr
 
 .. literalinclude:: ../code/mlp.py
 
-The user can then run the code by calling :
+The user can then run the code by calling:
 
 .. code-block:: bash
 
     python code/mlp.py
 
-The output one should expect is of the form :
+The output one should expect is of the form:
 
 .. code-block:: bash
 

From d8294003cff53ea56f1d34c574f708e35ab63085 Mon Sep 17 00:00:00 2001
From: Philip Kirkbride <phk@2klic.com>
Date: Tue, 6 Jun 2017 15:19:26 -0400
Subject: [PATCH 77/90] remove spaces in dA page

---
 doc/dA.txt | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/doc/dA.txt b/doc/dA.txt
index 8ff26354..dd05acdf 100644
--- a/doc/dA.txt
+++ b/doc/dA.txt
@@ -6,7 +6,7 @@ Denoising Autoencoders (dA)
 .. note::
   This section assumes the reader has already read through :doc:`logreg`
   and :doc:`mlp`. Additionally it uses the following Theano functions
-  and concepts : `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_.
+  and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_.
 
 .. _T.tanh: http://deeplearning.net/software/theano/tutorial/examples.html?highlight=tanh
 
@@ -126,7 +126,7 @@ signal:
   :pyobject: dA.get_reconstructed_input
 
 And using these functions we can compute the cost and the updates of
-one stochastic gradient descent step :
+one stochastic gradient descent step:
 
 .. literalinclude:: ../code/dA.py
   :pyobject: dA.get_cost_updates
@@ -209,7 +209,7 @@ need to do is to add a stochastic corruption step operating on the input. The in
 corrupted in many ways, but in this tutorial we will stick to the original
 corruption mechanism of randomly masking entries of the input by making
 them zero. The code below
-does just that :
+does just that:
 
 .. literalinclude:: ../code/dA.py
   :pyobject: dA.get_corrupted_input
@@ -221,7 +221,7 @@ For this reason, the constructor of the ``dA`` also gets Theano variables
 pointing to the shared parameters. If those parameters are left to ``None``,
 new ones will be constructed.
 
-The final denoising autoencoder class becomes :
+The final denoising autoencoder class becomes:
 
 .. literalinclude:: ../code/dA.py
   :pyobject: dA
@@ -254,7 +254,7 @@ constant (weights are converted to values between 0 and 1).
 To plot our filters we will need the help of ``tile_raster_images`` (see
 :ref:`how-to-plot`) so we urge the reader to study it. Also
 using the help of the Python Image Library, the following lines of code will
-save the filters as an image :
+save the filters as an image:
 
 .. literalinclude:: ../code/dA.py
   :start-after: start-snippet-4
@@ -264,20 +264,20 @@ save the filters as an image :
 Running the Code
 ++++++++++++++++
 
-To run the code :
+To run the code:
 
 .. code-block:: bash
 
   python dA.py
 
-The resulted filters when we do not use any noise are :
+The resulted filters when we do not use any noise are:
 
 .. figure:: images/filters_corruption_0.png
     :align: center
 
 
 
-The filters for 30 percent noise :
+The filters for 30 percent noise:
 
 
 .. figure:: images/filters_corruption_30.png

From 738b641bacd23511d0efdc87e9494f2ec8c1426e Mon Sep 17 00:00:00 2001
From: Philip Kirkbride <phk@2klic.com>
Date: Tue, 6 Jun 2017 15:21:31 -0400
Subject: [PATCH 78/90] remove space in rbm page

---
 doc/SdA.txt | 2 +-
 doc/rbm.txt | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/SdA.txt b/doc/SdA.txt
index 289a8b0a..6d9ba0da 100644
--- a/doc/SdA.txt
+++ b/doc/SdA.txt
@@ -6,7 +6,7 @@ Stacked Denoising Autoencoders (SdA)
 .. note::
   This section assumes you have already read through :doc:`logreg`
   and :doc:`mlp`. Additionally it uses the following Theano functions
-  and concepts : `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_.
+  and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_.
 
 .. _T.tanh: http://deeplearning.net/software/theano/tutorial/examples.html?highlight=tanh
 
diff --git a/doc/rbm.txt b/doc/rbm.txt
index a8079012..7a052cc6 100644
--- a/doc/rbm.txt
+++ b/doc/rbm.txt
@@ -7,7 +7,7 @@ Restricted Boltzmann Machines (RBM)
 .. note::
   This section assumes the reader has already read through :doc:`logreg`
   and :doc:`mlp`. Additionally it uses the following Theano functions
-  and concepts : `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_ and `scan`_. If you intend to run the code on GPU also read `GPU`_.
+  and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_ and `scan`_. If you intend to run the code on GPU also read `GPU`_.
 
 .. _T.tanh: http://deeplearning.net/software/theano/tutorial/examples.html?highlight=tanh
 
@@ -573,7 +573,7 @@ The output was the following:
      ... plotting sample  8
      ... plotting sample  9
 
-The pictures below show the filters after 15 epochs :
+The pictures below show the filters after 15 epochs:
 
 .. figure:: images/filters_at_epoch_14.png
     :align: center

From ec4855a6a5eabdb5fdd0e8daf69218a21b2e5c17 Mon Sep 17 00:00:00 2001
From: Philip Kirkbride <phk@2klic.com>
Date: Tue, 6 Jun 2017 15:23:30 -0400
Subject: [PATCH 79/90] spaces on DBN page

---
 doc/DBN.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/DBN.txt b/doc/DBN.txt
index bb0571eb..be7bfbdc 100644
--- a/doc/DBN.txt
+++ b/doc/DBN.txt
@@ -6,7 +6,7 @@ Deep Belief Networks
 .. note::
   This section assumes the reader has already read through :doc:`logreg`
   and :doc:`mlp` and :doc:`rbm`. Additionally it uses the following Theano
-  functions and concepts : `T.tanh`_, `shared variables`_, `basic arithmetic
+  functions and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic
   ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the
   code on GPU also read `GPU`_.
 
@@ -210,7 +210,7 @@ obtained over these sets.
 Putting it all together
 +++++++++++++++++++++++
 
-The few lines of code below constructs the deep belief network : 
+The few lines of code below constructs the deep belief network: 
 
 .. literalinclude:: ../code/DBN.py
   :start-after: # numpy random generator

From 85962ee63ae990e267e0875517de153e47cf777a Mon Sep 17 00:00:00 2001
From: Philip Kirkbride <phk@2klic.com>
Date: Tue, 6 Jun 2017 15:51:15 -0400
Subject: [PATCH 80/90] spaces on lstm page

---
 doc/lstm.txt | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/doc/lstm.txt b/doc/lstm.txt
index bde70bd8..aec230ab 100644
--- a/doc/lstm.txt
+++ b/doc/lstm.txt
@@ -75,10 +75,10 @@ previous state, as needed.
 .. figure:: images/lstm_memorycell.png
     :align: center
 
-    **Figure 1** : Illustration of an LSTM memory cell.
+    **Figure 1**: Illustration of an LSTM memory cell.
 
 The equations below describe how a layer of memory cells is updated at every
-timestep :math:`t`. In these equations :
+timestep :math:`t`. In these equations:
 
 *       :math:`x_t` is the input to the memory cell layer at time :math:`t`
 *       :math:`W_i`, :math:`W_f`, :math:`W_c`, :math:`W_o`, :math:`U_i`,
@@ -89,7 +89,7 @@ timestep :math:`t`. In these equations :
 
 First, we compute the values for :math:`i_t`, the input gate, and
 :math:`\widetilde{C_t}` the candidate value for the states of the memory
-cells at time :math:`t` :
+cells at time :math:`t`:
 
 .. math::
     :label: 1
@@ -102,7 +102,7 @@ cells at time :math:`t` :
     \widetilde{C_t} = tanh(W_c x_t + U_c h_{t-1} + b_c)
 
 Second, we compute the value for :math:`f_t`, the activation of the memory
-cells' forget gates at time :math:`t` :
+cells' forget gates at time :math:`t`:
 
 .. math::
     :label: 3
@@ -111,7 +111,7 @@ cells' forget gates at time :math:`t` :
 
 Given the value of the input gate activation :math:`i_t`, the forget gate
 activation :math:`f_t` and the candidate state value :math:`\widetilde{C_t}`,
-we can compute :math:`C_t` the memory cells' new state at time :math:`t` :
+we can compute :math:`C_t` the memory cells' new state at time :math:`t`:
 
 .. math::
     :label: 4
@@ -119,7 +119,7 @@ we can compute :math:`C_t` the memory cells' new state at time :math:`t` :
     C_t = i_t * \widetilde{C_t} + f_t * C_{t-1}
 
 With the new state of the memory cells, we can compute the value of their
-output gates and, subsequently, their outputs :
+output gates and, subsequently, their outputs:
 
 .. math::
     :label: 5
@@ -139,7 +139,7 @@ In this variant, the activation of a cell’s output gate does not depend on the
 memory cell’s state :math:`C_t`. This allows us to perform part of the
 computation more efficiently (see the implementation note, below, for
 details). This means that, in the variant we have implemented, there is no
-matrix :math:`V_o` and equation :eq:`5` is replaced by equation :eq:`5-alt` :
+matrix :math:`V_o` and equation :eq:`5` is replaced by equation :eq:`5-alt`:
 
 .. math::
     :label: 5-alt
@@ -170,7 +170,7 @@ concatenating the four matrices :math:`W_*` into a single weight matrix
 :math:`W` and performing the same concatenation on the weight matrices
 :math:`U_*` to produce the matrix :math:`U` and the bias vectors :math:`b_*`
 to produce the vector :math:`b`. Then, the pre-nonlinearity activations can
-be computed with :
+be computed with:
 
 .. math::
 
@@ -187,11 +187,11 @@ Code - Citations - Contact
 Code
 ====
 
-The LSTM implementation can be found in the two following files :
+The LSTM implementation can be found in the two following files:
 
-* `lstm.py <http://deeplearning.net/tutorial/code/lstm.py>`_ : Main script. Defines and train the model.
+* `lstm.py <http://deeplearning.net/tutorial/code/lstm.py>`_: Main script. Defines and train the model.
 
-* `imdb.py <http://deeplearning.net/tutorial/code/imdb.py>`_ : Secondary script. Handles the loading and preprocessing of the IMDB dataset.
+* `imdb.py <http://deeplearning.net/tutorial/code/imdb.py>`_: Secondary script. Handles the loading and preprocessing of the IMDB dataset.
 
 After downloading both scripts and putting both in the same folder, the user
 can run the code by calling:
@@ -202,7 +202,7 @@ can run the code by calling:
 
 The script will automatically download the data and decompress it.
 
-**Note** : The provided code supports the Stochastic Gradient Descent (SGD),
+**Note**: The provided code supports the Stochastic Gradient Descent (SGD),
 AdaDelta and RMSProp optimization methods. You are advised to use AdaDelta or
 RMSProp because SGD appears to performs poorly on this task with this
 particular model.

From bb2aa41171de24c48315578fd41f682e07284eca Mon Sep 17 00:00:00 2001
From: Philip Kirkbride <phk@2klic.com>
Date: Wed, 7 Jun 2017 14:05:28 -0400
Subject: [PATCH 81/90] typo and space fix

---
 doc/mlp.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/mlp.txt b/doc/mlp.txt
index 0ecc7a89..9e59ffbf 100644
--- a/doc/mlp.txt
+++ b/doc/mlp.txt
@@ -90,8 +90,8 @@ The set of parameters to learn is the set :math:`\theta =
 \{W^{(2)},b^{(2)},W^{(1)},b^{(1)}\}`.  Obtaining the gradients
 :math:`\partial{\ell}/\partial{\theta}` can be achieved through the
 **backpropagation algorithm** (a special case of the chain-rule of derivation).
-Thankfully, since Theano performs automatic differentation, we will not need to
-cover this in the tutorial !
+Thankfully, since Theano performs automatic differentiation, we will not need to
+cover this in the tutorial!
 
 
 Going from logistic regression to MLP

From 8eb21daf92d48c020bfc7fe9b3ef680403e812ae Mon Sep 17 00:00:00 2001
From: Philip Kirkbride <phk@2klic.com>
Date: Wed, 7 Jun 2017 14:09:03 -0400
Subject: [PATCH 82/90] typo on lenet page

---
 doc/lenet.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/lenet.txt b/doc/lenet.txt
index 09f50be6..84b7c3be 100644
--- a/doc/lenet.txt
+++ b/doc/lenet.txt
@@ -543,7 +543,7 @@ the task.
 
 Filter Shape
 ************
-Common filter shapes found in the litterature vary greatly, usually based on
+Common filter shapes found in the literature vary greatly, usually based on
 the dataset. Best results on MNIST-sized images (28x28) are usually in the 5x5
 range on the first layer, while natural image datasets (often with hundreds of pixels in each
 dimension) tend to use larger first-layer filters of shape 12x12 or 15x15.

From 147cb2e9a9374d8f5b4673370c12ce6457b53cce Mon Sep 17 00:00:00 2001
From: Philip Kirkbride <phk@2klic.com>
Date: Wed, 7 Jun 2017 14:12:57 -0400
Subject: [PATCH 83/90] typo/inconsistency in spelling of corruption

---
 code/dA.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/dA.py b/code/dA.py
index 93a696f8..7d054b20 100644
--- a/code/dA.py
+++ b/code/dA.py
@@ -195,7 +195,7 @@ def __init__(
 
     def get_corrupted_input(self, input, corruption_level):
         """This function keeps ``1-corruption_level`` entries of the inputs the
-        same and zero-out randomly selected subset of size ``coruption_level``
+        same and zero-out randomly selected subset of size ``corruption_level``
         Note : first argument of theano.rng.binomial is the shape(size) of
                random numbers that it should produce
                second argument is the number of trials

From 534e91585ebddd8238bf59d9cb9ba7fef2e6949c Mon Sep 17 00:00:00 2001
From: Philip Kirkbride <kirkins@gmail.com>
Date: Tue, 20 Jun 2017 11:34:42 -0400
Subject: [PATCH 84/90] Add small note on easy download script

I'm not sure the existence/option of downloading all the datasets via bash script will be obvious to people approaching the repo via the written tutorial.
---
 doc/gettingstarted.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt
index 0019c3c6..f290305f 100644
--- a/doc/gettingstarted.txt
+++ b/doc/gettingstarted.txt
@@ -22,6 +22,11 @@ On each learning algorithm page, you will be able to download the corresponding
 
     git clone https://github.com/lisa-lab/DeepLearningTutorials.git
 
+On linux systems, after cloning, all datasets can be downloaded at once with:
+
+    cd DeeepLearningTutorials/data
+    ./download.sh
+    
 
 .. _datasets:
 

From cb4261c830b39936aea224620c678480338ef272 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Bastien?= <frederic.bastien@gmail.com>
Date: Tue, 20 Jun 2017 11:41:46 -0400
Subject: [PATCH 85/90] Tell that it work on Mac.

---
 doc/gettingstarted.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt
index f290305f..7b1974ea 100644
--- a/doc/gettingstarted.txt
+++ b/doc/gettingstarted.txt
@@ -22,7 +22,7 @@ On each learning algorithm page, you will be able to download the corresponding
 
     git clone https://github.com/lisa-lab/DeepLearningTutorials.git
 
-On linux systems, after cloning, all datasets can be downloaded at once with:
+On Linux or Mac systems, after cloning, all datasets can be downloaded at once with:
 
     cd DeeepLearningTutorials/data
     ./download.sh

From 36ec511d60746bde1d0e3905944760c92068675d Mon Sep 17 00:00:00 2001
From: Simon Lefrancois <lefransi@iro.umontreal.ca>
Date: Tue, 11 Jul 2017 10:30:57 -0400
Subject: [PATCH 86/90] add link to github

---
 doc/index.txt  | 4 +++-
 doc/rnnrbm.txt | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/index.txt b/doc/index.txt
index 68a18ec5..e01e79fc 100644
--- a/doc/index.txt
+++ b/doc/index.txt
@@ -25,7 +25,9 @@ training them on a GPU.
 The algorithm tutorials have some prerequisites.  You should know some python,
 and be familiar with numpy. Since this tutorial is about using Theano, you
 should read over the `Theano basic tutorial`_ first.  Once you've done that,
-read through our :ref:`gettingstarted` chapter -- it introduces the notation, and [downloadable] datasets used in the algorithm tutorials, and the way we do optimization by stochastic gradient descent.  
+read through our :ref:`gettingstarted` chapter -- it introduces the notation, and downloadable datasets used in the algorithm tutorials, and the way we do optimization by stochastic gradient descent.
+
+The code is available on the `Deep Learning Tutorial repositories <https://github.com/lisa-lab/DeepLearningTutorials>`_.
 
 The purely supervised learning algorithms are meant to be read in order:
 
diff --git a/doc/rnnrbm.txt b/doc/rnnrbm.txt
index d64a0c4a..75e681f8 100644
--- a/doc/rnnrbm.txt
+++ b/doc/rnnrbm.txt
@@ -17,7 +17,7 @@ Modeling and generating sequences of polyphonic music with the RNN-RBM
   The script also assumes that the content of the `Nottingham Database of folk tunes <http://www.iro.umontreal.ca/~lisa/deep/data/Nottingham.zip>`_ has been extracted in the ``../data`` directory.
   Alternative MIDI datasets are available `here <http://www-etud.iro.umontreal.ca/~boulanni/icml2012>`_.
   
-  Note that both dependencies above can be setup automatically by running the ``download.sh`` script in the ``../data`` directory.
+  Note that both dependencies above can be setup automatically by running the `download.sh <https://github.com/lisa-lab/DeepLearningTutorials/blob/master/data/download.sh>`_ script in the ``../data`` directory of the `Deep Learning Tutorials repository <https://github.com/lisa-lab/DeepLearningTutorials>`_.
 
 .. caution::
   Need Theano 0.6 or more recent.

From 81f257524079efc2c553beba0829c8a23d1a33d3 Mon Sep 17 00:00:00 2001
From: Simon Lefrancois <lefransi@iro.umontreal.ca>
Date: Tue, 11 Jul 2017 10:31:26 -0400
Subject: [PATCH 87/90] typo

---
 doc/gettingstarted.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt
index 7b1974ea..99c7f054 100644
--- a/doc/gettingstarted.txt
+++ b/doc/gettingstarted.txt
@@ -24,7 +24,7 @@ On each learning algorithm page, you will be able to download the corresponding
 
 On Linux or Mac systems, after cloning, all datasets can be downloaded at once with:
 
-    cd DeeepLearningTutorials/data
+    cd DeepLearningTutorials/data
     ./download.sh
     
 

From ebb8c21df3a3d073003e1323fead2150ada56ce1 Mon Sep 17 00:00:00 2001
From: Simon Lefrancois <lefransi@iro.umontreal.ca>
Date: Wed, 26 Jul 2017 08:29:51 -0400
Subject: [PATCH 88/90] update nosetests command

---
 .jenkins/jenkins_buildbot_dlt.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh
index c8be22b4..eb43d91c 100755
--- a/.jenkins/jenkins_buildbot_dlt.sh
+++ b/.jenkins/jenkins_buildbot_dlt.sh
@@ -79,4 +79,4 @@ THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE}
 echo "==== Executing nosetests with mode=FAST_RUN,floatX=float32,device=cuda"
 NAME=dlt_float32_cuda
 FILE=${BUILDBOT_DIR}/${NAME}_tests.xml
-PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/DeepLearningTutorials/code:${PYTHONPATH} THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32,device=cuda nosetests-2.7 test.py ${XUNIT}${FILE} ${SUITE}${NAME}
+PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/DeepLearningTutorials/code:${PYTHONPATH} THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32,device=cuda ${NOSETESTS} test.py ${XUNIT}${FILE} ${SUITE}${NAME}

From 8d25f1a91a656c5a0c67fe2434a5d37d89983665 Mon Sep 17 00:00:00 2001
From: Simon Lefrancois <lefransi@iro.umontreal.ca>
Date: Wed, 26 Jul 2017 10:41:07 -0400
Subject: [PATCH 89/90] use nosetests directly for gpu

---
 .jenkins/jenkins_buildbot_dlt.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh
index eb43d91c..8b57a1bc 100755
--- a/.jenkins/jenkins_buildbot_dlt.sh
+++ b/.jenkins/jenkins_buildbot_dlt.sh
@@ -79,4 +79,4 @@ THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE}
 echo "==== Executing nosetests with mode=FAST_RUN,floatX=float32,device=cuda"
 NAME=dlt_float32_cuda
 FILE=${BUILDBOT_DIR}/${NAME}_tests.xml
-PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/DeepLearningTutorials/code:${PYTHONPATH} THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32,device=cuda ${NOSETESTS} test.py ${XUNIT}${FILE} ${SUITE}${NAME}
+PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/DeepLearningTutorials/code:${PYTHONPATH} THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32,device=cuda nosetests test.py ${XUNIT}${FILE} ${SUITE}${NAME}

From 764cd4cdf5dc157a121a2fbffc2dec91c03f2ed9 Mon Sep 17 00:00:00 2001
From: Simon Lefrancois <lefransi@iro.umontreal.ca>
Date: Thu, 7 Sep 2017 16:46:18 -0400
Subject: [PATCH 90/90] libgpuarray full checkout

---
 .jenkins/jenkins_buildbot_dlt.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh
index 8b57a1bc..15da288b 100755
--- a/.jenkins/jenkins_buildbot_dlt.sh
+++ b/.jenkins/jenkins_buildbot_dlt.sh
@@ -24,7 +24,7 @@ cd ${BUILDBOT_DIR}
 
 # Make fresh clone (with no history since we don't need it)
 rm -rf libgpuarray
-git clone --depth 1 "https://github.com/Theano/libgpuarray.git"
+git clone "https://github.com/Theano/libgpuarray.git"
 
 (cd libgpuarray && echo "libgpuarray commit" && git rev-parse HEAD)