From e4cad43f96b52589e7d95a66156f698963c33e09 Mon Sep 17 00:00:00 2001 From: stray-leone Date: Sun, 13 Sep 2015 01:44:59 +0900 Subject: [PATCH 01/90] change the way of getting vocsize, nclasses. with previous way, when training data is big, need many hours. --- code/rnnslu.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/code/rnnslu.py b/code/rnnslu.py index 65363688..a8efda74 100644 --- a/code/rnnslu.py +++ b/code/rnnslu.py @@ -288,10 +288,8 @@ def main(param=None): valid_lex, valid_ne, valid_y = valid_set test_lex, test_ne, test_y = test_set - vocsize = len(set(reduce(lambda x, y: list(x) + list(y), - train_lex + valid_lex + test_lex))) - nclasses = len(set(reduce(lambda x, y: list(x)+list(y), - train_y + test_y + valid_y))) + vocsize = len(dic['words2idx']) + nclasses = len(dic['labels2idx']) nsentences = len(train_lex) groundtruth_valid = [map(lambda x: idx2label[x], y) for y in valid_y] From 7c1219dabbb24ea62d74ae90b2a39e9ad0c9a090 Mon Sep 17 00:00:00 2001 From: Joakim Skarding Date: Sat, 24 Oct 2015 20:50:42 +0200 Subject: [PATCH 02/90] Added MSGD abbreviation introduction --- doc/gettingstarted.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt index 5800889d..63f93597 100644 --- a/doc/gettingstarted.txt +++ b/doc/gettingstarted.txt @@ -389,7 +389,7 @@ form, we estimate the gradient from just a single example at a time. The variant that we recommend for deep learning is a further twist on stochastic gradient descent using so-called "minibatches". -Minibatch SGD works identically to SGD, except that we use more than +Minibatch SGD (MSGD) works identically to SGD, except that we use more than one training example to make each estimate of the gradient. This technique reduces variance in the estimate of the gradient, and often makes better use of the hierarchical memory organization in modern computers. From 321a6e1adf3d650e8393f98d65f2faa67ea27f45 Mon Sep 17 00:00:00 2001 From: Frederic Date: Wed, 28 Oct 2015 10:37:21 -0400 Subject: [PATCH 03/90] Update atis URL --- data/download.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/data/download.sh b/data/download.sh index 92ef3d3c..ed273bbb 100755 --- a/data/download.sh +++ b/data/download.sh @@ -19,8 +19,8 @@ $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl.gz && gunzip imdb.p $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.dict.pkl.gz && gunzip imdb.dict.pkl.gz $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/Nottingham.zip && unzip -u Nottingham.zip $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/midi.zip && unzip -u midi.zip -d ../code && echo "extracted Modified Python MIDI package (GPL)" -$DL_CMD http://www-etud.iro.umontreal.ca/~mesnilgr/atis/atis.fold0.pkl.gz -$DL_CMD http://www-etud.iro.umontreal.ca/~mesnilgr/atis/atis.fold1.pkl.gz -$DL_CMD http://www-etud.iro.umontreal.ca/~mesnilgr/atis/atis.fold2.pkl.gz -$DL_CMD http://www-etud.iro.umontreal.ca/~mesnilgr/atis/atis.fold3.pkl.gz -$DL_CMD http://www-etud.iro.umontreal.ca/~mesnilgr/atis/atis.fold4.pkl.gz +$DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold0.pkl.gz +$DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold1.pkl.gz +$DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold2.pkl.gz +$DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold3.pkl.gz +$DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold4.pkl.gz From 95cfe74621f3306cc7f6c0d610411054b98e91e6 Mon Sep 17 00:00:00 2001 From: Frederic Date: Wed, 28 Oct 2015 11:00:21 -0400 Subject: [PATCH 04/90] Fix travis, newer scipy version in miniconda have problems with gfortran --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 7873dedf..4344a63d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,7 @@ before_install: - conda update --yes conda install: - - conda create --yes -q -n pyenv mkl python=2.7 numpy scipy pip nose yaml pyflakes pillow pyparsing=1.5 + - conda create --yes -q -n pyenv mkl python=2.7 numpy=1.10 scipy=0.16.0 pip nose yaml pyflakes pillow pyparsing=1.5 - source activate pyenv - pip install git+git://github.com/Theano/Theano.git From b3c4a9e2f7630332f802e9acc2fe1f9452920e49 Mon Sep 17 00:00:00 2001 From: Frederic Date: Wed, 28 Oct 2015 11:41:19 -0400 Subject: [PATCH 05/90] Use the new travis infrastucture --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 4344a63d..17e75146 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,6 @@ # After changing this file, check it on: # http://lint.travis-ci.org/ +sudo: false language: python #python: From 1f628ff222d7f864559bcb62a892cb0dfcb5cb65 Mon Sep 17 00:00:00 2001 From: Frederic Date: Wed, 28 Oct 2015 13:31:23 -0400 Subject: [PATCH 06/90] Update Gregoire email --- doc/rnnslu.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/rnnslu.txt b/doc/rnnslu.txt index bb294c33..7fef1683 100644 --- a/doc/rnnslu.txt +++ b/doc/rnnslu.txt @@ -42,8 +42,9 @@ Thank you! Contact ======= -Please email to `Grégoire Mesnil `_ for any -problem report or feedback. We will be glad to hear from you. +Please email to +``Grégoire Mesnil (first-add-a-dot-last-add-at-gmail-add-a-dot-com)`` +for any problem report or feedback. We will be glad to hear from you. Task ++++ From 5b62a38639200960ba58ad39bcb1dd60370b68b2 Mon Sep 17 00:00:00 2001 From: Frederic Date: Thu, 29 Oct 2015 16:28:23 -0400 Subject: [PATCH 07/90] Small update to lstm code. uidx have already been incremented fix gh-122 --- code/lstm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/lstm.py b/code/lstm.py index b64970fb..1d87cfb3 100644 --- a/code/lstm.py +++ b/code/lstm.py @@ -569,7 +569,7 @@ def train_lstm( f_update(lrate) if numpy.isnan(cost) or numpy.isinf(cost): - print 'NaN detected' + print 'bad cost detected: ', cost return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: @@ -595,7 +595,7 @@ def train_lstm( history_errs.append([valid_err, test_err]) - if (uidx == 0 or + if (best_p is None or valid_err <= numpy.array(history_errs)[:, 0].min()): From 8b1e2b35f6f67017d76a4c0b7600132cf07ce5f8 Mon Sep 17 00:00:00 2001 From: Frederic Date: Thu, 29 Oct 2015 16:29:46 -0400 Subject: [PATCH 08/90] Update timming with speed up --- code/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/test.py b/code/test.py index cf226b42..94e03b8d 100644 --- a/code/test.py +++ b/code/test.py @@ -87,7 +87,7 @@ def speed(): # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread. expected_times_64 = numpy.asarray([9.8, 22.0, 76.1, 73.7, 116.4, - 346.9, 355.0, 558.1, 130.4, 50.8, 113.6]) + 346.9, 355.0, 558.1, 130.4, 23.2, 106]) expected_times_32 = numpy.asarray([8.1, 17.9, 42.5, 66.5, 71, 191.2, 199.0, 432.8, 119.5, 36.9, 78.0]) @@ -110,7 +110,7 @@ def speed(): expected_times_gpu = numpy.asarray([3.0, 7.55523491, 18.99226785, 5.8, 20.0, - 11.8, 18.2, 280.1, 132.8, 38.8, 10.5]) + 11.2, 17.2, 257.7, 118.8, 34.2, 8.7]) expected_times_64 = [s for idx, s in enumerate(expected_times_64) if to_exec[idx]] expected_times_32 = [s for idx, s in enumerate(expected_times_32) From 564af7e8381843b368b3fde86ef2d8ce13152cb2 Mon Sep 17 00:00:00 2001 From: Frederic Date: Thu, 5 Nov 2015 07:54:38 -0500 Subject: [PATCH 09/90] Fix miniconda path due to having changed --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 17e75146..8a84a9ea 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ before_install: - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh - chmod +x miniconda.sh - ./miniconda.sh -b - - export PATH=/home/travis/miniconda/bin:$PATH + - export PATH=/home/travis/miniconda/bin:/home/travis/miniconda2/bin:$PATH - conda update --yes conda install: From 74ab26817a1b0a04371a25a7109d7779f7f21b17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ali=20=C4=B0skender=20Turan?= Date: Tue, 15 Dec 2015 16:56:10 +0430 Subject: [PATCH 10/90] Update gettingstarted.txt L2_sqr variable must be L2 for the loss function or loss function L2 variable must be L2_sqr --- doc/gettingstarted.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt index 63f93597..e838d706 100644 --- a/doc/gettingstarted.txt +++ b/doc/gettingstarted.txt @@ -525,7 +525,7 @@ L2 regularization term weighted by :math:`\lambda_2` L1 = T.sum(abs(param)) # symbolic Theano variable that represents the squared L2 term - L2_sqr = T.sum(param ** 2) + L2 = T.sum(param ** 2) # the loss loss = NLL + lambda_1 * L1 + lambda_2 * L2 From fb5e394b25d4ce0d7a01a776045dc15b93697bf2 Mon Sep 17 00:00:00 2001 From: Frederic Date: Tue, 15 Dec 2015 11:31:10 -0500 Subject: [PATCH 11/90] Update timing that is now faster --- code/test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/code/test.py b/code/test.py index 94e03b8d..41749231 100644 --- a/code/test.py +++ b/code/test.py @@ -87,9 +87,9 @@ def speed(): # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread. expected_times_64 = numpy.asarray([9.8, 22.0, 76.1, 73.7, 116.4, - 346.9, 355.0, 558.1, 130.4, 23.2, 106]) - expected_times_32 = numpy.asarray([8.1, 17.9, 42.5, 66.5, 71, - 191.2, 199.0, 432.8, 119.5, 36.9, 78.0]) + 346.9, 355.0, 510.9, 130.4, 23.2, 106]) + expected_times_32 = numpy.asarray([6.4, 17.9, 42.5, 66.5, 71, + 191.2, 199.0, 400.4, 119.5, 36.9, 67.2]) # Number with just 1 decimal are new value that are faster with # the Theano version 0.5rc2 Other number are older. They are not @@ -110,7 +110,7 @@ def speed(): expected_times_gpu = numpy.asarray([3.0, 7.55523491, 18.99226785, 5.8, 20.0, - 11.2, 17.2, 257.7, 118.8, 34.2, 8.7]) + 11.2, 17.2, 244.3, 118.8, 34.2, 8.7]) expected_times_64 = [s for idx, s in enumerate(expected_times_64) if to_exec[idx]] expected_times_32 = [s for idx, s in enumerate(expected_times_32) From e9711aaf2e059ad7beceb2c762b5729ec0de7f68 Mon Sep 17 00:00:00 2001 From: Frederic Date: Tue, 15 Dec 2015 11:46:34 -0500 Subject: [PATCH 12/90] Try to fix travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 8a84a9ea..4528a5fe 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,7 +15,7 @@ before_install: - conda update --yes conda install: - - conda create --yes -q -n pyenv mkl python=2.7 numpy=1.10 scipy=0.16.0 pip nose yaml pyflakes pillow pyparsing=1.5 + - conda create --yes -q -n pyenv mkl python=2.7 numpy=1.10 scipy=0.16.1 pip nose yaml pyflakes pillow pyparsing=1.5 - source activate pyenv - pip install git+git://github.com/Theano/Theano.git From e9fc9c395d7fd3b8dafafb5229e4660154727128 Mon Sep 17 00:00:00 2001 From: Frederic Date: Tue, 15 Dec 2015 12:10:41 -0500 Subject: [PATCH 13/90] Add test_lstm in travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4528a5fe..ae3801c2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,7 +21,7 @@ install: env: - PART="test.py:test_logistic_sgd test.py:test_logistic_cg test.py:test_mlp test.py:test_convolutional_mlp test.py:test_dA" - - PART="test.py:test_SdA" + - PART="test.py:test_SdA test.py:test_lstm" - PART="test.py:test_dbn" - PART="test.py:test_rbm test.py:test_rnnrbm" - PART="-e test.py" From 407fd81b1a5bf8630187eddabe3ff1e22afa6ca7 Mon Sep 17 00:00:00 2001 From: Frederic Date: Tue, 15 Dec 2015 12:10:56 -0500 Subject: [PATCH 14/90] Get rid of scipy dependency by using numpy. --- code/hmc/test_hmc.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/code/hmc/test_hmc.py b/code/hmc/test_hmc.py index 0a70190a..f6c3b522 100644 --- a/code/hmc/test_hmc.py +++ b/code/hmc/test_hmc.py @@ -1,5 +1,4 @@ import numpy -from scipy import linalg import theano from hmc import HMC_sampler @@ -15,7 +14,7 @@ def sampler_on_nd_gaussian(sampler_cls, burnin, n_samples, dim=10): cov = numpy.array(rng.rand(dim, dim), dtype=theano.config.floatX) cov = (cov + cov.T) / 2. cov[numpy.arange(dim), numpy.arange(dim)] = 1.0 - cov_inv = linalg.inv(cov) + cov_inv = numpy.linalg.inv(cov) # Define energy function for a multi-variate Gaussian def gaussian_energy(x): From e1d1e0c826d7d05a84302222d55b45cf3af2a6fa Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Fri, 15 Jan 2016 16:23:24 -0500 Subject: [PATCH 15/90] Update timing for case that is now faster --- code/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/test.py b/code/test.py index 41749231..76c95b38 100644 --- a/code/test.py +++ b/code/test.py @@ -86,9 +86,9 @@ def speed(): # 580 for the GPU. OS=Fedora 14, gcc=4.5.1, python/BLAS from EPD # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread. - expected_times_64 = numpy.asarray([9.8, 22.0, 76.1, 73.7, 116.4, + expected_times_64 = numpy.asarray([9.3, 21.0, 76.1, 73.7, 116.4, 346.9, 355.0, 510.9, 130.4, 23.2, 106]) - expected_times_32 = numpy.asarray([6.4, 17.9, 42.5, 66.5, 71, + expected_times_32 = numpy.asarray([6.4, 14.7, 42.5, 66.5, 71, 191.2, 199.0, 400.4, 119.5, 36.9, 67.2]) # Number with just 1 decimal are new value that are faster with From b3b1783b3fb1ac92f28dbc3a4e64c5ea7cf85731 Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Fri, 15 Jan 2016 16:39:06 -0500 Subject: [PATCH 16/90] Add test_rnnslu to travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index ae3801c2..258963ee 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,7 +23,7 @@ env: - PART="test.py:test_logistic_sgd test.py:test_logistic_cg test.py:test_mlp test.py:test_convolutional_mlp test.py:test_dA" - PART="test.py:test_SdA test.py:test_lstm" - PART="test.py:test_dbn" - - PART="test.py:test_rbm test.py:test_rnnrbm" + - PART="test.py:test_rbm test.py:test_rnnrbm test.py:test_rnnslu" - PART="-e test.py" #i7-2600K CPU @ 3.40GHz From 1a1529261e05fb5d27be973439c5cb4f2ce49d94 Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Fri, 15 Jan 2016 16:39:35 -0500 Subject: [PATCH 17/90] Make test_rnnslu faster --- code/test.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/code/test.py b/code/test.py index 76c95b38..ff2ff359 100644 --- a/code/test.py +++ b/code/test.py @@ -15,10 +15,6 @@ import lstm -def test_rnnslu(): - rnnslu.main() - - def test_logistic_sgd(): logistic_sgd.sgd_optimization_mnist(n_epochs=10) @@ -62,6 +58,26 @@ def test_rnnrbm(): rnnrbm.test_rnnrbm(num_epochs=1) +def test_rnnslu(): + s = {'fold': 3, + # 5 folds 0,1,2,3,4 + 'data': 'atis', + 'lr': 0.0970806646812754, + 'verbose': 1, + 'decay': True, + # decay on the learning rate if improvement stops + 'win': 7, + # number of words in the context window + 'nhidden': 200, + # number of hidden units + 'seed': 345, + 'emb_dimension': 50, + # dimension of word embedding + 'nepochs': 1, # CHANGED + 'savemodel': False} + rnnslu.main(s) + + def test_lstm(): lstm.train_lstm(max_epochs=1, test_size=1000, saveto='') From 87b2f9a1d22757681c6c4636bd8d9219ba91cd7d Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Fri, 15 Jan 2016 16:41:35 -0500 Subject: [PATCH 18/90] Small diff to help debug by having better error message --- code/rnnslu.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/code/rnnslu.py b/code/rnnslu.py index 2ea55978..2251e465 100644 --- a/code/rnnslu.py +++ b/code/rnnslu.py @@ -126,11 +126,14 @@ def get_perf(filename, folder): stdout=subprocess.PIPE) stdout, _ = proc.communicate(''.join(open(filename).readlines())) + out = None for line in stdout.split('\n'): if 'accuracy' in line: out = line.split() break - + # To help debug + if out is None: + print stdout.split('\n') precision = float(out[6][:-2]) recall = float(out[8][:-2]) f1score = float(out[10]) From b701733044d73681baa8346973229ed8d0537395 Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Wed, 27 Jan 2016 21:40:33 -0800 Subject: [PATCH 19/90] Use the In object as Param is deprecated --- code/DBN.py | 2 +- code/SdA.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/code/DBN.py b/code/DBN.py index b54ac5bc..ecd563e7 100644 --- a/code/DBN.py +++ b/code/DBN.py @@ -174,7 +174,7 @@ def pretraining_functions(self, train_set_x, batch_size, k): # compile the theano function fn = theano.function( - inputs=[index, theano.Param(learning_rate, default=0.1)], + inputs=[index, theano.In(learning_rate, value=0.1)], outputs=cost, updates=updates, givens={ diff --git a/code/SdA.py b/code/SdA.py index 82660e99..c74c2986 100644 --- a/code/SdA.py +++ b/code/SdA.py @@ -214,8 +214,8 @@ def pretraining_functions(self, train_set_x, batch_size): fn = theano.function( inputs=[ index, - theano.Param(corruption_level, default=0.2), - theano.Param(learning_rate, default=0.1) + theano.In(corruption_level, value=0.2), + theano.In(learning_rate, value=0.1) ], outputs=cost, updates=updates, From 6e3d61544f2786f7400a98151db99c5409c8bb4e Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Tue, 2 Feb 2016 16:41:10 -0500 Subject: [PATCH 20/90] Update convolution to use the updated interface. --- code/convolutional_mlp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/convolutional_mlp.py b/code/convolutional_mlp.py index 64bf5e69..bb6aeaf4 100644 --- a/code/convolutional_mlp.py +++ b/code/convolutional_mlp.py @@ -30,7 +30,7 @@ import theano import theano.tensor as T from theano.tensor.signal import downsample -from theano.tensor.nnet import conv +from theano.tensor.nnet import conv2d from logistic_sgd import LogisticRegression, load_data from mlp import HiddenLayer @@ -87,7 +87,7 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)): self.b = theano.shared(value=b_values, borrow=True) # convolve input feature maps with filters - conv_out = conv.conv2d( + conv_out = conv2d( input=input, filters=self.W, filter_shape=filter_shape, From 6c5f07bbdbfcc9ed8fb6c8ae05b288a5ce696a03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Bastien?= Date: Wed, 3 Feb 2016 12:01:42 -0500 Subject: [PATCH 21/90] Update timing that got speed up. --- code/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/test.py b/code/test.py index 76c95b38..8b6a515e 100644 --- a/code/test.py +++ b/code/test.py @@ -87,7 +87,7 @@ def speed(): # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread. expected_times_64 = numpy.asarray([9.3, 21.0, 76.1, 73.7, 116.4, - 346.9, 355.0, 510.9, 130.4, 23.2, 106]) + 346.9, 355.0, 510.9, 130.4, 23.2, 98.8]) expected_times_32 = numpy.asarray([6.4, 14.7, 42.5, 66.5, 71, 191.2, 199.0, 400.4, 119.5, 36.9, 67.2]) @@ -108,7 +108,7 @@ def speed(): #expected/get [0.82492841, 0.75984178, 0.65092691, 1.04930573, 0.93125138 # 1.35324519 1.7356905 1.12937868] - expected_times_gpu = numpy.asarray([3.0, 7.55523491, 18.99226785, + expected_times_gpu = numpy.asarray([2.9, 7.55523491, 18.99226785, 5.8, 20.0, 11.2, 17.2, 244.3, 118.8, 34.2, 8.7]) expected_times_64 = [s for idx, s in enumerate(expected_times_64) From c26252342c4d1ef1fea9131c7605d4190e52b2c2 Mon Sep 17 00:00:00 2001 From: Benjamin Irving Date: Wed, 3 Feb 2016 17:56:49 +0000 Subject: [PATCH 22/90] fix minor typos and formatting --- code/logistic_sgd.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/code/logistic_sgd.py b/code/logistic_sgd.py index c944f8b3..68f26911 100644 --- a/code/logistic_sgd.py +++ b/code/logistic_sgd.py @@ -207,12 +207,12 @@ def load_data(dataset): f = gzip.open(dataset, 'rb') train_set, valid_set, test_set = cPickle.load(f) f.close() - #train_set, valid_set, test_set format: tuple(input, target) - #input is an numpy.ndarray of 2 dimensions (a matrix) - #witch row's correspond to an example. target is a - #numpy.ndarray of 1 dimensions (vector)) that have the same length as - #the number of rows in the input. It should give the target - #target to the example with the same index in the input. + # train_set, valid_set, test_set format: tuple(input, target) + # input is a numpy.ndarray of 2 dimensions (a matrix) + # where each row corresponds to an example. target is a + # numpy.ndarray of 1 dimension (vector) that has the same length as + # the number of rows in the input. It should give the target + # to the example with the same index in the input. def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables From d2764f288b4e58e12bd492953d1c1a0b43d92e21 Mon Sep 17 00:00:00 2001 From: Guillaume Alain Date: Thu, 21 Jan 2016 11:06:00 -0500 Subject: [PATCH 23/90] successfully ported logistic_sgd.py --- code/logistic_sgd.py | 59 ++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/code/logistic_sgd.py b/code/logistic_sgd.py index 68f26911..9f4427e7 100644 --- a/code/logistic_sgd.py +++ b/code/logistic_sgd.py @@ -32,9 +32,12 @@ Christopher M. Bishop, section 4.3.2 """ + +from __future__ import print_function + __docformat__ = 'restructedtext en' -import cPickle +import six.moves.cPickle as pickle import gzip import os import sys @@ -194,19 +197,21 @@ def load_data(dataset): dataset = new_path if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz': - import urllib + from six.moves import urllib origin = ( 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz' ) - print 'Downloading data from %s' % origin - urllib.urlretrieve(origin, dataset) + print('Downloading data from %s' % origin) + urllib.request.urlretrieve(origin, dataset) - print '... loading data' + print('... loading data') # Load the dataset - f = gzip.open(dataset, 'rb') - train_set, valid_set, test_set = cPickle.load(f) - f.close() + with gzip.open(dataset, 'rb') as f: + try: + train_set, valid_set, test_set = pickle.load(f, encoding='latin1') + except: + train_set, valid_set, test_set = pickle.load(f) # train_set, valid_set, test_set format: tuple(input, target) # input is a numpy.ndarray of 2 dimensions (a matrix) # where each row corresponds to an example. target is a @@ -276,14 +281,14 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing - n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size - n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size - n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size + n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size + n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size + n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### - print '... building the model' + print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch @@ -348,14 +353,14 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, ############### # TRAIN MODEL # ############### - print '... training the model' + print('... training the model') # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant - validation_frequency = min(n_train_batches, patience / 2) + validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we @@ -369,7 +374,7 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 - for minibatch_index in xrange(n_train_batches): + for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number @@ -378,7 +383,7 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) - for i in xrange(n_valid_batches)] + for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( @@ -402,7 +407,7 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, # test it on the test set test_losses = [test_model(i) - for i in xrange(n_test_batches)] + for i in range(n_test_batches)] test_score = numpy.mean(test_losses) print( @@ -419,8 +424,8 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, ) # save the best model - with open('best_model.pkl', 'w') as f: - cPickle.dump(classifier, f) + with open('best_model.pkl', 'wb') as f: + pickle.dump(classifier, f) if patience <= iter: done_looping = True @@ -434,11 +439,11 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, ) % (best_validation_loss * 100., test_score * 100.) ) - print 'The code run for %d epochs, with %f epochs/sec' % ( - epoch, 1. * epoch / (end_time - start_time)) - print >> sys.stderr, ('The code for file ' + - os.path.split(__file__)[1] + - ' ran for %.1fs' % ((end_time - start_time))) + print('The code run for %d epochs, with %f epochs/sec' % ( + epoch, 1. * epoch / (end_time - start_time))) + print(('The code for file ' + + os.path.split(__file__)[1] + + ' ran for %.1fs' % ((end_time - start_time))), file=sys.stderr) def predict(): @@ -448,7 +453,7 @@ def predict(): """ # load the saved model - classifier = cPickle.load(open('best_model.pkl')) + classifier = pickle.load(open('best_model.pkl')) # compile a predictor function predict_model = theano.function( @@ -462,8 +467,8 @@ def predict(): test_set_x = test_set_x.get_value() predicted_values = predict_model(test_set_x[:10]) - print ("Predicted values for the first 10 examples in test set:") - print predicted_values + print("Predicted values for the first 10 examples in test set:") + print(predicted_values) if __name__ == '__main__': From 2c022d15401c67538fabeb1b5ae2a7470d5fb2f2 Mon Sep 17 00:00:00 2001 From: Guillaume Alain Date: Thu, 21 Jan 2016 15:28:29 -0500 Subject: [PATCH 24/90] fixed everything except rnnrbm and rnnslu, partial tests run but not to completion --- code/SdA.py | 51 +++++++++++++----------- code/cA.py | 16 +++++--- code/convolutional_mlp.py | 31 ++++++++------- code/dA.py | 28 +++++++------ code/hmc/hmc.py | 6 +-- code/hmc/test_hmc.py | 29 +++++++++----- code/imdb.py | 17 +++++--- code/lstm.py | 83 +++++++++++++++++++++------------------ code/mlp.py | 27 +++++++------ code/rbm.py | 10 +++-- code/utils.py | 1 + 11 files changed, 169 insertions(+), 130 deletions(-) diff --git a/code/SdA.py b/code/SdA.py index c74c2986..d639cb54 100644 --- a/code/SdA.py +++ b/code/SdA.py @@ -29,6 +29,9 @@ Systems 19, 2007 """ + +from __future__ import print_function + import os import sys import timeit @@ -116,7 +119,7 @@ def __init__( # stochastich gradient descent on the MLP # start-snippet-2 - for i in xrange(self.n_layers): + for i in range(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of @@ -254,9 +257,9 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate): # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] - n_valid_batches /= batch_size + n_valid_batches //= batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] - n_test_batches /= batch_size + n_test_batches //= batch_size index = T.lscalar('index') # index to a [mini]batch @@ -314,11 +317,11 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate): # Create a function that scans the entire validation set def valid_score(): - return [valid_score_i(i) for i in xrange(n_valid_batches)] + return [valid_score_i(i) for i in range(n_valid_batches)] # Create a function that scans the entire test set def test_score(): - return [test_score_i(i) for i in xrange(n_test_batches)] + return [test_score_i(i) for i in range(n_test_batches)] return train_fn, valid_score, test_score @@ -357,12 +360,12 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15, # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] - n_train_batches /= batch_size + n_train_batches //= batch_size # numpy random generator # start-snippet-3 numpy_rng = numpy.random.RandomState(89677) - print '... building the model' + print('... building the model') # construct the stacked denoising autoencoder class sda = SdA( numpy_rng=numpy_rng, @@ -374,52 +377,52 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15, ######################### # PRETRAINING THE MODEL # ######################### - print '... getting the pretraining functions' + print('... getting the pretraining functions') pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) - print '... pre-training the model' + print('... pre-training the model') start_time = timeit.default_timer() ## Pre-train layer-wise corruption_levels = [.1, .2, .3] - for i in xrange(sda.n_layers): + for i in range(sda.n_layers): # go through pretraining epochs - for epoch in xrange(pretraining_epochs): + for epoch in range(pretraining_epochs): # go through the training set c = [] - for batch_index in xrange(n_train_batches): + for batch_index in range(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) - print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), - print numpy.mean(c) + print('Pre-training layer %i, epoch %d, cost ' % (i, epoch)) + print(numpy.mean(c)) end_time = timeit.default_timer() - print >> sys.stderr, ('The pretraining code for file ' + - os.path.split(__file__)[1] + - ' ran for %.2fm' % ((end_time - start_time) / 60.)) + print(('The pretraining code for file ' + + os.path.split(__file__)[1] + + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr) # end-snippet-4 ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model - print '... getting the finetuning functions' + print('... getting the finetuning functions') train_fn, validate_model, test_model = sda.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr ) - print '... finetunning the model' + print('... finetunning the model') # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant - validation_frequency = min(n_train_batches, patience / 2) + validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we @@ -434,7 +437,7 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15, while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 - for minibatch_index in xrange(n_train_batches): + for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index @@ -480,9 +483,9 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15, ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.) ) - print >> sys.stderr, ('The training code for file ' + - os.path.split(__file__)[1] + - ' ran for %.2fm' % ((end_time - start_time) / 60.)) + print(('The training code for file ' + + os.path.split(__file__)[1] + + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr) if __name__ == '__main__': diff --git a/code/cA.py b/code/cA.py index e26a1ddf..0d563ef2 100644 --- a/code/cA.py +++ b/code/cA.py @@ -28,6 +28,10 @@ Systems 19, 2007 """ + +from __future__ import print_function +from six.moves import xrange + import os import sys import timeit @@ -205,7 +209,7 @@ def get_cost_updates(self, contraction_level, learning_rate): axis=1) # Compute the jacobian and average over the number of samples/minibatch - self.L_jacob = T.sum(J ** 2) / self.n_batchsize + self.L_jacob = T.sum(J ** 2) // self.n_batchsize # note : L is now a vector, where each element is the # cross-entropy cost of the reconstruction of the @@ -246,7 +250,7 @@ def test_cA(learning_rate=0.01, training_epochs=20, train_set_x, train_set_y = datasets[0] # compute number of minibatches for training, validation and testing - n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size + n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch @@ -290,15 +294,15 @@ def test_cA(learning_rate=0.01, training_epochs=20, c.append(train_ca(batch_index)) c_array = numpy.vstack(c) - print 'Training epoch %d, reconstruction cost ' % epoch, numpy.mean( - c_array[0]), ' jacobian norm ', numpy.mean(numpy.sqrt(c_array[1])) + print('Training epoch %d, reconstruction cost ' % epoch, numpy.mean( + c_array[0]), ' jacobian norm ', numpy.mean(numpy.sqrt(c_array[1]))) end_time = timeit.default_timer() training_time = (end_time - start_time) - print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + - ' ran for %.2fm' % ((training_time) / 60.)) + print(('The code for file ' + os.path.split(__file__)[1] + + ' ran for %.2fm' % ((training_time) / 60.)), file=sys.stderr) image = Image.fromarray(tile_raster_images( X=ca.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), diff --git a/code/convolutional_mlp.py b/code/convolutional_mlp.py index bb6aeaf4..a8811bc1 100644 --- a/code/convolutional_mlp.py +++ b/code/convolutional_mlp.py @@ -21,6 +21,9 @@ http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf """ + +from __future__ import print_function + import os import sys import timeit @@ -70,7 +73,7 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)): # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size - fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / + fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) // numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) @@ -145,9 +148,9 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200, n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] - n_train_batches /= batch_size - n_valid_batches /= batch_size - n_test_batches /= batch_size + n_train_batches //= batch_size + n_valid_batches //= batch_size + n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch @@ -160,7 +163,7 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200, ###################### # BUILD ACTUAL MODEL # ###################### - print '... building the model' + print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer @@ -261,14 +264,14 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200, ############### # TRAIN MODEL # ############### - print '... training' + print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant - validation_frequency = min(n_train_batches, patience / 2) + validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we @@ -284,19 +287,19 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200, while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 - for minibatch_index in xrange(n_train_batches): + for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: - print 'training @ iter = ', iter + print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i - in xrange(n_valid_batches)] + in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, @@ -317,7 +320,7 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200, # test it on the test set test_losses = [ test_model(i) - for i in xrange(n_test_batches) + for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' @@ -334,9 +337,9 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200, print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) - print >> sys.stderr, ('The code for file ' + - os.path.split(__file__)[1] + - ' ran for %.2fm' % ((end_time - start_time) / 60.)) + print(('The code for file ' + + os.path.split(__file__)[1] + + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr) if __name__ == '__main__': evaluate_lenet5() diff --git a/code/dA.py b/code/dA.py index 8ea94e33..0d9efa54 100644 --- a/code/dA.py +++ b/code/dA.py @@ -30,6 +30,8 @@ """ +from __future__ import print_function + import os import sys import timeit @@ -280,7 +282,7 @@ def test_dA(learning_rate=0.1, training_epochs=15, train_set_x, train_set_y = datasets[0] # compute number of minibatches for training, validation and testing - n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size + n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size # start-snippet-2 # allocate symbolic variables for the data @@ -328,21 +330,21 @@ def test_dA(learning_rate=0.1, training_epochs=15, ############ # go through training epochs - for epoch in xrange(training_epochs): + for epoch in range(training_epochs): # go through trainng set c = [] - for batch_index in xrange(n_train_batches): + for batch_index in range(n_train_batches): c.append(train_da(batch_index)) - print 'Training epoch %d, cost ' % epoch, numpy.mean(c) + print('Training epoch %d, cost ' % epoch, numpy.mean(c)) end_time = timeit.default_timer() training_time = (end_time - start_time) - print >> sys.stderr, ('The no corruption code for file ' + - os.path.split(__file__)[1] + - ' ran for %.2fm' % ((training_time) / 60.)) + print(('The no corruption code for file ' + + os.path.split(__file__)[1] + + ' ran for %.2fm' % ((training_time) / 60.)), file=sys.stderr) image = Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), @@ -386,21 +388,21 @@ def test_dA(learning_rate=0.1, training_epochs=15, ############ # go through training epochs - for epoch in xrange(training_epochs): + for epoch in range(training_epochs): # go through trainng set c = [] - for batch_index in xrange(n_train_batches): + for batch_index in range(n_train_batches): c.append(train_da(batch_index)) - print 'Training epoch %d, cost ' % epoch, numpy.mean(c) + print('Training epoch %d, cost ' % epoch, numpy.mean(c)) end_time = timeit.default_timer() training_time = (end_time - start_time) - print >> sys.stderr, ('The 30% corruption code for file ' + - os.path.split(__file__)[1] + - ' ran for %.2fm' % (training_time / 60.)) + print(('The 30% corruption code for file ' + + os.path.split(__file__)[1] + + ' ran for %.2fm' % (training_time / 60.)), file=sys.stderr) # end-snippet-3 # start-snippet-4 diff --git a/code/hmc/hmc.py b/code/hmc/hmc.py index b9c872f0..aeb49937 100644 --- a/code/hmc/hmc.py +++ b/code/hmc/hmc.py @@ -128,14 +128,14 @@ def leapfrog(pos, vel, step): rval2: dictionary Dictionary of updates for the Scan Op """ - # from pos(t) and vel(t-stepsize/2), compute vel(t+stepsize/2) + # from pos(t) and vel(t-stepsize//2), compute vel(t+stepsize//2) dE_dpos = TT.grad(energy_fn(pos).sum(), pos) new_vel = vel - step * dE_dpos - # from vel(t+stepsize/2) compute pos(t+stepsize) + # from vel(t+stepsize//2) compute pos(t+stepsize) new_pos = pos + step * new_vel return [new_pos, new_vel], {} - # compute velocity at time-step: t + stepsize/2 + # compute velocity at time-step: t + stepsize//2 initial_energy = energy_fn(initial_pos) dE_dpos = TT.grad(initial_energy.sum(), initial_pos) vel_half_step = initial_vel - 0.5 * stepsize * dE_dpos diff --git a/code/hmc/test_hmc.py b/code/hmc/test_hmc.py index f6c3b522..c3c425e6 100644 --- a/code/hmc/test_hmc.py +++ b/code/hmc/test_hmc.py @@ -1,7 +1,16 @@ + +from __future__ import print_function +from six.moves import xrange + import numpy import theano -from hmc import HMC_sampler +try: + from hmc import HMC_sampler +except: + # python 3 compatibility + # http://stackoverflow.com/questions/3073259/python-nose-import-error + from hmc.hmc import HMC_sampler def sampler_on_nd_gaussian(sampler_cls, burnin, n_samples, dim=10): @@ -37,17 +46,17 @@ def gaussian_energy(x): # Flatten to [n_samples * batchsize, dim] samples = _samples.T.reshape(dim, -1).T - print '****** TARGET VALUES ******' - print 'target mean:', mu - print 'target cov:\n', cov + print('****** TARGET VALUES ******') + print('target mean:', mu) + print('target cov:\n', cov) - print '****** EMPIRICAL MEAN/COV USING HMC ******' - print 'empirical mean: ', samples.mean(axis=0) - print 'empirical_cov:\n', numpy.cov(samples.T) + print('****** EMPIRICAL MEAN/COV USING HMC ******') + print('empirical mean: ', samples.mean(axis=0)) + print('empirical_cov:\n', numpy.cov(samples.T)) - print '****** HMC INTERNALS ******' - print 'final stepsize', sampler.stepsize.get_value() - print 'final acceptance_rate', sampler.avg_acceptance_rate.get_value() + print('****** HMC INTERNALS ******') + print('final stepsize', sampler.stepsize.get_value()) + print('final acceptance_rate', sampler.avg_acceptance_rate.get_value()) return sampler diff --git a/code/imdb.py b/code/imdb.py index 21e0e376..341be231 100644 --- a/code/imdb.py +++ b/code/imdb.py @@ -1,4 +1,7 @@ -import cPickle +from __future__ import print_function +from six.moves import xrange +import six.moves.cPickle as pickle + import gzip import os @@ -68,9 +71,11 @@ def get_dataset_file(dataset, default_dataset, origin): dataset = new_path if (not os.path.isfile(dataset)) and data_file == default_dataset: - import urllib - print 'Downloading data from %s' % origin - urllib.urlretrieve(origin, dataset) + from six.moves import urllib + print('Downloading data from %s' % origin) + urllib.request.urlretrieve(origin, dataset) + + return dataset @@ -110,8 +115,8 @@ def load_data(path="imdb.pkl", n_words=100000, valid_portion=0.1, maxlen=None, else: f = open(path, 'rb') - train_set = cPickle.load(f) - test_set = cPickle.load(f) + train_set = pickle.load(f) + test_set = pickle.load(f) f.close() if maxlen: new_train_set_x = [] diff --git a/code/lstm.py b/code/lstm.py index 1d87cfb3..b3b89f3e 100644 --- a/code/lstm.py +++ b/code/lstm.py @@ -1,8 +1,13 @@ ''' Build a tweet sentiment analyzer ''' + +from __future__ import print_function +from six.moves import xrange +import six.moves.cPickle as pickle + +#from six.moves.collections import OrderedDict from collections import OrderedDict -import cPickle as pkl import sys import time @@ -56,7 +61,7 @@ def zipp(params, tparams): """ When we reload the model. Needed for the GPU stuff. """ - for kk, vv in params.iteritems(): + for kk, vv in params.items(): tparams[kk].set_value(vv) @@ -65,7 +70,7 @@ def unzip(zipped): When we pickle the model. Needed for the GPU stuff. """ new_params = OrderedDict() - for kk, vv in zipped.iteritems(): + for kk, vv in zipped.items(): new_params[kk] = vv.get_value() return new_params @@ -106,7 +111,7 @@ def init_params(options): def load_params(path, params): pp = numpy.load(path) - for kk, vv in params.iteritems(): + for kk, vv in params.items(): if kk not in pp: raise Warning('%s is not in the archive' % kk) params[kk] = pp[kk] @@ -116,7 +121,7 @@ def load_params(path, params): def init_tparams(params): tparams = OrderedDict() - for kk, pp in params.iteritems(): + for kk, pp in params.items(): tparams[kk] = theano.shared(params[kk], name=kk) return tparams @@ -217,7 +222,7 @@ def sgd(lr, tparams, grads, x, mask, y, cost): # New set of shared variable that will contain the gradient # for a mini-batch. gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k) - for k, p in tparams.iteritems()] + for k, p in tparams.items()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] # Function that computes gradients for a mini-batch, but do not @@ -266,13 +271,13 @@ def adadelta(lr, tparams, grads, x, mask, y, cost): zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % k) - for k, p in tparams.iteritems()] + for k, p in tparams.items()] running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rup2' % k) - for k, p in tparams.iteritems()] + for k, p in tparams.items()] running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2' % k) - for k, p in tparams.iteritems()] + for k, p in tparams.items()] zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)] rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) @@ -329,13 +334,13 @@ def rmsprop(lr, tparams, grads, x, mask, y, cost): zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % k) - for k, p in tparams.iteritems()] + for k, p in tparams.items()] running_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad' % k) - for k, p in tparams.iteritems()] + for k, p in tparams.items()] running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2' % k) - for k, p in tparams.iteritems()] + for k, p in tparams.items()] zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)] rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)] @@ -348,7 +353,7 @@ def rmsprop(lr, tparams, grads, x, mask, y, cost): updir = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_updir' % k) - for k, p in tparams.iteritems()] + for k, p in tparams.items()] updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4)) for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads, running_grads2)] @@ -418,7 +423,7 @@ def pred_probs(f_pred_prob, prepare_data, data, iterator, verbose=False): n_done += len(valid_index) if verbose: - print '%d/%d samples classified' % (n_done, n_samples) + print('%d/%d samples classified' % (n_done, n_samples)) return probs @@ -470,11 +475,11 @@ def train_lstm( # Model options model_options = locals().copy() - print "model options", model_options + print("model options", model_options) load_data, prepare_data = get_dataset(dataset) - print 'Loading data' + print('Loading data') train, valid, test = load_data(n_words=n_words, valid_portion=0.05, maxlen=maxlen) if test_size > 0: @@ -490,7 +495,7 @@ def train_lstm( model_options['ydim'] = ydim - print 'Building model' + print('Building model') # This create the initial parameters as numpy ndarrays. # Dict name (string) -> numpy ndarray params = init_params(model_options) @@ -516,30 +521,30 @@ def train_lstm( f_cost = theano.function([x, mask, y], cost, name='f_cost') - grads = tensor.grad(cost, wrt=tparams.values()) + grads = tensor.grad(cost, wrt=list(tparams.values())) f_grad = theano.function([x, mask, y], grads, name='f_grad') lr = tensor.scalar(name='lr') f_grad_shared, f_update = optimizer(lr, tparams, grads, x, mask, y, cost) - print 'Optimization' + print('Optimization') kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size) kf_test = get_minibatches_idx(len(test[0]), valid_batch_size) - print "%d train examples" % len(train[0]) - print "%d valid examples" % len(valid[0]) - print "%d test examples" % len(test[0]) + print("%d train examples" % len(train[0])) + print("%d valid examples" % len(valid[0])) + print("%d test examples" % len(test[0])) history_errs = [] best_p = None bad_count = 0 if validFreq == -1: - validFreq = len(train[0]) / batch_size + validFreq = len(train[0]) // batch_size if saveFreq == -1: - saveFreq = len(train[0]) / batch_size + saveFreq = len(train[0]) // batch_size uidx = 0 # the number of update done estop = False # early stop @@ -569,22 +574,22 @@ def train_lstm( f_update(lrate) if numpy.isnan(cost) or numpy.isinf(cost): - print 'bad cost detected: ', cost + print('bad cost detected: ', cost) return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: - print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost + print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost) if saveto and numpy.mod(uidx, saveFreq) == 0: - print 'Saving...', + print('Saving...') if best_p is not None: params = best_p else: params = unzip(tparams) numpy.savez(saveto, history_errs=history_errs, **params) - pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1) - print 'Done' + pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1) + print('Done') if numpy.mod(uidx, validFreq) == 0: use_noise.set_value(0.) @@ -602,25 +607,25 @@ def train_lstm( best_p = unzip(tparams) bad_counter = 0 - print ('Train ', train_err, 'Valid ', valid_err, - 'Test ', test_err) + print( ('Train ', train_err, 'Valid ', valid_err, + 'Test ', test_err) ) if (len(history_errs) > patience and valid_err >= numpy.array(history_errs)[:-patience, 0].min()): bad_counter += 1 if bad_counter > patience: - print 'Early Stop!' + print('Early Stop!') estop = True break - print 'Seen %d samples' % n_samples + print('Seen %d samples' % n_samples) if estop: break except KeyboardInterrupt: - print "Training interupted" + print("Training interupted") end_time = time.time() if best_p is not None: @@ -634,15 +639,15 @@ def train_lstm( valid_err = pred_error(f_pred, prepare_data, valid, kf_valid) test_err = pred_error(f_pred, prepare_data, test, kf_test) - print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err + print( 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err ) if saveto: numpy.savez(saveto, train_err=train_err, valid_err=valid_err, test_err=test_err, history_errs=history_errs, **best_p) - print 'The code run for %d epochs, with %f sec/epochs' % ( - (eidx + 1), (end_time - start_time) / (1. * (eidx + 1))) - print >> sys.stderr, ('Training took %.1fs' % - (end_time - start_time)) + print('The code run for %d epochs, with %f sec/epochs' % ( + (eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))) + print( ('Training took %.1fs' % + (end_time - start_time)), file=sys.stderr) return train_err, valid_err, test_err diff --git a/code/mlp.py b/code/mlp.py index 18f34e7c..1d463d81 100644 --- a/code/mlp.py +++ b/code/mlp.py @@ -18,6 +18,9 @@ Christopher M. Bishop, section 5 """ + +from __future__ import print_function + __docformat__ = 'restructedtext en' @@ -231,14 +234,14 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing - n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size - n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size - n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size + n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size + n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size + n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### - print '... building the model' + print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch @@ -322,7 +325,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, ############### # TRAIN MODEL # ############### - print '... training' + print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless @@ -330,7 +333,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant - validation_frequency = min(n_train_batches, patience / 2) + validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we @@ -346,7 +349,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 - for minibatch_index in xrange(n_train_batches): + for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number @@ -355,7 +358,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i - in xrange(n_valid_batches)] + in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( @@ -382,7 +385,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, # test it on the test set test_losses = [test_model(i) for i - in xrange(n_test_batches)] + in range(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' @@ -398,9 +401,9 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) - print >> sys.stderr, ('The code for file ' + - os.path.split(__file__)[1] + - ' ran for %.2fm' % ((end_time - start_time) / 60.)) + print(('The code for file ' + + os.path.split(__file__)[1] + + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr) if __name__ == '__main__': diff --git a/code/rbm.py b/code/rbm.py index 1ba4c86d..0a947963 100644 --- a/code/rbm.py +++ b/code/rbm.py @@ -4,6 +4,10 @@ contain hidden variables. Restricted Boltzmann Machines further restrict BMs to those without visible-visible and hidden-hidden connections. """ + +from __future__ import print_function +from six.moves import xrange + import timeit try: @@ -384,7 +388,7 @@ def test_rbm(learning_rate=0.1, training_epochs=15, test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing - n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size + n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch @@ -438,7 +442,7 @@ def test_rbm(learning_rate=0.1, training_epochs=15, for batch_index in xrange(n_train_batches): mean_cost += [train_rbm(batch_index)] - print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost) + print('Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)) # Plot filters after each training epoch plotting_start = timeit.default_timer() @@ -522,7 +526,7 @@ def test_rbm(learning_rate=0.1, training_epochs=15, # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() - print ' ... plotting sample ', idx + print(' ... plotting sample %d' % idx) image_data[29 * idx:29 * idx + 28, :] = tile_raster_images( X=vis_mf, img_shape=(28, 28), diff --git a/code/utils.py b/code/utils.py index 3b50019c..fa4e4d96 100644 --- a/code/utils.py +++ b/code/utils.py @@ -7,6 +7,7 @@ """ +from six.moves import xrange import numpy From 53f246dc2cd743131fd918542b7f24936e2decce Mon Sep 17 00:00:00 2001 From: Guillaume Alain Date: Thu, 21 Jan 2016 16:02:02 -0500 Subject: [PATCH 25/90] partly fixed rnnrbm, but we will need to do some magic with the midi module to make it compatible with python 3 --- code/rnnrbm.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/code/rnnrbm.py b/code/rnnrbm.py index e1f40b5a..e5027083 100644 --- a/code/rnnrbm.py +++ b/code/rnnrbm.py @@ -3,6 +3,9 @@ # RNN-RBM deep learning tutorial # More information at http://deeplearning.net/tutorial/rnnrbm.html +from __future__ import print_function +from six.moves import xrange + import glob import os import sys @@ -11,10 +14,8 @@ try: import pylab except ImportError: - print ( - "pylab isn't available. If you use its functionality, it will crash." - ) - print "It can be installed with 'pip install -q Pillow'" + print ("pylab isn't available. If you use its functionality, it will crash.") + print("It can be installed with 'pip install -q Pillow'") from midi.utils import midiread, midiwrite import theano @@ -257,12 +258,12 @@ def train(self, files, batch_size=100, num_epochs=200): cost = self.train_function(sequence[i:i + batch_size]) costs.append(cost) - print 'Epoch %i/%i' % (epoch + 1, num_epochs), - print numpy.mean(costs) + print('Epoch %i/%i' % (epoch + 1, num_epochs)) + print(numpy.mean(costs)) sys.stdout.flush() except KeyboardInterrupt: - print 'Interrupted by user.' + print('Interrupted by user.') def generate(self, filename, show=True): '''Generate a sample sequence, plot the resulting piano-roll and save From 2c610d38168a38fbd0aa8fc032579114ff660cf2 Mon Sep 17 00:00:00 2001 From: Guillaume Alain Date: Thu, 28 Jan 2016 16:26:55 -0500 Subject: [PATCH 26/90] made rnnslu compatible with python 3. tested on cpu for many epochs, but not to completion --- code/rnnslu.py | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/code/rnnslu.py b/code/rnnslu.py index 2251e465..110029f4 100644 --- a/code/rnnslu.py +++ b/code/rnnslu.py @@ -1,6 +1,10 @@ + +from __future__ import print_function +from six.moves import xrange +import six.moves.cPickle as pickle + from collections import OrderedDict import copy -import cPickle import gzip import os import urllib @@ -66,7 +70,10 @@ def atisfold(fold): assert fold in range(5) filename = os.path.join(PREFIX, 'atis.fold'+str(fold)+'.pkl.gz') f = gzip.open(filename, 'rb') - train_set, valid_set, test_set, dicts = cPickle.load(f) + try: + train_set, valid_set, test_set, dicts = pickle.load(f, encoding='latin1') + except: + train_set, valid_set, test_set, dicts = pickle.load(f) return train_set, valid_set, test_set, dicts @@ -107,7 +114,7 @@ def download(origin, destination): download the corresponding atis file from http://www-etud.iro.umontreal.ca/~mesnilgr/atis/ ''' - print 'Downloading data from %s' % origin + print('Downloading data from %s' % origin) urllib.urlretrieve(origin, destination) @@ -125,8 +132,10 @@ def get_perf(filename, folder): stdin=subprocess.PIPE, stdout=subprocess.PIPE) - stdout, _ = proc.communicate(''.join(open(filename).readlines())) + stdout, _ = proc.communicate(''.join(open(filename).readlines()).encode('utf-8')) + stdout = stdout.decode('utf-8') out = None + for line in stdout.split('\n'): if 'accuracy' in line: out = line.split() @@ -237,7 +246,7 @@ def recurrence(x_t, h_tm1): def train(self, x, y, window_size, learning_rate): cwords = contextwin(x, window_size) - words = map(lambda x: numpy.asarray(x).astype('int32'), cwords) + words = list(map(lambda x: numpy.asarray(x).astype('int32'), cwords)) labels = y self.sentence_train(words, labels, learning_rate) @@ -274,7 +283,7 @@ def main(param=None): 'nepochs': 60, # 60 is recommended 'savemodel': False} - print param + print(param) folder_name = os.path.basename(__file__).split('.')[0] folder = os.path.join(os.path.dirname(__file__), folder_name) @@ -284,8 +293,8 @@ def main(param=None): # load the dataset train_set, valid_set, test_set, dic = atisfold(param['fold']) - idx2label = dict((k, v) for v, k in dic['labels2idx'].iteritems()) - idx2word = dict((k, v) for v, k in dic['words2idx'].iteritems()) + idx2label = dict((k, v) for v, k in dic['labels2idx'].items()) + idx2word = dict((k, v) for v, k in dic['words2idx'].items()) train_lex, train_ne, train_y = train_set valid_lex, valid_ne, valid_y = valid_set @@ -323,9 +332,9 @@ def main(param=None): for i, (x, y) in enumerate(zip(train_lex, train_y)): rnn.train(x, y, param['win'], param['clr']) - print '[learning] epoch %i >> %2.2f%%' % ( - e, (i + 1) * 100. / nsentences), - print 'completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic), + print('[learning] epoch %i >> %2.2f%%' % ( + e, (i + 1) * 100. / nsentences),) + print('completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic),) sys.stdout.flush() # evaluation // back into the real world : idx -> words @@ -374,7 +383,7 @@ def main(param=None): folder + '/best.valid.txt']) else: if param['verbose']: - print '' + print('') # learning rate decay if no improvement in 10 epochs if param['decay'] and abs(param['be']-param['ce']) >= 10: @@ -384,10 +393,10 @@ def main(param=None): if param['clr'] < 1e-5: break - print('BEST RESULT: epoch', param['be'], - 'valid F1', param['vf1'], - 'best test F1', param['tf1'], - 'with the model', folder) + print(('BEST RESULT: epoch', param['be'], + 'valid F1', param['vf1'], + 'best test F1', param['tf1'], + 'with the model', folder)) if __name__ == '__main__': From 226729f96785a96b22a937de199abab62e830de4 Mon Sep 17 00:00:00 2001 From: Guillaume Alain Date: Fri, 29 Jan 2016 15:55:43 -0500 Subject: [PATCH 27/90] all fixes suggested by Pascal, plus update for the doc --- code/SdA.py | 3 +-- code/lstm.py | 1 - code/rnnslu.py | 8 ++++---- doc/index.txt | 3 +++ 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/code/SdA.py b/code/SdA.py index d639cb54..25e306c7 100644 --- a/code/SdA.py +++ b/code/SdA.py @@ -394,8 +394,7 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15, c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) - print('Pre-training layer %i, epoch %d, cost ' % (i, epoch)) - print(numpy.mean(c)) + print('Pre-training layer %i, epoch %d, cost %f' % (i, epoch, numpy.mean(c))) end_time = timeit.default_timer() diff --git a/code/lstm.py b/code/lstm.py index b3b89f3e..9c19e1ad 100644 --- a/code/lstm.py +++ b/code/lstm.py @@ -6,7 +6,6 @@ from six.moves import xrange import six.moves.cPickle as pickle -#from six.moves.collections import OrderedDict from collections import OrderedDict import sys import time diff --git a/code/rnnslu.py b/code/rnnslu.py index 110029f4..45aaf3a6 100644 --- a/code/rnnslu.py +++ b/code/rnnslu.py @@ -333,8 +333,8 @@ def main(param=None): for i, (x, y) in enumerate(zip(train_lex, train_y)): rnn.train(x, y, param['win'], param['clr']) print('[learning] epoch %i >> %2.2f%%' % ( - e, (i + 1) * 100. / nsentences),) - print('completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic),) + e, (i + 1) * 100. / nsentences), end=' ') + print('completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic), end='') sys.stdout.flush() # evaluation // back into the real world : idx -> words @@ -393,10 +393,10 @@ def main(param=None): if param['clr'] < 1e-5: break - print(('BEST RESULT: epoch', param['be'], + print('BEST RESULT: epoch', param['be'], 'valid F1', param['vf1'], 'best test F1', param['tf1'], - 'with the model', folder)) + 'with the model', folder) if __name__ == '__main__': diff --git a/doc/index.txt b/doc/index.txt index 7c6605bf..68a18ec5 100644 --- a/doc/index.txt +++ b/doc/index.txt @@ -63,3 +63,6 @@ Energy-based recurrent neural network (RNN-RBM): .. _Theano basic tutorial: http://deeplearning.net/software/theano/tutorial .. _Contractive auto-encoders: https://github.com/lisa-lab/DeepLearningTutorials/blob/master/code/cA.py + +Note that the tutorials here are all compatible with Python 2 and 3, with the exception of :ref:`rnnrbm` which is only available for Python 2. + From 90b925b2c716f29b26209375fc28b1e32fad6f22 Mon Sep 17 00:00:00 2001 From: Guillaume Alain Date: Mon, 1 Feb 2016 13:36:43 -0500 Subject: [PATCH 28/90] travis python version update as suggested by Fred --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 258963ee..e2f2d530 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,8 +4,8 @@ sudo: false language: python #python: -# - "2.7" -# - "3.2" +# - "2.6" +# - "3.3" # command to install dependencies before_install: - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh From 4c0858de1073660842f3f9b8f53c162ca3107653 Mon Sep 17 00:00:00 2001 From: Guillaume Alain Date: Mon, 1 Feb 2016 15:51:10 -0500 Subject: [PATCH 29/90] got rid of all the xrange --- code/DBN.py | 14 +++++++------- code/cA.py | 5 ++--- code/hmc/test_hmc.py | 5 ++--- code/logistic_cg.py | 8 ++++---- code/lstm.py | 3 +-- code/rbm.py | 7 +++---- code/rnnrbm.py | 5 ++--- code/rnnslu.py | 3 +-- code/utils.py | 8 +++----- doc/gettingstarted.txt | 2 +- doc/utilities.txt | 6 +++--- 11 files changed, 29 insertions(+), 37 deletions(-) diff --git a/code/DBN.py b/code/DBN.py index ecd563e7..6ca88603 100644 --- a/code/DBN.py +++ b/code/DBN.py @@ -75,7 +75,7 @@ def __init__(self, numpy_rng, theano_rng=None, n_ins=784, # training the DBN by doing stochastic gradient descent on the # MLP. - for i in xrange(self.n_layers): + for i in range(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden @@ -267,11 +267,11 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate): # Create a function that scans the entire validation set def valid_score(): - return [valid_score_i(i) for i in xrange(n_valid_batches)] + return [valid_score_i(i) for i in range(n_valid_batches)] # Create a function that scans the entire test set def test_score(): - return [test_score_i(i) for i in xrange(n_test_batches)] + return [test_score_i(i) for i in range(n_test_batches)] return train_fn, valid_score, test_score @@ -329,12 +329,12 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100, print '... pre-training the model' start_time = timeit.default_timer() ## Pre-train layer-wise - for i in xrange(dbn.n_layers): + for i in range(dbn.n_layers): # go through pretraining epochs - for epoch in xrange(pretraining_epochs): + for epoch in range(pretraining_epochs): # go through the training set c = [] - for batch_index in xrange(n_train_batches): + for batch_index in range(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), @@ -379,7 +379,7 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100, while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 - for minibatch_index in xrange(n_train_batches): + for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index diff --git a/code/cA.py b/code/cA.py index 0d563ef2..8dc5d8b6 100644 --- a/code/cA.py +++ b/code/cA.py @@ -30,7 +30,6 @@ """ from __future__ import print_function -from six.moves import xrange import os import sys @@ -287,10 +286,10 @@ def test_cA(learning_rate=0.01, training_epochs=20, ############ # go through training epochs - for epoch in xrange(training_epochs): + for epoch in range(training_epochs): # go through trainng set c = [] - for batch_index in xrange(n_train_batches): + for batch_index in range(n_train_batches): c.append(train_ca(batch_index)) c_array = numpy.vstack(c) diff --git a/code/hmc/test_hmc.py b/code/hmc/test_hmc.py index c3c425e6..be1a1ac6 100644 --- a/code/hmc/test_hmc.py +++ b/code/hmc/test_hmc.py @@ -1,6 +1,5 @@ from __future__ import print_function -from six.moves import xrange import numpy import theano @@ -39,10 +38,10 @@ def gaussian_energy(x): initial_stepsize=1e-3, stepsize_max=0.5) # Start with a burn-in process - garbage = [sampler.draw() for r in xrange(burnin)] # burn-in Draw + garbage = [sampler.draw() for r in range(burnin)] # burn-in Draw # `n_samples`: result is a 3D tensor of dim [n_samples, batchsize, # dim] - _samples = numpy.asarray([sampler.draw() for r in xrange(n_samples)]) + _samples = numpy.asarray([sampler.draw() for r in range(n_samples)]) # Flatten to [n_samples * batchsize, dim] samples = _samples.T.reshape(dim, -1).T diff --git a/code/logistic_cg.py b/code/logistic_cg.py index db9822ef..40c72c2f 100644 --- a/code/logistic_cg.py +++ b/code/logistic_cg.py @@ -239,7 +239,7 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'): def train_fn(theta_value): classifier.theta.set_value(theta_value, borrow=True) train_losses = [batch_cost(i * batch_size) - for i in xrange(n_train_batches)] + for i in range(n_train_batches)] return numpy.mean(train_losses) # creates a function that computes the average gradient of cost with @@ -247,7 +247,7 @@ def train_fn(theta_value): def train_fn_grad(theta_value): classifier.theta.set_value(theta_value, borrow=True) grad = batch_grad(0) - for i in xrange(1, n_train_batches): + for i in range(1, n_train_batches): grad += batch_grad(i * batch_size) return grad / n_train_batches @@ -258,7 +258,7 @@ def callback(theta_value): classifier.theta.set_value(theta_value, borrow=True) #compute the validation loss validation_losses = [validate_model(i * batch_size) - for i in xrange(n_valid_batches)] + for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('validation error %f %%' % (this_validation_loss * 100.,)) @@ -268,7 +268,7 @@ def callback(theta_value): # testing dataset validation_scores[0] = this_validation_loss test_losses = [test_model(i * batch_size) - for i in xrange(n_test_batches)] + for i in range(n_test_batches)] validation_scores[1] = numpy.mean(test_losses) ############### diff --git a/code/lstm.py b/code/lstm.py index 9c19e1ad..1c285928 100644 --- a/code/lstm.py +++ b/code/lstm.py @@ -3,7 +3,6 @@ ''' from __future__ import print_function -from six.moves import xrange import six.moves.cPickle as pickle from collections import OrderedDict @@ -549,7 +548,7 @@ def train_lstm( estop = False # early stop start_time = time.time() try: - for eidx in xrange(max_epochs): + for eidx in range(max_epochs): n_samples = 0 # Get new shuffled index for the training set. diff --git a/code/rbm.py b/code/rbm.py index 0a947963..901b5870 100644 --- a/code/rbm.py +++ b/code/rbm.py @@ -6,7 +6,6 @@ """ from __future__ import print_function -from six.moves import xrange import timeit @@ -435,11 +434,11 @@ def test_rbm(learning_rate=0.1, training_epochs=15, start_time = timeit.default_timer() # go through training epochs - for epoch in xrange(training_epochs): + for epoch in range(training_epochs): # go through the training set mean_cost = [] - for batch_index in xrange(n_train_batches): + for batch_index in range(n_train_batches): mean_cost += [train_rbm(batch_index)] print('Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)) @@ -522,7 +521,7 @@ def test_rbm(learning_rate=0.1, training_epochs=15, (29 * n_samples + 1, 29 * n_chains - 1), dtype='uint8' ) - for idx in xrange(n_samples): + for idx in range(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() diff --git a/code/rnnrbm.py b/code/rnnrbm.py index e5027083..b8420b9b 100644 --- a/code/rnnrbm.py +++ b/code/rnnrbm.py @@ -4,7 +4,6 @@ # More information at http://deeplearning.net/tutorial/rnnrbm.html from __future__ import print_function -from six.moves import xrange import glob import os @@ -249,12 +248,12 @@ def train(self, files, batch_size=100, num_epochs=200): for f in files] try: - for epoch in xrange(num_epochs): + for epoch in range(num_epochs): numpy.random.shuffle(dataset) costs = [] for s, sequence in enumerate(dataset): - for i in xrange(0, len(sequence), batch_size): + for i in range(0, len(sequence), batch_size): cost = self.train_function(sequence[i:i + batch_size]) costs.append(cost) diff --git a/code/rnnslu.py b/code/rnnslu.py index 45aaf3a6..0413ee63 100644 --- a/code/rnnslu.py +++ b/code/rnnslu.py @@ -1,6 +1,5 @@ from __future__ import print_function -from six.moves import xrange import six.moves.cPickle as pickle from collections import OrderedDict @@ -322,7 +321,7 @@ def main(param=None): # train with early stopping on validation set best_f1 = -numpy.inf param['clr'] = param['lr'] - for e in xrange(param['nepochs']): + for e in range(param['nepochs']): # shuffle shuffle([train_lex, train_ne, train_y], param['seed']) diff --git a/code/utils.py b/code/utils.py index fa4e4d96..ff772ad4 100644 --- a/code/utils.py +++ b/code/utils.py @@ -6,8 +6,6 @@ image from a set of samples or weights. """ - -from six.moves import xrange import numpy @@ -86,7 +84,7 @@ def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0), else: channel_defaults = [0., 0., 0., 1.] - for i in xrange(4): + for i in range(4): if X[i] is None: # if channel is None, fill it with zeros of the correct # dtype @@ -116,8 +114,8 @@ def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0), dt = 'uint8' out_array = numpy.zeros(out_shape, dtype=dt) - for tile_row in xrange(tile_shape[0]): - for tile_col in xrange(tile_shape[1]): + for tile_row in range(tile_shape[0]): + for tile_col in range(tile_shape[1]): if tile_row * tile_shape[1] + tile_col < X.shape[0]: this_x = X[tile_row * tile_shape[1] + tile_col] if scale_rows_to_unit_interval: diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt index e838d706..d765f14a 100644 --- a/doc/gettingstarted.txt +++ b/doc/gettingstarted.txt @@ -578,7 +578,7 @@ of a strategy based on a geometrically increasing amount of patience. while (epoch < n_epochs) and (not done_looping): # Report "1" for first epoch, "n_epochs" for last epoch epoch = epoch + 1 - for minibatch_index in xrange(n_train_batches): + for minibatch_index in range(n_train_batches): d_loss_wrt_params = ... # compute gradient params -= learning_rate * d_loss_wrt_params # gradient descent diff --git a/doc/utilities.txt b/doc/utilities.txt index 0367127c..eb982ec2 100644 --- a/doc/utilities.txt +++ b/doc/utilities.txt @@ -112,7 +112,7 @@ Tiling minibatches together is done for us by the else: channel_defaults = [0., 0., 0., 1.] - for i in xrange(4): + for i in range(4): if X[i] is None: # if channel is None, fill it with zeros of the correct # dtype @@ -134,8 +134,8 @@ Tiling minibatches together is done for us by the out_array = numpy.zeros(out_shape, dtype='uint8' if output_pixel_vals else X.dtype) - for tile_row in xrange(tile_shape[0]): - for tile_col in xrange(tile_shape[1]): + for tile_row in range(tile_shape[0]): + for tile_col in range(tile_shape[1]): if tile_row * tile_shape[1] + tile_col < X.shape[0]: if scale_rows_to_unit_interval: # if we should scale values to be between 0 and 1 From dcfe518dba2e346268ac88884578db5ce4fbebf4 Mon Sep 17 00:00:00 2001 From: Guillaume Alain Date: Wed, 3 Feb 2016 16:37:27 -0500 Subject: [PATCH 30/90] minor edit to respond to Pascal's suggestion --- code/hmc/test_hmc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/hmc/test_hmc.py b/code/hmc/test_hmc.py index be1a1ac6..42dbc3a7 100644 --- a/code/hmc/test_hmc.py +++ b/code/hmc/test_hmc.py @@ -6,7 +6,7 @@ try: from hmc import HMC_sampler -except: +except ImportError as e: # python 3 compatibility # http://stackoverflow.com/questions/3073259/python-nose-import-error from hmc.hmc import HMC_sampler From 8ca9239cbd9ad4472241bad638c4b283818295da Mon Sep 17 00:00:00 2001 From: Guillaume Alain Date: Tue, 9 Feb 2016 11:31:59 -0500 Subject: [PATCH 31/90] missed one print statement --- code/rnnslu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/rnnslu.py b/code/rnnslu.py index 0413ee63..3c620178 100644 --- a/code/rnnslu.py +++ b/code/rnnslu.py @@ -141,7 +141,7 @@ def get_perf(filename, folder): break # To help debug if out is None: - print stdout.split('\n') + print(stdout.split('\n')) precision = float(out[6][:-2]) recall = float(out[8][:-2]) f1score = float(out[10]) From 0054116a1cadc27fe6353f14ee48479e681c0b19 Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Wed, 17 Feb 2016 09:02:43 -0500 Subject: [PATCH 32/90] Update timing due to speed up. (lower the number of random number generator) --- code/test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/code/test.py b/code/test.py index 39d0ab4c..b08f39a3 100644 --- a/code/test.py +++ b/code/test.py @@ -103,9 +103,9 @@ def speed(): # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread. expected_times_64 = numpy.asarray([9.3, 21.0, 76.1, 73.7, 116.4, - 346.9, 355.0, 510.9, 130.4, 23.2, 98.8]) + 346.9, 355.0, 268.2, 130.4, 23.2, 98.8]) expected_times_32 = numpy.asarray([6.4, 14.7, 42.5, 66.5, 71, - 191.2, 199.0, 400.4, 119.5, 36.9, 67.2]) + 191.2, 199.0, 201.9, 119.5, 36.9, 67.2]) # Number with just 1 decimal are new value that are faster with # the Theano version 0.5rc2 Other number are older. They are not @@ -125,8 +125,8 @@ def speed(): # 1.35324519 1.7356905 1.12937868] expected_times_gpu = numpy.asarray([2.9, 7.55523491, 18.99226785, - 5.8, 20.0, - 11.2, 17.2, 244.3, 118.8, 34.2, 8.7]) + 5.8, 19.2, + 11.2, 17.2, 122, 112.5, 31.1, 8.7]) expected_times_64 = [s for idx, s in enumerate(expected_times_64) if to_exec[idx]] expected_times_32 = [s for idx, s in enumerate(expected_times_32) From 0ef0b4dd4d9ebcacb21c8079595637bc1742e588 Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Wed, 17 Feb 2016 10:24:53 -0500 Subject: [PATCH 33/90] Make DLT compatible with Theano 0.7 --- code/DBN.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/DBN.py b/code/DBN.py index 6ca88603..b8e35fad 100644 --- a/code/DBN.py +++ b/code/DBN.py @@ -174,7 +174,7 @@ def pretraining_functions(self, train_set_x, batch_size, k): # compile the theano function fn = theano.function( - inputs=[index, theano.In(learning_rate, value=0.1)], + inputs=[index, theano.Param(learning_rate, default=0.1)], outputs=cost, updates=updates, givens={ From 0c8507bc469e0a99027350c526372b8c8dd8a75d Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Wed, 24 Feb 2016 16:29:05 -0500 Subject: [PATCH 34/90] Update speed test to faster speed --- code/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/test.py b/code/test.py index b08f39a3..250e4d7e 100644 --- a/code/test.py +++ b/code/test.py @@ -104,7 +104,7 @@ def speed(): expected_times_64 = numpy.asarray([9.3, 21.0, 76.1, 73.7, 116.4, 346.9, 355.0, 268.2, 130.4, 23.2, 98.8]) - expected_times_32 = numpy.asarray([6.4, 14.7, 42.5, 66.5, 71, + expected_times_32 = numpy.asarray([6.4, 14.7, 42.5, 63.1, 71, 191.2, 199.0, 201.9, 119.5, 36.9, 67.2]) # Number with just 1 decimal are new value that are faster with From cdfcde08e4667d794db3907ae19437c352baab85 Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Wed, 2 Mar 2016 09:13:43 -0500 Subject: [PATCH 35/90] Speed up 8 expected benchmark speed --- code/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/code/test.py b/code/test.py index 250e4d7e..e034c63b 100644 --- a/code/test.py +++ b/code/test.py @@ -103,9 +103,9 @@ def speed(): # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread. expected_times_64 = numpy.asarray([9.3, 21.0, 76.1, 73.7, 116.4, - 346.9, 355.0, 268.2, 130.4, 23.2, 98.8]) + 346.9, 355.0, 268.2, 115.8, 16.8, 91.6]) expected_times_32 = numpy.asarray([6.4, 14.7, 42.5, 63.1, 71, - 191.2, 199.0, 201.9, 119.5, 36.9, 67.2]) + 191.2, 199.0, 201.9, 107, 12.6, 61.3]) # Number with just 1 decimal are new value that are faster with # the Theano version 0.5rc2 Other number are older. They are not @@ -126,7 +126,7 @@ def speed(): expected_times_gpu = numpy.asarray([2.9, 7.55523491, 18.99226785, 5.8, 19.2, - 11.2, 17.2, 122, 112.5, 31.1, 8.7]) + 11.2, 7.8, 122, 112.5, 31.1, 8.3]) expected_times_64 = [s for idx, s in enumerate(expected_times_64) if to_exec[idx]] expected_times_32 = [s for idx, s in enumerate(expected_times_32) From bba82fbe92447b7e346a941847581199c05e4eeb Mon Sep 17 00:00:00 2001 From: Jamie White Date: Wed, 9 Mar 2016 22:12:06 -0500 Subject: [PATCH 36/90] Update mlp.py Fixed misspelling of "sorted" --- code/mlp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/mlp.py b/code/mlp.py index 1d463d81..e865bc8f 100644 --- a/code/mlp.py +++ b/code/mlp.py @@ -292,7 +292,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, ) # start-snippet-5 - # compute the gradient of cost with respect to theta (sotred in params) + # compute the gradient of cost with respect to theta (sorted in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] From 06a9d877642ed22ceccaf913edfb746a013e9184 Mon Sep 17 00:00:00 2001 From: Kyunghyun Cho Date: Fri, 18 Mar 2016 10:15:24 -0400 Subject: [PATCH 37/90] no nonlinearity in z --- doc/lstm.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/lstm.txt b/doc/lstm.txt index 828fd694..bde70bd8 100644 --- a/doc/lstm.txt +++ b/doc/lstm.txt @@ -174,7 +174,7 @@ be computed with : .. math:: - z = \sigma(W x_t + U h_{t-1} + b) + z = W x_t + U h_{t-1} + b The result is then sliced to obtain the pre-nonlinearity activations for :math:`i`, :math:`f`, :math:`\widetilde{C_t}`, and :math:`o` and the From 146eb2a3680658cca971d2aa3c3f1ab1471075b0 Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Wed, 9 Mar 2016 09:27:38 -0500 Subject: [PATCH 38/90] Don't be too much version when downloading. Make buildbot output smaller --- data/download.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/download.sh b/data/download.sh index ed273bbb..160b0986 100755 --- a/data/download.sh +++ b/data/download.sh @@ -5,7 +5,7 @@ WGET=$? which curl >/dev/null 2>&1 CURL=$? if [ "$WGET" -eq 0 ]; then - DL_CMD="wget -c" + DL_CMD="wget --no-verbose -c" elif [ "$CURL" -eq 0 ]; then DL_CMD="curl -C - -O" else From 57a80fd2bb51b171b81db05cbd33bcfaf68e322f Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Wed, 9 Mar 2016 09:27:59 -0500 Subject: [PATCH 39/90] Give name to theano function --- code/rbm.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/code/rbm.py b/code/rbm.py index 901b5870..3800cca7 100644 --- a/code/rbm.py +++ b/code/rbm.py @@ -257,7 +257,8 @@ def get_cost_updates(self, lr=0.1, persistent=None, k=1): # chain_start is the initial state corresponding to the # 6th output outputs_info=[None, None, None, None, None, chain_start], - n_steps=k + n_steps=k, + name="gibbs_hvh" ) # start-snippet-3 # determine gradients on RBM parameters @@ -496,7 +497,8 @@ def test_rbm(learning_rate=0.1, training_epochs=15, ) = theano.scan( rbm.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], - n_steps=plot_every + n_steps=plot_every, + name="gibbs_vhv" ) # add to updates the shared variable that takes care of our persistent From ff6939b7bcdb70c7acbd9ed4020eacbb0a65c6d0 Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Tue, 22 Mar 2016 11:05:15 -0400 Subject: [PATCH 40/90] Finish passing to new conv2d interface --- code/convolutional_mlp.py | 2 +- doc/lenet.txt | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/code/convolutional_mlp.py b/code/convolutional_mlp.py index a8811bc1..62845c99 100644 --- a/code/convolutional_mlp.py +++ b/code/convolutional_mlp.py @@ -94,7 +94,7 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)): input=input, filters=self.W, filter_shape=filter_shape, - image_shape=image_shape + input_shape=image_shape ) # downsample each feature map individually, using maxpooling diff --git a/doc/lenet.txt b/doc/lenet.txt index 117dfdab..76614106 100644 --- a/doc/lenet.txt +++ b/doc/lenet.txt @@ -196,7 +196,7 @@ one of Figure 1. The input consists of 3 features maps (an RGB color image) of s import theano from theano import tensor as T - from theano.tensor.nnet import conv + from theano.tensor.nnet import conv2d import numpy @@ -226,7 +226,7 @@ one of Figure 1. The input consists of 3 features maps (an RGB color image) of s dtype=input.dtype), name ='b') # build symbolic expression that computes the convolution of input with filters in w - conv_out = conv.conv2d(input, W) + conv_out = conv2d(input, W) # build symbolic expression to add bias and apply activation function, i.e. produce neural net layer output # A few words on ``dimshuffle`` : @@ -404,7 +404,7 @@ to be compatible with our previous MLP implementation. Note that the term "convolution" could corresponds to different mathematical operations: 1. `theano.tensor.nnet.conv2d - `_, + `_, which is the most common one in almost all of the recent published convolutional models. In this operation, each output feature map is connected to each From ee5c0cb9a5e873d51c25dc60203e828dd1793889 Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Tue, 22 Mar 2016 11:07:09 -0400 Subject: [PATCH 41/90] Use the new Interface --- code/DBN.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/DBN.py b/code/DBN.py index b8e35fad..6ca88603 100644 --- a/code/DBN.py +++ b/code/DBN.py @@ -174,7 +174,7 @@ def pretraining_functions(self, train_set_x, batch_size, k): # compile the theano function fn = theano.function( - inputs=[index, theano.Param(learning_rate, default=0.1)], + inputs=[index, theano.In(learning_rate, value=0.1)], outputs=cost, updates=updates, givens={ From 797342acc73b94854964e682ec5babbc5735bdfc Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Fri, 1 Apr 2016 13:34:19 -0400 Subject: [PATCH 42/90] Use the new pool interface --- code/convolutional_mlp.py | 6 +++--- doc/lenet.txt | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/code/convolutional_mlp.py b/code/convolutional_mlp.py index 62845c99..6bbb47a1 100644 --- a/code/convolutional_mlp.py +++ b/code/convolutional_mlp.py @@ -32,7 +32,7 @@ import theano import theano.tensor as T -from theano.tensor.signal import downsample +from theano.tensor.signal import pool from theano.tensor.nnet import conv2d from logistic_sgd import LogisticRegression, load_data @@ -97,8 +97,8 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)): input_shape=image_shape ) - # downsample each feature map individually, using maxpooling - pooled_out = downsample.max_pool_2d( + # pool each feature map individually, using maxpooling + pooled_out = pool.pool_2d( input=conv_out, ds=poolsize, ignore_border=True diff --git a/doc/lenet.txt b/doc/lenet.txt index 76614106..09f50be6 100644 --- a/doc/lenet.txt +++ b/doc/lenet.txt @@ -7,7 +7,7 @@ Convolutional Neural Networks (LeNet) This section assumes the reader has already read through :doc:`logreg` and :doc:`mlp`. Additionally, it uses the following new Theano functions and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, - `floatX`_, `downsample`_ , `conv2d`_, `dimshuffle`_. If you intend to run the + `floatX`_, `pool`_ , `conv2d`_, `dimshuffle`_. If you intend to run the code on GPU also read `GPU`_. To run this example on a GPU, you need a good GPU. It needs @@ -35,7 +35,7 @@ Convolutional Neural Networks (LeNet) .. _GPU: http://deeplearning.net/software/theano/tutorial/using_gpu.html -.. _downsample: http://deeplearning.net/software/theano/library/tensor/signal/downsample.html +.. _pool: http://deeplearning.net/software/theano/library/tensor/signal/pool.html .. _conv2d: http://deeplearning.net/software/theano/library/tensor/signal/conv.html#module-conv @@ -320,7 +320,7 @@ Max-pooling is useful in vision for two reasons: "smart" way of reducing the dimensionality of intermediate representations. Max-pooling is done in Theano by way of -``theano.tensor.signal.downsample.max_pool_2d``. This function takes as input +``theano.tensor.signal.pool.pool_2d``. This function takes as input an N dimensional tensor (where N >= 2) and a downscaling factor and performs max-pooling over the 2 trailing dimensions of the tensor. @@ -328,11 +328,11 @@ An example is worth a thousand words: .. code-block:: python - from theano.tensor.signal import downsample + from theano.tensor.signal import pool input = T.dtensor4('input') maxpool_shape = (2, 2) - pool_out = downsample.max_pool_2d(input, maxpool_shape, ignore_border=True) + pool_out = pool.pool_2d(input, maxpool_shape, ignore_border=True) f = theano.function([input],pool_out) invals = numpy.random.RandomState(1).rand(3, 2, 5, 5) @@ -340,7 +340,7 @@ An example is worth a thousand words: print 'invals[0, 0, :, :] =\n', invals[0, 0, :, :] print 'output[0, 0, :, :] =\n', f(invals)[0, 0, :, :] - pool_out = downsample.max_pool_2d(input, maxpool_shape, ignore_border=False) + pool_out = pool.pool_2d(input, maxpool_shape, ignore_border=False) f = theano.function([input],pool_out) print 'With ignore_border set to False:' print 'invals[1, 0, :, :] =\n ', invals[1, 0, :, :] From aad4f16662edb643926a38b661f469b6026a6a72 Mon Sep 17 00:00:00 2001 From: "lorenzo.ritter" Date: Wed, 27 Apr 2016 19:10:25 +0200 Subject: [PATCH 43/90] fixed typo in SdA.py --- code/SdA.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/SdA.py b/code/SdA.py index 25e306c7..3d9589ac 100644 --- a/code/SdA.py +++ b/code/SdA.py @@ -81,8 +81,8 @@ def __init__( :type n_ins: int :param n_ins: dimension of the input to the sdA - :type n_layers_sizes: list of ints - :param n_layers_sizes: intermediate layers size, must contain + :type hidden_layers_sizes: list of ints + :param hidden_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int From de99c6eb17d802549bf08fc7ed5ed4f287f967c2 Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Sun, 8 May 2016 19:50:03 -0400 Subject: [PATCH 44/90] Commit a small speed up. --- code/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/test.py b/code/test.py index e034c63b..6aee1084 100644 --- a/code/test.py +++ b/code/test.py @@ -126,7 +126,7 @@ def speed(): expected_times_gpu = numpy.asarray([2.9, 7.55523491, 18.99226785, 5.8, 19.2, - 11.2, 7.8, 122, 112.5, 31.1, 8.3]) + 11.2, 7.3, 122, 112.5, 31.1, 8.3]) expected_times_64 = [s for idx, s in enumerate(expected_times_64) if to_exec[idx]] expected_times_32 = [s for idx, s in enumerate(expected_times_32) From 75cbba67b4fdc271bae5b7020a2a3fc69b70328d Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Wed, 13 Jul 2016 14:03:47 -0400 Subject: [PATCH 45/90] Python 3 + flake8 fixes. --- code/DBN.py | 101 +++++++++++++++++++--------------------- code/imdb_preprocess.py | 10 ++-- code/logistic_cg.py | 25 +++++----- code/test.py | 11 +++-- 4 files changed, 70 insertions(+), 77 deletions(-) diff --git a/code/DBN.py b/code/DBN.py index 6ca88603..3b2bd230 100644 --- a/code/DBN.py +++ b/code/DBN.py @@ -1,5 +1,6 @@ """ """ +from __future__ import print_function, division import os import sys import timeit @@ -61,9 +62,12 @@ def __init__(self, numpy_rng, theano_rng=None, n_ins=784, theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data - self.x = T.matrix('x') # the data is presented as rasterized images - self.y = T.ivector('y') # the labels are presented as 1D vector - # of [int] labels + + # the data is presented as rasterized images + self.x = T.matrix('x') + + # the labels are presented as 1D vector of [int] labels + self.y = T.ivector('y') # end-snippet-1 # The DBN is an MLP, for which all weights of intermediate # layers are shared with a different RBM. We will first @@ -156,8 +160,6 @@ def pretraining_functions(self, train_set_x, batch_size, k): index = T.lscalar('index') # index to a minibatch learning_rate = T.scalar('lr') # learning rate to use - # number of batches - n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # begining of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` @@ -211,9 +213,9 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate): # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] - n_valid_batches /= batch_size + n_valid_batches //= batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] - n_test_batches /= batch_size + n_test_batches //= batch_size index = T.lscalar('index') # index to a [mini]batch @@ -307,11 +309,11 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100, test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing - n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size + n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size # numpy random generator numpy_rng = numpy.random.RandomState(123) - print '... building the model' + print('... building the model') # construct the Deep Belief Network dbn = DBN(numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[1000, 1000, 1000], @@ -321,14 +323,14 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100, ######################### # PRETRAINING THE MODEL # ######################### - print '... getting the pretraining functions' + print('... getting the pretraining functions') pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) - print '... pre-training the model' + print('... pre-training the model') start_time = timeit.default_timer() - ## Pre-train layer-wise + # Pre-train layer-wise for i in range(dbn.n_layers): # go through pretraining epochs for epoch in range(pretraining_epochs): @@ -337,38 +339,40 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100, for batch_index in range(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr)) - print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), - print numpy.mean(c) + print('Pre-training layer %i, epoch %d, cost ' % (i, epoch), end=' ') + print(numpy.mean(c)) end_time = timeit.default_timer() # end-snippet-2 - print >> sys.stderr, ('The pretraining code for file ' + - os.path.split(__file__)[1] + - ' ran for %.2fm' % ((end_time - start_time) / 60.)) + print('The pretraining code for file ' + os.path.split(__file__)[1] + + ' ran for %.2fm' % ((end_time - start_time) / 60.), file=sys.stderr) ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model - print '... getting the finetuning functions' + print('... getting the finetuning functions') train_fn, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr ) - print '... finetuning the model' + print('... finetuning the model') # early-stopping parameters - patience = 4 * n_train_batches # look as this many examples regardless - patience_increase = 2. # wait this much longer when a new best is - # found - improvement_threshold = 0.995 # a relative improvement of this much is - # considered significant + + # look as this many examples regardless + patience = 4 * n_train_batches + + # wait this much longer when a new best is found + patience_increase = 2. + + # a relative improvement of this much is considered significant + improvement_threshold = 0.995 + + # go through this many minibatches before checking the network on + # the validation set; in this case we check every epoch validation_frequency = min(n_train_batches, patience / 2) - # go through this many - # minibatches before checking the network - # on the validation set; in this case we - # check every epoch best_validation_loss = numpy.inf test_score = 0. @@ -381,31 +385,27 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100, epoch = epoch + 1 for minibatch_index in range(n_train_batches): - minibatch_avg_cost = train_fn(minibatch_index) + train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) - print( - 'epoch %i, minibatch %i/%i, validation error %f %%' - % ( - epoch, - minibatch_index + 1, - n_train_batches, - this_validation_loss * 100. + print('epoch %i, minibatch %i/%i, validation error %f %%' % ( + epoch, + minibatch_index + 1, + n_train_batches, + this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: - #improve patience if loss improvement is good enough - if ( - this_validation_loss < best_validation_loss * - improvement_threshold - ): + # improve patience if loss improvement is good enough + if (this_validation_loss < best_validation_loss * + improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number @@ -418,24 +418,19 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100, print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, - test_score * 100.)) + test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() - print( - ( - 'Optimization complete with best validation score of %f %%, ' - 'obtained at iteration %i, ' - 'with test performance %f %%' - ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.) - ) - print >> sys.stderr, ('The fine tuning code for file ' + - os.path.split(__file__)[1] + - ' ran for %.2fm' % ((end_time - start_time) - / 60.)) + print(('Optimization complete with best validation score of %f %%, ' + 'obtained at iteration %i, ' + 'with test performance %f %%' + ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) + print('The fine tuning code for file ' + os.path.split(__file__)[1] + + ' ran for %.2fm' % ((end_time - start_time) / 60.), file=sys.stderr) if __name__ == '__main__': diff --git a/code/imdb_preprocess.py b/code/imdb_preprocess.py index c20b37b6..62ebb556 100644 --- a/code/imdb_preprocess.py +++ b/code/imdb_preprocess.py @@ -8,7 +8,7 @@ 3) Then run this script. """ - +from __future__ import print_function dataset_path='/Tmp/bastienf/aclImdb/' import numpy @@ -27,12 +27,12 @@ def tokenize(sentences): - print 'Tokenizing..', + print('Tokenizing..', end=' ') text = "\n".join(sentences) tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE) tok_text, _ = tokenizer.communicate(text) toks = tok_text.split('\n')[:-1] - print 'Done' + print('Done') return toks @@ -52,7 +52,7 @@ def build_dict(path): sentences = tokenize(sentences) - print 'Building dictionary..', + print('Building dictionary..', end=' ') wordcount = dict() for ss in sentences: words = ss.strip().lower().split() @@ -72,7 +72,7 @@ def build_dict(path): for idx, ss in enumerate(sorted_idx): worddict[keys[ss]] = idx+2 # leave 0 and 1 (UNK) - print numpy.sum(counts), ' total words ', len(keys), ' unique words' + print(numpy.sum(counts), ' total words ', len(keys), ' unique words') return worddict diff --git a/code/logistic_cg.py b/code/logistic_cg.py index 40c72c2f..c2970d51 100644 --- a/code/logistic_cg.py +++ b/code/logistic_cg.py @@ -33,6 +33,7 @@ """ +from __future__ import print_function, division __docformat__ = 'restructedtext en' @@ -165,9 +166,9 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'): batch_size = 600 # size of the minibatch - n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size - n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size - n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size + n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size + n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size + n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size n_in = 28 * 28 # number of input units n_out = 10 # number of output units @@ -175,7 +176,7 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'): ###################### # BUILD ACTUAL MODEL # ###################### - print '... building the model' + print('... building the model') # allocate symbolic variables for the data minibatch_offset = T.lscalar() # offset to the start of a [mini]batch @@ -260,7 +261,7 @@ def callback(theta_value): validation_losses = [validate_model(i * batch_size) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) - print('validation error %f %%' % (this_validation_loss * 100.,)) + print(('validation error %f %%' % (this_validation_loss * 100.,))) # check if it is better then best validation score got until now if this_validation_loss < validation_scores[0]: @@ -288,17 +289,13 @@ def callback(theta_value): maxiter=n_epochs ) end_time = timeit.default_timer() - print( - ( - 'Optimization complete with best validation score of %f %%, with ' - 'test performance %f %%' - ) - % (validation_scores[0] * 100., validation_scores[1] * 100.) + print(('Optimization complete with best validation score of %f %%, with ' + 'test performance %f %%' + ) % (validation_scores[0] * 100., validation_scores[1] * 100.) ) - print >> sys.stderr, ('The code for file ' + - os.path.split(__file__)[1] + - ' ran for %.1fs' % ((end_time - start_time))) + print('The code for file ' + os.path.split(__file__)[1] + + ' ran for %.1fs' % (end_time - start_time), file=sys.stderr) if __name__ == '__main__': diff --git a/code/test.py b/code/test.py index 6aee1084..926cae7b 100644 --- a/code/test.py +++ b/code/test.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import, print_function, division import sys import numpy @@ -137,12 +138,12 @@ def speed(): def time_test(m, l, idx, f, **kwargs): if not to_exec[idx]: return - print algo[idx] + print(algo[idx]) ts = m.call_time try: f(**kwargs) - except Exception, e: - print >> sys.stderr, 'test', algo[idx], 'FAILED', e + except Exception as e: + print('test', algo[idx], 'FAILED', e, file=sys.stderr) l.append(numpy.nan) return te = m.call_time @@ -265,7 +266,7 @@ def do_tests(): print >> sys.stderr, 'gpu % expected/get', ( expected_times_gpu / gpu_times) - print + print() if do_float64 and do_float32: print >> sys.stderr, 'float64/float32', ( float64_times / float32_times) @@ -286,7 +287,7 @@ def compare(x, y): # time and the real time, we consider this an error. return sum((ratio < 0.95) + (ratio > 1.05)) - print + print() if do_float64: err = compare(expected_times_64, float64_times) print >> sys.stderr, 'speed_failure_float64=' + str(err) From 4faede82e900555a063d6c7c385d0c3e59c04699 Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Thu, 28 Jul 2016 10:29:44 -0400 Subject: [PATCH 46/90] python3 --- code/test.py | 130 +++++++++++++++++++++++++-------------------------- 1 file changed, 65 insertions(+), 65 deletions(-) diff --git a/code/test.py b/code/test.py index 926cae7b..4332e8b0 100644 --- a/code/test.py +++ b/code/test.py @@ -194,92 +194,92 @@ def do_tests(): theano.config.floatX = 'float64' theano.config.mode = 'FAST_RUN' float64_times = do_tests() - print >> sys.stderr, algo_executed - print >> sys.stderr, 'float64 times', float64_times - print >> sys.stderr, 'float64 expected', expected_times_64 - print >> sys.stderr, 'float64 % expected/get', ( - expected_times_64 / float64_times) + print(algo_executed, file=sys.stderr) + print('float64 times', float64_times, file=sys.stderr) + print('float64 expected', expected_times_64, file=sys.stderr) + print('float64 % expected/get', ( + expected_times_64 / float64_times), file=sys.stderr) #test in float32 in FAST_RUN mode on the cpu theano.config.floatX = 'float32' if do_float32: float32_times = do_tests() - print >> sys.stderr, algo_executed - print >> sys.stderr, 'float32 times', float32_times - print >> sys.stderr, 'float32 expected', expected_times_32 - print >> sys.stderr, 'float32 % expected/get', ( - expected_times_32 / float32_times) + print(algo_executed, file=sys.stderr) + print('float32 times', float32_times, file=sys.stderr) + print('float32 expected', expected_times_32, file=sys.stderr) + print('float32 % expected/get', ( + expected_times_32 / float32_times), file=sys.stderr) if do_float64: - print >> sys.stderr, 'float64/float32', ( - float64_times / float32_times) - print >> sys.stderr - print >> sys.stderr, ('Duplicate the timing to have everything ' - 'in one place') - print >> sys.stderr, algo_executed - print >> sys.stderr, 'float64 times', float64_times - print >> sys.stderr, 'float64 expected', expected_times_64 - print >> sys.stderr, 'float64 % expected/get', ( - expected_times_64 / float64_times) - print >> sys.stderr, 'float32 times', float32_times - print >> sys.stderr, 'float32 expected', expected_times_32 - print >> sys.stderr, 'float32 % expected/get', ( - expected_times_32 / float32_times) - - print >> sys.stderr, 'float64/float32', ( - float64_times / float32_times) - print >> sys.stderr, 'expected float64/float32', ( - expected_times_64 / float32_times) + print('float64/float32', ( + float64_times / float32_times), file=sys.stderr) + print(file=sys.stderr) + print(('Duplicate the timing to have everything ' + 'in one place'), file=sys.stderr) + print(algo_executed, file=sys.stderr) + print('float64 times', float64_times, file=sys.stderr) + print('float64 expected', expected_times_64, file=sys.stderr) + print('float64 % expected/get', ( + expected_times_64 / float64_times), file=sys.stderr) + print('float32 times', float32_times, file=sys.stderr) + print('float32 expected', expected_times_32, file=sys.stderr) + print('float32 % expected/get', ( + expected_times_32 / float32_times), file=sys.stderr) + + print('float64/float32', ( + float64_times / float32_times), file=sys.stderr) + print('expected float64/float32', ( + expected_times_64 / float32_times), file=sys.stderr) #test in float32 in FAST_RUN mode on the gpu import theano.sandbox.cuda if do_gpu: theano.sandbox.cuda.use('gpu') gpu_times = do_tests() - print >> sys.stderr, algo_executed - print >> sys.stderr, 'gpu times', gpu_times - print >> sys.stderr, 'gpu expected', expected_times_gpu - print >> sys.stderr, 'gpu % expected/get', ( - expected_times_gpu / gpu_times) + print(algo_executed, file=sys.stderr) + print('gpu times', gpu_times, file=sys.stderr) + print('gpu expected', expected_times_gpu, file=sys.stderr) + print('gpu % expected/get', ( + expected_times_gpu / gpu_times), file=sys.stderr) if do_float64: - print >> sys.stderr, 'float64/gpu', float64_times / gpu_times + print('float64/gpu', float64_times / gpu_times, file=sys.stderr) if (do_float64 + do_float32 + do_gpu) > 1: - print >> sys.stderr - print >> sys.stderr, ('Duplicate the timing to have everything ' - 'in one place') - print >> sys.stderr, algo_executed + print(file=sys.stderr) + print(('Duplicate the timing to have everything ' + 'in one place'), file=sys.stderr) + print(algo_executed, file=sys.stderr) if do_float64: - print >> sys.stderr, 'float64 times', float64_times - print >> sys.stderr, 'float64 expected', expected_times_64 - print >> sys.stderr, 'float64 % expected/get', ( - expected_times_64 / float64_times) + print('float64 times', float64_times, file=sys.stderr) + print('float64 expected', expected_times_64, file=sys.stderr) + print('float64 % expected/get', ( + expected_times_64 / float64_times), file=sys.stderr) if do_float32: - print >> sys.stderr, 'float32 times', float32_times - print >> sys.stderr, 'float32 expected', expected_times_32 - print >> sys.stderr, 'float32 % expected/get', ( - expected_times_32 / float32_times) + print('float32 times', float32_times, file=sys.stderr) + print('float32 expected', expected_times_32, file=sys.stderr) + print('float32 % expected/get', ( + expected_times_32 / float32_times), file=sys.stderr) if do_gpu: - print >> sys.stderr, 'gpu times', gpu_times - print >> sys.stderr, 'gpu expected', expected_times_gpu - print >> sys.stderr, 'gpu % expected/get', ( - expected_times_gpu / gpu_times) + print('gpu times', gpu_times, file=sys.stderr) + print('gpu expected', expected_times_gpu, file=sys.stderr) + print('gpu % expected/get', ( + expected_times_gpu / gpu_times), file=sys.stderr) print() if do_float64 and do_float32: - print >> sys.stderr, 'float64/float32', ( - float64_times / float32_times) - print >> sys.stderr, 'expected float64/float32', ( - expected_times_64 / float32_times) + print('float64/float32', ( + float64_times / float32_times), file=sys.stderr) + print('expected float64/float32', ( + expected_times_64 / float32_times), file=sys.stderr) if do_float64 and do_gpu: - print >> sys.stderr, 'float64/gpu', float64_times / gpu_times - print >> sys.stderr, 'expected float64/gpu', ( - expected_times_64 / gpu_times) + print('float64/gpu', float64_times / gpu_times, file=sys.stderr) + print('expected float64/gpu', ( + expected_times_64 / gpu_times), file=sys.stderr) if do_float32 and do_gpu: - print >> sys.stderr, 'float32/gpu', float32_times / gpu_times - print >> sys.stderr, 'expected float32/gpu', ( - expected_times_32 / gpu_times) + print('float32/gpu', float32_times / gpu_times, file=sys.stderr) + print('expected float32/gpu', ( + expected_times_32 / gpu_times), file=sys.stderr) def compare(x, y): ratio = x / y @@ -287,15 +287,15 @@ def compare(x, y): # time and the real time, we consider this an error. return sum((ratio < 0.95) + (ratio > 1.05)) - print() + print(file=sys.stderr) if do_float64: err = compare(expected_times_64, float64_times) - print >> sys.stderr, 'speed_failure_float64=' + str(err) + print('speed_failure_float64=' + str(err), file=sys.stderr) if do_float32: err = compare(expected_times_32, float32_times) - print >> sys.stderr, 'speed_failure_float32=' + str(err) + print('speed_failure_float32=' + str(err), file=sys.stderr) if do_gpu: err = compare(expected_times_gpu, gpu_times) - print >> sys.stderr, 'speed_failure_gpu=' + str(err) + print('speed_failure_gpu=' + str(err), file=sys.stderr) assert not numpy.isnan(gpu_times).any() From ac029111f94c67c480746ebd23229af099fd2570 Mon Sep 17 00:00:00 2001 From: slefrancois Date: Wed, 31 Aug 2016 11:18:18 -0400 Subject: [PATCH 47/90] unzip -f to avoid prompt in data download --- data/download.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/download.sh b/data/download.sh index 160b0986..67c5c057 100755 --- a/data/download.sh +++ b/data/download.sh @@ -15,8 +15,8 @@ fi $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist_py3k.pkl.gz -$DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl.gz && gunzip imdb.pkl.gz -$DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.dict.pkl.gz && gunzip imdb.dict.pkl.gz +$DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl.gz && gunzip -f imdb.pkl.gz +$DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.dict.pkl.gz && gunzip -f imdb.dict.pkl.gz $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/Nottingham.zip && unzip -u Nottingham.zip $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/midi.zip && unzip -u midi.zip -d ../code && echo "extracted Modified Python MIDI package (GPL)" $DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold0.pkl.gz From f6db4f12f191a421f7a0f948d68cce36290fb617 Mon Sep 17 00:00:00 2001 From: slefrancois Date: Wed, 7 Sep 2016 10:25:51 -0400 Subject: [PATCH 48/90] change compiledir and add xunit for jenkins --- misc/do_nightly_build | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/misc/do_nightly_build b/misc/do_nightly_build index bd703f04..cafab51c 100755 --- a/misc/do_nightly_build +++ b/misc/do_nightly_build @@ -1,9 +1,15 @@ #!/bin/bash -#we set the compiledir to the /Tmp dir to make the test faster by bypassing the nfs network. + +# If not jenkins, set workspace to local Tmp +if [ -v $WORKSPACE ]; then + WORKSPACE=/Tmp +fi + date -ROOT_CWD=/Tmp/nightly_build -COMPILEDIR=/Tmp/lisa_theano_compile_dir_deeplearning +ROOT_CWD=$WORKSPACE/nightly_build +COMPILEDIR=$WORKSPACE/lisa_theano_compile_dir_deeplearning NOSETESTS=${ROOT_CWD}/Theano/bin/theano-nose +XUNIT="--with-xunit --xunit-file=" FLAGS=warn.ignore_bug_before=0.5,compiledir=${COMPILEDIR} export PYTHONPATH=${ROOT_CWD}/Theano:${ROOT_CWD}/Pylearn:$PYTHONPATH @@ -19,14 +25,17 @@ echo "git version:" `git rev-parse HEAD` #echo "executing nosetests with mode=FAST_COMPILE" #THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} echo "executing nosetests speed with mode=FAST_RUN" -THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} test.py:speed +FILE=${ROOT_CWD}/dlt_tests.xml +THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${XUNIT}${FILE} test.py:speed #echo "executing nosetests speed with mode=FAST_RUN and OMP_NUM_THREADS=2" #OMP_NUM_THREADS=2 THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} test.py:speed echo "executing nosetests with mode=FAST_RUN,floatX=float32" -THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} +FILE=${ROOT_CWD}/dlt_32bit_tests.xml +THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE} #we change the seed and record it everyday to test different combination. We record it to be able to reproduce bug caused by different seed. We don't want multiple test in DEBUG_MODE each day as this take too long. #seed=$RANDOM #echo "executing nosetests with mode=DEBUG_MODE with seed of the day $seed" -#THEANO_DEBUGMODE_CHECK_STRIDES=0 THEANO_DEBUGMODE_PATIENCE=3 THEANO_COMPILEDIR=/Tmp/lisa_theano_compile_dir_deeplearning THEANO_UNITTEST_SEED=$seed THEANO_DEFAULT_MODE=DEBUG_MODE ${NOSETESTS} +#FILE=${ROOT_CWD}/'dlt_debug_tests.xml' +#THEANO_DEBUGMODE_CHECK_STRIDES=0 THEANO_DEBUGMODE_PATIENCE=3 THEANO_COMPILEDIR=$WORKSPACE/lisa_theano_compile_dir_deeplearning THEANO_UNITTEST_SEED=$seed THEANO_DEFAULT_MODE=DEBUG_MODE ${NOSETESTS} ${XUNIT}${FILE} From a0362806a029f20d7ed920868ded79d1b388d741 Mon Sep 17 00:00:00 2001 From: slefrancois Date: Wed, 7 Sep 2016 19:43:00 -0400 Subject: [PATCH 49/90] dtl compiledir --- misc/do_nightly_build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc/do_nightly_build b/misc/do_nightly_build index cafab51c..a8ee32cf 100755 --- a/misc/do_nightly_build +++ b/misc/do_nightly_build @@ -7,7 +7,7 @@ fi date ROOT_CWD=$WORKSPACE/nightly_build -COMPILEDIR=$WORKSPACE/lisa_theano_compile_dir_deeplearning +COMPILEDIR=$WORKSPACE/compile/lisa_theano_compile_dir_deeplearning NOSETESTS=${ROOT_CWD}/Theano/bin/theano-nose XUNIT="--with-xunit --xunit-file=" From 31e194d4a844db9455cbb72a91b0e717084f84ed Mon Sep 17 00:00:00 2001 From: slefrancois Date: Fri, 9 Sep 2016 15:45:53 -0400 Subject: [PATCH 50/90] use TMPDIR for buildbot --- misc/do_nightly_build | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/misc/do_nightly_build b/misc/do_nightly_build index a8ee32cf..29281050 100755 --- a/misc/do_nightly_build +++ b/misc/do_nightly_build @@ -2,7 +2,10 @@ # If not jenkins, set workspace to local Tmp if [ -v $WORKSPACE ]; then - WORKSPACE=/Tmp + if [ -v $TMPDIR ]; then + TMPDIR=/tmp + fi + WORKSPACE=$TMPDIR fi date From 80b969171df5bb341788864a46e433aa06858ccb Mon Sep 17 00:00:00 2001 From: slefrancois Date: Mon, 12 Sep 2016 09:36:55 -0400 Subject: [PATCH 51/90] test file name to float32 --- misc/do_nightly_build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc/do_nightly_build b/misc/do_nightly_build index 29281050..ef2b8319 100755 --- a/misc/do_nightly_build +++ b/misc/do_nightly_build @@ -33,7 +33,7 @@ THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${XUNIT}${FILE} test.py:speed #echo "executing nosetests speed with mode=FAST_RUN and OMP_NUM_THREADS=2" #OMP_NUM_THREADS=2 THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} test.py:speed echo "executing nosetests with mode=FAST_RUN,floatX=float32" -FILE=${ROOT_CWD}/dlt_32bit_tests.xml +FILE=${ROOT_CWD}/dlt_float32_tests.xml THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE} #we change the seed and record it everyday to test different combination. We record it to be able to reproduce bug caused by different seed. We don't want multiple test in DEBUG_MODE each day as this take too long. From 793d6181bc70b45a5d7521131822c62d78d9a418 Mon Sep 17 00:00:00 2001 From: slefrancois Date: Mon, 19 Sep 2016 12:06:05 -0400 Subject: [PATCH 52/90] add jenkins buildbot script --- .jenkins/jenkins_buildbot_dlt.sh | 35 ++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100755 .jenkins/jenkins_buildbot_dlt.sh diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh new file mode 100755 index 00000000..0d2e49f2 --- /dev/null +++ b/.jenkins/jenkins_buildbot_dlt.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +BUILDBOT_DIR=$WORKSPACE/nightly_build +source $HOME/.bashrc + +mkdir -p ${BUILDBOT_DIR} + +date +COMPILEDIR=$WORKSPACE/compile/lisa_theano_compile_dir_deeplearning +NOSETESTS=${BUILDBOT_DIR}/Theano/bin/theano-nose +XUNIT="--with-xunit --xunit-file=" + +FLAGS=warn.ignore_bug_before=0.5,compiledir=${COMPILEDIR} +export PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/Pylearn:$PYTHONPATH + +cd ${BUILDBOT_DIR} +if [ ! -d ${BUILDBOT_DIR}/Theano ]; then + git clone git://github.com/Theano/Theano.git +fi +# update repo +cd ${BUILDBOT_DIR}/Theano; git pull + +${WORKSPACE}/data/download.sh + +cd ${BUILDBOT_DIR}/Theano +echo "git version for Theano:" `git rev-parse HEAD` +cd ${WORKSPACE}/code +echo "git version:" `git rev-parse HEAD` + +echo "executing nosetests speed with mode=FAST_RUN" +FILE=${BUILDBOT_DIR}/dlt_tests.xml +THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${XUNIT}${FILE} test.py:speed +echo "executing nosetests with mode=FAST_RUN,floatX=float32" +FILE=${BUILDBOT_DIR}/dlt_float32_tests.xml +THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE} From 12fb33662170918912f473e75360434b4680c7b9 Mon Sep 17 00:00:00 2001 From: slefrancois Date: Mon, 19 Sep 2016 14:33:42 -0400 Subject: [PATCH 53/90] midi --- .jenkins/jenkins_buildbot_dlt.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh index 0d2e49f2..2cd41d5e 100755 --- a/.jenkins/jenkins_buildbot_dlt.sh +++ b/.jenkins/jenkins_buildbot_dlt.sh @@ -20,7 +20,8 @@ fi # update repo cd ${BUILDBOT_DIR}/Theano; git pull -${WORKSPACE}/data/download.sh +cd ${WORKSPACE}/data +./download.sh cd ${BUILDBOT_DIR}/Theano echo "git version for Theano:" `git rev-parse HEAD` From 93c9a3642d8952f7816273cddfc55a5a9f64077b Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Mon, 3 Oct 2016 12:24:53 -0400 Subject: [PATCH 54/90] Use MRG_RandomStreams instead for shared_randomstreams for GPU compat. --- code/SdA.py | 2 +- code/dA.py | 2 +- code/hmc/hmc.py | 2 +- code/rbm.py | 2 +- code/rnnrbm.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/code/SdA.py b/code/SdA.py index 3d9589ac..eb7b7357 100644 --- a/code/SdA.py +++ b/code/SdA.py @@ -40,7 +40,7 @@ import theano import theano.tensor as T -from theano.tensor.shared_randomstreams import RandomStreams +from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams from logistic_sgd import LogisticRegression, load_data from mlp import HiddenLayer diff --git a/code/dA.py b/code/dA.py index 0d9efa54..aad3d454 100644 --- a/code/dA.py +++ b/code/dA.py @@ -40,7 +40,7 @@ import theano import theano.tensor as T -from theano.tensor.shared_randomstreams import RandomStreams +from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams from logistic_sgd import load_data from utils import tile_raster_images diff --git a/code/hmc/hmc.py b/code/hmc/hmc.py index aeb49937..f16a50c1 100644 --- a/code/hmc/hmc.py +++ b/code/hmc/hmc.py @@ -358,7 +358,7 @@ def new_from_shared_positions( stepsize = sharedX(initial_stepsize, 'hmc_stepsize') avg_acceptance_rate = sharedX(target_acceptance_rate, 'avg_acceptance_rate') - s_rng = TT.shared_randomstreams.RandomStreams(seed) + s_rng = theano.sandbox.rng_mrg.MRG_RandomStreams(seed) # define graph for an `n_steps` HMC simulation accept, final_pos = hmc_move( diff --git a/code/rbm.py b/code/rbm.py index 3800cca7..6e4f1012 100644 --- a/code/rbm.py +++ b/code/rbm.py @@ -20,7 +20,7 @@ import theano.tensor as T import os -from theano.tensor.shared_randomstreams import RandomStreams +from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams from utils import tile_raster_images from logistic_sgd import load_data diff --git a/code/rnnrbm.py b/code/rnnrbm.py index b8420b9b..900ffdc6 100644 --- a/code/rnnrbm.py +++ b/code/rnnrbm.py @@ -19,7 +19,7 @@ from midi.utils import midiread, midiwrite import theano import theano.tensor as T -from theano.tensor.shared_randomstreams import RandomStreams +from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams #Don't use a python long as this don't work on 32 bits computers. numpy.random.seed(0xbeef) From 4f251cd72dac2754c173c0a850f215b73fdb19f5 Mon Sep 17 00:00:00 2001 From: slefrancois Date: Thu, 6 Oct 2016 12:01:01 -0400 Subject: [PATCH 55/90] add testsuites names --- .jenkins/jenkins_buildbot_dlt.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh index 2cd41d5e..a4e4b1e4 100755 --- a/.jenkins/jenkins_buildbot_dlt.sh +++ b/.jenkins/jenkins_buildbot_dlt.sh @@ -9,6 +9,8 @@ date COMPILEDIR=$WORKSPACE/compile/lisa_theano_compile_dir_deeplearning NOSETESTS=${BUILDBOT_DIR}/Theano/bin/theano-nose XUNIT="--with-xunit --xunit-file=" +# name test suites +SUITE="--xunit-prefix-with-testsuite-name --xunit-testsuite-name=" FLAGS=warn.ignore_bug_before=0.5,compiledir=${COMPILEDIR} export PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/Pylearn:$PYTHONPATH @@ -29,8 +31,10 @@ cd ${WORKSPACE}/code echo "git version:" `git rev-parse HEAD` echo "executing nosetests speed with mode=FAST_RUN" -FILE=${BUILDBOT_DIR}/dlt_tests.xml -THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${XUNIT}${FILE} test.py:speed +NAME=dlt_speed +FILE=${BUILDBOT_DIR}/${NAME}_tests.xml +THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${XUNIT}${FILE} ${SUITE}${NAME} test.py:speed echo "executing nosetests with mode=FAST_RUN,floatX=float32" -FILE=${BUILDBOT_DIR}/dlt_float32_tests.xml -THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE} +NAME=dlt_float32 +FILE=${BUILDBOT_DIR}/${NAME}_tests.xml +THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE} ${SUITE}${NAME} From 82c85e2a6f74a92736c2afdd805710dedfcc4f4f Mon Sep 17 00:00:00 2001 From: slefrancois Date: Thu, 6 Oct 2016 14:36:46 -0400 Subject: [PATCH 56/90] Add JUnit writer for speed tests, remove hardcoded reference times --- code/test.py | 117 +++++++++++++-------------------------------------- 1 file changed, 29 insertions(+), 88 deletions(-) diff --git a/code/test.py b/code/test.py index 4332e8b0..60c0af02 100644 --- a/code/test.py +++ b/code/test.py @@ -98,43 +98,7 @@ def speed(): do_gpu = True algo_executed = [s for idx, s in enumerate(algo) if to_exec[idx]] - #Timming expected are from the buildbot that have an i7-920 @ - # 2.67GHz with hyperthread enabled for the cpu, 12G of ram. An GeForce GTX - # 580 for the GPU. OS=Fedora 14, gcc=4.5.1, python/BLAS from EPD - # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread. - - expected_times_64 = numpy.asarray([9.3, 21.0, 76.1, 73.7, 116.4, - 346.9, 355.0, 268.2, 115.8, 16.8, 91.6]) - expected_times_32 = numpy.asarray([6.4, 14.7, 42.5, 63.1, 71, - 191.2, 199.0, 201.9, 107, 12.6, 61.3]) - - # Number with just 1 decimal are new value that are faster with - # the Theano version 0.5rc2 Other number are older. They are not - # updated, as we where faster in the past! - # TODO: find why and fix this! - -# Here is the value for the buildbot on February 3th 2012 with a GTX 285 -# sgd, cg mlp conv da -# sda dbn rbm -# gpu times[3.72957802, 9.94316864, 29.1772666, 9.13857198, 25.91144657, -# 18.30802011, 53.38651466, 285.41386175] -# expected [3.076634879, 7.555234910, 18.99226785, 9.58915591, 24.130070450, -# 24.77524018, 92.66246653, 322.340329170] -# sgd, cg mlp conv da -# sda dbn rbm -#expected/get [0.82492841, 0.75984178, 0.65092691, 1.04930573, 0.93125138 -# 1.35324519 1.7356905 1.12937868] - - expected_times_gpu = numpy.asarray([2.9, 7.55523491, 18.99226785, - 5.8, 19.2, - 11.2, 7.3, 122, 112.5, 31.1, 8.3]) - expected_times_64 = [s for idx, s in enumerate(expected_times_64) - if to_exec[idx]] - expected_times_32 = [s for idx, s in enumerate(expected_times_32) - if to_exec[idx]] - expected_times_gpu = [s for idx, s in enumerate(expected_times_gpu) - if to_exec[idx]] - + def time_test(m, l, idx, f, **kwargs): if not to_exec[idx]: return @@ -196,9 +160,6 @@ def do_tests(): float64_times = do_tests() print(algo_executed, file=sys.stderr) print('float64 times', float64_times, file=sys.stderr) - print('float64 expected', expected_times_64, file=sys.stderr) - print('float64 % expected/get', ( - expected_times_64 / float64_times), file=sys.stderr) #test in float32 in FAST_RUN mode on the cpu theano.config.floatX = 'float32' @@ -206,9 +167,6 @@ def do_tests(): float32_times = do_tests() print(algo_executed, file=sys.stderr) print('float32 times', float32_times, file=sys.stderr) - print('float32 expected', expected_times_32, file=sys.stderr) - print('float32 % expected/get', ( - expected_times_32 / float32_times), file=sys.stderr) if do_float64: print('float64/float32', ( @@ -218,18 +176,10 @@ def do_tests(): 'in one place'), file=sys.stderr) print(algo_executed, file=sys.stderr) print('float64 times', float64_times, file=sys.stderr) - print('float64 expected', expected_times_64, file=sys.stderr) - print('float64 % expected/get', ( - expected_times_64 / float64_times), file=sys.stderr) print('float32 times', float32_times, file=sys.stderr) - print('float32 expected', expected_times_32, file=sys.stderr) - print('float32 % expected/get', ( - expected_times_32 / float32_times), file=sys.stderr) print('float64/float32', ( float64_times / float32_times), file=sys.stderr) - print('expected float64/float32', ( - expected_times_64 / float32_times), file=sys.stderr) #test in float32 in FAST_RUN mode on the gpu import theano.sandbox.cuda @@ -238,9 +188,6 @@ def do_tests(): gpu_times = do_tests() print(algo_executed, file=sys.stderr) print('gpu times', gpu_times, file=sys.stderr) - print('gpu expected', expected_times_gpu, file=sys.stderr) - print('gpu % expected/get', ( - expected_times_gpu / gpu_times), file=sys.stderr) if do_float64: print('float64/gpu', float64_times / gpu_times, file=sys.stderr) @@ -252,50 +199,44 @@ def do_tests(): print(algo_executed, file=sys.stderr) if do_float64: print('float64 times', float64_times, file=sys.stderr) - print('float64 expected', expected_times_64, file=sys.stderr) - print('float64 % expected/get', ( - expected_times_64 / float64_times), file=sys.stderr) if do_float32: print('float32 times', float32_times, file=sys.stderr) - print('float32 expected', expected_times_32, file=sys.stderr) - print('float32 % expected/get', ( - expected_times_32 / float32_times), file=sys.stderr) if do_gpu: print('gpu times', gpu_times, file=sys.stderr) - print('gpu expected', expected_times_gpu, file=sys.stderr) - print('gpu % expected/get', ( - expected_times_gpu / gpu_times), file=sys.stderr) print() if do_float64 and do_float32: print('float64/float32', ( float64_times / float32_times), file=sys.stderr) - print('expected float64/float32', ( - expected_times_64 / float32_times), file=sys.stderr) if do_float64 and do_gpu: print('float64/gpu', float64_times / gpu_times, file=sys.stderr) - print('expected float64/gpu', ( - expected_times_64 / gpu_times), file=sys.stderr) if do_float32 and do_gpu: print('float32/gpu', float32_times / gpu_times, file=sys.stderr) - print('expected float32/gpu', ( - expected_times_32 / gpu_times), file=sys.stderr) - - def compare(x, y): - ratio = x / y - # If there is more then 5% difference between the expected - # time and the real time, we consider this an error. - return sum((ratio < 0.95) + (ratio > 1.05)) - - print(file=sys.stderr) - if do_float64: - err = compare(expected_times_64, float64_times) - print('speed_failure_float64=' + str(err), file=sys.stderr) - if do_float32: - err = compare(expected_times_32, float32_times) - print('speed_failure_float32=' + str(err), file=sys.stderr) - if do_gpu: - err = compare(expected_times_gpu, gpu_times) - print('speed_failure_gpu=' + str(err), file=sys.stderr) - - assert not numpy.isnan(gpu_times).any() + + # Write JUnit xml for speed test performance report + + speed_file = 'speedtests_time.xml' + + # Define speed test file write method + def write_junit(filename, algos, times, label): + with open(filename, 'a') as f: + for algo, time in zip(algos, times): + f.write(' ' + .format(label=label, algo=algo, time=time)) + f.write(' \n') + + test_total = numpy.size(float64_times) \ + + numpy.size(float32_times) \ + + numpy.size(gpu_times) + + with open(speed_file, 'w') as f: + f.write('\n') + f.write('\n' + .format(ntests=numpy.size(test_total))) + + write_junit(speed_file, algo_executed, float64_times, label='float64') + write_junit(speed_file, algo_executed, float32_times, label='float32') + write_junit(speed_file, algo_executed, gpu_times, label='gpu') + + with open(speed_file, 'a') as f: + f.write('\n') From 9918b7a9d377af71ac1323187913861651b26ce8 Mon Sep 17 00:00:00 2001 From: slefrancois Date: Fri, 7 Oct 2016 09:04:14 -0400 Subject: [PATCH 57/90] remove testsuite prefix option --- .jenkins/jenkins_buildbot_dlt.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh index a4e4b1e4..846cf7fc 100755 --- a/.jenkins/jenkins_buildbot_dlt.sh +++ b/.jenkins/jenkins_buildbot_dlt.sh @@ -10,7 +10,7 @@ COMPILEDIR=$WORKSPACE/compile/lisa_theano_compile_dir_deeplearning NOSETESTS=${BUILDBOT_DIR}/Theano/bin/theano-nose XUNIT="--with-xunit --xunit-file=" # name test suites -SUITE="--xunit-prefix-with-testsuite-name --xunit-testsuite-name=" +SUITE="--xunit-testsuite-name=" FLAGS=warn.ignore_bug_before=0.5,compiledir=${COMPILEDIR} export PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/Pylearn:$PYTHONPATH From f14107d31d5cb05f192129a95d3f272acf4dbc09 Mon Sep 17 00:00:00 2001 From: slefrancois Date: Fri, 7 Oct 2016 17:28:37 -0400 Subject: [PATCH 58/90] single performance file open, correct only access times variables if tests ran --- code/test.py | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/code/test.py b/code/test.py index 60c0af02..22a59655 100644 --- a/code/test.py +++ b/code/test.py @@ -152,12 +152,18 @@ def do_tests(): saveto='') return numpy.asarray(l) + # Initialize test count and results dictionnary + test_total = 0 + times_dic = {} + #test in float64 in FAST_RUN mode on the cpu import theano if do_float64: theano.config.floatX = 'float64' theano.config.mode = 'FAST_RUN' float64_times = do_tests() + times_dic['float64'] = float64_times + test_total += numpy.size(float64_times) print(algo_executed, file=sys.stderr) print('float64 times', float64_times, file=sys.stderr) @@ -165,6 +171,8 @@ def do_tests(): theano.config.floatX = 'float32' if do_float32: float32_times = do_tests() + times_dic['float32'] = float32_times + test_total += numpy.size(float32_times) print(algo_executed, file=sys.stderr) print('float32 times', float32_times, file=sys.stderr) @@ -186,6 +194,8 @@ def do_tests(): if do_gpu: theano.sandbox.cuda.use('gpu') gpu_times = do_tests() + times_dic['gpu'] = gpu_times + test_total += numpy.size(gpu_times) print(algo_executed, file=sys.stderr) print('gpu times', gpu_times, file=sys.stderr) @@ -213,30 +223,18 @@ def do_tests(): if do_float32 and do_gpu: print('float32/gpu', float32_times / gpu_times, file=sys.stderr) - # Write JUnit xml for speed test performance report - - speed_file = 'speedtests_time.xml' - - # Define speed test file write method - def write_junit(filename, algos, times, label): - with open(filename, 'a') as f: - for algo, time in zip(algos, times): - f.write(' ' - .format(label=label, algo=algo, time=time)) - f.write(' \n') - - test_total = numpy.size(float64_times) \ - + numpy.size(float32_times) \ - + numpy.size(gpu_times) - - with open(speed_file, 'w') as f: + # Generate JUnit performance report + # Define speedtest file write method + def write_junit(f, algos, times, label): + for algo, time in zip(algos, times): + f.write(' ' + .format(label=label, algo=algo, time=time)) + f.write(' \n') + + with open('speedtests_time.xml', 'w') as f: f.write('\n') - f.write('\n' - .format(ntests=numpy.size(test_total))) - - write_junit(speed_file, algo_executed, float64_times, label='float64') - write_junit(speed_file, algo_executed, float32_times, label='float32') - write_junit(speed_file, algo_executed, gpu_times, label='gpu') - - with open(speed_file, 'a') as f: + f.write('\n' + .format(ntests=test_total)) + for label, times in times_dic.items(): + write_junit(f, algo_executed, times, label) f.write('\n') From f724c2c6054c736c548196d2a7a000ec307e0b0d Mon Sep 17 00:00:00 2001 From: slefrancois Date: Fri, 7 Oct 2016 17:36:44 -0400 Subject: [PATCH 59/90] move assert gpu_times not nan --- code/test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/code/test.py b/code/test.py index 22a59655..b3077b7c 100644 --- a/code/test.py +++ b/code/test.py @@ -238,3 +238,6 @@ def write_junit(f, algos, times, label): for label, times in times_dic.items(): write_junit(f, algo_executed, times, label) f.write('\n') + + if do_gpu: + assert not numpy.isnan(gpu_times).any() From 85f56c22ebf46e260e38215d32e7f893e95fdcc3 Mon Sep 17 00:00:00 2001 From: slefrancois Date: Thu, 13 Oct 2016 13:14:11 -0400 Subject: [PATCH 60/90] add explicit CUDA path to buildbot --- .jenkins/jenkins_buildbot_dlt.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh index 846cf7fc..283eb933 100755 --- a/.jenkins/jenkins_buildbot_dlt.sh +++ b/.jenkins/jenkins_buildbot_dlt.sh @@ -1,7 +1,11 @@ #!/bin/bash +# CUDA +export PATH=/usr/local/cuda/bin:$PATH +export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH +export LIBRARY_PATH=/usr/local/cuda/lib64:$LIBRARY_PATH + BUILDBOT_DIR=$WORKSPACE/nightly_build -source $HOME/.bashrc mkdir -p ${BUILDBOT_DIR} From d4035919fe2342ba83f104e34d13a8962203c1e6 Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Thu, 20 Oct 2016 14:22:16 -0400 Subject: [PATCH 61/90] Compute mean in higher precision to avoid overflow. --- code/DBN.py | 6 +++--- code/dA.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/code/DBN.py b/code/DBN.py index 3b2bd230..e1bb66df 100644 --- a/code/DBN.py +++ b/code/DBN.py @@ -340,7 +340,7 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100, c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr)) print('Pre-training layer %i, epoch %d, cost ' % (i, epoch), end=' ') - print(numpy.mean(c)) + print(numpy.mean(c, dtype='float64')) end_time = timeit.default_timer() # end-snippet-2 @@ -391,7 +391,7 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100, if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() - this_validation_loss = numpy.mean(validation_losses) + this_validation_loss = numpy.mean(validation_losses, dtype='float64') print('epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, @@ -414,7 +414,7 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100, # test it on the test set test_losses = test_model() - test_score = numpy.mean(test_losses) + test_score = numpy.mean(test_losses, dtype='float64') print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, diff --git a/code/dA.py b/code/dA.py index aad3d454..93a696f8 100644 --- a/code/dA.py +++ b/code/dA.py @@ -336,7 +336,7 @@ def test_dA(learning_rate=0.1, training_epochs=15, for batch_index in range(n_train_batches): c.append(train_da(batch_index)) - print('Training epoch %d, cost ' % epoch, numpy.mean(c)) + print('Training epoch %d, cost ' % epoch, numpy.mean(c, dtype='float64')) end_time = timeit.default_timer() @@ -394,7 +394,7 @@ def test_dA(learning_rate=0.1, training_epochs=15, for batch_index in range(n_train_batches): c.append(train_da(batch_index)) - print('Training epoch %d, cost ' % epoch, numpy.mean(c)) + print('Training epoch %d, cost ' % epoch, numpy.mean(c, dtype='float64')) end_time = timeit.default_timer() From 5a13d9869587a84018b939f83f5fd85293c9a8a1 Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Thu, 20 Oct 2016 16:18:46 -0400 Subject: [PATCH 62/90] Fix import of sandbox. --- code/hmc/hmc.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/code/hmc/hmc.py b/code/hmc/hmc.py index f16a50c1..cf4d20a1 100644 --- a/code/hmc/hmc.py +++ b/code/hmc/hmc.py @@ -7,6 +7,7 @@ from theano import function, shared from theano import tensor as TT import theano +import theano.sandbox.rng_mrg sharedX = (lambda X, name: shared(numpy.asarray(X, dtype=theano.config.floatX), name=name)) @@ -275,14 +276,14 @@ def hmc_updates(positions, stepsize, avg_acceptance_rate, final_pos, accept, """ - ## POSITION UPDATES ## + # POSITION UPDATES # # broadcast `accept` scalar to tensor with the same dimensions as # final_pos. accept_matrix = accept.dimshuffle(0, *(('x',) * (final_pos.ndim - 1))) # if accept is True, update to `final_pos` else stay put new_positions = TT.switch(accept_matrix, final_pos, positions) # end-snippet-5 start-snippet-7 - ## STEPSIZE UPDATES ## + # STEPSIZE UPDATES # # if acceptance rate is too low, our sampler is too "noisy" and we reduce # the stepsize. If it is too high, our sampler is too conservative, we can # get away with a larger stepsize (resulting in better mixing). @@ -292,7 +293,7 @@ def hmc_updates(positions, stepsize, avg_acceptance_rate, final_pos, accept, new_stepsize = TT.clip(_new_stepsize, stepsize_min, stepsize_max) # end-snippet-7 start-snippet-6 - ## ACCEPT RATE UPDATES ## + # ACCEPT RATE UPDATES # # perform exponential moving average mean_dtype = theano.scalar.upcast(accept.dtype, avg_acceptance_rate.dtype) new_acceptance_rate = TT.add( From 93837e03aeeff6917d2b3a121e05341b663fa890 Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Mon, 24 Oct 2016 16:33:18 -0400 Subject: [PATCH 63/90] Fix printout in lstm.py. --- code/lstm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/lstm.py b/code/lstm.py index 1c285928..a3010a9f 100644 --- a/code/lstm.py +++ b/code/lstm.py @@ -605,8 +605,8 @@ def train_lstm( best_p = unzip(tparams) bad_counter = 0 - print( ('Train ', train_err, 'Valid ', valid_err, - 'Test ', test_err) ) + print('Train ', train_err, 'Valid ', valid_err, + 'Test ', test_err) if (len(history_errs) > patience and valid_err >= numpy.array(history_errs)[:-patience, From 780cecc9abbe6181e8fe37f9bda386bdc01fe2ec Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Wed, 26 Oct 2016 14:46:32 -0400 Subject: [PATCH 64/90] Adjust mean dtypes for scores in SdA too. --- code/SdA.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/code/SdA.py b/code/SdA.py index eb7b7357..8da74797 100644 --- a/code/SdA.py +++ b/code/SdA.py @@ -394,7 +394,7 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15, c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) - print('Pre-training layer %i, epoch %d, cost %f' % (i, epoch, numpy.mean(c))) + print('Pre-training layer %i, epoch %d, cost %f' % (i, epoch, numpy.mean(c, dtype='float64'))) end_time = timeit.default_timer() @@ -442,7 +442,7 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15, if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() - this_validation_loss = numpy.mean(validation_losses) + this_validation_loss = numpy.mean(validation_losses, dtype='float64') print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) @@ -463,7 +463,7 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15, # test it on the test set test_losses = test_model() - test_score = numpy.mean(test_losses) + test_score = numpy.mean(test_losses, dtype='float64') print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, From cd462eccb4f351cec6915c4294b0197fd2aa51d9 Mon Sep 17 00:00:00 2001 From: slefrancois Date: Thu, 3 Nov 2016 15:20:16 -0400 Subject: [PATCH 65/90] split performance report file --- code/test.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/code/test.py b/code/test.py index b3077b7c..c2ad68bc 100644 --- a/code/test.py +++ b/code/test.py @@ -224,20 +224,16 @@ def do_tests(): print('float32/gpu', float32_times / gpu_times, file=sys.stderr) # Generate JUnit performance report - # Define speedtest file write method - def write_junit(f, algos, times, label): - for algo, time in zip(algos, times): - f.write(' ' - .format(label=label, algo=algo, time=time)) - f.write(' \n') - - with open('speedtests_time.xml', 'w') as f: - f.write('\n') - f.write('\n' - .format(ntests=test_total)) - for label, times in times_dic.items(): - write_junit(f, algo_executed, times, label) - f.write('\n') + for label, times in times_dic.items(): + with open('speedtests_{label}.xml'.format(label=label), 'w') as f: + f.write('\n') + f.write('\n' + .format(label=label, ntests=test_total/len(times_dic))) + for algo, time in zip(algo_executed, times): + f.write(' ' + .format(label=label, algo=algo, time=time)) + f.write(' \n') + f.write('\n') if do_gpu: assert not numpy.isnan(gpu_times).any() From fd5cb65460df2dee9cfa250e3e7fbc864720bd86 Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Fri, 13 Jan 2017 15:20:23 -0500 Subject: [PATCH 66/90] Do the speed test on the new gpu back-end. --- code/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/test.py b/code/test.py index c2ad68bc..5053b8c4 100644 --- a/code/test.py +++ b/code/test.py @@ -190,9 +190,9 @@ def do_tests(): float64_times / float32_times), file=sys.stderr) #test in float32 in FAST_RUN mode on the gpu - import theano.sandbox.cuda + import theano.gpuarray if do_gpu: - theano.sandbox.cuda.use('gpu') + theano.gpuarray.use('cuda') gpu_times = do_tests() times_dic['gpu'] = gpu_times test_total += numpy.size(gpu_times) From e481d33b2492e37274c2db8389f3b5452767dd68 Mon Sep 17 00:00:00 2001 From: slefrancois Date: Mon, 16 Jan 2017 10:05:22 -0500 Subject: [PATCH 67/90] install libgpuarray for dlt speed tests --- .jenkins/jenkins_buildbot_dlt.sh | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh index 283eb933..243cd4ef 100755 --- a/.jenkins/jenkins_buildbot_dlt.sh +++ b/.jenkins/jenkins_buildbot_dlt.sh @@ -19,6 +19,38 @@ SUITE="--xunit-testsuite-name=" FLAGS=warn.ignore_bug_before=0.5,compiledir=${COMPILEDIR} export PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/Pylearn:$PYTHONPATH +# Install libgpuarray and pygpu +cd ${BUILDBOT_DIR} + +# Make fresh clone (with no history since we don't need it) +rm -rf libgpuarray +git clone --depth 1 "https://github.com/Theano/libgpuarray.git" + +(cd libgpuarray && echo "libgpuarray commit" && git rev-parse HEAD) + +# Clean up previous installs (to make sure no old files are left) +rm -rf local +mkdir local + +# Build libgpuarray and run C tests +mkdir libgpuarray/build +(cd libgpuarray/build && cmake .. -DCMAKE_BUILD_TYPE=${GPUARRAY_CONFIG} -DCMAKE_INSTALL_PREFIX=${BUILDBOT_DIR}/local && make) + +# Finally install +(cd libgpuarray/build && make install) +export LD_LIBRARY_PATH=${BUILDBOT_DIR}/local/lib:${LD_LIBRARY_PATH} +export LIBRARY_PATH=${BUILDBOT_DIR}/local/lib:${LIBRARY_PATH} +export CPATH=${BUILDBOT_DIR}/local/include:${CPATH} + +# Build the pygpu modules +(cd libgpuarray && python setup.py build_ext --inplace -I${BUILDBOT_DIR}/local/include -L${BUILDBOT_DIR}/local/lib) + +mkdir ${BUILDBOT_DIR}/local/lib/python +export PYTHONPATH=${PYTHONPATH}:${BUILDBOT_DIR}/local/lib/python +# Then install +(cd libgpuarray && python setup.py install --home=${BUILDBOT_DIR}/local) + +# Install Theano cd ${BUILDBOT_DIR} if [ ! -d ${BUILDBOT_DIR}/Theano ]; then git clone git://github.com/Theano/Theano.git From 73e621d37ae6bb7f0747e831822f39435e61bab1 Mon Sep 17 00:00:00 2001 From: Simon Lefrancois Date: Tue, 18 Apr 2017 09:59:43 -0400 Subject: [PATCH 68/90] move speedtest cache outside workspace --- .DS_Store | Bin 0 -> 6148 bytes .jenkins/jenkins_buildbot_dlt.sh | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..3cd979e05c0d9c2d21079f88c5fedc75d991437e GIT binary patch literal 6148 zcmeHKF=_)r43v^93~5}Z+%Mz@i*a7y57@*ZO|Zcvsjter{4~! Date: Wed, 19 Apr 2017 14:52:56 -0400 Subject: [PATCH 69/90] add label to speedtest class --- code/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/test.py b/code/test.py index 5053b8c4..8768d8c1 100644 --- a/code/test.py +++ b/code/test.py @@ -230,7 +230,7 @@ def do_tests(): f.write('\n' .format(label=label, ntests=test_total/len(times_dic))) for algo, time in zip(algo_executed, times): - f.write(' ' + f.write(' ' .format(label=label, algo=algo, time=time)) f.write(' \n') f.write('\n') From e7b2dc866d8a460cf5e0f20805fa4155649f1840 Mon Sep 17 00:00:00 2001 From: Simon Lefrancois Date: Fri, 28 Apr 2017 14:10:43 -0400 Subject: [PATCH 70/90] buildbot includes theano.gpuarray --- .DS_Store | Bin 6148 -> 0 bytes .jenkins/jenkins_buildbot_dlt.sh | 10 ++++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 3cd979e05c0d9c2d21079f88c5fedc75d991437e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKF=_)r43v^93~5}Z+%Mz@i*a7y57@*ZO|Zcvsjter{4~! Date: Tue, 6 Jun 2017 14:56:35 -0400 Subject: [PATCH 71/90] fix typos/spelling --- doc/gettingstarted.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt index d765f14a..85111d11 100644 --- a/doc/gettingstarted.txt +++ b/doc/gettingstarted.txt @@ -147,7 +147,7 @@ MNIST Dataset The data has to be stored as floats on the GPU ( the right ``dtype`` for storing on the GPU is given by ``theano.config.floatX``). -To get around this shortcomming for the labels, we store them as float, +To get around this shortcoming for the labels, we store them as float, and then cast it to int. .. note:: @@ -316,7 +316,7 @@ The likelihood of the correct class is not the same as the number of right predictions, but from the point of view of a randomly initialized classifier they are pretty similar. Remember that likelihood and zero-one loss are different objectives; -you should see that they are corralated on the validation set but +you should see that they are correlated on the validation set but sometimes one will rise while the other falls, or vice-versa. Since we usually speak in terms of minimizing a loss function, learning will @@ -421,7 +421,7 @@ but this choice is almost arbitrary (though harmless). because it controls the number of updates done to your parameters. Training the same model for 10 epochs using a batch size of 1 yields completely different results compared to training for the same 10 epochs but with a batchsize of 20. Keep this in mind when - switching between batch sizes and be prepared to tweak all the other parameters acording + switching between batch sizes and be prepared to tweak all the other parameters according to the batch size used. All code-blocks above show pseudocode of how the algorithm looks like. Implementing such From 8819681562c539054c97097f6100d1a69bcbe75d Mon Sep 17 00:00:00 2001 From: Philip Kirkbride Date: Tue, 6 Jun 2017 14:59:57 -0400 Subject: [PATCH 72/90] remove extra space --- doc/gettingstarted.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt index 85111d11..06e2e88e 100644 --- a/doc/gettingstarted.txt +++ b/doc/gettingstarted.txt @@ -85,7 +85,7 @@ MNIST Dataset variables and access it based on the minibatch index, given a fixed and known batch size. The reason behind shared variables is related to using the GPU. There is a large overhead when copying data - into the GPU memory. If you would copy data on request ( each minibatch + into the GPU memory. If you would copy data on request (each minibatch individually when needed) as the code will do if you do not use shared variables, due to this overhead, the GPU code will not be much faster then the CPU code (maybe even slower). If you have your data in From 59667bd502e4ee05a5221293e4c2370bb065be52 Mon Sep 17 00:00:00 2001 From: Philip Kirkbride Date: Tue, 6 Jun 2017 15:01:42 -0400 Subject: [PATCH 73/90] remove extra space --- doc/gettingstarted.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt index 06e2e88e..256ee07d 100644 --- a/doc/gettingstarted.txt +++ b/doc/gettingstarted.txt @@ -286,7 +286,7 @@ In this tutorial, :math:`f` is defined as: f(x) = {\rm argmax}_k P(Y=k | x, \theta) -In python, using Theano this can be written as : +In python, using Theano this can be written as: .. code-block:: python From 37048765dadf7146c3aafc4994cf8721cb7518b3 Mon Sep 17 00:00:00 2001 From: Philip Kirkbride Date: Tue, 6 Jun 2017 15:06:50 -0400 Subject: [PATCH 74/90] remove more spaces --- doc/gettingstarted.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt index 256ee07d..0019c3c6 100644 --- a/doc/gettingstarted.txt +++ b/doc/gettingstarted.txt @@ -331,7 +331,7 @@ The NLL of our classifier is a differentiable surrogate for the zero-one loss, and we use the gradient of this function over our training data as a supervised learning signal for deep learning of a classifier. -This can be computed using the following line of code : +This can be computed using the following line of code: .. code-block:: python @@ -357,7 +357,7 @@ algorithm in which we repeatedly make small steps downward on an error surface defined by a loss function of some parameters. For the purpose of ordinary gradient descent we consider that the training data is rolled into the loss function. Then the pseudocode of this -algorithm can be described as : +algorithm can be described as: .. code-block:: python @@ -425,7 +425,7 @@ but this choice is almost arbitrary (though harmless). to the batch size used. All code-blocks above show pseudocode of how the algorithm looks like. Implementing such -algorithm in Theano can be done as follows : +algorithm in Theano can be done as follows: .. code-block:: python From f78ba92c513edc177f1ff88eb34fb4a78310e652 Mon Sep 17 00:00:00 2001 From: Philip Kirkbride Date: Tue, 6 Jun 2017 15:14:38 -0400 Subject: [PATCH 75/90] extra space in logreg --- doc/logreg.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/logreg.txt b/doc/logreg.txt index c2979e63..b582acd4 100644 --- a/doc/logreg.txt +++ b/doc/logreg.txt @@ -246,7 +246,7 @@ within the DeepLearningTutorials folder: python code/logistic_sgd.py -The output one should expect is of the form : +The output one should expect is of the form: .. code-block:: bash From 1867a4e5a3f10730a6a844a91dc425962ab94fa5 Mon Sep 17 00:00:00 2001 From: Philip Kirkbride Date: Tue, 6 Jun 2017 15:16:05 -0400 Subject: [PATCH 76/90] remove spaces in mlp page --- doc/mlp.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/mlp.txt b/doc/mlp.txt index 2a74aaad..0ecc7a89 100644 --- a/doc/mlp.txt +++ b/doc/mlp.txt @@ -178,13 +178,13 @@ The code below shows how this can be done, in a way which is analogous to our pr .. literalinclude:: ../code/mlp.py -The user can then run the code by calling : +The user can then run the code by calling: .. code-block:: bash python code/mlp.py -The output one should expect is of the form : +The output one should expect is of the form: .. code-block:: bash From d8294003cff53ea56f1d34c574f708e35ab63085 Mon Sep 17 00:00:00 2001 From: Philip Kirkbride Date: Tue, 6 Jun 2017 15:19:26 -0400 Subject: [PATCH 77/90] remove spaces in dA page --- doc/dA.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/dA.txt b/doc/dA.txt index 8ff26354..dd05acdf 100644 --- a/doc/dA.txt +++ b/doc/dA.txt @@ -6,7 +6,7 @@ Denoising Autoencoders (dA) .. note:: This section assumes the reader has already read through :doc:`logreg` and :doc:`mlp`. Additionally it uses the following Theano functions - and concepts : `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_. + and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_. .. _T.tanh: http://deeplearning.net/software/theano/tutorial/examples.html?highlight=tanh @@ -126,7 +126,7 @@ signal: :pyobject: dA.get_reconstructed_input And using these functions we can compute the cost and the updates of -one stochastic gradient descent step : +one stochastic gradient descent step: .. literalinclude:: ../code/dA.py :pyobject: dA.get_cost_updates @@ -209,7 +209,7 @@ need to do is to add a stochastic corruption step operating on the input. The in corrupted in many ways, but in this tutorial we will stick to the original corruption mechanism of randomly masking entries of the input by making them zero. The code below -does just that : +does just that: .. literalinclude:: ../code/dA.py :pyobject: dA.get_corrupted_input @@ -221,7 +221,7 @@ For this reason, the constructor of the ``dA`` also gets Theano variables pointing to the shared parameters. If those parameters are left to ``None``, new ones will be constructed. -The final denoising autoencoder class becomes : +The final denoising autoencoder class becomes: .. literalinclude:: ../code/dA.py :pyobject: dA @@ -254,7 +254,7 @@ constant (weights are converted to values between 0 and 1). To plot our filters we will need the help of ``tile_raster_images`` (see :ref:`how-to-plot`) so we urge the reader to study it. Also using the help of the Python Image Library, the following lines of code will -save the filters as an image : +save the filters as an image: .. literalinclude:: ../code/dA.py :start-after: start-snippet-4 @@ -264,20 +264,20 @@ save the filters as an image : Running the Code ++++++++++++++++ -To run the code : +To run the code: .. code-block:: bash python dA.py -The resulted filters when we do not use any noise are : +The resulted filters when we do not use any noise are: .. figure:: images/filters_corruption_0.png :align: center -The filters for 30 percent noise : +The filters for 30 percent noise: .. figure:: images/filters_corruption_30.png From 738b641bacd23511d0efdc87e9494f2ec8c1426e Mon Sep 17 00:00:00 2001 From: Philip Kirkbride Date: Tue, 6 Jun 2017 15:21:31 -0400 Subject: [PATCH 78/90] remove space in rbm page --- doc/SdA.txt | 2 +- doc/rbm.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/SdA.txt b/doc/SdA.txt index 289a8b0a..6d9ba0da 100644 --- a/doc/SdA.txt +++ b/doc/SdA.txt @@ -6,7 +6,7 @@ Stacked Denoising Autoencoders (SdA) .. note:: This section assumes you have already read through :doc:`logreg` and :doc:`mlp`. Additionally it uses the following Theano functions - and concepts : `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_. + and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_. .. _T.tanh: http://deeplearning.net/software/theano/tutorial/examples.html?highlight=tanh diff --git a/doc/rbm.txt b/doc/rbm.txt index a8079012..7a052cc6 100644 --- a/doc/rbm.txt +++ b/doc/rbm.txt @@ -7,7 +7,7 @@ Restricted Boltzmann Machines (RBM) .. note:: This section assumes the reader has already read through :doc:`logreg` and :doc:`mlp`. Additionally it uses the following Theano functions - and concepts : `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_ and `scan`_. If you intend to run the code on GPU also read `GPU`_. + and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_ and `scan`_. If you intend to run the code on GPU also read `GPU`_. .. _T.tanh: http://deeplearning.net/software/theano/tutorial/examples.html?highlight=tanh @@ -573,7 +573,7 @@ The output was the following: ... plotting sample 8 ... plotting sample 9 -The pictures below show the filters after 15 epochs : +The pictures below show the filters after 15 epochs: .. figure:: images/filters_at_epoch_14.png :align: center From ec4855a6a5eabdb5fdd0e8daf69218a21b2e5c17 Mon Sep 17 00:00:00 2001 From: Philip Kirkbride Date: Tue, 6 Jun 2017 15:23:30 -0400 Subject: [PATCH 79/90] spaces on DBN page --- doc/DBN.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/DBN.txt b/doc/DBN.txt index bb0571eb..be7bfbdc 100644 --- a/doc/DBN.txt +++ b/doc/DBN.txt @@ -6,7 +6,7 @@ Deep Belief Networks .. note:: This section assumes the reader has already read through :doc:`logreg` and :doc:`mlp` and :doc:`rbm`. Additionally it uses the following Theano - functions and concepts : `T.tanh`_, `shared variables`_, `basic arithmetic + functions and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_. @@ -210,7 +210,7 @@ obtained over these sets. Putting it all together +++++++++++++++++++++++ -The few lines of code below constructs the deep belief network : +The few lines of code below constructs the deep belief network: .. literalinclude:: ../code/DBN.py :start-after: # numpy random generator From 85962ee63ae990e267e0875517de153e47cf777a Mon Sep 17 00:00:00 2001 From: Philip Kirkbride Date: Tue, 6 Jun 2017 15:51:15 -0400 Subject: [PATCH 80/90] spaces on lstm page --- doc/lstm.txt | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/doc/lstm.txt b/doc/lstm.txt index bde70bd8..aec230ab 100644 --- a/doc/lstm.txt +++ b/doc/lstm.txt @@ -75,10 +75,10 @@ previous state, as needed. .. figure:: images/lstm_memorycell.png :align: center - **Figure 1** : Illustration of an LSTM memory cell. + **Figure 1**: Illustration of an LSTM memory cell. The equations below describe how a layer of memory cells is updated at every -timestep :math:`t`. In these equations : +timestep :math:`t`. In these equations: * :math:`x_t` is the input to the memory cell layer at time :math:`t` * :math:`W_i`, :math:`W_f`, :math:`W_c`, :math:`W_o`, :math:`U_i`, @@ -89,7 +89,7 @@ timestep :math:`t`. In these equations : First, we compute the values for :math:`i_t`, the input gate, and :math:`\widetilde{C_t}` the candidate value for the states of the memory -cells at time :math:`t` : +cells at time :math:`t`: .. math:: :label: 1 @@ -102,7 +102,7 @@ cells at time :math:`t` : \widetilde{C_t} = tanh(W_c x_t + U_c h_{t-1} + b_c) Second, we compute the value for :math:`f_t`, the activation of the memory -cells' forget gates at time :math:`t` : +cells' forget gates at time :math:`t`: .. math:: :label: 3 @@ -111,7 +111,7 @@ cells' forget gates at time :math:`t` : Given the value of the input gate activation :math:`i_t`, the forget gate activation :math:`f_t` and the candidate state value :math:`\widetilde{C_t}`, -we can compute :math:`C_t` the memory cells' new state at time :math:`t` : +we can compute :math:`C_t` the memory cells' new state at time :math:`t`: .. math:: :label: 4 @@ -119,7 +119,7 @@ we can compute :math:`C_t` the memory cells' new state at time :math:`t` : C_t = i_t * \widetilde{C_t} + f_t * C_{t-1} With the new state of the memory cells, we can compute the value of their -output gates and, subsequently, their outputs : +output gates and, subsequently, their outputs: .. math:: :label: 5 @@ -139,7 +139,7 @@ In this variant, the activation of a cell’s output gate does not depend on the memory cell’s state :math:`C_t`. This allows us to perform part of the computation more efficiently (see the implementation note, below, for details). This means that, in the variant we have implemented, there is no -matrix :math:`V_o` and equation :eq:`5` is replaced by equation :eq:`5-alt` : +matrix :math:`V_o` and equation :eq:`5` is replaced by equation :eq:`5-alt`: .. math:: :label: 5-alt @@ -170,7 +170,7 @@ concatenating the four matrices :math:`W_*` into a single weight matrix :math:`W` and performing the same concatenation on the weight matrices :math:`U_*` to produce the matrix :math:`U` and the bias vectors :math:`b_*` to produce the vector :math:`b`. Then, the pre-nonlinearity activations can -be computed with : +be computed with: .. math:: @@ -187,11 +187,11 @@ Code - Citations - Contact Code ==== -The LSTM implementation can be found in the two following files : +The LSTM implementation can be found in the two following files: -* `lstm.py `_ : Main script. Defines and train the model. +* `lstm.py `_: Main script. Defines and train the model. -* `imdb.py `_ : Secondary script. Handles the loading and preprocessing of the IMDB dataset. +* `imdb.py `_: Secondary script. Handles the loading and preprocessing of the IMDB dataset. After downloading both scripts and putting both in the same folder, the user can run the code by calling: @@ -202,7 +202,7 @@ can run the code by calling: The script will automatically download the data and decompress it. -**Note** : The provided code supports the Stochastic Gradient Descent (SGD), +**Note**: The provided code supports the Stochastic Gradient Descent (SGD), AdaDelta and RMSProp optimization methods. You are advised to use AdaDelta or RMSProp because SGD appears to performs poorly on this task with this particular model. From bb2aa41171de24c48315578fd41f682e07284eca Mon Sep 17 00:00:00 2001 From: Philip Kirkbride Date: Wed, 7 Jun 2017 14:05:28 -0400 Subject: [PATCH 81/90] typo and space fix --- doc/mlp.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/mlp.txt b/doc/mlp.txt index 0ecc7a89..9e59ffbf 100644 --- a/doc/mlp.txt +++ b/doc/mlp.txt @@ -90,8 +90,8 @@ The set of parameters to learn is the set :math:`\theta = \{W^{(2)},b^{(2)},W^{(1)},b^{(1)}\}`. Obtaining the gradients :math:`\partial{\ell}/\partial{\theta}` can be achieved through the **backpropagation algorithm** (a special case of the chain-rule of derivation). -Thankfully, since Theano performs automatic differentation, we will not need to -cover this in the tutorial ! +Thankfully, since Theano performs automatic differentiation, we will not need to +cover this in the tutorial! Going from logistic regression to MLP From 8eb21daf92d48c020bfc7fe9b3ef680403e812ae Mon Sep 17 00:00:00 2001 From: Philip Kirkbride Date: Wed, 7 Jun 2017 14:09:03 -0400 Subject: [PATCH 82/90] typo on lenet page --- doc/lenet.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/lenet.txt b/doc/lenet.txt index 09f50be6..84b7c3be 100644 --- a/doc/lenet.txt +++ b/doc/lenet.txt @@ -543,7 +543,7 @@ the task. Filter Shape ************ -Common filter shapes found in the litterature vary greatly, usually based on +Common filter shapes found in the literature vary greatly, usually based on the dataset. Best results on MNIST-sized images (28x28) are usually in the 5x5 range on the first layer, while natural image datasets (often with hundreds of pixels in each dimension) tend to use larger first-layer filters of shape 12x12 or 15x15. From 147cb2e9a9374d8f5b4673370c12ce6457b53cce Mon Sep 17 00:00:00 2001 From: Philip Kirkbride Date: Wed, 7 Jun 2017 14:12:57 -0400 Subject: [PATCH 83/90] typo/inconsistency in spelling of corruption --- code/dA.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/dA.py b/code/dA.py index 93a696f8..7d054b20 100644 --- a/code/dA.py +++ b/code/dA.py @@ -195,7 +195,7 @@ def __init__( def get_corrupted_input(self, input, corruption_level): """This function keeps ``1-corruption_level`` entries of the inputs the - same and zero-out randomly selected subset of size ``coruption_level`` + same and zero-out randomly selected subset of size ``corruption_level`` Note : first argument of theano.rng.binomial is the shape(size) of random numbers that it should produce second argument is the number of trials From 534e91585ebddd8238bf59d9cb9ba7fef2e6949c Mon Sep 17 00:00:00 2001 From: Philip Kirkbride Date: Tue, 20 Jun 2017 11:34:42 -0400 Subject: [PATCH 84/90] Add small note on easy download script I'm not sure the existence/option of downloading all the datasets via bash script will be obvious to people approaching the repo via the written tutorial. --- doc/gettingstarted.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt index 0019c3c6..f290305f 100644 --- a/doc/gettingstarted.txt +++ b/doc/gettingstarted.txt @@ -22,6 +22,11 @@ On each learning algorithm page, you will be able to download the corresponding git clone https://github.com/lisa-lab/DeepLearningTutorials.git +On linux systems, after cloning, all datasets can be downloaded at once with: + + cd DeeepLearningTutorials/data + ./download.sh + .. _datasets: From cb4261c830b39936aea224620c678480338ef272 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Bastien?= Date: Tue, 20 Jun 2017 11:41:46 -0400 Subject: [PATCH 85/90] Tell that it work on Mac. --- doc/gettingstarted.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt index f290305f..7b1974ea 100644 --- a/doc/gettingstarted.txt +++ b/doc/gettingstarted.txt @@ -22,7 +22,7 @@ On each learning algorithm page, you will be able to download the corresponding git clone https://github.com/lisa-lab/DeepLearningTutorials.git -On linux systems, after cloning, all datasets can be downloaded at once with: +On Linux or Mac systems, after cloning, all datasets can be downloaded at once with: cd DeeepLearningTutorials/data ./download.sh From 36ec511d60746bde1d0e3905944760c92068675d Mon Sep 17 00:00:00 2001 From: Simon Lefrancois Date: Tue, 11 Jul 2017 10:30:57 -0400 Subject: [PATCH 86/90] add link to github --- doc/index.txt | 4 +++- doc/rnnrbm.txt | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/index.txt b/doc/index.txt index 68a18ec5..e01e79fc 100644 --- a/doc/index.txt +++ b/doc/index.txt @@ -25,7 +25,9 @@ training them on a GPU. The algorithm tutorials have some prerequisites. You should know some python, and be familiar with numpy. Since this tutorial is about using Theano, you should read over the `Theano basic tutorial`_ first. Once you've done that, -read through our :ref:`gettingstarted` chapter -- it introduces the notation, and [downloadable] datasets used in the algorithm tutorials, and the way we do optimization by stochastic gradient descent. +read through our :ref:`gettingstarted` chapter -- it introduces the notation, and downloadable datasets used in the algorithm tutorials, and the way we do optimization by stochastic gradient descent. + +The code is available on the `Deep Learning Tutorial repositories `_. The purely supervised learning algorithms are meant to be read in order: diff --git a/doc/rnnrbm.txt b/doc/rnnrbm.txt index d64a0c4a..75e681f8 100644 --- a/doc/rnnrbm.txt +++ b/doc/rnnrbm.txt @@ -17,7 +17,7 @@ Modeling and generating sequences of polyphonic music with the RNN-RBM The script also assumes that the content of the `Nottingham Database of folk tunes `_ has been extracted in the ``../data`` directory. Alternative MIDI datasets are available `here `_. - Note that both dependencies above can be setup automatically by running the ``download.sh`` script in the ``../data`` directory. + Note that both dependencies above can be setup automatically by running the `download.sh `_ script in the ``../data`` directory of the `Deep Learning Tutorials repository `_. .. caution:: Need Theano 0.6 or more recent. From 81f257524079efc2c553beba0829c8a23d1a33d3 Mon Sep 17 00:00:00 2001 From: Simon Lefrancois Date: Tue, 11 Jul 2017 10:31:26 -0400 Subject: [PATCH 87/90] typo --- doc/gettingstarted.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt index 7b1974ea..99c7f054 100644 --- a/doc/gettingstarted.txt +++ b/doc/gettingstarted.txt @@ -24,7 +24,7 @@ On each learning algorithm page, you will be able to download the corresponding On Linux or Mac systems, after cloning, all datasets can be downloaded at once with: - cd DeeepLearningTutorials/data + cd DeepLearningTutorials/data ./download.sh From ebb8c21df3a3d073003e1323fead2150ada56ce1 Mon Sep 17 00:00:00 2001 From: Simon Lefrancois Date: Wed, 26 Jul 2017 08:29:51 -0400 Subject: [PATCH 88/90] update nosetests command --- .jenkins/jenkins_buildbot_dlt.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh index c8be22b4..eb43d91c 100755 --- a/.jenkins/jenkins_buildbot_dlt.sh +++ b/.jenkins/jenkins_buildbot_dlt.sh @@ -79,4 +79,4 @@ THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE} echo "==== Executing nosetests with mode=FAST_RUN,floatX=float32,device=cuda" NAME=dlt_float32_cuda FILE=${BUILDBOT_DIR}/${NAME}_tests.xml -PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/DeepLearningTutorials/code:${PYTHONPATH} THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32,device=cuda nosetests-2.7 test.py ${XUNIT}${FILE} ${SUITE}${NAME} +PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/DeepLearningTutorials/code:${PYTHONPATH} THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32,device=cuda ${NOSETESTS} test.py ${XUNIT}${FILE} ${SUITE}${NAME} From 8d25f1a91a656c5a0c67fe2434a5d37d89983665 Mon Sep 17 00:00:00 2001 From: Simon Lefrancois Date: Wed, 26 Jul 2017 10:41:07 -0400 Subject: [PATCH 89/90] use nosetests directly for gpu --- .jenkins/jenkins_buildbot_dlt.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh index eb43d91c..8b57a1bc 100755 --- a/.jenkins/jenkins_buildbot_dlt.sh +++ b/.jenkins/jenkins_buildbot_dlt.sh @@ -79,4 +79,4 @@ THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE} echo "==== Executing nosetests with mode=FAST_RUN,floatX=float32,device=cuda" NAME=dlt_float32_cuda FILE=${BUILDBOT_DIR}/${NAME}_tests.xml -PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/DeepLearningTutorials/code:${PYTHONPATH} THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32,device=cuda ${NOSETESTS} test.py ${XUNIT}${FILE} ${SUITE}${NAME} +PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/DeepLearningTutorials/code:${PYTHONPATH} THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32,device=cuda nosetests test.py ${XUNIT}${FILE} ${SUITE}${NAME} From 764cd4cdf5dc157a121a2fbffc2dec91c03f2ed9 Mon Sep 17 00:00:00 2001 From: Simon Lefrancois Date: Thu, 7 Sep 2017 16:46:18 -0400 Subject: [PATCH 90/90] libgpuarray full checkout --- .jenkins/jenkins_buildbot_dlt.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh index 8b57a1bc..15da288b 100755 --- a/.jenkins/jenkins_buildbot_dlt.sh +++ b/.jenkins/jenkins_buildbot_dlt.sh @@ -24,7 +24,7 @@ cd ${BUILDBOT_DIR} # Make fresh clone (with no history since we don't need it) rm -rf libgpuarray -git clone --depth 1 "https://github.com/Theano/libgpuarray.git" +git clone "https://github.com/Theano/libgpuarray.git" (cd libgpuarray && echo "libgpuarray commit" && git rev-parse HEAD)