Skip to content

Commit 8cd8bb7

Browse files
committed
Merge pull request lisa-lab#28 from nouiz/master
Always load the data from the data directory
2 parents ae246e7 + 6069c87 commit 8cd8bb7

File tree

11 files changed

+27
-45
lines changed

11 files changed

+27
-45
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
code/*.pyc
2+
code/midi
23
data/mnist.pkl.gz
4+
data/mnist_py3k.pkl.gz
5+
data/Nottingham.zip
6+
data/Nottingham
7+
data/midi.zip
38
html
49
*.pyc
510
*~

.travis.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@ env:
2626
- PART="test.py:test_logistic_sgd test.py:test_logistic_cg test.py:test_mlp test.py:test_convolutional_mlp test.py:test_dA"
2727
- PART="test.py:test_SdA"
2828
- PART="test.py:test_dbn"
29-
- PART="test.py:test_rbm"
30-
- PART="test.py:test_rnnrbm"
29+
- PART="test.py:test_rbm test.py:test_rnnrbm"
3130
- PART="-e test.py"
3231

3332
#i7-2600K CPU @ 3.40GHz

code/DBN.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ def test_score():
257257

258258
def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
259259
pretrain_lr=0.01, k=1, training_epochs=1000,
260-
dataset='../data/mnist.pkl.gz', batch_size=10):
260+
dataset='mnist.pkl.gz', batch_size=10):
261261
"""
262262
Demonstrates how to train and test a Deep Belief Network.
263263

code/SdA.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ def test_score():
295295

296296
def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
297297
pretrain_lr=0.001, training_epochs=1000,
298-
dataset='../data/mnist.pkl.gz', batch_size=1):
298+
dataset='mnist.pkl.gz', batch_size=1):
299299
"""
300300
Demonstrates how to train and test a stochastic denoising autoencoder.
301301

code/cA.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ def get_cost_updates(self, contraction_level, learning_rate):
221221

222222

223223
def test_cA(learning_rate=0.01, training_epochs=20,
224-
dataset='../data/mnist.pkl.gz',
224+
dataset='mnist.pkl.gz',
225225
batch_size=10, output_folder='cA_plots', contraction_level=.1):
226226
"""
227227
This demo is tested on MNIST

code/convolutional_mlp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
104104

105105

106106
def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
107-
dataset='../data/mnist.pkl.gz',
107+
dataset='mnist.pkl.gz',
108108
nkerns=[20, 50], batch_size=500):
109109
""" Demonstrates lenet on MNIST dataset
110110

code/dA.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ def get_cost_updates(self, corruption_level, learning_rate):
237237

238238

239239
def test_dA(learning_rate=0.1, training_epochs=15,
240-
dataset='../data/mnist.pkl.gz',
240+
dataset='mnist.pkl.gz',
241241
batch_size=20, output_folder='dA_plots'):
242242

243243
"""

code/logistic_cg.py

Lines changed: 7 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@
4848
import theano
4949
import theano.tensor as T
5050

51+
from logistic_sgd import load_data
52+
5153

5254
class LogisticRegression(object):
5355
"""Multi-class Logistic Regression Class
@@ -132,7 +134,7 @@ def errors(self, y):
132134
raise NotImplementedError()
133135

134136

135-
def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='../data/mnist.pkl.gz'):
137+
def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
136138
"""Demonstrate conjugate gradient optimization of a log-linear model
137139
138140
This is demonstrated on MNIST.
@@ -148,41 +150,11 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='../data/mnist.pkl.gz'):
148150
#############
149151
# LOAD DATA #
150152
#############
151-
print '... loading data'
152-
153-
# Load the dataset
154-
f = gzip.open(mnist_pkl_gz, 'rb')
155-
train_set, valid_set, test_set = cPickle.load(f)
156-
f.close()
153+
datasets = load_data(mnist_pkl_gz)
157154

158-
def shared_dataset(data_xy, borrow=True):
159-
""" Function that loads the dataset into shared variables
160-
161-
The reason we store our dataset in shared variables is to allow
162-
Theano to copy it into the GPU memory (when code is run on GPU).
163-
Since copying data into the GPU is slow, copying a minibatch everytime
164-
is needed (the default behaviour if the data is not in a shared
165-
variable) would lead to a large decrease in performance.
166-
"""
167-
data_x, data_y = data_xy
168-
shared_x = theano.shared(numpy.asarray(data_x,
169-
dtype=theano.config.floatX),
170-
borrow=borrow)
171-
shared_y = theano.shared(numpy.asarray(data_y,
172-
dtype=theano.config.floatX),
173-
borrow=borrow)
174-
# When storing data on the GPU it has to be stored as floats
175-
# therefore we will store the labels as ``floatX`` as well
176-
# (``shared_y`` does exactly that). But during our computations
177-
# we need them as ints (we use labels as index, and if they are
178-
# floats it doesn't make sense) therefore instead of returning
179-
# ``shared_y`` we will have to cast it to int. This little hack
180-
# lets ous get around this issue
181-
return shared_x, T.cast(shared_y, 'int32')
182-
183-
test_set_x, test_set_y = shared_dataset(test_set)
184-
valid_set_x, valid_set_y = shared_dataset(valid_set)
185-
train_set_x, train_set_y = shared_dataset(train_set)
155+
train_set_x, train_set_y = datasets[0]
156+
valid_set_x, valid_set_y = datasets[1]
157+
test_set_x, test_set_y = datasets[2]
186158

187159
batch_size = 600 # size of the minibatch
188160

code/logistic_sgd.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,12 @@ def load_data(dataset):
157157

158158
# Download the MNIST dataset if it is not present
159159
data_dir, data_file = os.path.split(dataset)
160+
if data_dir == "" and not os.path.isfile(dataset):
161+
# Check if dataset is in the data directory.
162+
new_path = os.path.join(os.path.split(__file__)[0], "..", "data", dataset)
163+
if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
164+
dataset = new_path
165+
160166
if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
161167
import urllib
162168
origin = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
@@ -211,7 +217,7 @@ def shared_dataset(data_xy, borrow=True):
211217

212218

213219
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
214-
dataset='../data/mnist.pkl.gz',
220+
dataset='mnist.pkl.gz',
215221
batch_size=600):
216222
"""
217223
Demonstrate stochastic gradient descent optimization of a log-linear

code/mlp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def __init__(self, rng, input, n_in, n_hidden, n_out):
174174

175175

176176
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
177-
dataset='../data/mnist.pkl.gz', batch_size=20, n_hidden=500):
177+
dataset='mnist.pkl.gz', batch_size=20, n_hidden=500):
178178
"""
179179
Demonstrate stochastic gradient descent optimization for a multilayer
180180
perceptron

0 commit comments

Comments
 (0)