Skip to content

Commit 79c90fa

Browse files
committed
wrote ConvOp description for conv. neural nets
1 parent 61ceb18 commit 79c90fa

5 files changed

Lines changed: 318 additions & 121 deletions

File tree

code/convolutional_mlp.py

Lines changed: 179 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -27,22 +27,13 @@
2727
2828
2929
"""
30-
import numpy
31-
from theano.compile.sandbox import shared, pfunc
32-
from theano import tensor
33-
from pylearn.shared.layers import LogisticRegression, SigmoidalLayer
30+
import numpy, theano, cPickle, gzip, time
31+
import theano.tensor as T
3432
import theano.sandbox.softsign
3533
import pylearn.datasets.MNIST
34+
from theano.sandbox import conv, downsample
3635

37-
38-
try:
39-
# this tells theano to use the GPU if possible
40-
from theano.sandbox.cuda import use
41-
use()
42-
except Exception, e:
43-
print('Warning: Attempt to use GPU resulted in error "%s"' % str(e))
44-
45-
class LeNetConvPool(object):
36+
class LeNetConvPoolLayer(object):
4637
"""WRITEME
4738
4839
Math of what the layer does, and what symbolic variables are created by the class (w, b,
@@ -55,21 +46,17 @@ class LeNetConvPool(object):
5546
# - one bias & scale per downsample feature location (a 2d bias)
5647
# - more?
5748

58-
def __init__(self, rng, input, n_examples, n_imgs, img_shape, n_filters, filter_shape=(5,5),
49+
def __init__(self, rng, input, n_imgs, n_filters, filter_shape=(5,5),
5950
poolsize=(2,2)):
6051
"""
61-
Allocate a LeNetConvPool layer with shared variable internal parameters.
52+
Allocate a LeNetConvPoolLayer with shared variable internal parameters.
6253
6354
:param rng: a random number generator used to initialize weights
6455
65-
:param input: symbolic images. Shape: (n_examples, n_imgs, img_shape[0], img_shape[1])
66-
67-
:param n_examples: input's shape[0] at runtime
56+
:param input: symbolic images. Shape: (<mini-batch size>, n_imgs, <img height>, <img width>)
6857
6958
:param n_imgs: input's shape[1] at runtime
7059
71-
:param img_shape: input's shape[2:4] at runtime
72-
7360
:param n_filters: the number of filters to apply to the image.
7461
7562
:param filter_shape: the size of the filters to apply
@@ -79,74 +66,67 @@ def __init__(self, rng, input, n_examples, n_imgs, img_shape, n_filters, filter_
7966
:type poolsize: pair (rows, cols)
8067
"""
8168

82-
#TODO: make a simpler convolution constructor!!
83-
# - make dx and dy optional
84-
# - why do we have to pass shapes? (Can we make them optional at least?)
85-
conv_op = ConvOp((n_imgs,)+img_shape, filter_shape, n_filters, n_examples,
86-
dx=1, dy=1, output_mode='valid')
87-
88-
# - why is poolsize an op parameter here?
89-
# - can we just have a maxpool function that creates this Op internally?
90-
ds_op = DownsampleFactorMax(poolsize, ignore_border=True)
91-
9269
# the filter tensor that we will apply is a 4D tensor
9370
w_shp = (n_filters, n_imgs) + filter_shape
94-
95-
# the bias we add is a 1D tensor
96-
b_shp = (n_filters,)
97-
98-
self.w = shared(
99-
numpy.asarray(
71+
w_bound = numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs)
72+
self.w = theano.shared( numpy.asarray(
10073
rng.uniform(
101-
low=-1.0 / numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs),
102-
high=1.0 / numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs),
74+
low=-1.0 / w_bound,
75+
high=1.0 / w_bound,
10376
size=w_shp),
10477
dtype=input.dtype))
105-
self.b = shared(
106-
numpy.asarray(
78+
79+
# the bias we add is a 1D tensor
80+
b_shp = (n_filters,)
81+
self.b = theano.shared( numpy.asarray(
10782
rng.uniform(low=-.0, high=0., size=(n_filters,)),
10883
dtype=input.dtype))
10984

11085
self.input = input
111-
conv_out = conv_op(input, self.w)
112-
self.output = tensor.tanh(ds_op(conv_out) + b.dimshuffle('x', 0, 'x', 'x'))
86+
conv_out = conv.conv2d(input, self.w)
87+
88+
# - why is poolsize an op parameter here?
89+
# - can we just have a maxpool function that creates this Op internally?
90+
ds_op = downsample.DownsampleFactorMax(poolsize, ignore_border=True)
91+
self.output = T.tanh(ds_op(conv_out) + self.b.dimshuffle('x', 0, 'x', 'x'))
11392
self.params = [self.w, self.b]
11493

94+
11595
class SigmoidalLayer(object):
116-
def __init__(self, input, n_in, n_out):
96+
def __init__(self, rng, input, n_in, n_out):
11797
"""
11898
:param input: a symbolic tensor of shape (n_examples, n_in)
11999
:param w: a symbolic weight matrix of shape (n_in, n_out)
120100
:param b: symbolic bias terms of shape (n_out,)
121101
:param squash: an squashing function
122102
"""
123103
self.input = input
124-
self.w = shared(
104+
self.w = theano.shared(
125105
numpy.asarray(
126106
rng.uniform(low=-2/numpy.sqrt(n_in), high=2/numpy.sqrt(n_in),
127107
size=(n_in, n_out)), dtype=input.dtype))
128-
self.b = shared(numpy.asarray(numpy.zeros(n_out), dtype=input.dtype))
129-
self.output = tensor.tanh(tensor.dot(input, self.w) + self.b)
108+
self.b = theano.shared(numpy.asarray(numpy.zeros(n_out), dtype=input.dtype))
109+
self.output = T.tanh(T.dot(input, self.w) + self.b)
130110
self.params = [self.w, self.b]
131111

132112
class LogisticRegression(object):
133113
"""WRITEME"""
134114

135115
def __init__(self, input, n_in, n_out):
136-
self.w = shared(numpy.zeros((n_in, n_out), dtype=input.dtype))
137-
self.b = shared(numpy.zeros((n_out,), dtype=input.dtype))
138-
self.l1=abs(self.w).sum()
116+
self.w = theano.shared(numpy.zeros((n_in, n_out), dtype=input.dtype))
117+
self.b = theano.shared(numpy.zeros((n_out,), dtype=input.dtype))
118+
self.l1 = abs(self.w).sum()
139119
self.l2_sqr = (self.w**2).sum()
140-
self.output=nnet.softmax(theano.dot(input, self.w)+self.b)
141-
self.argmax=theano.tensor.argmax(self.output, axis=1)
120+
self.output = T.nnet.softmax(theano.dot(input, self.w)+self.b)
121+
self.argmax = T.argmax(self.output, axis=1)
142122
self.params = [self.w, self.b]
143123

144124
def nll(self, target):
145125
"""Return the negative log-likelihood of the prediction of this model under a given
146126
target distribution. Passing symbolic integers here means 1-hot.
147127
WRITEME
148128
"""
149-
return nnet.categorical_crossentropy(self.output, target)
129+
return T.nnet.categorical_crossentropy(self.output, target)
150130

151131
def errors(self, target):
152132
"""Return a vector of 0s and 1s, with 1s on every line that was mis-classified.
@@ -155,75 +135,179 @@ def errors(self, target):
155135
raise TypeError('target should have the same shape as self.argmax', ('target', target.type,
156136
'argmax', self.argmax.type))
157137
if target.dtype.startswith('int'):
158-
return theano.tensor.neq(self.argmax, target)
138+
return T.neq(self.argmax, target)
159139
else:
160140
raise NotImplementedError()
161141

162-
def evaluate_lenet5(batch_size=30, n_iter=1000):
142+
def load_dataset():
143+
144+
# Load the dataset
145+
f = gzip.open('mnist.pkl.gz','rb')
146+
train_set, valid_set, test_set = cPickle.load(f)
147+
f.close()
148+
149+
# make minibatches of size 20
150+
batch_size = 20 # sized of the minibatch
151+
152+
# Dealing with the training set
153+
# get the list of training images (x) and their labels (y)
154+
(train_set_x, train_set_y) = train_set
155+
# initialize the list of training minibatches with empty list
156+
train_batches = []
157+
for i in xrange(0, len(train_set_x), batch_size):
158+
# add to the list of minibatches the minibatch starting at
159+
# position i, ending at position i+batch_size
160+
# a minibatch is a pair ; the first element of the pair is a list
161+
# of datapoints, the second element is the list of corresponding
162+
# labels
163+
train_batches = train_batches + \
164+
[(train_set_x[i:i+batch_size], train_set_y[i:i+batch_size])]
165+
166+
# Dealing with the validation set
167+
(valid_set_x, valid_set_y) = valid_set
168+
# initialize the list of validation minibatches
169+
valid_batches = []
170+
for i in xrange(0, len(valid_set_x), batch_size):
171+
valid_batches = valid_batches + \
172+
[(valid_set_x[i:i+batch_size], valid_set_y[i:i+batch_size])]
173+
174+
# Dealing with the testing set
175+
(test_set_x, test_set_y) = test_set
176+
# initialize the list of testing minibatches
177+
test_batches = []
178+
for i in xrange(0, len(test_set_x), batch_size):
179+
test_batches = test_batches + \
180+
[(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])]
181+
182+
return train_batches, valid_batches, test_batches
183+
184+
185+
def evaluate_lenet5(learning_rate=0.01, n_iter=1000):
186+
163187
rng = numpy.random.RandomState(23455)
164188

165-
mnist = pylearn.datasets.MNIST.train_valid_test()
189+
train_batches, valid_batches, test_batches = load_dataset()
166190

167-
ishape=(28,28) #this is the size of MNIST images
191+
ishape = (28,28) # this is the size of MNIST images
192+
batch_size = 20 # sized of the minibatch
168193

169194
# allocate symbolic variables for the data
170-
x = tensor.fmatrix() # the data is presented as rasterized images
171-
y = tensor.lvector() # the labels are presented as 1D vector of [long int] labels
195+
x = T.fmatrix() # the data is presented as rasterized images
196+
y = T.lvector() # the labels are presented as 1D vector of [long int] labels
197+
198+
199+
######################
200+
# BUILD ACTUAL MODEL #
201+
######################
172202

173203
# construct the first convolutional pooling layer
174-
layer0 = LeNetConvPool.new(rng, input=x.reshape((batch_size,1,28,28)), n_examples=batch_size,
175-
n_imgs=1, img_shape=ishape,
176-
n_filters=6, filter_shape=(5,5),
177-
poolsize=(2,2))
204+
layer0 = LeNetConvPoolLayer(rng, input=x.reshape((batch_size,1,28,28)),
205+
n_imgs=1, n_filters=6, filter_shape=(5,5), poolsize=(2,2))
178206

179207
# construct the second convolutional pooling layer
180-
layer1 = LeNetConvPool.new(rng, input=layer0.output, n_examples=batch_size,
181-
n_imgs=6, img_shape=(12,12),
182-
n_filters=16, filter_shape=(5,5),
183-
poolsize=(2,2))
208+
layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
209+
n_imgs=6, n_filters=16, filter_shape=(5,5), poolsize=(2,2))
184210

185211
# construct a fully-connected sigmoidal layer
186-
layer2 = SigmoidalLayer.new(rng, input=layer1.output.flatten(2), n_in=16*16, n_out=128) # 128 ?
212+
layer2 = SigmoidalLayer(rng, input=layer1.output.flatten(2), n_in=16*4*4, n_out=128) # 128 ?
187213

188214
# classify the values of the fully-connected sigmoidal layer
189-
layer3 = LogisticRegression.new(input=layer2.output, n_in=128, n_out=10)
215+
layer3 = LogisticRegression(input=layer2.output, n_in=128, n_out=10)
190216

191217
# the cost we minimize during training is the NLL of the model
192218
cost = layer3.nll(y).mean()
193219

194220
# create a function to compute the mistakes that are made by the model
195-
test_model = pfunc([x,y], layer3.errors(y))
221+
test_model = theano.function([x,y], layer3.errors(y))
196222

197223
# create a list of all model parameters to be fit by gradient descent
198224
params = layer3.params+ layer2.params+ layer1.params + layer0.params
199-
learning_rate = numpy.asarray(0.01, dtype='float32')
225+
learning_rate = numpy.asarray(learning_rate, dtype='float32')
200226

201227
# train_model is a function that updates the model parameters by SGD
202-
train_model = pfunc([x, y], cost,
203-
updates=[(p, p - learning_rate*gp) for p,gp in zip(params, tensor.grad(cost, params))])
204-
205-
# IS IT MORE SIMPLE TO USE A MINIMIZER OR THE DIRECT CODE?
206-
207-
best_valid_score = float('inf')
208-
for i in xrange(n_iter):
209-
for j in xrange(len(mnist.train.x)/batch_size):
210-
cost_ij = train_model(
211-
mnist.train.x[j*batch_size:(j+1)*batch_size],
212-
mnist.train.y[j*batch_size:(j+1)*batch_size])
213-
#if 0 == j % 100:
214-
#print('epoch %i:%i, training error %f' % (i, j*batch_size, cost_ij))
215-
valid_score = numpy.mean([test_model(
216-
mnist.valid.x[j*batch_size:(j+1)*batch_size],
217-
mnist.valid.y[j*batch_size:(j+1)*batch_size])
218-
for j in xrange(len(mnist.valid.x)/batch_size)])
219-
print('epoch %i, validation error %f' % (i, valid_score))
220-
if valid_score < best_valid_score:
221-
best_valid_score = valid_score
222-
test_score = numpy.mean([test_model(
223-
mnist.test.x[j*batch_size:(j+1)*batch_size],
224-
mnist.test.y[j*batch_size:(j+1)*batch_size])
225-
for j in xrange(len(mnist.test.x)/batch_size)])
226-
print('epoch %i, test error of best model %f' % (i, test_score))
228+
train_model = theano.function([x, y], cost,
229+
updates=[(p, p - learning_rate*gp) for p,gp in zip(params, T.grad(cost, params))])
230+
231+
232+
###############
233+
# TRAIN MODEL #
234+
###############
235+
236+
n_minibatches = len(train_batches)
237+
238+
# early-stopping parameters
239+
patience = 10000 # look as this many examples regardless
240+
patience_increase = 2 # wait this much longer when a new best is
241+
# found
242+
improvement_threshold = 0.995 # a relative improvement of this much is
243+
# considered significant
244+
validation_frequency = n_minibatches # go through this many
245+
# minibatche before checking the network
246+
# on the validation set; in this case we
247+
# check every epoch
248+
249+
best_params = None
250+
best_validation_loss = float('inf')
251+
test_score = 0.
252+
start_time = time.clock()
253+
254+
# have a maximum of `n_iter` iterations through the entire dataset
255+
for iter in xrange(n_iter * n_minibatches):
256+
257+
# get epoch and minibatch index
258+
epoch = iter / n_minibatches
259+
minibatch_index = iter % n_minibatches
260+
261+
# get the minibatches corresponding to `iter` modulo
262+
# `len(train_batches)`
263+
x,y = train_batches[ minibatch_index ]
264+
265+
print 'training @ iter = ', iter
266+
cost_ij = train_model(x,y)
267+
268+
if (iter+1) % validation_frequency == 0:
269+
# compute zero-one loss on validation set
270+
this_validation_loss = 0.
271+
for x,y in valid_batches:
272+
# sum up the errors for each minibatch
273+
this_validation_loss += test_model(x,y)
274+
# get the average by dividing with the number of minibatches
275+
this_validation_loss /= len(valid_batches)
276+
277+
print('epoch %i, minibatch %i/%i, validation error %f %%' % \
278+
(epoch, minibatch_index+1, n_minibatches, \
279+
this_validation_loss*100.))
280+
281+
282+
# if we got the best validation score until now
283+
if this_validation_loss < best_validation_loss:
284+
285+
#improve patience if loss improvement is good enough
286+
if this_validation_loss < best_validation_loss * \
287+
improvement_threshold :
288+
patience = max(patience, iter * patience_increase)
289+
290+
best_validation_loss = this_validation_loss
291+
# test it on the test set
292+
293+
test_score = 0.
294+
for x,y in test_batches:
295+
test_score += test_model(x,y)
296+
test_score /= len(test_batches)
297+
print((' epoch %i, minibatch %i/%i, test error of best '
298+
'model %f %%') %
299+
(epoch, minibatch_index+1, n_minibatches,
300+
test_score*100.))
301+
302+
if patience <= iter :
303+
break
304+
305+
end_time = time.clock()
306+
print(('Optimization complete with best validation score of %f %%,'
307+
'with test performance %f %%') %
308+
(best_validation_loss * 100., test_score*100.))
309+
print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
310+
227311

228312
if __name__ == '__main__':
229313
evaluate_lenet5()

doc/gettingstarted.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ Tutorial code often uses the following namespaces:
174174

175175
import theano
176176
import theano.tensor as T
177+
import numpy
177178

178179

179180

doc/images/3wolfmoon.jpg

125 KB
Loading

doc/images/3wolfmoon_output.png

93.8 KB
Loading

0 commit comments

Comments
 (0)