Skip to content

Commit 8226007

Browse files
committed
initial commit
0 parents  commit 8226007

22 files changed

Lines changed: 2052 additions & 0 deletions

code/convolutional_mlp.py

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
2+
"""
3+
This tutorial introduces the LeNet5 neural network architecture using Theano. LeNet5 is a
4+
convolutional neural network, good for classifying images. This tutorial shows how to build the
5+
architecture, and comes with all the hyper-parameters you need to reproduce the paper's MNIST
6+
results.
7+
8+
The best results are obtained after X iterations of the main program loop, which takes ***
9+
minutes on my workstation (an Intel Core i7, circa July 2009), and *** minutes on my GPU (an
10+
NVIDIA GTX 285 graphics processor).
11+
12+
This implementation simplifies the model in the following ways:
13+
14+
- LeNetConvPool doesn't implement location-specific gain and bias parameters
15+
16+
- LeNetConvPool doesn't implement pooling by average, it implements pooling by max.
17+
18+
- Digit classification is implemented with a logistic regression rather than an RBF network
19+
20+
- LeNet5 was not fully-connected convolutions at second layer
21+
22+
References:
23+
24+
- Y. LeCun, L. Bottou, Y. Bengio and P. Haffner: Gradient-Based Learning Applied to Document
25+
Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998.
26+
http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf
27+
28+
29+
"""
30+
import numpy
31+
from theano.compile.sandbox import shared, pfunc
32+
from theano import tensor
33+
from pylearn.shared.layers import LogisticRegression, SigmoidalLayer
34+
import theano.sandbox.softsign
35+
import pylearn.datasets.MNIST
36+
37+
38+
try:
39+
# this tells theano to use the GPU if possible
40+
from theano.sandbox.cuda import use
41+
use()
42+
except Exception, e:
43+
print('Warning: Attempt to use GPU resulted in error "%s"' % str(e))
44+
45+
class LeNetConvPool(object):
46+
"""WRITEME
47+
48+
Math of what the layer does, and what symbolic variables are created by the class (w, b,
49+
output).
50+
51+
"""
52+
53+
#TODO: implement biases & scales properly. There are supposed to be more parameters.
54+
# - one bias & scale per filter
55+
# - one bias & scale per downsample feature location (a 2d bias)
56+
# - more?
57+
58+
def __init__(self, rng, input, n_examples, n_imgs, img_shape, n_filters, filter_shape=(5,5),
59+
poolsize=(2,2)):
60+
"""
61+
Allocate a LeNetConvPool layer with shared variable internal parameters.
62+
63+
:param rng: a random number generator used to initialize weights
64+
65+
:param input: symbolic images. Shape: (n_examples, n_imgs, img_shape[0], img_shape[1])
66+
67+
:param n_examples: input's shape[0] at runtime
68+
69+
:param n_imgs: input's shape[1] at runtime
70+
71+
:param img_shape: input's shape[2:4] at runtime
72+
73+
:param n_filters: the number of filters to apply to the image.
74+
75+
:param filter_shape: the size of the filters to apply
76+
:type filter_shape: pair (rows, cols)
77+
78+
:param poolsize: the downsampling (pooling) factor
79+
:type poolsize: pair (rows, cols)
80+
"""
81+
82+
#TODO: make a simpler convolution constructor!!
83+
# - make dx and dy optional
84+
# - why do we have to pass shapes? (Can we make them optional at least?)
85+
conv_op = ConvOp((n_imgs,)+img_shape, filter_shape, n_filters, n_examples,
86+
dx=1, dy=1, output_mode='valid')
87+
88+
# - why is poolsize an op parameter here?
89+
# - can we just have a maxpool function that creates this Op internally?
90+
ds_op = DownsampleFactorMax(poolsize, ignore_border=True)
91+
92+
# the filter tensor that we will apply is a 4D tensor
93+
w_shp = (n_filters, n_imgs) + filter_shape
94+
95+
# the bias we add is a 1D tensor
96+
b_shp = (n_filters,)
97+
98+
self.w = shared(
99+
numpy.asarray(
100+
rng.uniform(
101+
low=-1.0 / numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs),
102+
high=1.0 / numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs),
103+
size=w_shp),
104+
dtype=input.dtype))
105+
self.b = shared(
106+
numpy.asarray(
107+
rng.uniform(low=-.0, high=0., size=(n_filters,)),
108+
dtype=input.dtype))
109+
110+
self.input = input
111+
conv_out = conv_op(input, self.w)
112+
self.output = tensor.tanh(ds_op(conv_out) + b.dimshuffle('x', 0, 'x', 'x'))
113+
self.params = [self.w, self.b]
114+
115+
class SigmoidalLayer(object):
116+
def __init__(self, input, n_in, n_out):
117+
"""
118+
:param input: a symbolic tensor of shape (n_examples, n_in)
119+
:param w: a symbolic weight matrix of shape (n_in, n_out)
120+
:param b: symbolic bias terms of shape (n_out,)
121+
:param squash: an squashing function
122+
"""
123+
self.input = input
124+
self.w = shared(
125+
numpy.asarray(
126+
rng.uniform(low=-2/numpy.sqrt(n_in), high=2/numpy.sqrt(n_in),
127+
size=(n_in, n_out)), dtype=input.dtype))
128+
self.b = shared(numpy.asarray(numpy.zeros(n_out), dtype=input.dtype))
129+
self.output = tensor.tanh(tensor.dot(input, self.w) + self.b)
130+
self.params = [self.w, self.b]
131+
132+
class LogisticRegression(object):
133+
"""WRITEME"""
134+
135+
def __init__(self, input, n_in, n_out):
136+
self.w = shared(numpy.zeros((n_in, n_out), dtype=input.dtype))
137+
self.b = shared(numpy.zeros((n_out,), dtype=input.dtype))
138+
self.l1=abs(self.w).sum()
139+
self.l2_sqr = (self.w**2).sum()
140+
self.output=nnet.softmax(theano.dot(input, self.w)+self.b)
141+
self.argmax=theano.tensor.argmax(self.output, axis=1)
142+
self.params = [self.w, self.b]
143+
144+
def nll(self, target):
145+
"""Return the negative log-likelihood of the prediction of this model under a given
146+
target distribution. Passing symbolic integers here means 1-hot.
147+
WRITEME
148+
"""
149+
return nnet.categorical_crossentropy(self.output, target)
150+
151+
def errors(self, target):
152+
"""Return a vector of 0s and 1s, with 1s on every line that was mis-classified.
153+
"""
154+
if target.ndim != self.argmax.ndim:
155+
raise TypeError('target should have the same shape as self.argmax', ('target', target.type,
156+
'argmax', self.argmax.type))
157+
if target.dtype.startswith('int'):
158+
return theano.tensor.neq(self.argmax, target)
159+
else:
160+
raise NotImplementedError()
161+
162+
def evaluate_lenet5(batch_size=30, n_iter=1000):
163+
rng = numpy.random.RandomState(23455)
164+
165+
mnist = pylearn.datasets.MNIST.train_valid_test()
166+
167+
ishape=(28,28) #this is the size of MNIST images
168+
169+
# allocate symbolic variables for the data
170+
x = tensor.fmatrix() # the data is presented as rasterized images
171+
y = tensor.lvector() # the labels are presented as 1D vector of [long int] labels
172+
173+
# construct the first convolutional pooling layer
174+
layer0 = LeNetConvPool.new(rng, input=x.reshape((batch_size,1,28,28)), n_examples=batch_size,
175+
n_imgs=1, img_shape=ishape,
176+
n_filters=6, filter_shape=(5,5),
177+
poolsize=(2,2))
178+
179+
# construct the second convolutional pooling layer
180+
layer1 = LeNetConvPool.new(rng, input=layer0.output, n_examples=batch_size,
181+
n_imgs=6, img_shape=(12,12),
182+
n_filters=16, filter_shape=(5,5),
183+
poolsize=(2,2))
184+
185+
# construct a fully-connected sigmoidal layer
186+
layer2 = SigmoidalLayer.new(rng, input=layer1.output.flatten(2), n_in=16*16, n_out=128) # 128 ?
187+
188+
# classify the values of the fully-connected sigmoidal layer
189+
layer3 = LogisticRegression.new(input=layer2.output, n_in=128, n_out=10)
190+
191+
# the cost we minimize during training is the NLL of the model
192+
cost = layer3.nll(y).mean()
193+
194+
# create a function to compute the mistakes that are made by the model
195+
test_model = pfunc([x,y], layer3.errors(y))
196+
197+
# create a list of all model parameters to be fit by gradient descent
198+
params = layer3.params+ layer2.params+ layer1.params + layer0.params
199+
learning_rate = numpy.asarray(0.01, dtype='float32')
200+
201+
# train_model is a function that updates the model parameters by SGD
202+
train_model = pfunc([x, y], cost,
203+
updates=[(p, p - learning_rate*gp) for p,gp in zip(params, tensor.grad(cost, params))])
204+
205+
# IS IT MORE SIMPLE TO USE A MINIMIZER OR THE DIRECT CODE?
206+
207+
best_valid_score = float('inf')
208+
for i in xrange(n_iter):
209+
for j in xrange(len(mnist.train.x)/batch_size):
210+
cost_ij = train_model(
211+
mnist.train.x[j*batch_size:(j+1)*batch_size],
212+
mnist.train.y[j*batch_size:(j+1)*batch_size])
213+
#if 0 == j % 100:
214+
#print('epoch %i:%i, training error %f' % (i, j*batch_size, cost_ij))
215+
valid_score = numpy.mean([test_model(
216+
mnist.valid.x[j*batch_size:(j+1)*batch_size],
217+
mnist.valid.y[j*batch_size:(j+1)*batch_size])
218+
for j in xrange(len(mnist.valid.x)/batch_size)])
219+
print('epoch %i, validation error %f' % (i, valid_score))
220+
if valid_score < best_valid_score:
221+
best_valid_score = valid_score
222+
test_score = numpy.mean([test_model(
223+
mnist.test.x[j*batch_size:(j+1)*batch_size],
224+
mnist.test.y[j*batch_size:(j+1)*batch_size])
225+
for j in xrange(len(mnist.test.x)/batch_size)])
226+
print('epoch %i, test error of best model %f' % (i, test_score))
227+
228+
if __name__ == '__main__':
229+
evaluate_lenet5()
230+

0 commit comments

Comments
 (0)