Skip to content

Commit 3f705f4

Browse files
committed
add data change for kaggle
1 parent cfb0a02 commit 3f705f4

File tree

2 files changed

+783
-0
lines changed

2 files changed

+783
-0
lines changed

code/convolutional_mlp_kaggle.py

Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
"""This tutorial introduces the LeNet5 neural network architecture
2+
using Theano. LeNet5 is a convolutional neural network, good for
3+
classifying images. This tutorial shows how to build the architecture,
4+
and comes with all the hyper-parameters you need to reproduce the
5+
paper's MNIST results.
6+
7+
8+
This implementation simplifies the model in the following ways:
9+
10+
- LeNetConvPool doesn't implement location-specific gain and bias parameters
11+
- LeNetConvPool doesn't implement pooling by average, it implements pooling
12+
by max.
13+
- Digit classification is implemented with a logistic regression rather than
14+
an RBF network
15+
- LeNet5 was not fully-connected convolutions at second layer
16+
17+
References:
18+
- Y. LeCun, L. Bottou, Y. Bengio and P. Haffner:
19+
Gradient-Based Learning Applied to Document
20+
Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998.
21+
http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf
22+
23+
"""
24+
import cPickle
25+
import gzip
26+
import os
27+
import sys
28+
import time
29+
30+
import numpy
31+
32+
import theano
33+
import theano.tensor as T
34+
from theano.tensor.signal import downsample
35+
from theano.tensor.nnet import conv
36+
37+
from logistic_sgd import LogisticRegression, load_data
38+
from mlp import HiddenLayer
39+
40+
41+
class LeNetConvPoolLayer(object):
42+
"""Pool Layer of a convolutional network """
43+
44+
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
45+
"""
46+
Allocate a LeNetConvPoolLayer with shared variable internal parameters.
47+
48+
:type rng: numpy.random.RandomState
49+
:param rng: a random number generator used to initialize weights
50+
51+
:type input: theano.tensor.dtensor4
52+
:param input: symbolic image tensor, of shape image_shape
53+
54+
:type filter_shape: tuple or list of length 4
55+
:param filter_shape: (number of filters, num input feature maps,
56+
filter height,filter width)
57+
58+
:type image_shape: tuple or list of length 4
59+
:param image_shape: (batch size, num input feature maps,
60+
image height, image width)
61+
62+
:type poolsize: tuple or list of length 2
63+
:param poolsize: the downsampling (pooling) factor (#rows,#cols)
64+
"""
65+
66+
assert image_shape[1] == filter_shape[1]
67+
self.input = input
68+
69+
# there are "num input feature maps * filter height * filter width"
70+
# inputs to each hidden unit
71+
fan_in = numpy.prod(filter_shape[1:])
72+
# each unit in the lower layer receives a gradient from:
73+
# "num output feature maps * filter height * filter width" /
74+
# pooling size
75+
fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
76+
numpy.prod(poolsize))
77+
# initialize weights with random weights
78+
W_bound = numpy.sqrt(6. / (fan_in + fan_out))
79+
self.W = theano.shared(numpy.asarray(
80+
rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
81+
dtype=theano.config.floatX),
82+
borrow=True)
83+
84+
# the bias is a 1D tensor -- one bias per output feature map
85+
b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
86+
self.b = theano.shared(value=b_values, borrow=True)
87+
88+
# convolve input feature maps with filters
89+
conv_out = conv.conv2d(input=input, filters=self.W,
90+
filter_shape=filter_shape, image_shape=image_shape)
91+
92+
# downsample each feature map individually, using maxpooling
93+
pooled_out = downsample.max_pool_2d(input=conv_out,
94+
ds=poolsize, ignore_border=True)
95+
96+
# add the bias term. Since the bias is a vector (1D array), we first
97+
# reshape it to a tensor of shape (1,n_filters,1,1). Each bias will
98+
# thus be broadcasted across mini-batches and feature map
99+
# width & height
100+
self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
101+
102+
# store parameters of this layer
103+
self.params = [self.W, self.b]
104+
105+
106+
def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
107+
dataset='../data/mnist.pkl.gz',
108+
nkerns=[20, 50], batch_size=500):
109+
""" Demonstrates lenet on MNIST dataset
110+
111+
:type learning_rate: float
112+
:param learning_rate: learning rate used (factor for the stochastic
113+
gradient)
114+
115+
:type n_epochs: int
116+
:param n_epochs: maximal number of epochs to run the optimizer
117+
118+
:type dataset: string
119+
:param dataset: path to the dataset used for training /testing (MNIST here)
120+
121+
:type nkerns: list of ints
122+
:param nkerns: number of kernels on each layer
123+
"""
124+
125+
rng = numpy.random.RandomState(23455)
126+
127+
datasets = load_data(dataset)
128+
129+
train_set_x, train_set_y = datasets[0]
130+
valid_set_x, valid_set_y = datasets[1]
131+
test_set_x, test_set_y = datasets[2]
132+
predict_set_x, predict_set_y = datasets[3]
133+
134+
# compute number of minibatches for training, validation and testing
135+
n_train_batches = train_set_x.get_value(borrow=True).shape[0]
136+
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
137+
n_test_batches = test_set_x.get_value(borrow=True).shape[0]
138+
n_predict_batches = predict_set_x.get_value(borrow=True).shape[0]
139+
140+
n_train_batches /= batch_size
141+
n_valid_batches /= batch_size
142+
n_test_batches /= batch_size
143+
n_predict_batches /= batch_size
144+
145+
# allocate symbolic variables for the data
146+
index = T.lscalar() # index to a [mini]batch
147+
x = T.matrix('x') # the data is presented as rasterized images
148+
y = T.ivector('y') # the labels are presented as 1D vector of
149+
# [int] labels
150+
151+
ishape = (28, 28) # this is the size of MNIST images
152+
153+
######################
154+
# BUILD ACTUAL MODEL #
155+
######################
156+
print '... building the model'
157+
158+
# Reshape matrix of rasterized images of shape (batch_size,28*28)
159+
# to a 4D tensor, compatible with our LeNetConvPoolLayer
160+
layer0_input = x.reshape((batch_size, 1, 28, 28))
161+
162+
# Construct the first convolutional pooling layer:
163+
# filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
164+
# maxpooling reduces this further to (24/2,24/2) = (12,12)
165+
# 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
166+
layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
167+
image_shape=(batch_size, 1, 28, 28),
168+
filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2))
169+
170+
# Construct the second convolutional pooling layer
171+
# filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
172+
# maxpooling reduces this further to (8/2,8/2) = (4,4)
173+
# 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
174+
layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
175+
image_shape=(batch_size, nkerns[0], 12, 12),
176+
filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2))
177+
178+
# the TanhLayer being fully-connected, it operates on 2D matrices of
179+
# shape (batch_size,num_pixels) (i.e matrix of rasterized images).
180+
# This will generate a matrix of shape (20,32*4*4) = (20,512)
181+
layer2_input = layer1.output.flatten(2)
182+
183+
# construct a fully-connected sigmoidal layer
184+
layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4,
185+
n_out=500, activation=T.tanh)
186+
187+
# classify the values of the fully-connected sigmoidal layer
188+
layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
189+
190+
# the cost we minimize during training is the NLL of the model
191+
cost = layer3.negative_log_likelihood(y)
192+
193+
# create a function to compute the mistakes that are made by the model
194+
test_model = theano.function([index], layer3.errors(y),
195+
givens={
196+
x: test_set_x[index * batch_size: (index + 1) * batch_size],
197+
y: test_set_y[index * batch_size: (index + 1) * batch_size]})
198+
199+
predict_model = theano.function([index], layer3.predict(),
200+
givens={
201+
x: predict_set_x[index * batch_size: (index + 1) * batch_size]})
202+
203+
validate_model = theano.function([index], layer3.errors(y),
204+
givens={
205+
x: valid_set_x[index * batch_size: (index + 1) * batch_size],
206+
y: valid_set_y[index * batch_size: (index + 1) * batch_size]})
207+
208+
# create a list of all model parameters to be fit by gradient descent
209+
params = layer3.params + layer2.params + layer1.params + layer0.params
210+
211+
# create a list of gradients for all model parameters
212+
grads = T.grad(cost, params)
213+
214+
# train_model is a function that updates the model parameters by
215+
# SGD Since this model has many parameters, it would be tedious to
216+
# manually create an update rule for each model parameter. We thus
217+
# create the updates list by automatically looping over all
218+
# (params[i],grads[i]) pairs.
219+
updates = []
220+
for param_i, grad_i in zip(params, grads):
221+
updates.append((param_i, param_i - learning_rate * grad_i))
222+
223+
train_model = theano.function([index], cost, updates=updates,
224+
givens={
225+
x: train_set_x[index * batch_size: (index + 1) * batch_size],
226+
y: train_set_y[index * batch_size: (index + 1) * batch_size]})
227+
228+
###############
229+
# TRAIN MODEL #
230+
###############
231+
print '... training'
232+
# early-stopping parameters
233+
patience = 10000 # look as this many examples regardless
234+
patience_increase = 2 # wait this much longer when a new best is
235+
# found
236+
improvement_threshold = 0.995 # a relative improvement of this much is
237+
# considered significant
238+
validation_frequency = min(n_train_batches, patience / 2)
239+
# go through this many
240+
# minibatche before checking the network
241+
# on the validation set; in this case we
242+
# check every epoch
243+
244+
best_params = None
245+
best_validation_loss = numpy.inf
246+
best_iter = 0
247+
test_score = 0.
248+
start_time = time.clock()
249+
250+
epoch = 0
251+
done_looping = False
252+
253+
while (epoch < n_epochs) and (not done_looping):
254+
epoch = epoch + 1
255+
for minibatch_index in xrange(n_train_batches):
256+
257+
iter = (epoch - 1) * n_train_batches + minibatch_index
258+
259+
if iter % 100 == 0:
260+
print 'training @ iter = ', iter
261+
cost_ij = train_model(minibatch_index)
262+
263+
if (iter + 1) % validation_frequency == 0:
264+
265+
# compute zero-one loss on validation set
266+
validation_losses = [validate_model(i) for i
267+
in xrange(n_valid_batches)]
268+
this_validation_loss = numpy.mean(validation_losses)
269+
print('epoch %i, minibatch %i/%i, validation error %f %%' % \
270+
(epoch, minibatch_index + 1, n_train_batches, \
271+
this_validation_loss * 100.))
272+
273+
# if we got the best validation score until now
274+
if this_validation_loss < best_validation_loss:
275+
276+
#improve patience if loss improvement is good enough
277+
if this_validation_loss < best_validation_loss * \
278+
improvement_threshold:
279+
patience = max(patience, iter * patience_increase)
280+
281+
# save best validation score and iteration number
282+
best_validation_loss = this_validation_loss
283+
best_iter = iter
284+
285+
# test it on the test set
286+
test_losses = [test_model(i) for i in xrange(n_test_batches)]
287+
test_score = numpy.mean(test_losses)
288+
289+
predict_res_array = [predict_model(i) for i in xrange(n_predict_batches)]
290+
print predict_res_array;
291+
f = open("predict_res","w+");
292+
for y_pred_item_array in predict_res_array:
293+
for y_pred_item in y_pred_item_array:
294+
f.write(str(y_pred_item)+'\n');
295+
f.close();
296+
297+
298+
print((' epoch %i, minibatch %i/%i, test error of best '
299+
'model %f %%') %
300+
(epoch, minibatch_index + 1, n_train_batches,
301+
test_score * 100.))
302+
303+
if patience <= iter:
304+
done_looping = True
305+
break
306+
307+
end_time = time.clock()
308+
print('Optimization complete.')
309+
print('Best validation score of %f %% obtained at iteration %i,'\
310+
'with test performance %f %%' %
311+
(best_validation_loss * 100., best_iter + 1, test_score * 100.))
312+
print >> sys.stderr, ('The code for file ' +
313+
os.path.split(__file__)[1] +
314+
' ran for %.2fm' % ((end_time - start_time) / 60.))
315+
316+
if __name__ == '__main__':
317+
evaluate_lenet5()
318+
319+
320+
def experiment(state, channel):
321+
evaluate_lenet5(state.learning_rate, dataset=state.dataset)

0 commit comments

Comments
 (0)