Skip to content

Commit f042066

Browse files
committed
Added custon sentiment trainer
1 parent 785d45e commit f042066

4 files changed

Lines changed: 422 additions & 0 deletions

File tree

code/my_conv.py

Lines changed: 313 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,313 @@
1+
"""This tutorial introduces the LeNet5 neural network architecture
2+
using Theano. LeNet5 is a convolutional neural network, good for
3+
classifying images. This tutorial shows how to build the architecture,
4+
and comes with all the hyper-parameters you need to reproduce the
5+
paper's MNIST results.
6+
7+
8+
This implementation simplifies the model in the following ways:
9+
10+
- LeNetConvPool doesn't implement location-specific gain and bias parameters
11+
- LeNetConvPool doesn't implement pooling by average, it implements pooling
12+
by max.
13+
- Digit classification is implemented with a logistic regression rather than
14+
an RBF network
15+
- LeNet5 was not fully-connected convolutions at second layer
16+
17+
References:
18+
- Y. LeCun, L. Bottou, Y. Bengio and P. Haffner:
19+
Gradient-Based Learning Applied to Document
20+
Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998.
21+
http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf
22+
23+
"""
24+
import cPickle
25+
import gzip
26+
import os
27+
import sys
28+
import time
29+
30+
import numpy
31+
32+
import theano
33+
import theano.tensor as T
34+
from theano.tensor.signal import downsample
35+
from theano.tensor.nnet import conv
36+
37+
from logistic_sgd import LogisticRegression, load_data
38+
from mlp import HiddenLayer
39+
40+
41+
class MyConvPoolLayer(object):
42+
"""Pool Layer of a convolutional network """
43+
44+
def __init__(self, rng, input, filter_shape, poolsize=(2, 2)):
45+
"""
46+
Allocate a LeNetConvPoolLayer with shared variable internal parameters.
47+
48+
:type rng: numpy.random.RandomState
49+
:param rng: a random number generator used to initialize weights
50+
51+
:type input: theano.tensor.dtensor4
52+
:param input: symbolic image tensor, of shape image_shape
53+
54+
:type filter_shape: tuple or list of length 4
55+
:param filter_shape: (number of filters, num input feature maps,
56+
filter height,filter width)
57+
58+
:type image_shape: tuple or list of length 4
59+
:param image_shape: (batch size, num input feature maps,
60+
image height, image width)
61+
62+
:type poolsize: tuple or list of length 2
63+
:param poolsize: the downsampling (pooling) factor (#rows,#cols)
64+
"""
65+
66+
self.input = input
67+
68+
# initialize weights to temporary values until we know the
69+
# shape of the output feature maps
70+
W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX)
71+
self.W = theano.shared(value=W_values)
72+
73+
# the bias is a 1D tensor -- one bias per output feature map
74+
b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
75+
self.b = theano.shared(value=b_values)
76+
77+
# convolve input feature maps with filters
78+
conv_out = conv.conv2d(input=input, filters=self.W,
79+
filter_shape=filter_shape)
80+
81+
# there are "num input feature maps * filter height * filter width"
82+
# inputs to each hidden unit
83+
fan_in = numpy.prod(filter_shape[1:])
84+
# each unit in the lower layer receives a gradient from:
85+
# "num output feature maps * filter height * filter width" /
86+
# pooling size
87+
fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
88+
numpy.prod(poolsize))
89+
# replace weight values with random weights
90+
W_bound = numpy.sqrt(6. / (fan_in + fan_out))
91+
self.W.set_value(numpy.asarray(
92+
rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
93+
dtype=theano.config.floatX),
94+
borrow=True)
95+
96+
# downsample each feature map individually, using maxpooling
97+
pooled_out = downsample.max_pool_2d(input=conv_out,
98+
ds=poolsize, ignore_border=True)
99+
100+
# add the bias term. Since the bias is a vector (1D array), we first
101+
# reshape it to a tensor of shape (1,n_filters,1,1). Each bias will
102+
# thus be broadcasted across mini-batches and feature map
103+
# width & height
104+
self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
105+
106+
# store parameters of this layer
107+
self.params = [self.W, self.b]
108+
109+
110+
class NLPNet(object):
111+
112+
def __init__(self,
113+
batch_size=500,
114+
layers=1,
115+
ishape=(28, 28),
116+
conv_filter_shape=(5, 5),
117+
maxpool_filter_shape=(2, 2),
118+
nkerns=[1, 50, 50, 10]):
119+
self.ishape = ishape
120+
# allocate symbolic variables for the data
121+
self.x = T.matrix('x') # the data is presented batches of rasterized images
122+
self.y = T.ivector('y') # the labels are presented as 1D vector of
123+
# [int] labels
124+
self.batch_size = batch_size
125+
######################
126+
# BUILD ACTUAL MODEL #
127+
######################
128+
print '... building the model'
129+
rng = numpy.random.RandomState(23455)
130+
131+
layer0_input = self.x.reshape((batch_size, layers, self.ishape[0], self.ishape[1]))
132+
133+
# Construct the first convolutional pooling layer:
134+
# filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
135+
# maxpooling reduces this further to (24/2,24/2) = (12,12)
136+
# 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
137+
layer0 = MyConvPoolLayer(rng, input=layer0_input,
138+
filter_shape=(nkerns[0], 1,
139+
conv_filter_shape[0],
140+
conv_filter_shape[1]),
141+
poolsize=maxpool_filter_shape)
142+
layer0_output_x = (ishape[0] - conv_filter_shape[0] + 1)/maxpool_filter_shape[0]
143+
layer0_output_y = (ishape[1] - conv_filter_shape[1] + 1)/maxpool_filter_shape[1]
144+
145+
layer1_input = layer0.output.flatten(2)
146+
147+
# construct a fully-connected sigmoidal layer
148+
layer1 = HiddenLayer(rng, input=layer1_input,
149+
n_in=nkerns[0] * layer0_output_x * layer0_output_y,
150+
n_out=nkerns[1], activation=T.tanh)
151+
152+
layer2 = HiddenLayer(rng, input=layer1.output,
153+
n_in=nkerns[1], n_out=nkerns[2],
154+
activation=T.tanh)
155+
156+
layer3 = LogisticRegression(input=layer2.output,
157+
n_in=nkerns[2], n_out=nkerns[3])
158+
159+
# classify the values of the fully-connected sigmoidal layer
160+
161+
# the cost we minimize during training is the NLL of the model
162+
self.cost = layer3.negative_log_likelihood(self.y)
163+
self.errors = layer3.errors
164+
# create a list of all model parameters to be fit by gradient descent
165+
self.params = layer3.params + layer2.params + layer1.params\
166+
+ layer0.params
167+
168+
169+
def train(self, datasets, learning_rate=0.1, n_epochs=200):
170+
""" Demonstrates lenet on MNIST dataset
171+
172+
:type learning_rate: float
173+
:param learning_rate: learning rate used (factor for the stochastic
174+
gradient)
175+
176+
:type n_epochs: int
177+
:param n_epochs: maximal number of epochs to run the optimizer
178+
179+
:type dataset: string
180+
:param dataset: path to the dataset used for training /testing (MNIST here)
181+
182+
:type nkerns: list of ints
183+
:param nkerns: number of kernels on each layer
184+
"""
185+
train_set_x, train_set_y = datasets[0]
186+
valid_set_x, valid_set_y = datasets[1]
187+
test_set_x, test_set_y = datasets[2]
188+
189+
# compute number of minibatches for training, validation and testing
190+
n_train_batches = train_set_x.get_value(borrow=True).shape[0]
191+
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
192+
n_test_batches = test_set_x.get_value(borrow=True).shape[0]
193+
n_train_batches /= self.batch_size
194+
n_valid_batches /= self.batch_size
195+
n_test_batches /= self.batch_size
196+
197+
index = T.lscalar() # index to a [mini]batch
198+
# create a function to compute the mistakes that are made by the model
199+
test_model = theano.function([index], self.errors(self.y),
200+
givens={
201+
self.x: test_set_x[index * self.batch_size: (index + 1) * self.batch_size],
202+
self.y: test_set_y[index * self.batch_size: (index + 1) * self.batch_size]})
203+
204+
validate_model = theano.function([index], self.errors(self.y),
205+
givens={
206+
self.x: valid_set_x[index * self.batch_size: (index + 1) * self.batch_size],
207+
self.y: valid_set_y[index * self.batch_size: (index + 1) * self.batch_size]})
208+
209+
# create a list of gradients for all model parameters
210+
grads = T.grad(self.cost, self.params)
211+
212+
# train_model is a function that updates the model parameters by
213+
# SGD Since this model has many parameters, it would be tedious to
214+
# manually create an update rule for each model parameter. We thus
215+
# create the updates dictionary by automatically looping over all
216+
# (params[i],grads[i]) pairs.
217+
updates = {}
218+
for param_i, grad_i in zip(self.params, grads):
219+
updates[param_i] = param_i - learning_rate * grad_i
220+
221+
train_model = theano.function([index], self.cost, updates=updates,
222+
givens={
223+
self.x: train_set_x[index * self.batch_size: (index + 1) * self.batch_size],
224+
self.y: train_set_y[index * self.batch_size: (index + 1) * self.batch_size]})
225+
226+
227+
###############
228+
# TRAIN MODEL #
229+
###############
230+
print '... training'
231+
# early-stopping parameters
232+
patience = 10000 # look as this many examples regardless
233+
patience_increase = 2 # wait this much longer when a new best is
234+
# found
235+
improvement_threshold = 0.995 # a relative improvement of this much is
236+
# considered significant
237+
validation_frequency = min(n_train_batches, patience / 2)
238+
# go through this many
239+
# minibatche before checking the network
240+
# on the validation set; in this case we
241+
# check every epoch
242+
243+
best_params = None
244+
best_validation_loss = numpy.inf
245+
best_iter = 0
246+
test_score = 0.
247+
start_time = time.clock()
248+
249+
epoch = 0
250+
done_looping = False
251+
252+
while (epoch < n_epochs) and (not done_looping):
253+
epoch = epoch + 1
254+
for minibatch_index in xrange(n_train_batches):
255+
256+
iter = epoch * n_train_batches + minibatch_index
257+
258+
if iter % 100 == 0:
259+
print 'training @ iter = ', iter
260+
cost_ij = train_model(minibatch_index)
261+
262+
if (iter + 1) % validation_frequency == 0:
263+
264+
# compute zero-one loss on validation set
265+
validation_losses = [validate_model(i) for i
266+
in xrange(n_valid_batches)]
267+
this_validation_loss = numpy.mean(validation_losses)
268+
print('epoch %i, minibatch %i/%i, validation error %f %%' % \
269+
(epoch, minibatch_index + 1, n_train_batches, \
270+
this_validation_loss * 100.))
271+
272+
# if we got the best validation score until now
273+
if this_validation_loss < best_validation_loss:
274+
275+
#improve patience if loss improvement is good enough
276+
if this_validation_loss < best_validation_loss * \
277+
improvement_threshold:
278+
patience = max(patience, iter * patience_increase)
279+
280+
# save best validation score and iteration number
281+
best_validation_loss = this_validation_loss
282+
best_iter = iter
283+
284+
# test it on the test set
285+
test_losses = [test_model(i) for i in xrange(n_test_batches)]
286+
test_score = numpy.mean(test_losses)
287+
print((' epoch %i, minibatch %i/%i, test error of best '
288+
'model %f %%') %
289+
(epoch, minibatch_index + 1, n_train_batches,
290+
test_score * 100.))
291+
292+
if patience <= iter:
293+
done_looping = True
294+
break
295+
296+
end_time = time.clock()
297+
print('Optimization complete.')
298+
print('Best validation score of %f %% obtained at iteration %i,'\
299+
'with test performance %f %%' %
300+
(best_validation_loss * 100., best_iter, test_score * 100.))
301+
print >> sys.stderr, ('The code for file ' +
302+
os.path.split(__file__)[1] +
303+
' ran for %.2fm' % ((end_time - start_time) / 60.))
304+
305+
306+
if __name__ == '__main__':
307+
net = NLPNet()
308+
datasets = load_data("../data/mnist.pkl.gz")
309+
net.train(datasets)
310+
311+
312+
def experiment(state, channel):
313+
evaluate_lenet5(state.learning_rate, dataset=state.dataset)

code/sentiment140_resize.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import csv
2+
3+
N = 1000
4+
fi = csv.reader(open("Sentiment140/training.1600000.processed.noemoticon.csv"),
5+
delimiter=",", quotechar="\"")
6+
fo = csv.writer(open("Sentiment140/training.%d.processed.noemoticon.csv" % (N*2), "w"),
7+
delimiter=",", quotechar="\"")
8+
9+
counts = [0]*5
10+
for row in fi:
11+
label = int(row[0])
12+
if counts[label] < N:
13+
counts[label] += 1
14+
fo.writerow(row)
15+
print counts

code/srl.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from comvolutional_mlp import LeNetConvPoolLayer
2+

0 commit comments

Comments
 (0)