Skip to content

Commit b18a24b

Browse files
committed
logistic_sgd and logistic_cg running
1 parent 8226007 commit b18a24b

1 file changed

Lines changed: 246 additions & 0 deletions

File tree

code/logistic_cg.py

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
"""
2+
This tutorial introduces logistic regression using Theano and conjugate
3+
gradient descent.
4+
5+
Logistic regression is a probabilistic, linear classifier. It is parametrized
6+
by a weight matrix :math:`W` and a bias vector :math:`b`. Classification is
7+
done by projecting data points onto a set of hyperplanes, the distance to
8+
which is used to determine a class membership probability.
9+
10+
Mathematically, this can be written as:
11+
12+
.. math::
13+
P(Y=i|x, W,b) &= softmax_i(W x + b) \\
14+
&= \frac {e^{W_i x + b_i}} {\sum_j e^{W_j x + b_j}}
15+
16+
17+
The output of the model or prediction is then done by taking the argmax of
18+
the vector whose i'th element is P(Y=i|x).
19+
20+
.. math::
21+
22+
y_{pred} = argmax_i P(Y=i|x,W,b)
23+
24+
25+
This tutorial presents a stochastic gradient descent optimization method
26+
suitable for large datasets, and a conjugate gradient optimization method
27+
that is suitable for smaller datasets.
28+
29+
30+
References:
31+
32+
- textbooks: "Pattern Recognition and Machine Learning" -
33+
Christopher M. Bishop, section 4.3.2
34+
35+
TODO: recommended preprocessing, lr ranges, regularization ranges (explain
36+
to do lr first, then add regularization)
37+
38+
"""
39+
__docformat__ = 'restructedtext en'
40+
41+
42+
import numpy, cPickle, gzip
43+
44+
45+
import theano
46+
import theano.tensor as T
47+
48+
from theano.compile.sandbox import shared, pfunc
49+
import theano.tensor.nnet
50+
51+
52+
class LogisticRegression(object):
53+
"""Multi-class Logistic Regression Class
54+
55+
The logistic regression is fully described by a weight matrix :math:`W`
56+
and bias vector :math:`b`. Classification is done by projecting data
57+
points onto a set of hyperplanes, the distance to which is used to
58+
determine a class membership probability.
59+
"""
60+
61+
62+
63+
64+
def __init__(self, input, n_in, n_out):
65+
""" Initialize the parameters of the logistic regression
66+
67+
:param input: symbolic variable that describes the input of the
68+
architecture ( one minibatch)
69+
70+
:param n_in: number of input units, the dimension of the space in
71+
which the datapoint lies
72+
73+
:param n_out: number of output units, the dimension of the space in
74+
which the target lies
75+
76+
"""
77+
78+
# initialize theta = (W,b) with 0s; W gets the shape (n_in, n_out),
79+
# while b is a vector of n_out elements, making theta a vector of
80+
# n_in*n_out + n_out elements
81+
self.theta = shared( value = numpy.zeros(n_in*n_out+n_out) )
82+
# W is represented by the fisr n_in*n_out elements of theta
83+
self.W = self.theta[0:n_in*n_out].reshape((n_in,n_out))
84+
# b is the rest (last n_out elements)
85+
self.b = self.theta[n_in*n_out:n_in*n_out+n_out]
86+
87+
88+
# compute vector of class-membership probabilities in symbolic form
89+
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
90+
91+
# compute prediction as class whose probability is maximal in
92+
# symbolic form
93+
self.y_pred=T.argmax(self.p_y_given_x, axis=1)
94+
95+
96+
97+
98+
99+
def negative_log_likelihood(self, y):
100+
"""Return the negative log-likelihood of the prediction of this model
101+
under a given target distribution.
102+
103+
TODO : add description of the categorical_crossentropy
104+
105+
:param y: corresponds to a vector that gives for each example the
106+
:correct label
107+
"""
108+
# TODO: inline NLL formula, refer to theano function
109+
return T.nnet.categorical_crossentropy(self.p_y_given_x, y)
110+
111+
112+
113+
114+
115+
def errors(self, y):
116+
"""Return a float representing the number of errors in the minibatch
117+
over the total number of examples of the minibatch
118+
"""
119+
120+
# check if y has same dimension of y_pred
121+
if y.ndim != self.y_pred.ndim:
122+
raise TypeError('y should have the same shape as self.y_pred',
123+
('y', target.type, 'y_pred', self.y_pred.type))
124+
# check if y is of the correct datatype
125+
if y.dtype.startswith('int'):
126+
# the T.neq operator returns a vector of 0s and 1s, where 1
127+
# represents a mistake in prediction
128+
return T.mean(T.neq(self.y_pred, y))
129+
else:
130+
raise NotImplementedError()
131+
132+
133+
134+
135+
136+
137+
138+
def cg_optimization_mnist( n_iter=50 ):
139+
"""Demonstrate conjugate gradient optimization of a log-linear model
140+
141+
This is demonstrated on MNIST.
142+
143+
:param n_iter: number of iterations ot run the optimizer
144+
145+
"""
146+
#TODO: Tzanetakis
147+
148+
# Load the dataset ; note that the dataset is already divided in
149+
# minibatches of size 10;
150+
f = gzip.open('mnist.pkl.gz','rb')
151+
train_batches, valid_batches, test_batches = cPickle.load(f)
152+
f.close()
153+
154+
ishape = (28,28) #this is the size of MNIST images
155+
batch_size = 10 # size of the minibatch
156+
n_in = 28*28 # number of input units
157+
n_out = 10 # number of output units
158+
# allocate symbolic variables for the data
159+
x = T.fmatrix() # the data is presented as rasterized images
160+
y = T.lvector() # the labels are presented as 1D vector of
161+
# [long int] labels
162+
163+
164+
# construct the logistic regression class
165+
classifier = LogisticRegression( \
166+
input=x.reshape((batch_size,28*28)), n_in=28*28, n_out=10)
167+
168+
# the cost we minimize during training is the negative log likelihood of
169+
# the model in symbolic format
170+
cost = classifier.negative_log_likelihood(y).mean()
171+
172+
# compile a theano function that computes the mistakes that are made by
173+
# the model on a minibatch
174+
test_model = pfunc([x,y], classifier.errors(y))
175+
# compile a theano function that returns the gradient of the minibatch
176+
# with respect to theta
177+
batch_grad = pfunc([x, y], T.grad(cost, classifier.theta))
178+
# compile a thenao function that returns the cost of a minibatch
179+
batch_cost = pfunc([x, y], cost)
180+
181+
# creates a function that computes the average cost on the training set
182+
def train_fn(theta_value):
183+
classifier.theta.value = theta_value
184+
cost = 0.
185+
for x,y in train_batches :
186+
cost += batch_cost(x,y)
187+
return cost / len(train_batches)
188+
189+
# creates a function that computes the average gradient of cost with
190+
# respect to theta
191+
def train_fn_grad(theta_value):
192+
classifier.theta.value = theta_value
193+
grad = numpy.zeros(n_in * n_out + n_out)
194+
for x,y in train_batches:
195+
grad += batch_grad(x,y)
196+
return grad/ len(train_batches)
197+
198+
199+
200+
validation_scores = [float('inf'), 0]
201+
202+
# creates the validation function
203+
def callback(theta_value):
204+
classifier.theta.value = theta_value
205+
#compute the validation loss
206+
this_validation_loss = 0.
207+
for x,y in valid_batches:
208+
this_validation_loss += test_model(x,y)
209+
210+
this_validation_loss /= len(valid_batches)
211+
212+
print('validation error %f' % (this_validation_loss,))
213+
214+
# check if it is better then best validation score got until now
215+
if this_validation_loss < validation_scores[0]:
216+
# if so, replace the old one, and compute the score on the
217+
# testing dataset
218+
validation_scores[0] = this_validation_loss
219+
test_score = 0.
220+
for x,y in test_batches:
221+
test_score += test_model(x,y)
222+
validation_scores[1] = test_score / len(test_batches)
223+
224+
# using scipy conjugate gradient optimizer
225+
import scipy.optimize
226+
print ("Optimizing using scipy.optimize.fmin_cg...")
227+
best_w_b = scipy.optimize.fmin_cg(
228+
f=train_fn,
229+
x0=numpy.zeros((n_in+1)*n_out, dtype=x.dtype),
230+
fprime=train_fn_grad,
231+
callback=callback,
232+
disp=0,
233+
maxiter=n_iter)
234+
235+
print(('Optimization complete with best validation score of %f, with'
236+
'test performance %f') % (best_validation_loss, test_score))
237+
238+
239+
240+
241+
242+
243+
244+
if __name__ == '__main__':
245+
cg_optimization_mnist()
246+

0 commit comments

Comments
 (0)