Skip to content

Commit f3eb854

Browse files
committed
white space fix.
1 parent 19040c3 commit f3eb854

1 file changed

Lines changed: 56 additions & 57 deletions

File tree

code/logistic_cg.py

Lines changed: 56 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
"""
2-
This tutorial introduces logistic regression using Theano and conjugate
3-
gradient descent.
2+
This tutorial introduces logistic regression using Theano and conjugate
3+
gradient descent.
44
55
Logistic regression is a probabilistic, linear classifier. It is parametrized
66
by a weight matrix :math:`W` and a bias vector :math:`b`. Classification is
77
done by projecting data points onto a set of hyperplanes, the distance to
8-
which is used to determine a class membership probability.
8+
which is used to determine a class membership probability.
99
1010
Mathematically, this can be written as:
1111
@@ -14,22 +14,22 @@
1414
&= \frac {e^{W_i x + b_i}} {\sum_j e^{W_j x + b_j}}
1515
1616
17-
The output of the model or prediction is then done by taking the argmax of
17+
The output of the model or prediction is then done by taking the argmax of
1818
the vector whose i'th element is P(Y=i|x).
1919
2020
.. math::
2121
2222
y_{pred} = argmax_i P(Y=i|x,W,b)
2323
2424
25-
This tutorial presents a stochastic gradient descent optimization method
26-
suitable for large datasets, and a conjugate gradient optimization method
25+
This tutorial presents a stochastic gradient descent optimization method
26+
suitable for large datasets, and a conjugate gradient optimization method
2727
that is suitable for smaller datasets.
2828
2929
3030
References:
3131
32-
- textbooks: "Pattern Recognition and Machine Learning" -
32+
- textbooks: "Pattern Recognition and Machine Learning" -
3333
Christopher M. Bishop, section 4.3.2
3434
3535
@@ -46,10 +46,10 @@
4646
class LogisticRegression(object):
4747
"""Multi-class Logistic Regression Class
4848
49-
The logistic regression is fully described by a weight matrix :math:`W`
50-
and bias vector :math:`b`. Classification is done by projecting data
51-
points onto a set of hyperplanes, the distance to which is used to
52-
determine a class membership probability.
49+
The logistic regression is fully described by a weight matrix :math:`W`
50+
and bias vector :math:`b`. Classification is done by projecting data
51+
points onto a set of hyperplanes, the distance to which is used to
52+
determine a class membership probability.
5353
"""
5454

5555

@@ -59,20 +59,20 @@ def __init__(self, input, n_in, n_out):
5959
""" Initialize the parameters of the logistic regression
6060
6161
:type input: theano.tensor.TensorType
62-
:param input: symbolic variable that describes the input of the
62+
:param input: symbolic variable that describes the input of the
6363
architecture ( one minibatch)
6464
6565
:type n_in: int
66-
:param n_in: number of input units, the dimension of the space in
66+
:param n_in: number of input units, the dimension of the space in
6767
which the datapoint lies
6868
6969
:type n_out: int
70-
:param n_out: number of output units, the dimension of the space in
70+
:param n_out: number of output units, the dimension of the space in
7171
which the target lies
7272
73-
"""
73+
"""
7474

75-
# initialize theta = (W,b) with 0s; W gets the shape (n_in, n_out),
75+
# initialize theta = (W,b) with 0s; W gets the shape (n_in, n_out),
7676
# while b is a vector of n_out elements, making theta a vector of
7777
# n_in*n_out + n_out elements
7878
self.theta = theano.shared(value=numpy.zeros(n_in*n_out+n_out, dtype=theano.config.floatX),
@@ -86,7 +86,7 @@ def __init__(self, input, n_in, n_out):
8686
# compute vector of class-membership probabilities in symbolic form
8787
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
8888

89-
# compute prediction as class whose probability is maximal in
89+
# compute prediction as class whose probability is maximal in
9090
# symbolic form
9191
self.y_pred=T.argmax(self.p_y_given_x, axis=1)
9292

@@ -96,13 +96,13 @@ def __init__(self, input, n_in, n_out):
9696

9797
def negative_log_likelihood(self, y):
9898
"""Return the negative log-likelihood of the prediction of this model
99-
under a given target distribution.
99+
under a given target distribution.
100100
101101
.. math::
102102
103-
\frac{1}{|\mathcal{D}|}\mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
103+
\frac{1}{|\mathcal{D}|}\mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
104104
\frac{1}{|\mathcal{D}|}\sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
105-
\ell (\theta=\{W,b\}, \mathcal{D})
105+
\ell (\theta=\{W,b\}, \mathcal{D})
106106
107107
:type y: theano.tensor.TensorType
108108
:param y: corresponds to a vector that gives for each example the
@@ -115,19 +115,19 @@ def negative_log_likelihood(self, y):
115115

116116

117117
def errors(self, y):
118-
"""Return a float representing the number of errors in the minibatch
119-
over the total number of examples of the minibatch
118+
"""Return a float representing the number of errors in the minibatch
119+
over the total number of examples of the minibatch
120120
121121
:type y: theano.tensor.TensorType
122122
:param y: corresponds to a vector that gives for each example
123123
the correct label
124124
"""
125125

126-
# check if y has same dimension of y_pred
126+
# check if y has same dimension of y_pred
127127
if y.ndim != self.y_pred.ndim:
128-
raise TypeError('y should have the same shape as self.y_pred',
128+
raise TypeError('y should have the same shape as self.y_pred',
129129
('y', target.type, 'y_pred', self.y_pred.type))
130-
# check if y is of the correct datatype
130+
# check if y is of the correct datatype
131131
if y.dtype.startswith('int'):
132132
# the T.neq operator returns a vector of 0s and 1s, where 1
133133
# represents a mistake in prediction
@@ -142,15 +142,15 @@ def errors(self, y):
142142

143143

144144
def cg_optimization_mnist( n_epochs=50, mnist_pkl_gz='../data/mnist.pkl.gz' ):
145-
"""Demonstrate conjugate gradient optimization of a log-linear model
145+
"""Demonstrate conjugate gradient optimization of a log-linear model
146146
147147
This is demonstrated on MNIST.
148-
148+
149149
:type n_epochs: int
150-
:param n_epochs: number of epochs to run the optimizer
150+
:param n_epochs: number of epochs to run the optimizer
151151
152152
:type mnist_pkl_gz: string
153-
:param mnist_pkl_gz: the path of the mnist training file from
153+
:param mnist_pkl_gz: the path of the mnist training file from
154154
http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
155155
156156
"""
@@ -159,18 +159,18 @@ def cg_optimization_mnist( n_epochs=50, mnist_pkl_gz='../data/mnist.pkl.gz' ):
159159
#############
160160
print '... loading data'
161161

162-
# Load the dataset
162+
# Load the dataset
163163
f = gzip.open(mnist_pkl_gz,'rb')
164164
train_set, valid_set, test_set = cPickle.load(f)
165165
f.close()
166166

167167
def shared_dataset(data_xy):
168168
""" Function that loads the dataset into shared variables
169-
170-
The reason we store our dataset in shared variables is to allow
171-
Theano to copy it into the GPU memory (when code is run on GPU).
169+
170+
The reason we store our dataset in shared variables is to allow
171+
Theano to copy it into the GPU memory (when code is run on GPU).
172172
Since copying data into the GPU is slow, copying a minibatch everytime
173-
is needed (the default behaviour if the data is not in a shared
173+
is needed (the default behaviour if the data is not in a shared
174174
variable) would lead to a large decrease in performance.
175175
"""
176176
data_x, data_y = data_xy
@@ -179,8 +179,8 @@ def shared_dataset(data_xy):
179179
# When storing data on the GPU it has to be stored as floats
180180
# therefore we will store the labels as ``floatX`` as well
181181
# (``shared_y`` does exactly that). But during our computations
182-
# we need them as ints (we use labels as index, and if they are
183-
# floats it doesn't make sense) therefore instead of returning
182+
# we need them as ints (we use labels as index, and if they are
183+
# floats it doesn't make sense) therefore instead of returning
184184
# ``shared_y`` we will have to cast it to int. This little hack
185185
# lets ous get around this issue
186186
return shared_x, T.cast(shared_y, 'int32')
@@ -204,24 +204,24 @@ def shared_dataset(data_xy):
204204

205205
######################
206206
# BUILD ACTUAL MODEL #
207-
######################
207+
######################
208208
print '... building the model'
209209

210210
# allocate symbolic variables for the data
211-
minibatch_offset = T.lscalar() # offset to the start of a [mini]batch
211+
minibatch_offset = T.lscalar() # offset to the start of a [mini]batch
212212
x = T.matrix() # the data is presented as rasterized images
213-
y = T.ivector() # the labels are presented as 1D vector of
213+
y = T.ivector() # the labels are presented as 1D vector of
214214
# [int] labels
215215

216-
216+
217217
# construct the logistic regression class
218218
classifier = LogisticRegression( input=x, n_in=28*28, n_out=10)
219219

220-
# the cost we minimize during training is the negative log likelihood of
220+
# the cost we minimize during training is the negative log likelihood of
221221
# the model in symbolic format
222-
cost = classifier.negative_log_likelihood(y).mean()
222+
cost = classifier.negative_log_likelihood(y).mean()
223223

224-
# compile a theano function that computes the mistakes that are made by
224+
# compile a theano function that computes the mistakes that are made by
225225
# the model on a minibatch
226226
test_model = theano.function([minibatch_offset], classifier.errors(y),
227227
givens={
@@ -235,17 +235,17 @@ def shared_dataset(data_xy):
235235
y:valid_set_y[minibatch_offset:minibatch_offset+batch_size]},
236236
name="validate")
237237

238-
# compile a thenao function that returns the cost of a minibatch
239-
batch_cost = theano.function([minibatch_offset], cost,
238+
# compile a thenao function that returns the cost of a minibatch
239+
batch_cost = theano.function([minibatch_offset], cost,
240240
givens= {
241241
x : train_set_x[minibatch_offset:minibatch_offset+batch_size],
242242
y : train_set_y[minibatch_offset:minibatch_offset+batch_size]},
243243
name="batch_cost")
244244

245-
246-
# compile a theano function that returns the gradient of the minibatch
245+
246+
# compile a theano function that returns the gradient of the minibatch
247247
# with respect to theta
248-
batch_grad = theano.function([minibatch_offset], T.grad(cost,classifier.theta),
248+
batch_grad = theano.function([minibatch_offset], T.grad(cost,classifier.theta),
249249
givens= {
250250
x : train_set_x[minibatch_offset:minibatch_offset+batch_size],
251251
y : train_set_y[minibatch_offset:minibatch_offset+batch_size]},
@@ -258,7 +258,7 @@ def train_fn(theta_value):
258258
train_losses = [batch_cost(i*batch_size) for i in xrange(n_train_batches)]
259259
return numpy.mean(train_losses)
260260

261-
# creates a function that computes the average gradient of cost with
261+
# creates a function that computes the average gradient of cost with
262262
# respect to theta
263263
def train_fn_grad(theta_value):
264264
classifier.theta.value = theta_value
@@ -269,18 +269,18 @@ def train_fn_grad(theta_value):
269269

270270

271271
validation_scores = [float('inf'), 0]
272-
272+
273273
# creates the validation function
274274
def callback(theta_value):
275275
classifier.theta.value = theta_value
276276
#compute the validation loss
277277
validation_losses = [validate_model(i*batch_size) for i in xrange(n_valid_batches)]
278278
this_validation_loss = numpy.mean(validation_losses)
279279
print('validation error %f %%' % (this_validation_loss*100.,))
280-
280+
281281
# check if it is better then best validation score got until now
282282
if this_validation_loss < validation_scores[0]:
283-
# if so, replace the old one, and compute the score on the
283+
# if so, replace the old one, and compute the score on the
284284
# testing dataset
285285
validation_scores[0] = this_validation_loss
286286
test_loses = [test_model(i*batch_size) for i in xrange(n_test_batches)]
@@ -289,26 +289,25 @@ def callback(theta_value):
289289
###############
290290
# TRAIN MODEL #
291291
###############
292-
293-
# using scipy conjugate gradient optimizer
292+
293+
# using scipy conjugate gradient optimizer
294294
import scipy.optimize
295295
print ("Optimizing using scipy.optimize.fmin_cg...")
296296
start_time = time.clock()
297297
best_w_b = scipy.optimize.fmin_cg(
298-
f = train_fn,
298+
f = train_fn,
299299
x0 = numpy.zeros((n_in+1)*n_out, dtype=x.dtype),
300300
fprime = train_fn_grad,
301301
callback = callback,
302302
disp = 0,
303303
maxiter = n_epochs)
304304
end_time = time.clock()
305305
print(('Optimization complete with best validation score of %f %%, with '
306-
'test performance %f %%') %
306+
'test performance %f %%') %
307307
(validation_scores[0]*100., validation_scores[1]*100.))
308308

309309
print >> sys.stderr, ('The code for file '+os.path.split(__file__)[1]+' ran for %.1fs' % ((end_time-start_time)))
310310

311311

312312
if __name__ == '__main__':
313313
cg_optimization_mnist()
314-

0 commit comments

Comments
 (0)