Skip to content

Commit 69d7c41

Browse files
committed
remove trailing white space.
1 parent 2b53f67 commit 69d7c41

1 file changed

Lines changed: 61 additions & 61 deletions

File tree

code/mlp.py

Lines changed: 61 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
"""
2-
This tutorial introduces the multilayer perceptron using Theano.
2+
This tutorial introduces the multilayer perceptron using Theano.
33
44
A multilayer perceptron is a logistic regressor where
55
instead of feeding the input to the logistic regression you insert a
6-
intermediate layer, called the hidden layer, that has a nonlinear
7-
activation function (usually tanh or sigmoid) . One can use many such
8-
hidden layers making the architecture deep. The tutorial will also tackle
6+
intermediate layer, called the hidden layer, that has a nonlinear
7+
activation function (usually tanh or sigmoid) . One can use many such
8+
hidden layers making the architecture deep. The tutorial will also tackle
99
the problem of MNIST digit classification.
1010
1111
.. math::
@@ -14,7 +14,7 @@
1414
1515
References:
1616
17-
- textbooks: "Pattern Recognition and Machine Learning" -
17+
- textbooks: "Pattern Recognition and Machine Learning" -
1818
Christopher M. Bishop, section 5
1919
2020
"""
@@ -38,7 +38,7 @@ def __init__(self, rng, input, n_in, n_out, W = None, b = None, activation = T.t
3838
and the bias vector b is of shape (n_out,).
3939
4040
NOTE : The nonlinearity used here is tanh
41-
41+
4242
Hidden unit activation is given by: tanh(dot(input,W) + b)
4343
4444
:type rng: numpy.random.RandomState
@@ -54,20 +54,20 @@ def __init__(self, rng, input, n_in, n_out, W = None, b = None, activation = T.t
5454
:param n_out: number of hidden units
5555
5656
:type activation: theano.Op or function
57-
:param activation: Non linearity to be applied in the hidden
57+
:param activation: Non linearity to be applied in the hidden
5858
layer
5959
"""
6060
self.input = input
6161

6262
# `W` is initialized with `W_values` which is uniformely sampled
6363
# from sqrt(-6./(n_in+n_hidden)) and sqrt(6./(n_in+n_hidden))
6464
# for tanh activation function
65-
# the output of uniform if converted using asarray to dtype
65+
# the output of uniform if converted using asarray to dtype
6666
# theano.config.floatX so that the code is runable on GPU
6767
# Note : optimal initialization of weights is dependent on the
6868
# activation function used (among other things).
69-
# For example, results presented in [Xavier10] suggest that you
70-
# should use 4 times larger initial weights for sigmoid
69+
# For example, results presented in [Xavier10] suggest that you
70+
# should use 4 times larger initial weights for sigmoid
7171
# compared to tanh
7272
# We have no info for other function, so we use the same as tanh.
7373
if W is None:
@@ -96,12 +96,12 @@ def __init__(self, rng, input, n_in, n_out, W = None, b = None, activation = T.t
9696
class MLP(object):
9797
"""Multi-Layer Perceptron Class
9898
99-
A multilayer perceptron is a feedforward artificial neural network model
100-
that has one layer or more of hidden units and nonlinear activations.
101-
Intermediate layers usually have as activation function thanh or the
102-
sigmoid function (defined here by a ``SigmoidalLayer`` class) while the
103-
top layer is a softamx layer (defined here by a ``LogisticRegression``
104-
class).
99+
A multilayer perceptron is a feedforward artificial neural network model
100+
that has one layer or more of hidden units and nonlinear activations.
101+
Intermediate layers usually have as activation function thanh or the
102+
sigmoid function (defined here by a ``SigmoidalLayer`` class) while the
103+
top layer is a softamx layer (defined here by a ``LogisticRegression``
104+
class).
105105
"""
106106

107107

@@ -113,49 +113,49 @@ def __init__(self, rng, input, n_in, n_hidden, n_out):
113113
:param rng: a random number generator used to initialize weights
114114
115115
:type input: theano.tensor.TensorType
116-
:param input: symbolic variable that describes the input of the
116+
:param input: symbolic variable that describes the input of the
117117
architecture (one minibatch)
118118
119119
:type n_in: int
120-
:param n_in: number of input units, the dimension of the space in
120+
:param n_in: number of input units, the dimension of the space in
121121
which the datapoints lie
122122
123123
:type n_hidden: int
124-
:param n_hidden: number of hidden units
124+
:param n_hidden: number of hidden units
125125
126126
:type n_out: int
127-
:param n_out: number of output units, the dimension of the space in
127+
:param n_out: number of output units, the dimension of the space in
128128
which the labels lie
129129
130130
"""
131131

132-
# Since we are dealing with a one hidden layer MLP, this will
132+
# Since we are dealing with a one hidden layer MLP, this will
133133
# translate into a TanhLayer connected to the LogisticRegression
134-
# layer; this can be replaced by a SigmoidalLayer, or a layer
134+
# layer; this can be replaced by a SigmoidalLayer, or a layer
135135
# implementing any other nonlinearity
136-
self.hiddenLayer = HiddenLayer(rng = rng, input = input,
136+
self.hiddenLayer = HiddenLayer(rng = rng, input = input,
137137
n_in = n_in, n_out = n_hidden,
138138
activation = T.tanh)
139139

140-
# The logistic regression layer gets as input the hidden units
140+
# The logistic regression layer gets as input the hidden units
141141
# of the hidden layer
142-
self.logRegressionLayer = LogisticRegression(
142+
self.logRegressionLayer = LogisticRegression(
143143
input = self.hiddenLayer.output,
144144
n_in = n_hidden,
145145
n_out = n_out)
146146

147-
# L1 norm ; one regularization option is to enforce L1 norm to
148-
# be small
147+
# L1 norm ; one regularization option is to enforce L1 norm to
148+
# be small
149149
self.L1 = abs(self.hiddenLayer.W).sum() \
150150
+ abs(self.logRegressionLayer.W).sum()
151151

152-
# square of L2 norm ; one regularization option is to enforce
152+
# square of L2 norm ; one regularization option is to enforce
153153
# square of L2 norm to be small
154154
self.L2_sqr = (self.hiddenLayer.W**2).sum() \
155155
+ (self.logRegressionLayer.W**2).sum()
156156

157-
# negative log likelihood of the MLP is given by the negative
158-
# log likelihood of the output of the model, computed in the
157+
# negative log likelihood of the MLP is given by the negative
158+
# log likelihood of the output of the model, computed in the
159159
# logistic regression layer
160160
self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
161161
# same holds for the function computing the number of errors
@@ -169,28 +169,28 @@ def __init__(self, rng, input, n_in, n_hidden, n_out):
169169
def test_mlp( learning_rate=0.01, L1_reg = 0.00, L2_reg = 0.0001, n_epochs=1000,
170170
dataset = '../data/mnist.pkl.gz', batch_size = 20):
171171
"""
172-
Demonstrate stochastic gradient descent optimization for a multilayer
172+
Demonstrate stochastic gradient descent optimization for a multilayer
173173
perceptron
174174
175175
This is demonstrated on MNIST.
176176
177177
:type learning_rate: float
178-
:param learning_rate: learning rate used (factor for the stochastic
178+
:param learning_rate: learning rate used (factor for the stochastic
179179
gradient
180180
181181
:type L1_reg: float
182-
:param L1_reg: L1-norm's weight when added to the cost (see
182+
:param L1_reg: L1-norm's weight when added to the cost (see
183183
regularization)
184184
185185
:type L2_reg: float
186-
:param L2_reg: L2-norm's weight when added to the cost (see
186+
:param L2_reg: L2-norm's weight when added to the cost (see
187187
regularization)
188-
188+
189189
:type n_epochs: int
190-
:param n_epochs: maximal number of epochs to run the optimizer
190+
:param n_epochs: maximal number of epochs to run the optimizer
191191
192192
:type dataset: string
193-
:param dataset: the path of the MNIST dataset file from
193+
:param dataset: the path of the MNIST dataset file from
194194
http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
195195
196196
@@ -210,36 +210,36 @@ def test_mlp( learning_rate=0.01, L1_reg = 0.00, L2_reg = 0.0001, n_epochs=1000,
210210

211211
######################
212212
# BUILD ACTUAL MODEL #
213-
######################
213+
######################
214214
print '... building the model'
215215

216216
# allocate symbolic variables for the data
217-
index = T.lscalar() # index to a [mini]batch
217+
index = T.lscalar() # index to a [mini]batch
218218
x = T.matrix('x') # the data is presented as rasterized images
219-
y = T.ivector('y') # the labels are presented as 1D vector of
219+
y = T.ivector('y') # the labels are presented as 1D vector of
220220
# [int] labels
221221

222222
rng = numpy.random.RandomState(1234)
223223

224224
# construct the MLP class
225225
classifier = MLP( rng = rng, input=x, n_in=28*28, n_hidden = 500, n_out=10)
226226

227-
# the cost we minimize during training is the negative log likelihood of
227+
# the cost we minimize during training is the negative log likelihood of
228228
# the model plus the regularization terms (L1 and L2); cost is expressed
229229
# here symbolically
230230
cost = classifier.negative_log_likelihood(y) \
231231
+ L1_reg * classifier.L1 \
232-
+ L2_reg * classifier.L2_sqr
232+
+ L2_reg * classifier.L2_sqr
233233

234234
# compiling a Theano function that computes the mistakes that are made
235235
# by the model on a minibatch
236-
test_model = theano.function(inputs = [index],
236+
test_model = theano.function(inputs = [index],
237237
outputs = classifier.errors(y),
238238
givens={
239239
x:test_set_x[index*batch_size:(index+1)*batch_size],
240240
y:test_set_y[index*batch_size:(index+1)*batch_size]})
241241

242-
validate_model = theano.function(inputs = [index],
242+
validate_model = theano.function(inputs = [index],
243243
outputs = classifier.errors(y),
244244
givens={
245245
x:valid_set_x[index*batch_size:(index+1)*batch_size],
@@ -255,17 +255,17 @@ def test_mlp( learning_rate=0.01, L1_reg = 0.00, L2_reg = 0.0001, n_epochs=1000,
255255

256256
# specify how to update the parameters of the model as a dictionary
257257
updates = {}
258-
# given two list the zip A = [ a1,a2,a3,a4] and B = [b1,b2,b3,b4] of
258+
# given two list the zip A = [ a1,a2,a3,a4] and B = [b1,b2,b3,b4] of
259259
# same length, zip generates a list C of same size, where each element
260-
# is a pair formed from the two lists :
261-
# C = [ (a1,b1), (a2,b2), (a3,b3) , (a4,b4) ]
260+
# is a pair formed from the two lists :
261+
# C = [ (a1,b1), (a2,b2), (a3,b3) , (a4,b4) ]
262262
for param, gparam in zip(classifier.params, gparams):
263263
updates[param] = param - learning_rate*gparam
264264

265-
# compiling a Theano function `train_model` that returns the cost, but
266-
# in the same time updates the parameter of the model based on the rules
265+
# compiling a Theano function `train_model` that returns the cost, but
266+
# in the same time updates the parameter of the model based on the rules
267267
# defined in `updates`
268-
train_model =theano.function( inputs = [index], outputs = cost,
268+
train_model =theano.function( inputs = [index], outputs = cost,
269269
updates = updates,
270270
givens={
271271
x:train_set_x[index*batch_size:(index+1)*batch_size],
@@ -278,15 +278,15 @@ def test_mlp( learning_rate=0.01, L1_reg = 0.00, L2_reg = 0.0001, n_epochs=1000,
278278

279279
# early-stopping parameters
280280
patience = 10000 # look as this many examples regardless
281-
patience_increase = 2 # wait this much longer when a new best is
281+
patience_increase = 2 # wait this much longer when a new best is
282282
# found
283-
improvement_threshold = 0.995 # a relative improvement of this much is
283+
improvement_threshold = 0.995 # a relative improvement of this much is
284284
# considered significant
285-
validation_frequency = min(n_train_batches,patience/2)
286-
# go through this many
287-
# minibatche before checking the network
288-
# on the validation set; in this case we
289-
# check every epoch
285+
validation_frequency = min(n_train_batches,patience/2)
286+
# go through this many
287+
# minibatche before checking the network
288+
# on the validation set; in this case we
289+
# check every epoch
290290

291291

292292
best_params = None
@@ -306,8 +306,8 @@ def test_mlp( learning_rate=0.01, L1_reg = 0.00, L2_reg = 0.0001, n_epochs=1000,
306306
# iteration number
307307
iter = epoch * n_train_batches + minibatch_index
308308

309-
if (iter+1) % validation_frequency == 0:
310-
# compute zero-one loss on validation set
309+
if (iter+1) % validation_frequency == 0:
310+
# compute zero-one loss on validation set
311311
validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
312312
this_validation_loss = numpy.mean(validation_losses)
313313

@@ -329,7 +329,7 @@ def test_mlp( learning_rate=0.01, L1_reg = 0.00, L2_reg = 0.0001, n_epochs=1000,
329329
test_losses = [test_model(i) for i in xrange(n_test_batches)]
330330
test_score = numpy.mean(test_losses)
331331

332-
print((' epoch %i, minibatch %i/%i, test error of best '
332+
print((' epoch %i, minibatch %i/%i, test error of best '
333333
'model %f %%') % \
334334
(epoch, minibatch_index+1, n_train_batches,test_score*100.))
335335

@@ -340,7 +340,7 @@ def test_mlp( learning_rate=0.01, L1_reg = 0.00, L2_reg = 0.0001, n_epochs=1000,
340340

341341
end_time = time.clock()
342342
print(('Optimization complete. Best validation score of %f %% '
343-
'obtained at iteration %i, with test performance %f %%') %
343+
'obtained at iteration %i, with test performance %f %%') %
344344
(best_validation_loss * 100., best_iter, test_score*100.))
345345
print >> sys.stderr, ('The code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % ((end_time-start_time)/60.))
346346

0 commit comments

Comments
 (0)