@@ -40,27 +40,44 @@ The code to do this in theano is the following:
4040
4141.. code-block:: python
4242
43- # allocate shared variables for inputs and model params
44- x = theano.shared(numpy.zeros((5,784))
45- y = theano.shared(numpy.zeros((5))
43+ # generate symbolic variables for input (x and y represent a
44+ # minibatch)
45+ x = T.fmatrix()
46+ y = T.lvector()
47+
48+ # allocate shared variables model params
4649 b = theano.shared(numpy.random(10))
4750 W = theano.shared(numpy.random(784,10))
4851
49- # compute vector of class-membership probabilities
52+ # symbolic expression for computing the vector of
53+ # class-membership probabilities
5054 p_y_given_x = T.softmax(T.dot(x,w)+b)
5155
52- print 'Probability that x is of class %i is %f' % i, p_y_given_x[i]
56+ # compiled theano function that returns the vector of class-membership
57+ # probabilities
58+ get_p_y_given_x = theano.function( x, p_y_given_x)
59+
60+ # print the probability of some example represented by x_value
61+ # x_value is not a symbolic variable but a numpy array describing the
62+ # datapoint
63+ print 'Probability that x is of class %i is %f' % i, get_p_y_given_x(x_value)[i]
5364
54- # compute prediction as class whose probability is maximal
65+ # symbolic description of how to compute prediction as class whose probability
66+ # is maximal
5567 y_pred = T.argmax(p_y_given_x)
56- classify = pfunc([x,y], y_pred)
68+
69+ # compiled theano function that returns this value
70+ classify = theano.function([x,y], y_pred)
5771
5872
59- We first start by allocating shared variables for the parameters :math:`W,b` and
60- and inputs :math:`x,y`. This step declares them both as symbolic theano
73+ We first start by allocating symbolic variables for the inputs
74+ :math:`x,y`. Afterwards we allocate shared variables for the parameters :math:`W,b`.
75+ This step declares them both as symbolic theano
6176variables, but also initializes their contents. The dot and softmax operators
6277are then used to compute the vector :math:`P(Y|x, W,b)`. The resulting
63- variable p_y_given_x is a vector and can thus be index to retrieve a
78+ variable p_y_given_x is a symbolic variable pointing to a vector. The function
79+ `get_p_y_given_x` computs this vector for a given x. The output of the
80+ function is a vector and can thus be index to retrieve a
6481particular entry :math:`P(Y=i|x, W,b)`. The final model prediction is then
6582computed using the T.argmax operator.
6683
@@ -103,10 +120,10 @@ The following Theano code defines the loss for a given minibatch:
103120
104121.. code-block:: python
105122
106- loss = theano .sum(theano .log(p_y_given_x)[y])
123+ loss = T .sum(T .log(p_y_given_x)[y])
107124
108125.. note::
109- In practice, we will use the mean (theano .mean) instead of the sum. This
126+ In practice, we will use the mean (T .mean) instead of the sum. This
110127 allows for the learning rate to be independent of the minibatch size.
111128
112129
@@ -120,6 +137,7 @@ encapsulates the basic behaviour for LogisticRegression:
120137
121138 class LogisticRegression(object):
122139
140+
123141 def __init__(self, input, n_in, n_out):
124142 """ Initialize the parameters of the logistic regression
125143 :param input: symbolic variable that describes the input of the
@@ -160,9 +178,15 @@ We instantiate the class and declare a global cost which we wish to minimize:
160178.. code-block:: python
161179
162180 # allocate symbolic variables for the data
163- x = tensor.fmatrix() # the data is presented as rasterized images
164- y = tensor.lvector() # the labels are presented as 1D vector of [long int] labels
165- classifier = LogisticRegression(input=x.reshape((batch_size,784)), n_in=784, n_out=10)
181+ x = T.fmatrix() # the data is presented as rasterized images
182+ y = T.lvector() # the labels are presented as 1D vector of [long int] labels
183+
184+ # construct the logistic regression class
185+ classifier = LogisticRegression( \
186+ input=x.reshape((batch_size,28*28)), n_in=28*28, n_out=10)
187+
188+ # the cost we minimize during training is the negative log likelihood of
189+ # the model in symbolic format
166190 cost = classifier.negative_log_likelihood(y).mean()
167191
168192
@@ -219,46 +243,67 @@ The finished product is as follows:
219243.. code-block:: python
220244
221245 # early-stopping parameters
222- patience = 2000 # look as this many examples regardless
223- patience_increase = 2 # wait this much longer when a new best is
224- # found
225- improvement_threshold = 0.99 # a relative improvement of this much is
226- # considered significant
227- validation_frequency = 1000 # make this many SGD updates between
228- # validations
246+ patience = 5000 # look as this many examples regardless
247+ patience_increase = 2 # wait this much longer when a new best is
248+ # found
249+ improvement_threshold = 0.995 # a relative improvement of this much is
250+ # considered significant
251+ validation_frequency = 1000 # make this many SGD updates between
252+ # validations
229253
230254 best_params = None
231255 best_validation_loss = float('inf')
256+ test_score = 0.
257+
258+ # have a maximum of `n_iter` iterations through the entire dataset
259+ for iter in xrange(n_iter* len(train_batches)):
260+
261+ # get epoch and minibatch index
262+ epoch = iter / len(train_batches)
263+ minibatch_index = iter % len(train_batches)
264+
265+ # get the minibatches corresponding to `iter` modulo
266+ # `len(train_batches)`
267+ x,y = train_batches[ minibatch_index ]
268+ cost_ij = train_model(x,y)
269+
270+ if (iter+1) % validation_frequency == 0:
271+ # compute zero-one loss on validation set
272+ this_validation_loss = 0.
273+ for x,y in valid_batches:
274+ # sum up the errors for each minibatch
275+ this_validation_loss += test_model(x,y)
276+ # get the average by dividing with the number of minibatches
277+ this_validation_loss /= len(valid_batches)
278+
279+ print('epoch %i, validation error %f' %
280+ (epoch, this_validation_loss))
232281
282+ #improve patience
283+ if this_validation_loss < best_validation_loss * \
284+ improvement_threshold :
285+ patience = max(patience, iter * patience_increase)
233286
234- for i in xrange(n_iter):
235- # go through the training set and update the model parameters
236- for x,y in train_batches:
237- cost_ij = train_model(x, y)
238-
239287
240- # test the model on the validation set ( measuring the average number
241- # of errors )
242- valid_score = 0.
243- for x,y in valid_batches:
244- # sum up the errors for each minibatch
245- valid_score += test_model(x,y)
246- # get the average by dividing with the number of minibatches
247- valid_score /= len(valid_batches)
288+ # if we got the best validation score until now
289+ if this_validation_loss < best_validation_loss:
290+ best_validation_loss = this_validation_loss
291+ # test it on the test set
292+
293+ test_score = 0.
294+ for x,y in test_batches:
295+ test_score += test_model(x,y)
296+ test_score /= len(test_batches)
297+ print(' epoch %i, test error of best model %f' %
298+ (epoch, test_score))
248299
249- print('epoch %i, validation error %f' % (i, valid_score))
300+ if patience <= iter :
301+ break
250302
251303
252- # if we got the best validation score until now
253- if valid_score < best_valid_score:
254- best_valid_score = valid_score
255- # test it on the test set
304+ print(('Optimization complete with best validation score of %f,'
305+ 'with test performance %f') % (best_validation_loss, test_score))
256306
257- test_score = 0.
258- for x,y in test_batches:
259- test_score += test_model(x,y)
260- test_score /= len(test_batches)
261- print('epoch %i, test error of best model %f' % (i, test_score))
262307
263308
264309
0 commit comments