@@ -128,26 +128,9 @@ Given a sentence i.e. an array of indexes, and a window size i.e. 1,3,5,..., we
128128need to convert each word in the sentence to a context window surrounding this
129129particular word. In details, we have::
130130
131- def contextwin(l, win):
132- '''
133- win :: int corresponding to the size of the window
134- given a list of indexes composing a sentence
135-
136- l :: array containing the word indexes
137-
138- it will return a list of list of indexes corresponding
139- to context windows surrounding each word in the sentence
140- '''
141-
142- assert (win % 2) == 1
143- assert win >=1
144- l = list(l)
145-
146- lpadded = win//2 * [-1] + l + win//2 * [-1]
147- out = [ lpadded[i:i+win] for i in range(len(l)) ]
148-
149- assert len(out) == len(l)
150- return out
131+ .. literalinclude:: ../code/rnnslu.py
132+ :start-after: start-snippet-1
133+ :end-before: end-snippet-1
151134
152135The index ``-1`` corresponds to the ``PADDING`` index we insert at the
153136beginning/end of the sentence.
@@ -254,71 +237,39 @@ The **hyperparameters** define the whole architecture:
254237
255238It gives the following code::
256239
257- class RNNSLU(object):
258-
259- def __init__(self, nh, nc, ne, de, cs):
260- '''
261- nh :: dimension of the hidden layer
262- nc :: number of classes
263- ne :: number of word embeddings in the vocabulary
264- de :: dimension of the word embeddings
265- cs :: word window context size
266- '''
267- self.emb = theano.shared(name='embeddings', value=0.2 * numpy.random.uniform(-1.0, 1.0,
268- (ne+1, de)).astype(theano.config.floatX)) # add one for PADDING at the end
269- self.Wx = theano.shared(name='Wx', value=0.2 * numpy.random.uniform(-1.0, 1.0,
270- (de * cs, nh)).astype(theano.config.floatX))
271- self.Wh = theano.shared(name='Wh', value=0.2 * numpy.random.uniform(-1.0, 1.0,
272- (nh, nh)).astype(theano.config.floatX))
273- self.W = theano.shared(name='W', value=0.2 * numpy.random.uniform(-1.0, 1.0,
274- (nh, nc)).astype(theano.config.floatX))
275- self.bh = theano.shared(name='bh', value=numpy.zeros(nh, dtype=theano.config.floatX))
276- self.b = theano.shared(name='b', value=numpy.zeros(nc, dtype=theano.config.floatX))
277- self.h0 = theano.shared(name='h0', value=numpy.zeros(nh, dtype=theano.config.floatX))
278-
279- # bundle
280- self.params = [self.emb, self.Wx, self.Wh, self.W, self.bh, self.b, self.h0]
240+ .. literalinclude:: ../code/rnnslu.py
241+ :start-after: start-snippet-2
242+ :end-before: end-snippet-2
281243
282244Then we integrate the way to build the input from the embedding matrix::
283245
284- idxs = T.imatrix() # as many columns as context window size/lines as words in the sentence
285- x = self.emb[idxs].reshape((idxs.shape[0], de*cs))
286- y = T.ivector('y') # label
246+ .. literalinclude:: ../code/rnnslu.py
247+ :start-after: start-snippet-3
248+ :end-before: end-snippet-3
287249
288250We use the scan operator to construct the recursion, works like a charm::
289251
290- def recurrence(x_t, h_tm1):
291- h_t = T.nnet.sigmoid(T.dot(x_t, self.Wx) + T.dot(h_tm1, self.Wh) + self.bh)
292- s_t = T.nnet.softmax(T.dot(h_t, self.W) + self.b)
293- return [h_t, s_t]
294-
295- [h, s], _ = theano.scan(fn=recurrence,
296- sequences=x, outputs_info=[self.h0, None],
297- n_steps=x.shape[0])
298-
299- p_y_given_x_sentence = s[:, 0, :]
300- y_pred = T.argmax(p_y_given_x_sentence, axis=1)
252+ .. literalinclude:: ../code/rnnslu.py
253+ :start-after: start-snippet-4
254+ :end-before: end-snippet-4
301255
302256Theano will then compute all the gradients automatically to maximize the log-likelihood::
303257
304- lr = T.scalar('lr')
305- nll = -T.mean(T.log(p_y_given_x_sentence)[T.arange(x.shape[0]),y])
306- gradients = T.grad( nll, self.params )
307- updates = OrderedDict((p, p - lr*g) for p, g in zip(self.params, gradients))
308-
309- Next compile those functions::
258+ .. literalinclude:: ../code/rnnslu.py
259+ :start-after: start-snippet-5
260+ :end-before: end-snippet-5
310261
311- self.classify = theano.function(inputs=[idxs], outputs=y_pred)
262+ Next compile those functions::
312263
313- self.train = theano.function(inputs=[idxs, y, lr],
314- outputs=nll,
315- updates=updates)
264+ .. literalinclude:: ../code/rnnslu.py
265+ :start-after: start-snippet-6
266+ :end-before: end-snippet-6
316267
317268We keep the word embeddings on the unit sphere by normalizing them after each update::
318269
319- self.normalize = theano.function(inputs=[],
320- updates = {self.emb:
321- self.emb / T.sqrt((self.emb**2).sum(axis=1)).dimshuffle(0, 'x')})
270+ .. literalinclude:: ../code/rnnslu.py
271+ :start-after: start-snippet-7
272+ :end-before: end-snippet-7
322273
323274And that's it!
324275
@@ -373,7 +324,7 @@ The following intervals can give you some starting point:
373324Running the Code
374325++++++++++++++++
375326
376- The user can then run the code by calling:
327+ After downloading the data using `download.sh`, the user can then run the code by calling:
377328
378329.. code-block:: bash
379330
0 commit comments