@@ -291,6 +291,8 @@ def rmsprop(lr, tparams, grads, x, mask, y, cost):
291291
292292def build_model (tparams , options ):
293293 trng = RandomStreams (1234 )
294+
295+ # Used for dropout.
294296 use_noise = theano .shared (numpy .float32 (0. ))
295297
296298 x = tensor .matrix ('x' , dtype = 'int64' )
@@ -378,8 +380,8 @@ def test_lstm(
378380 validFreq = 10000 , # after 1000
379381 saveFreq = 100000 , # save the parameters after every saveFreq updates
380382 maxlen = 100 , # longer sequence get ignored
381- batch_size = 64 ,
382- valid_batch_size = 64 ,
383+ batch_size = 64 , # the batch size during training.
384+ valid_batch_size = 64 , # The batch size during validation
383385 dataset = 'imdb' ,
384386
385387 # Parameter for extra option
@@ -448,12 +450,13 @@ def test_lstm(
448450 if saveFreq == - 1 :
449451 saveFreq = len (train [0 ])/ batch_size
450452
451- uidx = 0
452- estop = False
453+ uidx = 0 # the number of update done
454+ estop = False # early stop
453455 start_time = time .clock ()
454456 for eidx in xrange (max_epochs ):
455457 n_samples = 0
456458
459+ # Get new shuffled index for the training set.
457460 kf = get_minibatches_idx (len (train [0 ]), len (train [0 ])/ batch_size ,
458461 shuffle = True )
459462
@@ -462,10 +465,13 @@ def test_lstm(
462465 uidx += 1
463466 use_noise .set_value (1. )
464467
468+ # Select the random examples for this minibatch
465469 y = [train [1 ][t ] for t in train_index ]
466- x , mask , y = prepare_data ([train [0 ][t ]for t in train_index ],
467- y , maxlen = maxlen )
470+ x = [train [0 ][t ]for t in train_index ]
468471
472+ # Get the data in numpy.ndarray formet.
473+ # It return something of the shape (minibatch maxlen, n samples)
474+ x , mask , y = prepare_data (x , y , maxlen = maxlen )
469475 if x is None :
470476 print 'Minibatch with zero sample under length ' , maxlen
471477 continue
0 commit comments