Skip to content

Commit 6b7d587

Browse files
committed
Fix typo, add docstring, add timming, remove useless printing
1 parent 6fe4fa0 commit 6b7d587

2 files changed

Lines changed: 27 additions & 11 deletions

File tree

code/imdb.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,15 @@
1111

1212

1313
def prepare_data(seqs, labels, maxlen=None):
14+
"""Create the matrices from the datasets.
15+
16+
This pad each sequence to the same lenght: the lenght of the
17+
longuest sequence or maxlen.
18+
19+
if maxlen is set, we will cut all sequence to this maximum
20+
lenght.
21+
22+
"""
1423
# x: a list of sentences
1524
lengths = [len(s) for s in seqs]
1625

code/lstm.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
'''
22
Build a tweet sentiment analyzer
33
'''
4-
import theano
5-
import theano.tensor as tensor
6-
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
7-
8-
import cPickle as pkl
9-
import numpy
4+
from collections import OrderedDict
105
import copy
6+
import cPickle as pkl
117
import random
8+
import sys
9+
import time
1210

13-
from collections import OrderedDict
11+
import numpy
12+
import theano
13+
import theano.tensor as tensor
14+
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
1415

1516
import imdb
1617

@@ -364,7 +365,7 @@ def train(dim_proj=100,
364365
activ=tensor.tanh,
365366
decay_c=0., # weight decay for the classifier
366367
lrate=0.01, # learning rate for sgd (not used for adadelta and rmsprop)
367-
n_words=100000, # wocabulary size
368+
n_words=100000, # vocabulary size
368369
optimizer=adadelta,
369370
encoder='lstm',# can be removed must be lstm.
370371
saveto='lstm_model.npz',
@@ -432,6 +433,7 @@ def train(dim_proj=100,
432433

433434
uidx = 0
434435
estop = False
436+
start_time = time.clock()
435437
for eidx in xrange(max_epochs):
436438
n_samples = 0
437439

@@ -502,9 +504,11 @@ def train(dim_proj=100,
502504

503505
if estop:
504506
break
505-
507+
end_time = time.clock()
506508
if best_p is not None:
507509
zipp(best_p, tparams)
510+
else:
511+
best_p = unzip(tparams)
508512

509513
use_noise.set_value(0.)
510514
train_err = pred_error(f_pred, prepare_data, train, kf)
@@ -518,12 +522,15 @@ def train(dim_proj=100,
518522
valid_err=valid_err, test_err=test_err,
519523
history_errs=history_errs, **params)
520524

525+
print 'The code run for %d epochs, with %f epochs/sec' % (
526+
uidx, 1. * uidx / (end_time - start_time))
527+
print >> sys.stderr, ('The code for file ' +
528+
os.path.split(__file__)[1] +
529+
' ran for %.1fs' % ((end_time - start_time)))
521530
return train_err, valid_err, test_err
522531

523532

524533
def main(job_id, params):
525-
print ('Anything printed here will end up in the output directory'
526-
'for job #%d' % job_id)
527534
print params
528535
use_dropout = True if params['use-dropout'][0] else False
529536
trainerr, validerr, testerr = train(saveto=params['model'][0],

0 commit comments

Comments
 (0)