Skip to content

Commit e85246f

Browse files
committed
lstm: add comments
1 parent e57dd03 commit e85246f

1 file changed

Lines changed: 27 additions & 15 deletions

File tree

code/lstm.py

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,17 @@ def get_dataset(name):
4444

4545

4646
def zipp(params, tparams):
47+
"""
48+
When we reload the model. Needed for the GPU stuff.
49+
"""
4750
for kk, vv in params.iteritems():
4851
tparams[kk].set_value(vv)
4952

5053

5154
def unzip(zipped):
55+
"""
56+
When we pickle the model. Needed for the GPU stuff.
57+
"""
5258
new_params = OrderedDict()
5359
for kk, vv in zipped.iteritems():
5460
new_params[kk] = vv.get_value()
@@ -79,7 +85,6 @@ def init_params(options):
7985
randn = numpy.random.rand(options['n_words'],
8086
options['dim_proj'])
8187
params['Wemb'] = (0.01 * randn).astype('float32')
82-
# rconv
8388
params = get_layer(options['encoder'])[0](options,
8489
params,
8590
prefix=options['encoder'])
@@ -122,7 +127,7 @@ def param_init_fflayer(options, params, prefix='ff'):
122127
return params
123128

124129

125-
def fflayer(tparams, state_below, options, prefix='rconv', **kwargs):
130+
def fflayer(tparams, state_below, options, prefix='ff', **kwargs):
126131
pre_act = (tensor.dot(state_below,
127132
tparams[_p(prefix, 'W')]) + tparams[_p(prefix, 'b')])
128133
return options['activ'](pre_act)
@@ -396,6 +401,9 @@ def build_model(tparams, options):
396401

397402

398403
def pred_probs(f_pred_prob, prepare_data, data, iterator, verbose=False):
404+
""" If you want to use a trained model, this is useful to compute
405+
the probabilities of new examples.
406+
"""
399407
n_samples = len(data[0])
400408
probs = numpy.zeros((n_samples, 2)).astype('float32')
401409

@@ -416,6 +424,11 @@ def pred_probs(f_pred_prob, prepare_data, data, iterator, verbose=False):
416424

417425

418426
def pred_error(f_pred, prepare_data, data, iterator, verbose=False):
427+
"""
428+
Just compute the error
429+
f_pred: Theano fct computing the prediction
430+
prepare_data: usual prepare_data for that dataset.
431+
"""
419432
valid_err = 0
420433
for _, valid_index in iterator:
421434
x, mask, y = prepare_data([data[0][t] for t in valid_index],
@@ -430,19 +443,18 @@ def pred_error(f_pred, prepare_data, data, iterator, verbose=False):
430443

431444

432445
def train(dim_proj=100,
433-
patience=10,
446+
patience=10, # number of epoch to wait before early stop if no progress
434447
max_epochs=5000,
435-
dispFreq=100,
448+
dispFreq=100, # display to stdout the training progress every N updates
436449
activ=tensor.tanh,
437-
decay_c=0.,
438-
lrate=0.01,
439-
n_words=100000,
440-
data_sym=False,
441-
optimizer=rmsprop,
442-
encoder='lstm',
450+
decay_c=0., # weight decay for the classifier
451+
lrate=0.01, # learning rate for sgd (not used for adadelta and rmsprop)
452+
n_words=100000, # wocabulary size
453+
optimizer=adadelta,
454+
encoder='lstm',# can be removed must be lstm.
443455
saveto='lstm_model.npz',
444456
noise_std=0.,
445-
validFreq=1000,
457+
validFreq=1000, # after 1000
446458
saveFreq=1000, # save the parameters after every saveFreq updates
447459
maxlen=50,
448460
batch_size=16,
@@ -478,7 +490,7 @@ def train(dim_proj=100,
478490

479491
f_cost = theano.function([x, mask, y], cost)
480492

481-
grads = tensor.grad(cost, wrt=itemlist(tparams))
493+
grads = tensor.grad(cost, wrt=tparams.values())
482494
f_grad = theano.function([x, mask, y], grads)
483495

484496
lr = tensor.scalar(name='lr')
@@ -627,8 +639,8 @@ def main(job_id, params):
627639
'encoder': ['lstm'],
628640
'dim-proj': [128],
629641
'n-words': [10000],
630-
'optimizer': [adadelta], # adadelta and rmsprop avail
642+
'optimizer': [adadelta], # sgd, adadelta and rmsprop available
631643
'activ': [tensor.tanh], # The activation function from Theano.
632-
'decay-c': [0.],
633-
'use-dropout': [1],
644+
'decay-c': [0.], #
645+
'use-dropout': [1], # if disable slightly faster, but worst test error.
634646
'learning-rate': [0.0001]})

0 commit comments

Comments
 (0)