@@ -44,11 +44,17 @@ def get_dataset(name):
4444
4545
4646def zipp (params , tparams ):
47+ """
48+ When we reload the model. Needed for the GPU stuff.
49+ """
4750 for kk , vv in params .iteritems ():
4851 tparams [kk ].set_value (vv )
4952
5053
5154def unzip (zipped ):
55+ """
56+ When we pickle the model. Needed for the GPU stuff.
57+ """
5258 new_params = OrderedDict ()
5359 for kk , vv in zipped .iteritems ():
5460 new_params [kk ] = vv .get_value ()
@@ -79,7 +85,6 @@ def init_params(options):
7985 randn = numpy .random .rand (options ['n_words' ],
8086 options ['dim_proj' ])
8187 params ['Wemb' ] = (0.01 * randn ).astype ('float32' )
82- # rconv
8388 params = get_layer (options ['encoder' ])[0 ](options ,
8489 params ,
8590 prefix = options ['encoder' ])
@@ -122,7 +127,7 @@ def param_init_fflayer(options, params, prefix='ff'):
122127 return params
123128
124129
125- def fflayer (tparams , state_below , options , prefix = 'rconv ' , ** kwargs ):
130+ def fflayer (tparams , state_below , options , prefix = 'ff ' , ** kwargs ):
126131 pre_act = (tensor .dot (state_below ,
127132 tparams [_p (prefix , 'W' )]) + tparams [_p (prefix , 'b' )])
128133 return options ['activ' ](pre_act )
@@ -396,6 +401,9 @@ def build_model(tparams, options):
396401
397402
398403def pred_probs (f_pred_prob , prepare_data , data , iterator , verbose = False ):
404+ """ If you want to use a trained model, this is useful to compute
405+ the probabilities of new examples.
406+ """
399407 n_samples = len (data [0 ])
400408 probs = numpy .zeros ((n_samples , 2 )).astype ('float32' )
401409
@@ -416,6 +424,11 @@ def pred_probs(f_pred_prob, prepare_data, data, iterator, verbose=False):
416424
417425
418426def pred_error (f_pred , prepare_data , data , iterator , verbose = False ):
427+ """
428+ Just compute the error
429+ f_pred: Theano fct computing the prediction
430+ prepare_data: usual prepare_data for that dataset.
431+ """
419432 valid_err = 0
420433 for _ , valid_index in iterator :
421434 x , mask , y = prepare_data ([data [0 ][t ] for t in valid_index ],
@@ -430,19 +443,18 @@ def pred_error(f_pred, prepare_data, data, iterator, verbose=False):
430443
431444
432445def train (dim_proj = 100 ,
433- patience = 10 ,
446+ patience = 10 , # number of epoch to wait before early stop if no progress
434447 max_epochs = 5000 ,
435- dispFreq = 100 ,
448+ dispFreq = 100 , # display to stdout the training progress every N updates
436449 activ = tensor .tanh ,
437- decay_c = 0. ,
438- lrate = 0.01 ,
439- n_words = 100000 ,
440- data_sym = False ,
441- optimizer = rmsprop ,
442- encoder = 'lstm' ,
450+ decay_c = 0. , # weight decay for the classifier
451+ lrate = 0.01 , # learning rate for sgd (not used for adadelta and rmsprop)
452+ n_words = 100000 , # wocabulary size
453+ optimizer = adadelta ,
454+ encoder = 'lstm' ,# can be removed must be lstm.
443455 saveto = 'lstm_model.npz' ,
444456 noise_std = 0. ,
445- validFreq = 1000 ,
457+ validFreq = 1000 , # after 1000
446458 saveFreq = 1000 , # save the parameters after every saveFreq updates
447459 maxlen = 50 ,
448460 batch_size = 16 ,
@@ -478,7 +490,7 @@ def train(dim_proj=100,
478490
479491 f_cost = theano .function ([x , mask , y ], cost )
480492
481- grads = tensor .grad (cost , wrt = itemlist ( tparams ))
493+ grads = tensor .grad (cost , wrt = tparams . values ( ))
482494 f_grad = theano .function ([x , mask , y ], grads )
483495
484496 lr = tensor .scalar (name = 'lr' )
@@ -627,8 +639,8 @@ def main(job_id, params):
627639 'encoder' : ['lstm' ],
628640 'dim-proj' : [128 ],
629641 'n-words' : [10000 ],
630- 'optimizer' : [adadelta ], # adadelta and rmsprop avail
642+ 'optimizer' : [adadelta ], # sgd, adadelta and rmsprop available
631643 'activ' : [tensor .tanh ], # The activation function from Theano.
632- 'decay-c' : [0. ],
633- 'use-dropout' : [1 ],
644+ 'decay-c' : [0. ], #
645+ 'use-dropout' : [1 ], # if disable slightly faster, but worst test error.
634646 'learning-rate' : [0.0001 ]})
0 commit comments