22Build a tweet sentiment analyzer
33'''
44from collections import OrderedDict
5- import copy
65import cPickle as pkl
76import random
87import sys
@@ -159,8 +158,8 @@ def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None):
159158
160159 def _slice (_x , n , dim ):
161160 if _x .ndim == 3 :
162- return _x [:, :, n * dim :(n + 1 ) * dim ]
163- return _x [:, n * dim :(n + 1 ) * dim ]
161+ return _x [:, :, n * dim :(n + 1 ) * dim ]
162+ return _x [:, n * dim :(n + 1 ) * dim ]
164163
165164 def _step (m_ , x_ , h_ , c_ ):
166165 preact = tensor .dot (h_ , tparams [_p (prefix , 'U' )])
@@ -243,7 +242,7 @@ def adadelta(lr, tparams, grads, x, mask, y, cost):
243242 rg2up = [(rg2 , 0.95 * rg2 + 0.05 * (g ** 2 ))
244243 for rg2 , g in zip (running_grads2 , grads )]
245244
246- f_grad_shared = theano .function ([x , mask , y ], cost , updates = zgup + rg2up ,
245+ f_grad_shared = theano .function ([x , mask , y ], cost , updates = zgup + rg2up ,
247246 name = 'adadelta_f_grad_shared' )
248247
249248 updir = [- tensor .sqrt (ru2 + 1e-6 ) / tensor .sqrt (rg2 + 1e-6 ) * zg
@@ -254,7 +253,7 @@ def adadelta(lr, tparams, grads, x, mask, y, cost):
254253 for ru2 , ud in zip (running_up2 , updir )]
255254 param_up = [(p , p + ud ) for p , ud in zip (tparams .values (), updir )]
256255
257- f_update = theano .function ([lr ], [], updates = ru2up + param_up ,
256+ f_update = theano .function ([lr ], [], updates = ru2up + param_up ,
258257 on_unused_input = 'ignore' ,
259258 name = 'adadelta_f_update' )
260259
@@ -289,7 +288,7 @@ def rmsprop(lr, tparams, grads, x, mask, y, cost):
289288 running_grads2 )]
290289 param_up = [(p , p + udn [1 ])
291290 for p , udn in zip (tparams .values (), updir_new )]
292- f_update = theano .function ([lr ], [], updates = updir_new + param_up ,
291+ f_update = theano .function ([lr ], [], updates = updir_new + param_up ,
293292 on_unused_input = 'ignore' ,
294293 name = 'rmsprop_f_update' )
295294
@@ -321,7 +320,7 @@ def build_model(tparams, options):
321320 if options ['use_dropout' ]:
322321 proj = dropout_layer (proj , use_noise , trng )
323322
324- pred = tensor .nnet .softmax (tensor .dot (proj , tparams ['U' ])+ tparams ['b' ])
323+ pred = tensor .nnet .softmax (tensor .dot (proj , tparams ['U' ]) + tparams ['b' ])
325324
326325 f_pred_prob = theano .function ([x , mask ], pred , name = 'f_pred_prob' )
327326 f_pred = theano .function ([x , mask ], pred .argmax (axis = 1 ), name = 'f_pred' )
@@ -408,7 +407,7 @@ def train_lstm(
408407 train , valid , test = load_data (n_words = n_words , valid_portion = 0.05 ,
409408 maxlen = maxlen )
410409
411- ydim = numpy .max (train [1 ])+ 1
410+ ydim = numpy .max (train [1 ]) + 1
412411
413412 model_options ['ydim' ] = ydim
414413
@@ -432,7 +431,7 @@ def train_lstm(
432431 if decay_c > 0. :
433432 decay_c = theano .shared (numpy .float32 (decay_c ), name = 'decay_c' )
434433 weight_decay = 0.
435- weight_decay += (tparams ['U' ]** 2 ).sum ()
434+ weight_decay += (tparams ['U' ] ** 2 ).sum ()
436435 weight_decay *= decay_c
437436 cost += weight_decay
438437
@@ -460,9 +459,9 @@ def train_lstm(
460459 bad_count = 0
461460
462461 if validFreq == - 1 :
463- validFreq = len (train [0 ])/ batch_size
462+ validFreq = len (train [0 ]) / batch_size
464463 if saveFreq == - 1 :
465- saveFreq = len (train [0 ])/ batch_size
464+ saveFreq = len (train [0 ]) / batch_size
466465
467466 uidx = 0 # the number of update done
468467 estop = False # early stop
@@ -514,7 +513,8 @@ def train_lstm(
514513 if numpy .mod (uidx , validFreq ) == 0 :
515514 use_noise .set_value (0. )
516515 train_err = pred_error (f_pred , prepare_data , train , kf )
517- valid_err = pred_error (f_pred , prepare_data , valid , kf_valid )
516+ valid_err = pred_error (f_pred , prepare_data , valid ,
517+ kf_valid )
518518 test_err = pred_error (f_pred , prepare_data , test , kf_test )
519519
520520 history_errs .append ([valid_err , test_err ])
0 commit comments