99
1010import numpy
1111import theano
12+ from theano import config
1213import theano .tensor as tensor
1314from theano .sandbox .rng_mrg import MRG_RandomStreams as RandomStreams
1415
1718datasets = {'imdb' : (imdb .load_data , imdb .prepare_data )}
1819
1920
21+ def numpy_floatX (data ):
22+ return numpy .asarray (data , dtype = config .floatX )
23+
24+
2025def get_minibatches_idx (n , minibatch_size , shuffle = False ):
2126 """
2227 Used to shuffle the dataset at each iteration.
@@ -85,14 +90,14 @@ def init_params(options):
8590 # embedding
8691 randn = numpy .random .rand (options ['n_words' ],
8792 options ['dim_proj' ])
88- params ['Wemb' ] = (0.01 * randn ).astype ('float32' )
93+ params ['Wemb' ] = (0.01 * randn ).astype (config . floatX )
8994 params = get_layer (options ['encoder' ])[0 ](options ,
9095 params ,
9196 prefix = options ['encoder' ])
9297 # classifier
9398 params ['U' ] = 0.01 * numpy .random .randn (options ['dim_proj' ],
94- options ['ydim' ]).astype ('float32' )
95- params ['b' ] = numpy .zeros ((options ['ydim' ],)).astype ('float32' )
99+ options ['ydim' ]).astype (config . floatX )
100+ params ['b' ] = numpy .zeros ((options ['ydim' ],)).astype (config . floatX )
96101
97102 return params
98103
@@ -122,7 +127,7 @@ def get_layer(name):
122127def ortho_weight (ndim ):
123128 W = numpy .random .randn (ndim , ndim )
124129 u , s , v = numpy .linalg .svd (W )
125- return u .astype ('float32' )
130+ return u .astype (config . floatX )
126131
127132
128133def param_init_lstm (options , params , prefix = 'lstm' ):
@@ -142,7 +147,7 @@ def param_init_lstm(options, params, prefix='lstm'):
142147 ortho_weight (options ['dim_proj' ])], axis = 1 )
143148 params [_p (prefix , 'U' )] = U
144149 b = numpy .zeros ((4 * options ['dim_proj' ],))
145- params [_p (prefix , 'b' )] = b .astype ('float32' )
150+ params [_p (prefix , 'b' )] = b .astype (config . floatX )
146151
147152 return params
148153
@@ -185,9 +190,11 @@ def _step(m_, x_, h_, c_):
185190 dim_proj = options ['dim_proj' ]
186191 rval , updates = theano .scan (_step ,
187192 sequences = [mask , state_below ],
188- outputs_info = [tensor .alloc (0. , n_samples ,
193+ outputs_info = [tensor .alloc (numpy_floatX (0. ),
194+ n_samples ,
189195 dim_proj ),
190- tensor .alloc (0. , n_samples ,
196+ tensor .alloc (numpy_floatX (0. ),
197+ n_samples ,
191198 dim_proj )],
192199 name = _p (prefix , '_layers' ),
193200 n_steps = nsteps )
@@ -228,13 +235,13 @@ def sgd(lr, tparams, grads, x, mask, y, cost):
228235
229236
230237def adadelta (lr , tparams , grads , x , mask , y , cost ):
231- zipped_grads = [theano .shared (p .get_value () * numpy . float32 (0. ),
238+ zipped_grads = [theano .shared (p .get_value () * numpy_floatX (0. ),
232239 name = '%s_grad' % k )
233240 for k , p in tparams .iteritems ()]
234- running_up2 = [theano .shared (p .get_value () * numpy . float32 (0. ),
241+ running_up2 = [theano .shared (p .get_value () * numpy_floatX (0. ),
235242 name = '%s_rup2' % k )
236243 for k , p in tparams .iteritems ()]
237- running_grads2 = [theano .shared (p .get_value () * numpy . float32 (0. ),
244+ running_grads2 = [theano .shared (p .get_value () * numpy_floatX (0. ),
238245 name = '%s_rgrad2' % k )
239246 for k , p in tparams .iteritems ()]
240247
@@ -261,13 +268,13 @@ def adadelta(lr, tparams, grads, x, mask, y, cost):
261268
262269
263270def rmsprop (lr , tparams , grads , x , mask , y , cost ):
264- zipped_grads = [theano .shared (p .get_value () * numpy . float32 (0. ),
271+ zipped_grads = [theano .shared (p .get_value () * numpy_floatX (0. ),
265272 name = '%s_grad' % k )
266273 for k , p in tparams .iteritems ()]
267- running_grads = [theano .shared (p .get_value () * numpy . float32 (0. ),
274+ running_grads = [theano .shared (p .get_value () * numpy_floatX (0. ),
268275 name = '%s_rgrad' % k )
269276 for k , p in tparams .iteritems ()]
270- running_grads2 = [theano .shared (p .get_value () * numpy . float32 (0. ),
277+ running_grads2 = [theano .shared (p .get_value () * numpy_floatX (0. ),
271278 name = '%s_rgrad2' % k )
272279 for k , p in tparams .iteritems ()]
273280
@@ -280,7 +287,7 @@ def rmsprop(lr, tparams, grads, x, mask, y, cost):
280287 updates = zgup + rgup + rg2up ,
281288 name = 'rmsprop_f_grad_shared' )
282289
283- updir = [theano .shared (p .get_value () * numpy . float32 (0. ),
290+ updir = [theano .shared (p .get_value () * numpy_floatX (0. ),
284291 name = '%s_updir' % k )
285292 for k , p in tparams .iteritems ()]
286293 updir_new = [(ud , 0.9 * ud - 1e-4 * zg / tensor .sqrt (rg2 - rg ** 2 + 1e-4 ))
@@ -299,10 +306,10 @@ def build_model(tparams, options):
299306 trng = RandomStreams (1234 )
300307
301308 # Used for dropout.
302- use_noise = theano .shared (numpy . float32 (0. ))
309+ use_noise = theano .shared (numpy_floatX (0. ))
303310
304311 x = tensor .matrix ('x' , dtype = 'int64' )
305- mask = tensor .matrix ('mask' , dtype = 'float32' )
312+ mask = tensor .matrix ('mask' , dtype = config . floatX )
306313 y = tensor .vector ('y' , dtype = 'int64' )
307314
308315 n_timesteps = x .shape [0 ]
@@ -335,7 +342,7 @@ def pred_probs(f_pred_prob, prepare_data, data, iterator, verbose=False):
335342 the probabilities of new examples.
336343 """
337344 n_samples = len (data [0 ])
338- probs = numpy .zeros ((n_samples , 2 )).astype ('float32' )
345+ probs = numpy .zeros ((n_samples , 2 )).astype (config . floatX )
339346
340347 n_done = 0
341348
@@ -367,7 +374,7 @@ def pred_error(f_pred, prepare_data, data, iterator, verbose=False):
367374 preds = f_pred (x , mask )
368375 targets = numpy .array (data [1 ])[valid_index ]
369376 valid_err += (preds == targets ).sum ()
370- valid_err = 1. - numpy . float32 (valid_err ) / len (data [0 ])
377+ valid_err = 1. - numpy_floatX (valid_err ) / len (data [0 ])
371378
372379 return valid_err
373380
@@ -395,7 +402,7 @@ def train_lstm(
395402 use_dropout = True , # if False slightly faster, but worst test error
396403 # This frequently need a bigger model.
397404 reload_model = "" , # Path to a saved model we want to start from.
398- test_size = - 1 , # If >0, we will trunc the test set to this number of example.
405+ test_size = - 1 , # If >0, we keep only this number of test example.
399406):
400407
401408 # Model options
@@ -432,7 +439,7 @@ def train_lstm(
432439 y , f_pred_prob , f_pred , cost ) = build_model (tparams , model_options )
433440
434441 if decay_c > 0. :
435- decay_c = theano .shared (numpy . float32 (decay_c ), name = 'decay_c' )
442+ decay_c = theano .shared (numpy_floatX (decay_c ), name = 'decay_c' )
436443 weight_decay = 0.
437444 weight_decay += (tparams ['U' ] ** 2 ).sum ()
438445 weight_decay *= decay_c
@@ -571,12 +578,6 @@ def train_lstm(
571578
572579
573580if __name__ == '__main__' :
574-
575- # We must have floatX=float32 for this tutorial to work correctly.
576- theano .config .floatX = "float32"
577- # The next line is the new Theano default. This is a speed up.
578- theano .config .scan .allow_gc = False
579-
580581 # See function train for all possible parameter and there definition.
581582 train_lstm (
582583 #reload_model="lstm_model.npz",
0 commit comments