@@ -203,90 +203,9 @@ def _step(m_, x_, h_, c_):
203203 return rval [0 ]
204204
205205
206- def param_init_rconv (options , params , prefix = 'rconv' ):
207- params [_p (prefix , 'W' )] = ortho_weight (options ['dim_proj' ])
208- params [_p (prefix , 'U' )] = ortho_weight (options ['dim_proj' ])
209- b = numpy .zeros ((options ['dim_proj' ],)).astype ('float32' )
210- params [_p (prefix , 'b' )] = b
211- gw = 0.01 * numpy .random .randn (options ['dim_proj' ], 3 ).astype ('float32' )
212- params [_p (prefix , 'GW' )] = gw
213- gu = 0.01 * numpy .random .randn (options ['dim_proj' ], 3 ).astype ('float32' )
214- params [_p (prefix , 'GU' )] = gu
215- params [_p (prefix , 'Gb' )] = numpy .zeros ((3 ,)).astype ('float32' )
216-
217- return params
218-
219-
220- def rconv_layer (tparams , state_below , options , prefix = 'rconv' , mask = None ):
221- nsteps = state_below .shape [0 ]
222-
223- assert mask is not None
224-
225- def _step (m_ , p_ ):
226- l_ = p_
227- # new activation
228- ps_ = tensor .zeros_like (p_ )
229- ps_ = tensor .set_subtensor (ps_ [1 :], p_ [:- 1 ])
230- ls_ = ps_
231- ps_ = tensor .dot (ps_ , tparams [_p (prefix , 'U' )])
232- pl_ = tensor .dot (p_ , tparams [_p (prefix , 'W' )])
233- newact = options ['activ' ](ps_ + pl_ + tparams [_p (prefix , 'b' )])
234-
235- # gater
236- gt_ = (tensor .dot (ls_ , tparams [_p (prefix , 'GU' )]) +
237- tensor .dot (l_ , tparams [_p (prefix , 'GW' )]) +
238- tparams [_p (prefix , 'Gb' )])
239- if l_ .ndim == 3 :
240- gt_shp = gt_ .shape
241- gt_ = gt_ .reshape ((gt_shp [0 ] * gt_shp [1 ], gt_shp [2 ]))
242- gt_ = tensor .nnet .softmax (gt_ )
243- if l_ .ndim == 3 :
244- gt_ = gt_ .reshape ((gt_shp [0 ], gt_shp [1 ], gt_shp [2 ]))
245-
246- if p_ .ndim == 3 :
247- gn = gt_ [:, :, 0 ].dimshuffle (0 , 1 , 'x' )
248- gl = gt_ [:, :, 1 ].dimshuffle (0 , 1 , 'x' )
249- gr = gt_ [:, :, 2 ].dimshuffle (0 , 1 , 'x' )
250- else :
251- gn = gt_ [:, 0 ].dimshuffle (0 , 'x' )
252- gl = gt_ [:, 1 ].dimshuffle (0 , 'x' )
253- gr = gt_ [:, 2 ].dimshuffle (0 , 'x' )
254-
255- act = newact * gn + ls_ * gl + l_ * gr
256-
257- if p_ .ndim == 3 :
258- m_ = m_ .dimshuffle ('x' , 0 , 'x' )
259- else :
260- m_ = m_ .dimshuffle ('x' , 0 )
261- return tensor .switch (m_ , act , l_ )
262-
263- rval , updates = theano .scan (_step ,
264- sequences = [mask [1 :]],
265- outputs_info = [state_below ],
266- name = 'layer_%s' % prefix ,
267- n_steps = nsteps - 1 )
268-
269- seqlens = tensor .cast (mask .sum (axis = 0 ), 'int64' )- 1
270- roots = rval [- 1 ]
271-
272- if state_below .ndim == 3 :
273- def _grab_root (seqlen , one_sample , prev_sample ):
274- return one_sample [seqlen ]
275-
276- dim_proj = options ['dim_proj' ]
277- roots , updates = theano .scan (_grab_root ,
278- sequences = [seqlens ,
279- roots .dimshuffle (1 , 0 , 2 )],
280- outputs_info = [tensor .alloc (0. , dim_proj )],
281- name = 'grab_root_%s' % prefix )
282- else :
283- roots = roots [seqlens ] # there should be only one, so it's fine.
284-
285- return roots
286-
287-
206+ # ff: Feed Forward (normal neural net), only useful to put after lstm
207+ # before the classifier.
288208layers = {'ff' : (param_init_fflayer , fflayer ),
289- 'rconv' : (param_init_rconv , rconv_layer ),
290209 'lstm' : (param_init_lstm , lstm_layer )}
291210
292211
0 commit comments