1515import theano
1616from theano import tensor as T
1717
18- PREFIX = os .getenv ('ATISDATA' , 'data' )
18+ PREFIX = os .getenv (
19+ 'ATISDATA' ,
20+ os .path .join (os .path .split (os .path .abspath (os .path .dirname (__file__ )))[0 ],
21+ 'data' ))
1922
2023
2124# utils functions
@@ -30,6 +33,7 @@ def shuffle(lol, seed):
3033 random .seed (seed )
3134 random .shuffle (l )
3235
36+
3337# start-snippet-1
3438def contextwin (l , win ):
3539 '''
@@ -45,13 +49,14 @@ def contextwin(l, win):
4549 assert win >= 1
4650 l = list (l )
4751
48- lpadded = win // 2 * [- 1 ] + l + win // 2 * [- 1 ]
49- out = [lpadded [i :i + win ] for i in range (len (l ))]
52+ lpadded = win // 2 * [- 1 ] + l + win // 2 * [- 1 ]
53+ out = [lpadded [i :( i + win ) ] for i in range (len (l ))]
5054
5155 assert len (out ) == len (l )
5256 return out
5357# end-snippet-1
5458
59+
5560# data loading functions
5661def atisfold (fold ):
5762 assert fold in range (5 )
@@ -62,7 +67,7 @@ def atisfold(fold):
6267
6368
6469# metrics function using conlleval.pl
65- def conlleval (p , g , w , filename ):
70+ def conlleval (p , g , w , filename , script_path ):
6671 '''
6772 INPUT:
6873 p :: predictions
@@ -74,6 +79,10 @@ def conlleval(p, g, w, filename):
7479 are written. it will be the input of conlleval.pl script
7580 for computing the performance in terms of precision
7681 recall and f1 score
82+
83+ OTHER:
84+ script_path :: path to the directory containing the
85+ conlleval.pl script
7786 '''
7887 out = ''
7988 for sl , sp , sw in zip (g , p , w ):
@@ -86,27 +95,26 @@ def conlleval(p, g, w, filename):
8695 f .writelines (out )
8796 f .close ()
8897
89- return get_perf (filename )
98+ return get_perf (filename , script_path )
9099
91100
92- def download (origin ):
101+ def download (origin , destination ):
93102 '''
94103 download the corresponding atis file
95104 from http://www-etud.iro.umontreal.ca/~mesnilgr/atis/
96105 '''
97106 print 'Downloading data from %s' % origin
98- name = origin .split ('/' )[- 1 ]
99- urllib .urlretrieve (origin , name )
107+ urllib .urlretrieve (origin , destination )
100108
101109
102- def get_perf (filename ):
110+ def get_perf (filename , folder ):
103111 ''' run conlleval.pl perl script to obtain
104112 precision/recall and F1 score '''
105- _conlleval = 'conlleval.pl'
113+ _conlleval = os . path . join ( folder , 'conlleval.pl' )
106114 if not os .path .isfile (_conlleval ):
107115 url = 'http://www-etud.iro.umontreal.ca/~mesnilgr/atis/conlleval.pl'
108- download (url )
109- os .chmod ('conlleval.pl' , stat .S_IRWXU ) # give the execute permissions
116+ download (url , _conlleval )
117+ os .chmod (_conlleval , stat .S_IRWXU ) # give the execute permissions
110118
111119 proc = subprocess .Popen (["perl" ,
112120 _conlleval ],
@@ -125,6 +133,7 @@ def get_perf(filename):
125133
126134 return {'p' : precision , 'r' : recall , 'f1' : f1score }
127135
136+
128137# start-snippet-2
129138class RNNSLU (object ):
130139 ''' elman neural net model '''
@@ -199,9 +208,9 @@ def recurrence(x_t, h_tm1):
199208 [T .arange (x .shape [0 ]), y_sentence ])
200209 sentence_gradients = T .grad (sentence_nll , self .params )
201210 sentence_updates = OrderedDict ((p , p - lr * g )
202- # end-snippet-5
203211 for p , g in
204212 zip (self .params , sentence_gradients ))
213+ # end-snippet-5
205214
206215 # theano functions to compile
207216 # start-snippet-6
@@ -238,29 +247,30 @@ def load(self, folder):
238247 param .name + '.npy' )))
239248
240249
241-
242250def main (param = None ):
243251 if not param :
244- param = {'fold' : 3 ,
245- # 5 folds 0,1,2,3,4
246- 'data' : 'atis' ,
247- 'lr' : 0.0970806646812754 ,
248- 'verbose' : 1 ,
249- 'decay' : True ,
250- # decay on the learning rate if improvement stops
251- 'win' : 7 ,
252- # number of words in the context window
253- 'nhidden' : 200 ,
254- # number of hidden units
255- 'seed' : 345 ,
256- 'emb_dimension' : 50 ,
257- # dimension of word embedding
258- 'nepochs' : 60 ,
259- # 60 is recommended
260- 'savemodel' : False }
252+ param = {
253+ 'fold' : 3 ,
254+ # 5 folds 0,1,2,3,4
255+ 'data' : 'atis' ,
256+ 'lr' : 0.0970806646812754 ,
257+ 'verbose' : 1 ,
258+ 'decay' : True ,
259+ # decay on the learning rate if improvement stops
260+ 'win' : 7 ,
261+ # number of words in the context window
262+ 'nhidden' : 200 ,
263+ # number of hidden units
264+ 'seed' : 345 ,
265+ 'emb_dimension' : 50 ,
266+ # dimension of word embedding
267+ 'nepochs' : 60 ,
268+ # 60 is recommended
269+ 'savemodel' : False }
261270 print param
262-
263- folder = os .path .basename (__file__ ).split ('.' )[0 ]
271+
272+ folder_name = os .path .basename (__file__ ).split ('.' )[0 ]
273+ folder = os .path .join (os .path .dirname (__file__ ), folder_name )
264274 if not os .path .exists (folder ):
265275 os .mkdir (folder )
266276
@@ -308,9 +318,11 @@ def main(param=None):
308318
309319 for i , (x , y ) in enumerate (zip (train_lex , train_y )):
310320 rnn .train (x , y , param ['win' ], param ['clr' ])
311- print '[learning] epoch %i >> %2.2f%%' % (e ,(i + 1 )* 100. / nsentences ),'completed in %.2f (sec) <<\r ' % (time .time ()- tic ),
321+ print '[learning] epoch %i >> %2.2f%%' % (
322+ e , (i + 1 ) * 100. / nsentences ),
323+ print 'completed in %.2f (sec) <<\r ' % (time .time () - tic ),
312324 sys .stdout .flush ()
313-
325+
314326 # evaluation // back into the real world : idx -> words
315327 predictions_test = [map (lambda x : idx2label [x ],
316328 rnn .classify (numpy .asarray (
@@ -325,12 +337,14 @@ def main(param=None):
325337 res_test = conlleval (predictions_test ,
326338 groundtruth_test ,
327339 words_test ,
328- folder + '/current.test.txt' )
340+ folder + '/current.test.txt' ,
341+ folder )
329342 res_valid = conlleval (predictions_valid ,
330343 groundtruth_valid ,
331344 words_valid ,
332- folder + '/current.valid.txt' )
333-
345+ folder + '/current.valid.txt' ,
346+ folder )
347+
334348 if res_valid ['f1' ] > best_f1 :
335349
336350 if param ['savemodel' ]:
0 commit comments