Skip to content

Commit d4a96b5

Browse files
committed
Merge pull request lisa-lab#53 from lisa-lab/cwd_independent
Cwd independent
2 parents 0256450 + 52bcea6 commit d4a96b5

File tree

2 files changed

+55
-38
lines changed

2 files changed

+55
-38
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
code/*.pyc
2+
code/*_plots
23
code/tmp*
34
code/midi
5+
code/rnnslu
6+
data/atis.*
47
data/mnist.pkl.gz
58
data/mnist_py3k.pkl.gz
69
data/Nottingham.zip

code/rnnslu.py

Lines changed: 52 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@
1515
import theano
1616
from theano import tensor as T
1717

18-
PREFIX = os.getenv('ATISDATA', 'data')
18+
PREFIX = os.getenv(
19+
'ATISDATA',
20+
os.path.join(os.path.split(os.path.abspath(os.path.dirname(__file__)))[0],
21+
'data'))
1922

2023

2124
# utils functions
@@ -30,6 +33,7 @@ def shuffle(lol, seed):
3033
random.seed(seed)
3134
random.shuffle(l)
3235

36+
3337
# start-snippet-1
3438
def contextwin(l, win):
3539
'''
@@ -45,13 +49,14 @@ def contextwin(l, win):
4549
assert win >= 1
4650
l = list(l)
4751

48-
lpadded = win//2 * [-1] + l + win//2 * [-1]
49-
out = [lpadded[i:i+win] for i in range(len(l))]
52+
lpadded = win // 2 * [-1] + l + win // 2 * [-1]
53+
out = [lpadded[i:(i + win)] for i in range(len(l))]
5054

5155
assert len(out) == len(l)
5256
return out
5357
# end-snippet-1
5458

59+
5560
# data loading functions
5661
def atisfold(fold):
5762
assert fold in range(5)
@@ -62,7 +67,7 @@ def atisfold(fold):
6267

6368

6469
# metrics function using conlleval.pl
65-
def conlleval(p, g, w, filename):
70+
def conlleval(p, g, w, filename, script_path):
6671
'''
6772
INPUT:
6873
p :: predictions
@@ -74,6 +79,10 @@ def conlleval(p, g, w, filename):
7479
are written. it will be the input of conlleval.pl script
7580
for computing the performance in terms of precision
7681
recall and f1 score
82+
83+
OTHER:
84+
script_path :: path to the directory containing the
85+
conlleval.pl script
7786
'''
7887
out = ''
7988
for sl, sp, sw in zip(g, p, w):
@@ -86,27 +95,26 @@ def conlleval(p, g, w, filename):
8695
f.writelines(out)
8796
f.close()
8897

89-
return get_perf(filename)
98+
return get_perf(filename, script_path)
9099

91100

92-
def download(origin):
101+
def download(origin, destination):
93102
'''
94103
download the corresponding atis file
95104
from http://www-etud.iro.umontreal.ca/~mesnilgr/atis/
96105
'''
97106
print 'Downloading data from %s' % origin
98-
name = origin.split('/')[-1]
99-
urllib.urlretrieve(origin, name)
107+
urllib.urlretrieve(origin, destination)
100108

101109

102-
def get_perf(filename):
110+
def get_perf(filename, folder):
103111
''' run conlleval.pl perl script to obtain
104112
precision/recall and F1 score '''
105-
_conlleval = 'conlleval.pl'
113+
_conlleval = os.path.join(folder, 'conlleval.pl')
106114
if not os.path.isfile(_conlleval):
107115
url = 'http://www-etud.iro.umontreal.ca/~mesnilgr/atis/conlleval.pl'
108-
download(url)
109-
os.chmod('conlleval.pl', stat.S_IRWXU) # give the execute permissions
116+
download(url, _conlleval)
117+
os.chmod(_conlleval, stat.S_IRWXU) # give the execute permissions
110118

111119
proc = subprocess.Popen(["perl",
112120
_conlleval],
@@ -125,6 +133,7 @@ def get_perf(filename):
125133

126134
return {'p': precision, 'r': recall, 'f1': f1score}
127135

136+
128137
# start-snippet-2
129138
class RNNSLU(object):
130139
''' elman neural net model '''
@@ -199,9 +208,9 @@ def recurrence(x_t, h_tm1):
199208
[T.arange(x.shape[0]), y_sentence])
200209
sentence_gradients = T.grad(sentence_nll, self.params)
201210
sentence_updates = OrderedDict((p, p - lr*g)
202-
# end-snippet-5
203211
for p, g in
204212
zip(self.params, sentence_gradients))
213+
# end-snippet-5
205214

206215
# theano functions to compile
207216
# start-snippet-6
@@ -238,29 +247,30 @@ def load(self, folder):
238247
param.name + '.npy')))
239248

240249

241-
242250
def main(param=None):
243251
if not param:
244-
param = {'fold': 3,
245-
# 5 folds 0,1,2,3,4
246-
'data': 'atis',
247-
'lr': 0.0970806646812754,
248-
'verbose': 1,
249-
'decay': True,
250-
# decay on the learning rate if improvement stops
251-
'win': 7,
252-
# number of words in the context window
253-
'nhidden': 200,
254-
# number of hidden units
255-
'seed': 345,
256-
'emb_dimension': 50,
257-
# dimension of word embedding
258-
'nepochs': 60,
259-
# 60 is recommended
260-
'savemodel': False}
252+
param = {
253+
'fold': 3,
254+
# 5 folds 0,1,2,3,4
255+
'data': 'atis',
256+
'lr': 0.0970806646812754,
257+
'verbose': 1,
258+
'decay': True,
259+
# decay on the learning rate if improvement stops
260+
'win': 7,
261+
# number of words in the context window
262+
'nhidden': 200,
263+
# number of hidden units
264+
'seed': 345,
265+
'emb_dimension': 50,
266+
# dimension of word embedding
267+
'nepochs': 60,
268+
# 60 is recommended
269+
'savemodel': False}
261270
print param
262-
263-
folder = os.path.basename(__file__).split('.')[0]
271+
272+
folder_name = os.path.basename(__file__).split('.')[0]
273+
folder = os.path.join(os.path.dirname(__file__), folder_name)
264274
if not os.path.exists(folder):
265275
os.mkdir(folder)
266276

@@ -308,9 +318,11 @@ def main(param=None):
308318

309319
for i, (x, y) in enumerate(zip(train_lex, train_y)):
310320
rnn.train(x, y, param['win'], param['clr'])
311-
print '[learning] epoch %i >> %2.2f%%'%(e,(i+1)*100./nsentences),'completed in %.2f (sec) <<\r'%(time.time()-tic),
321+
print '[learning] epoch %i >> %2.2f%%' % (
322+
e, (i + 1) * 100. / nsentences),
323+
print 'completed in %.2f (sec) <<\r' % (time.time() - tic),
312324
sys.stdout.flush()
313-
325+
314326
# evaluation // back into the real world : idx -> words
315327
predictions_test = [map(lambda x: idx2label[x],
316328
rnn.classify(numpy.asarray(
@@ -325,12 +337,14 @@ def main(param=None):
325337
res_test = conlleval(predictions_test,
326338
groundtruth_test,
327339
words_test,
328-
folder + '/current.test.txt')
340+
folder + '/current.test.txt',
341+
folder)
329342
res_valid = conlleval(predictions_valid,
330343
groundtruth_valid,
331344
words_valid,
332-
folder + '/current.valid.txt')
333-
345+
folder + '/current.valid.txt',
346+
folder)
347+
334348
if res_valid['f1'] > best_f1:
335349

336350
if param['savemodel']:

0 commit comments

Comments
 (0)