Skip to content

Commit 7f06bc2

Browse files
authored
Merge pull request lisa-lab#202 from slefrancois/conlleval
add conlleval.pl
2 parents 238a0bc + b13a1b7 commit 7f06bc2

File tree

2 files changed

+322
-16
lines changed

2 files changed

+322
-16
lines changed

code/conlleval.pl

Lines changed: 319 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,319 @@
1+
#!/usr/bin/perl -w
2+
# conlleval: evaluate result of processing CoNLL-2000 shared task
3+
# usage: conlleval [-l] [-r] [-d delimiterTag] [-o oTag] < file
4+
# README: http://www.clips.uantwerpen.be/conll2000/chunking/output.html
5+
# options: l: generate LaTeX output for tables like in
6+
# https://www.clips.uantwerpen.be/conll2003/ner/example.tex
7+
# r: accept raw result tags (without B- and I- prefix;
8+
# assumes one word per chunk)
9+
# d: alternative delimiter tag (default is single space)
10+
# o: alternative outside tag (default is O)
11+
# note: the file should contain lines with items separated
12+
# by $delimiter characters (default space). The final
13+
# two items should contain the correct tag and the
14+
# guessed tag in that order. Sentences should be
15+
# separated from each other by empty lines or lines
16+
# with $boundary fields (default -X-).
17+
# url: http://www.clips.uantwerpen.be/conll2000/chunking/
18+
# started: 1998-09-25
19+
# version: 2018-03-09
20+
# original author: Erik Tjong Kim Sang <[email protected]>
21+
# modifications: Grégoire Mesnil for Deep Learning Tutorials
22+
# https://github.com/lisa-lab/DeepLearningTutorials
23+
24+
use strict;
25+
26+
my $false = 0;
27+
my $true = 42;
28+
29+
my $boundary = "-X-"; # sentence boundary
30+
my $correct; # current corpus chunk tag (I,O,B)
31+
my $correctChunk = 0; # number of correctly identified chunks
32+
my $correctTags = 0; # number of correct chunk tags
33+
my $correctType; # type of current corpus chunk tag (NP,VP,etc.)
34+
my $delimiter = " "; # field delimiter
35+
my $FB1 = 0.0; # FB1 score (Van Rijsbergen 1979)
36+
my $firstItem; # first feature (for sentence boundary checks)
37+
my $foundCorrect = 0; # number of chunks in corpus
38+
my $foundGuessed = 0; # number of identified chunks
39+
my $guessed; # current guessed chunk tag
40+
my $guessedType; # type of current guessed chunk tag
41+
my $i; # miscellaneous counter
42+
my $inCorrect = $false; # currently processed chunk is correct until now
43+
my $lastCorrect = "O"; # previous chunk tag in corpus
44+
my $latex = 0; # generate LaTeX formatted output
45+
my $lastCorrectType = ""; # type of previously identified chunk tag
46+
my $lastGuessed = "O"; # previously identified chunk tag
47+
my $lastGuessedType = ""; # type of previous chunk tag in corpus
48+
my $lastType; # temporary storage for detecting duplicates
49+
my $line; # line
50+
my $nbrOfFeatures = -1; # number of features per line
51+
my $precision = 0.0; # precision score
52+
my $oTag = "O"; # outside tag, default O
53+
my $raw = 0; # raw input: add B to every token
54+
my $recall = 0.0; # recall score
55+
my $tokenCounter = 0; # token counter (ignores sentence breaks)
56+
57+
my %correctChunk = (); # number of correctly identified chunks per type
58+
my %foundCorrect = (); # number of chunks in corpus per type
59+
my %foundGuessed = (); # number of identified chunks per type
60+
61+
my @features; # features on line
62+
my @sortedTypes; # sorted list of chunk type names
63+
64+
# sanity check
65+
while (@ARGV and $ARGV[0] =~ /^-/) {
66+
if ($ARGV[0] eq "-l") { $latex = 1; shift(@ARGV); }
67+
elsif ($ARGV[0] eq "-r") { $raw = 1; shift(@ARGV); }
68+
elsif ($ARGV[0] eq "-d") {
69+
shift(@ARGV);
70+
if (not defined $ARGV[0]) {
71+
die "conlleval: -d requires delimiter character";
72+
}
73+
$delimiter = shift(@ARGV);
74+
} elsif ($ARGV[0] eq "-o") {
75+
shift(@ARGV);
76+
if (not defined $ARGV[0]) {
77+
die "conlleval: -o requires delimiter character";
78+
}
79+
$oTag = shift(@ARGV);
80+
} else { die "conlleval: unknown argument $ARGV[0]\n"; }
81+
}
82+
if (@ARGV) { die "conlleval: unexpected command line argument\n"; }
83+
# process input
84+
while (<STDIN>) {
85+
chomp($line = $_);
86+
@features = split(/$delimiter/,$line);
87+
if ($nbrOfFeatures < 0) { $nbrOfFeatures = $#features; }
88+
elsif ($nbrOfFeatures != $#features and @features != 0) {
89+
printf STDERR "unexpected number of features: %d (%d)\n",
90+
$#features+1,$nbrOfFeatures+1;
91+
exit(1);
92+
}
93+
if (@features == 0 or
94+
$features[0] eq $boundary) { @features = ($boundary,"O","O"); }
95+
if (@features < 2) {
96+
die "conlleval: unexpected number of features in line $line\n";
97+
}
98+
if ($raw) {
99+
if ($features[$#features] eq $oTag) { $features[$#features] = "O"; }
100+
if ($features[$#features-1] eq $oTag) { $features[$#features-1] = "O"; }
101+
if ($features[$#features] ne "O") {
102+
$features[$#features] = "B-$features[$#features]";
103+
}
104+
if ($features[$#features-1] ne "O") {
105+
$features[$#features-1] = "B-$features[$#features-1]";
106+
}
107+
}
108+
# 20040126 ET code which allows hyphens in the types
109+
if ($features[$#features] =~ /^([^-]*)-(.*)$/) {
110+
$guessed = $1;
111+
$guessedType = $2;
112+
} else {
113+
$guessed = $features[$#features];
114+
$guessedType = "";
115+
}
116+
pop(@features);
117+
if ($features[$#features] =~ /^([^-]*)-(.*)$/) {
118+
$correct = $1;
119+
$correctType = $2;
120+
} else {
121+
$correct = $features[$#features];
122+
$correctType = "";
123+
}
124+
pop(@features);
125+
# ($guessed,$guessedType) = split(/-/,pop(@features));
126+
# ($correct,$correctType) = split(/-/,pop(@features));
127+
$guessedType = $guessedType ? $guessedType : "";
128+
$correctType = $correctType ? $correctType : "";
129+
$firstItem = shift(@features);
130+
131+
# 1999-06-26 sentence breaks should always be counted as out of chunk
132+
if ( $firstItem eq $boundary ) { $guessed = "O"; }
133+
134+
if ($inCorrect) {
135+
if ( &endOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) and
136+
&endOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) and
137+
$lastGuessedType eq $lastCorrectType) {
138+
$inCorrect=$false;
139+
$correctChunk++;
140+
$correctChunk{$lastCorrectType} = $correctChunk{$lastCorrectType} ?
141+
$correctChunk{$lastCorrectType}+1 : 1;
142+
} elsif (
143+
&endOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) !=
144+
&endOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) or
145+
$guessedType ne $correctType ) {
146+
$inCorrect=$false;
147+
}
148+
}
149+
150+
if ( &startOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) and
151+
&startOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) and
152+
$guessedType eq $correctType) { $inCorrect = $true; }
153+
154+
if ( &startOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) ) {
155+
$foundCorrect++;
156+
$foundCorrect{$correctType} = $foundCorrect{$correctType} ?
157+
$foundCorrect{$correctType}+1 : 1;
158+
}
159+
if ( &startOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) ) {
160+
$foundGuessed++;
161+
$foundGuessed{$guessedType} = $foundGuessed{$guessedType} ?
162+
$foundGuessed{$guessedType}+1 : 1;
163+
}
164+
if ( $firstItem ne $boundary ) {
165+
if ( $correct eq $guessed and $guessedType eq $correctType ) {
166+
$correctTags++;
167+
}
168+
$tokenCounter++;
169+
}
170+
171+
$lastGuessed = $guessed;
172+
$lastCorrect = $correct;
173+
$lastGuessedType = $guessedType;
174+
$lastCorrectType = $correctType;
175+
}
176+
if ($inCorrect) {
177+
$correctChunk++;
178+
$correctChunk{$lastCorrectType} = $correctChunk{$lastCorrectType} ?
179+
$correctChunk{$lastCorrectType}+1 : 1;
180+
}
181+
182+
if (not $latex) {
183+
# compute overall precision, recall and FB1 (default values are 0.0)
184+
$precision = 100*$correctChunk/$foundGuessed if ($foundGuessed > 0);
185+
$recall = 100*$correctChunk/$foundCorrect if ($foundCorrect > 0);
186+
$FB1 = 2*$precision*$recall/($precision+$recall)
187+
if ($precision+$recall > 0);
188+
189+
# print overall performance
190+
printf "processed $tokenCounter tokens with $foundCorrect phrases; ";
191+
printf "found: $foundGuessed phrases; correct: $correctChunk.\n";
192+
if ($tokenCounter>0) {
193+
printf "accuracy: %6.2f%%; ",100*$correctTags/$tokenCounter;
194+
print "$correctChunk $foundCorrect $foundGuessed ";
195+
printf "precision: %6.2f%%; ",$precision;
196+
printf "recall: %6.2f%%; ",$recall;
197+
printf "FB1: %6.2f\n",$FB1;
198+
}
199+
}
200+
201+
# sort chunk type names
202+
undef($lastType);
203+
@sortedTypes = ();
204+
foreach $i (sort (keys %foundCorrect,keys %foundGuessed)) {
205+
if (not($lastType) or $lastType ne $i) {
206+
push(@sortedTypes,($i));
207+
}
208+
$lastType = $i;
209+
}
210+
# print performance per chunk type
211+
if (not $latex) {
212+
for $i (@sortedTypes) {
213+
$correctChunk{$i} = $correctChunk{$i} ? $correctChunk{$i} : 0;
214+
if (not($foundGuessed{$i})) { $foundGuessed{$i} = 0; $precision = 0.0; }
215+
else { $precision = 100*$correctChunk{$i}/$foundGuessed{$i}; }
216+
if (not($foundCorrect{$i})) { $recall = 0.0; }
217+
else { $recall = 100*$correctChunk{$i}/$foundCorrect{$i}; }
218+
if ($precision+$recall == 0.0) { $FB1 = 0.0; }
219+
else { $FB1 = 2*$precision*$recall/($precision+$recall); }
220+
printf "%17s: ",$i;
221+
printf "% 4d % 4d % 4d ", $correctChunk{$i}, $foundCorrect{$i}, $foundGuessed{$i};
222+
printf "precision: %6.2f%%; ",$precision;
223+
printf "recall: %6.2f%%; ",$recall;
224+
printf "FB1: %6.2f %d\n",$FB1,$foundGuessed{$i};
225+
}
226+
} else {
227+
print " & Precision & Recall & F\$_{\\beta=1} \\\\\\hline";
228+
for $i (@sortedTypes) {
229+
$correctChunk{$i} = $correctChunk{$i} ? $correctChunk{$i} : 0;
230+
if (not($foundGuessed{$i})) { $precision = 0.0; }
231+
else { $precision = 100*$correctChunk{$i}/$foundGuessed{$i}; }
232+
if (not($foundCorrect{$i})) { $recall = 0.0; }
233+
else { $recall = 100*$correctChunk{$i}/$foundCorrect{$i}; }
234+
if ($precision+$recall == 0.0) { $FB1 = 0.0; }
235+
else { $FB1 = 2*$precision*$recall/($precision+$recall); }
236+
printf "\n%-7s & %6.2f\\%% & %6.2f\\%% & %6.2f \\\\",
237+
$i,$precision,$recall,$FB1;
238+
}
239+
print "\\hline\n";
240+
$precision = 0.0;
241+
$recall = 0;
242+
$FB1 = 0.0;
243+
$precision = 100*$correctChunk/$foundGuessed if ($foundGuessed > 0);
244+
$recall = 100*$correctChunk/$foundCorrect if ($foundCorrect > 0);
245+
$FB1 = 2*$precision*$recall/($precision+$recall)
246+
if ($precision+$recall > 0);
247+
printf "Overall & %6.2f\\%% & %6.2f\\%% & %6.2f \\\\\\hline\n",
248+
$precision,$recall,$FB1;
249+
}
250+
251+
exit 0;
252+
253+
# endOfChunk: checks if a chunk ended between the previous and current word
254+
# arguments: previous and current chunk tags, previous and current types
255+
# note: this code is capable of handling other chunk representations
256+
# than the default CoNLL-2000 ones, see EACL'99 paper of Tjong
257+
# Kim Sang and Veenstra http://xxx.lanl.gov/abs/cs.CL/9907006
258+
259+
sub endOfChunk {
260+
my $prevTag = shift(@_);
261+
my $tag = shift(@_);
262+
my $prevType = shift(@_);
263+
my $type = shift(@_);
264+
my $chunkEnd = $false;
265+
266+
if ( $prevTag eq "B" and $tag eq "B" ) { $chunkEnd = $true; }
267+
if ( $prevTag eq "B" and $tag eq "O" ) { $chunkEnd = $true; }
268+
if ( $prevTag eq "I" and $tag eq "B" ) { $chunkEnd = $true; }
269+
if ( $prevTag eq "I" and $tag eq "O" ) { $chunkEnd = $true; }
270+
271+
if ( $prevTag eq "E" and $tag eq "E" ) { $chunkEnd = $true; }
272+
if ( $prevTag eq "E" and $tag eq "I" ) { $chunkEnd = $true; }
273+
if ( $prevTag eq "E" and $tag eq "O" ) { $chunkEnd = $true; }
274+
if ( $prevTag eq "I" and $tag eq "O" ) { $chunkEnd = $true; }
275+
276+
if ($prevTag ne "O" and $prevTag ne "." and $prevType ne $type) {
277+
$chunkEnd = $true;
278+
}
279+
280+
# corrected 1998-12-22: these chunks are assumed to have length 1
281+
if ( $prevTag eq "]" ) { $chunkEnd = $true; }
282+
if ( $prevTag eq "[" ) { $chunkEnd = $true; }
283+
284+
return($chunkEnd);
285+
}
286+
287+
# startOfChunk: checks if a chunk started between the previous and current word
288+
# arguments: previous and current chunk tags, previous and current types
289+
# note: this code is capable of handling other chunk representations
290+
# than the default CoNLL-2000 ones, see EACL'99 paper of Tjong
291+
# Kim Sang and Veenstra http://xxx.lanl.gov/abs/cs.CL/9907006
292+
293+
sub startOfChunk {
294+
my $prevTag = shift(@_);
295+
my $tag = shift(@_);
296+
my $prevType = shift(@_);
297+
my $type = shift(@_);
298+
my $chunkStart = $false;
299+
300+
if ( $prevTag eq "B" and $tag eq "B" ) { $chunkStart = $true; }
301+
if ( $prevTag eq "I" and $tag eq "B" ) { $chunkStart = $true; }
302+
if ( $prevTag eq "O" and $tag eq "B" ) { $chunkStart = $true; }
303+
if ( $prevTag eq "O" and $tag eq "I" ) { $chunkStart = $true; }
304+
305+
if ( $prevTag eq "E" and $tag eq "E" ) { $chunkStart = $true; }
306+
if ( $prevTag eq "E" and $tag eq "I" ) { $chunkStart = $true; }
307+
if ( $prevTag eq "O" and $tag eq "E" ) { $chunkStart = $true; }
308+
if ( $prevTag eq "O" and $tag eq "I" ) { $chunkStart = $true; }
309+
310+
if ($tag ne "O" and $tag ne "." and $prevType ne $type) {
311+
$chunkStart = $true;
312+
}
313+
314+
# corrected 1998-12-22: these chunks are assumed to have length 1
315+
if ( $tag eq "[" ) { $chunkStart = $true; }
316+
if ( $tag eq "]" ) { $chunkStart = $true; }
317+
318+
return($chunkStart);
319+
}

code/rnnslu.py

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -107,24 +107,10 @@ def conlleval(p, g, w, filename, script_path):
107107

108108
return get_perf(filename, script_path)
109109

110-
111-
def download(origin, destination):
112-
'''
113-
download the corresponding atis file
114-
from http://www-etud.iro.umontreal.ca/~mesnilgr/atis/
115-
'''
116-
print('Downloading data from %s' % origin)
117-
urllib.urlretrieve(origin, destination)
118-
119-
120110
def get_perf(filename, folder):
121111
''' run conlleval.pl perl script to obtain
122112
precision/recall and F1 score '''
123113
_conlleval = os.path.join(folder, 'conlleval.pl')
124-
if not os.path.isfile(_conlleval):
125-
url = 'http://www-etud.iro.umontreal.ca/~mesnilgr/atis/conlleval.pl'
126-
download(url, _conlleval)
127-
os.chmod(_conlleval, stat.S_IRWXU) # give the execute permissions
128114

129115
proc = subprocess.Popen(["perl",
130116
_conlleval],
@@ -288,6 +274,7 @@ def main(param=None):
288274
folder = os.path.join(os.path.dirname(__file__), folder_name)
289275
if not os.path.exists(folder):
290276
os.mkdir(folder)
277+
script_path = os.path.dirname(__file__)
291278

292279
# load the dataset
293280
train_set, valid_set, test_set, dic = atisfold(param['fold'])
@@ -351,12 +338,12 @@ def main(param=None):
351338
groundtruth_test,
352339
words_test,
353340
folder + '/current.test.txt',
354-
folder)
341+
script_path)
355342
res_valid = conlleval(predictions_valid,
356343
groundtruth_valid,
357344
words_valid,
358345
folder + '/current.valid.txt',
359-
folder)
346+
script_path)
360347

361348
if res_valid['f1'] > best_f1:
362349

0 commit comments

Comments
 (0)