|
1 | 1 | from __future__ import absolute_import |
2 | 2 | from __future__ import print_function |
| 3 | +from functools import reduce |
3 | 4 | import re |
4 | 5 | import tarfile |
5 | 6 |
|
|
21 | 22 | "Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks" |
22 | 23 | http://arxiv.org/abs/1502.05698 |
23 | 24 |
|
| 25 | +Task Number | FB LSTM Baseline | Keras QA |
| 26 | +--- | --- | --- |
| 27 | +QA1 - Single Supporting Fact | 50 | 52.1 |
| 28 | +QA2 - Two Supporting Facts | 20 | 37.0 |
| 29 | +QA3 - Three Supporting Facts | 20 | 20.5 |
| 30 | +QA4 - Two Arg. Relations | 61 | 62.9 |
| 31 | +QA5 - Three Arg. Relations | 70 | 61.9 |
| 32 | +QA6 - Yes/No Questions | 48 | 50.7 |
| 33 | +QA7 - Counting | 49 | 78.9 |
| 34 | +QA8 - Lists/Sets | 45 | 77.2 |
| 35 | +QA9 - Simple Negation | 64 | 64.0 |
| 36 | +QA10 - Indefinite Knowledge | 44 | 47.7 |
| 37 | +QA11 - Basic Coreference | 72 | 74.9 |
| 38 | +QA12 - Conjunction | 74 | 76.4 |
| 39 | +QA13 - Compound Coreference | 94 | 94.4 |
| 40 | +QA14 - Time Reasoning | 27 | 34.8 |
| 41 | +QA15 - Basic Deduction | 21 | 32.4 |
| 42 | +QA16 - Basic Induction | 23 | 50.6 |
| 43 | +QA17 - Positional Reasoning | 51 | 49.1 |
| 44 | +QA18 - Size Reasoning | 52 | 90.8 |
| 45 | +QA19 - Path Finding | 8 | 9.0 |
| 46 | +QA20 - Agent's Motivations | 91 | 90.7 |
| 47 | +
|
24 | 48 | For the resources related to the bAbI project, refer to: |
25 | 49 | https://research.facebook.com/researchers/1543934539189348 |
26 | 50 |
|
@@ -67,7 +91,7 @@ def parse_stories(lines, only_supporting=False): |
67 | 91 | data = [] |
68 | 92 | story = [] |
69 | 93 | for line in lines: |
70 | | - line = line.strip() |
| 94 | + line = line.decode('utf-8').strip() |
71 | 95 | nid, line = line.split(' ', 1) |
72 | 96 | nid = int(nid) |
73 | 97 | if nid == 1: |
@@ -137,7 +161,7 @@ def vectorize_stories(data): |
137 | 161 | train = get_stories(tar.extractfile(challenge.format('train'))) |
138 | 162 | test = get_stories(tar.extractfile(challenge.format('test'))) |
139 | 163 |
|
140 | | -vocab = sorted(reduce(lambda x, y: x | y, (set(story + q) for story, q, answer in train + test))) |
| 164 | +vocab = sorted(reduce(lambda x, y: x | y, (set(story + q + [answer]) for story, q, answer in train + test))) |
141 | 165 | # Reserve 0 for masking via pad_sequences |
142 | 166 | vocab_size = len(vocab) + 1 |
143 | 167 | word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) |
|
0 commit comments