forked from ysc/QuestionAnsweringSystem
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathQuestion.java
More file actions
279 lines (244 loc) · 9.2 KB
/
Question.java
File metadata and controls
279 lines (244 loc) · 9.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
/**
*
* APDPlat - Application Product Development Platform
* Copyright (c) 2013, 杨尚川, yang-shangchuan@qq.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package org.apdplat.qa.model;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apdplat.qa.filter.CandidateAnswerCanNotInQustionFilter;
import org.apdplat.qa.filter.CandidateAnswerFilter;
import org.apdplat.qa.parser.WordParser;
import org.apdplat.qa.util.Tools;
import org.apdplat.word.segmentation.Word;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 问题有多个证据 证据用于提取候选答案
*
* @author 杨尚川
*/
public class Question {
private static final Logger LOG = LoggerFactory.getLogger(Question.class);
private String question;
private final List<Evidence> evidences = new ArrayList<>();
private QuestionType questionType = QuestionType.PERSON_NAME;
private String expectAnswer;
private CandidateAnswerFilter candidateAnswerFilter = new CandidateAnswerCanNotInQustionFilter();
//候选的问题类型,对问题进行分类的时候,可能会有多个类型
private final Set<QuestionType> candidateQuestionTypes = new HashSet<>();
public void clearCandidateQuestionType() {
candidateQuestionTypes.clear();
}
public void addCandidateQuestionType(QuestionType questionType) {
candidateQuestionTypes.add(questionType);
}
public void removeCandidateQuestionType(QuestionType questionType) {
candidateQuestionTypes.remove(questionType);
}
public Set<QuestionType> getCandidateQuestionTypes() {
return candidateQuestionTypes;
}
public Map.Entry<String, Integer> getHot() {
List<String> questionWords = getWords();
Map<String, Integer> map = new HashMap<>();
List<Word> words = WordParser.parse(getText());
for (Word word : words) {
Integer count = map.get(word.getText());
if (count == null) {
count = 1;
} else {
count++;
}
map.put(word.getText(), count);
}
Map<String, Integer> questionMap = new HashMap<>();
for (String questionWord : questionWords) {
Integer count = map.get(questionWord);
if (questionWord.length() > 1 && count != null) {
questionMap.put(questionWord, count);
LOG.debug("问题热词统计: " + questionWord + " " + map.get(questionWord));
}
}
List<Map.Entry<String, Integer>> list = Tools.sortByIntegerValue(questionMap);
Collections.reverse(list);
if (!list.isEmpty()) {
return list.get(0);
}
return null;
}
public int getExpectAnswerRank() {
if (expectAnswer == null) {
LOG.info("未指定期望的答案");
return -2;
}
List<CandidateAnswer> candidateAnswers = this.getAllCandidateAnswer();
int len = candidateAnswers.size();
for (int i = 0; i < len; i++) {
CandidateAnswer candidateAnswer = candidateAnswers.get(i);
if (expectAnswer.trim().equals(candidateAnswer.getAnswer().trim())) {
return (i + 1);
}
}
return -1;
}
/**
* 对问题进行分词
*
* @return 分词结果
*/
public List<String> getWords() {
List<String> result = new ArrayList<>();
List<Word> words = WordParser.parse(question.replace("?", "").replace("?", ""));
for (Word word : words) {
result.add(word.getText());
}
return result;
}
/**
* 获取所有候选答案
*
* @return 所有候选答案
*/
public List<CandidateAnswer> getAllCandidateAnswer() {
Map<String, Double> map = new HashMap<>();
for (Evidence evidence : evidences) {
for (CandidateAnswer candidateAnswer : evidence.getCandidateAnswerCollection().getAllCandidateAnswer()) {
Double score = map.get(candidateAnswer.getAnswer());
//候选答案的分值和证据的分值 用于计算最终的候选答案分值
Double candidateAnswerFinalScore = candidateAnswer.getScore() + evidence.getScore();
if (score == null) {
score = candidateAnswerFinalScore;
} else {
score += candidateAnswerFinalScore;
}
map.put(candidateAnswer.getAnswer(), score);
}
}
//组装候选答案
List<CandidateAnswer> candidateAnswers = new ArrayList<>();
for (Map.Entry<String, Double> entry : map.entrySet()) {
String answer = entry.getKey();
Double score = entry.getValue();
if (answer != null && score != null && score > 0 && score < Double.MAX_VALUE) {
CandidateAnswer candidateAnswer = new CandidateAnswer();
candidateAnswer.setAnswer(answer);
candidateAnswer.setScore(score);
candidateAnswers.add(candidateAnswer);
}
}
Collections.sort(candidateAnswers);
Collections.reverse(candidateAnswers);
//过滤候选答案
if (candidateAnswerFilter != null) {
candidateAnswerFilter.filter(this, candidateAnswers);
}
//分值归一化
if (candidateAnswers.size() > 0) {
double baseScore = candidateAnswers.get(0).getScore();
for (CandidateAnswer candidateAnswer : candidateAnswers) {
double score = candidateAnswer.getScore() / baseScore;
candidateAnswer.setScore(score);
}
}
return candidateAnswers;
}
/**
* 获取topN候选答案
*
* @param topN
* @return topN候选答案
*/
public List<CandidateAnswer> getTopNCandidateAnswer(int topN) {
List<CandidateAnswer> topNcandidateAnswers = new ArrayList<>();
List<CandidateAnswer> allCandidateAnswers = getAllCandidateAnswer();
if (topN > allCandidateAnswers.size()) {
topN = allCandidateAnswers.size();
}
for (int i = 0; i < topN; i++) {
topNcandidateAnswers.add(allCandidateAnswers.get(i));
}
return topNcandidateAnswers;
}
public String getText() {
StringBuilder text = new StringBuilder();
for (Evidence evidence : evidences) {
text.append(evidence.getTitle()).append(evidence.getSnippet());
}
return text.toString();
}
public String getQuestion() {
return question;
}
public void setQuestion(String question) {
this.question = question;
}
public List<Evidence> getEvidences() {
return this.evidences;
}
public void addEvidences(List<Evidence> evidences) {
this.evidences.addAll(evidences);
}
public void addEvidence(Evidence evidence) {
this.evidences.add(evidence);
}
public void removeEvidence(Evidence evidence) {
this.evidences.remove(evidence);
}
@Override
public String toString() {
StringBuilder result = new StringBuilder();
result.append("?. ").append(question).append("\n\n");
for (Evidence evidence : this.evidences) {
result.append("Title: ").append(evidence.getTitle()).append("\n");
result.append("Snippet: ").append(evidence.getSnippet()).append("\n\n");
}
return result.toString();
}
public String toString(int index) {
StringBuilder result = new StringBuilder();
result.append("?").append(index).append(". ").append(question).append("\n\n");
for (Evidence evidence : this.evidences) {
result.append("Title: ").append(evidence.getTitle()).append("\n");
result.append("Snippet: ").append(evidence.getSnippet()).append("\n\n");
}
return result.toString();
}
public String getExpectAnswer() {
return expectAnswer;
}
public void setExpectAnswer(String expectAnswer) {
this.expectAnswer = expectAnswer;
}
public void setQuestionType(QuestionType questionType) {
this.questionType = questionType;
}
public QuestionType getQuestionType() {
return questionType;
}
public CandidateAnswerFilter getCandidateAnswerFilter() {
return candidateAnswerFilter;
}
public void setCandidateAnswerFilter(CandidateAnswerFilter candidateAnswerFilter) {
this.candidateAnswerFilter = candidateAnswerFilter;
}
}