diff --git a/src/main/java/com/hankcs/hanlp/collection/trie/bintrie/BinTrie.java b/src/main/java/com/hankcs/hanlp/collection/trie/bintrie/BinTrie.java index b4b92e0ee..7a152c8ba 100644 --- a/src/main/java/com/hankcs/hanlp/collection/trie/bintrie/BinTrie.java +++ b/src/main/java/com/hankcs/hanlp/collection/trie/bintrie/BinTrie.java @@ -608,6 +608,14 @@ public void parseText(String text, AhoCorasickDoubleArrayTrie.IHit processor) { processor.hit(begin, i + 1, value); } + + /*如果是最后一位,这里不能直接跳出循环, 要继续从下一个字符开始判断*/ + if (i == length - 1) + { + i = begin; + ++begin; + state = this; + } } else { @@ -640,6 +648,14 @@ public void parseText(char[] text, AhoCorasickDoubleArrayTrie.IHit processor) { processor.hit(begin, i + 1, value); } + + /*如果是最后一位,这里不能直接跳出循环, 要继续从下一个字符开始判断*/ + if (i == length - 1) + { + i = begin; + ++begin; + state = this; + } } else { diff --git a/src/test/java/com/hankcs/hanlp/collection/trie/bintrie/BinTrieParseTextTest.java b/src/test/java/com/hankcs/hanlp/collection/trie/bintrie/BinTrieParseTextTest.java new file mode 100644 index 000000000..7f48d8a7a --- /dev/null +++ b/src/test/java/com/hankcs/hanlp/collection/trie/bintrie/BinTrieParseTextTest.java @@ -0,0 +1,56 @@ +package com.hankcs.hanlp.collection.trie.bintrie; + +import com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.HashSet; +import java.util.Set; + +public class BinTrieParseTextTest { + + + private final String[] words = new String[]{"溜", "儿", "溜儿", "一溜儿", "一溜"}; + private BinTrie trie; + + @Before + public void setup() { + this.trie = new BinTrie(); + /*构建一个简单的词典, 从 core dict 文件中扣出的一部分*/ + for (int i = 0; i < words.length; i++) { + this.trie.put(words[i], i); + } + } + + + @Test + public void testFullParse() { + assertFullParse("一溜儿"); + assertFullParse("一溜儿 "); + assertFullParse("一溜儿 "); + } + + private void assertFullParse(String text) { + Set result = parseText(text); + /*确保每个词都被分出来了*/ + for (String word : words) { + Assert.assertTrue(result.contains(word)); + } + } + + + private Set parseText(final String text) { + final Set result = new HashSet(words.length); + trie.parseText(text, new AhoCorasickDoubleArrayTrie.IHit() { + @Override + public void hit(int begin, int end, Integer value) { + result.add(text.substring(begin, end)); + } + }); + + return result; + } + + +}