Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,14 @@ public void parseText(String text, AhoCorasickDoubleArrayTrie.IHit<V> processor)
{
processor.hit(begin, i + 1, value);
}

/*如果是最后一位,这里不能直接跳出循环, 要继续从下一个字符开始判断*/
if (i == length - 1)
{
i = begin;
++begin;
state = this;
}
}
else
{
Expand Down Expand Up @@ -640,6 +648,14 @@ public void parseText(char[] text, AhoCorasickDoubleArrayTrie.IHit<V> processor)
{
processor.hit(begin, i + 1, value);
}

/*如果是最后一位,这里不能直接跳出循环, 要继续从下一个字符开始判断*/
if (i == length - 1)
{
i = begin;
++begin;
state = this;
}
}
else
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package com.hankcs.hanlp.collection.trie.bintrie;

import com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

import java.util.HashSet;
import java.util.Set;

public class BinTrieParseTextTest {


private final String[] words = new String[]{"溜", "儿", "溜儿", "一溜儿", "一溜"};
private BinTrie<Integer> trie;

@Before
public void setup() {
this.trie = new BinTrie<Integer>();
/*构建一个简单的词典, 从 core dict 文件中扣出的一部分*/
for (int i = 0; i < words.length; i++) {
this.trie.put(words[i], i);
}
}


@Test
public void testFullParse() {
assertFullParse("一溜儿");
assertFullParse("一溜儿 ");
assertFullParse("一溜儿 ");
}

private void assertFullParse(String text) {
Set<String> result = parseText(text);
/*确保每个词都被分出来了*/
for (String word : words) {
Assert.assertTrue(result.contains(word));
}
}


private Set<String> parseText(final String text) {
final Set<String> result = new HashSet<String>(words.length);
trie.parseText(text, new AhoCorasickDoubleArrayTrie.IHit<Integer>() {
@Override
public void hit(int begin, int end, Integer value) {
result.add(text.substring(begin, end));
}
});

return result;
}


}