forked from NaturalNode/natural
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtokenizers_test.ts
More file actions
27 lines (22 loc) · 838 Bytes
/
tokenizers_test.ts
File metadata and controls
27 lines (22 loc) · 838 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import {
WordTokenizer,
TreebankWordTokenizer,
RegexpTokenizer,
WordPunctTokenizer,
SentenceTokenizer
} from '../lib/natural/tokenizers'
// Tokenizers
let tokenizer = new WordTokenizer()
console.log(tokenizer.tokenize('your dog has fleas.'))
tokenizer = new TreebankWordTokenizer()
console.log(tokenizer.tokenize("my dog hasn't any fleas."))
// [ 'my', 'dog', 'has', 'n\'t', 'any', 'fleas', '.' ]
tokenizer = new RegexpTokenizer({ pattern: /-/ })
console.log(tokenizer.tokenize('flea-dog'))
// [ 'flea', 'dog' ]
tokenizer = new WordPunctTokenizer()
console.log(tokenizer.tokenize("my dog hasn't any fleas."))
// [ 'my', 'dog', 'hasn', '\'', 't', 'any', 'fleas', '.' ]
tokenizer = new SentenceTokenizer()
console.log(tokenizer.tokenize('One sentence. Another sentence.'))
// [ 'One sentence.', 'Another sentence.' ]