Skip to content

Commit ffa9c6d

Browse files
nikulshrsloria
authored andcommitted
Added subroutine to run an NLTK Stemmer (sloria#149)
* Added subroutine to run an NLTK Stemmer * Added NLTK Stemmer + test functions * Added NLTK Stemmer + tests * Fixed NLTK Stemmer
1 parent 90cc87a commit ffa9c6d

File tree

2 files changed

+25
-0
lines changed

2 files changed

+25
-0
lines changed

tests/test_blob.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,10 @@ def test_lemmatize(self):
101101
wl = tb.WordList(["cat", "dogs", "oxen"])
102102
assert_equal(wl.lemmatize(), tb.WordList(['cat', 'dog', 'ox']))
103103

104+
def test_stem(self): #only PorterStemmer tested
105+
wl = tb.WordList(["cat", "dogs", "oxen"])
106+
assert_equal(wl.stem(), tb.WordList(['cat', 'dog', 'oxen']))
107+
104108
def test_upper(self):
105109
wl = tb.WordList(self.words)
106110
assert_equal(wl.upper(), tb.WordList([w.upper() for w in self.words]))
@@ -914,6 +918,14 @@ def test_lemma(self):
914918
w = tb.Word("went", "VBD");
915919
assert_equal(w.lemma, "go")
916920

921+
def test_stem(self): #only PorterStemmer tested
922+
w = tb.Word("cars")
923+
assert_equal(w.stem(), "car")
924+
w = tb.Word("wolves")
925+
assert_equal(w.stem(), "wolv")
926+
w = tb.Word("went")
927+
assert_equal(w.stem(), "went")
928+
917929
def test_synsets(self):
918930
w = tb.Word("car")
919931
assert_true(isinstance(w.synsets, (list, tuple)))

textblob/blob.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,19 @@ def lemmatize(self, pos=None):
148148
lemmatizer = nltk.stem.WordNetLemmatizer()
149149
return lemmatizer.lemmatize(self.string, pos)
150150

151+
PorterStemmer = nltk.stem.porter.PorterStemmer()
152+
LancasterStemmer = nltk.stem.lancaster.LancasterStemmer()
153+
SnowballStemmer = nltk.stem.snowball.SnowballStemmer("english")
154+
155+
#added 'stemmer' on lines of lemmatizer
156+
#based on nltk
157+
def stem(self, stemmer=PorterStemmer):
158+
"""Stem a word using various NLTK stemmers. (Default: Porter Stemmer)
159+
160+
.. versionadded:: 0.12.0
161+
"""
162+
return stemmer.stem(self.string)
163+
151164
@cached_property
152165
def synsets(self):
153166
"""The list of Synset objects for this Word.

0 commit comments

Comments
 (0)