diff --git a/tests/test_blob.py b/tests/test_blob.py index f939fdc5..d63e5080 100644 --- a/tests/test_blob.py +++ b/tests/test_blob.py @@ -101,6 +101,10 @@ def test_lemmatize(self): wl = tb.WordList(["cat", "dogs", "oxen"]) assert_equal(wl.lemmatize(), tb.WordList(['cat', 'dog', 'ox'])) + def test_stem(self): #only PorterStemmer tested + wl = tb.WordList(["cat", "dogs", "oxen"]) + assert_equal(wl.stem(), tb.WordList(['cat', 'dog', 'oxen'])) + def test_upper(self): wl = tb.WordList(self.words) assert_equal(wl.upper(), tb.WordList([w.upper() for w in self.words])) @@ -914,6 +918,14 @@ def test_lemma(self): w = tb.Word("went", "VBD"); assert_equal(w.lemma, "go") + def test_stem(self): #only PorterStemmer tested + w = tb.Word("cars") + assert_equal(w.stem(), "car") + w = tb.Word("wolves") + assert_equal(w.stem(), "wolv") + w = tb.Word("went") + assert_equal(w.stem(), "went") + def test_synsets(self): w = tb.Word("car") assert_true(isinstance(w.synsets, (list, tuple))) diff --git a/textblob/blob.py b/textblob/blob.py index a7a48c25..35c83e39 100644 --- a/textblob/blob.py +++ b/textblob/blob.py @@ -148,6 +148,19 @@ def lemmatize(self, pos=None): lemmatizer = nltk.stem.WordNetLemmatizer() return lemmatizer.lemmatize(self.string, pos) + PorterStemmer = nltk.stem.porter.PorterStemmer() + LancasterStemmer = nltk.stem.lancaster.LancasterStemmer() + SnowballStemmer = nltk.stem.snowball.SnowballStemmer("english") + + #added 'stemmer' on lines of lemmatizer + #based on nltk + def stem(self, stemmer=PorterStemmer): + """Stem a word using various NLTK stemmers. (Default: Porter Stemmer) + + .. versionadded:: 0.12.0 + """ + return stemmer.stem(self.string) + @cached_property def synsets(self): """The list of Synset objects for this Word.