Skip to content

Commit 4923458

Browse files
committed
Merge pull request sloria#97 from jonmcoe/s-singular-fix
s-singular fix
2 parents e479342 + d18a806 commit 4923458

File tree

2 files changed

+55
-8
lines changed

2 files changed

+55
-8
lines changed

tests/test_inflect.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
from nose.tools import assert_equals, assert_true
2+
from unittest import TestCase
3+
4+
5+
from textblob.en.inflect import (
6+
plural_categories,
7+
singular_ie,
8+
singular_irregular,
9+
singular_uncountable,
10+
singular_uninflected,
11+
singularize,
12+
pluralize
13+
)
14+
15+
16+
class InflectTestCase(TestCase):
17+
18+
def s_singular_pluralize_test(self):
19+
assert_equals(pluralize('lens'), 'lenses')
20+
21+
def s_singular_singularize_test(self):
22+
assert_equals(singularize('lenses'), 'lens')
23+
24+
def diagnoses_singularize_test(self):
25+
assert_equals(singularize('diagnoses'), 'diagnosis')
26+
27+
def bus_pluralize_test(self):
28+
assert_equals(pluralize('bus'), 'buses')
29+
30+
def test_all_singular_s(self):
31+
for w in plural_categories['s-singular']:
32+
assert_equals(singularize(pluralize(w)), w)
33+
34+
def test_all_singular_ie(self):
35+
for w in singular_ie:
36+
assert_true(pluralize(w).endswith('ies'))
37+
assert_equals(singularize(pluralize(w)), w)
38+
39+
def test_all_singular_irregular(self):
40+
for singular_w in singular_irregular.values():
41+
assert_equals(singular_irregular[pluralize(singular_w)], singular_w)

textblob/en/inflect.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@
6262
# 4) Words that do not inflect.
6363
[["$", "", "uninflected", False],
6464
["$", "", "uncountable", False],
65-
["s$", "s", "s-singular", False],
6665
["fish$", "fish", None, False],
6766
["([- ])bass$", "\\1bass", None, False],
6867
["ois$", "ois", None, False],
@@ -137,10 +136,11 @@
137136
["$", "i", "-i-classical", True],
138137
["$", "im", "-im-classical", True]
139138
],
140-
# 9) -ch, -sh and -ss take -es in the plural (churches, classes).
139+
# 9) -ch, -sh and -ss and the s-singular group take -es in the plural (churches, classes, lenses).
141140
[["([cs])h$", "\\1hes", None, False],
142141
["ss$", "sses", None, False],
143-
["x$", "xes", None, False]
142+
["x$", "xes", None, False],
143+
["s$", "ses", "s-singular", False]
144144
],
145145
# 10) Certain words ending in -f or -fe take -ves in the plural (lives, wolves).
146146
[["([aeo]l)f$", "\\1ves", None, False],
@@ -183,13 +183,13 @@
183183
"pliers", "proceedings", "rabies", "salmon", "scissors", "series", "shears", "species", "swine",
184184
"trout", "tuna", "whiting", "wildebeest"],
185185
"uncountable": [
186-
"advice", "bread", "butter", "cheese", "electricity", "equipment", "fruit", "furniture",
186+
"advice", "bread", "butter", "cannabis", "cheese", "electricity", "equipment", "fruit", "furniture",
187187
"garbage", "gravel", "happiness", "information", "ketchup", "knowledge", "love", "luggage",
188188
"mathematics", "mayonnaise", "meat", "mustard", "news", "progress", "research", "rice",
189189
"sand", "software", "understanding", "water"],
190190
"s-singular": [
191-
"acropolis", "aegis", "alias", "asbestos", "bathos", "bias", "caddis", "cannabis", "canvas",
192-
"chaos", "cosmos", "dais", "digitalis", "epidermis", "ethos", "gas", "glottis", "glottis",
191+
"acropolis", "aegis", "alias", "asbestos", "bathos", "bias", "bus", "caddis", "canvas",
192+
"chaos", "christmas", "cosmos", "dais", "digitalis", "epidermis", "ethos", "gas", "glottis",
193193
"ibis", "lens", "mantis", "marquis", "metropolis", "pathos", "pelvis", "polis", "rhinoceros",
194194
"sassafras", "trellis"],
195195
"ex-ices": ["codex", "murex", "silex"],
@@ -361,14 +361,14 @@ def pluralize(word, pos=NOUN, custom={}, classical=True):
361361

362362
singular_uninflected = [
363363
"aircraft", "antelope", "bison", "bream", "breeches", "britches", "carp", "cattle", "chassis",
364-
"christmas", "clippers", "cod", "contretemps", "corps", "debris", "diabetes", "djinn", "eland",
364+
"clippers", "cod", "contretemps", "corps", "debris", "diabetes", "djinn", "eland",
365365
"elk", "flounder", "gallows", "georgia", "graffiti", "headquarters", "herpes", "high-jinks",
366366
"homework", "innings", "jackanapes", "mackerel", "measles", "mews", "moose", "mumps", "news",
367367
"offspring", "pincers", "pliers", "proceedings", "rabies", "salmon", "scissors", "series",
368368
"shears", "species", "swine", "swiss", "trout", "tuna", "whiting", "wildebeest"
369369
]
370370
singular_uncountable = [
371-
"advice", "bread", "butter", "cheese", "electricity", "equipment", "fruit", "furniture",
371+
"advice", "bread", "butter", "cannabis", "cheese", "electricity", "equipment", "fruit", "furniture",
372372
"garbage", "gravel", "happiness", "information", "ketchup", "knowledge", "love", "luggage",
373373
"mathematics", "mayonnaise", "meat", "mustard", "news", "progress", "research", "rice", "sand",
374374
"software", "understanding", "water"
@@ -380,6 +380,9 @@ def pluralize(word, pos=NOUN, custom={}, classical=True):
380380
"pixie", "quickie", "reverie", "rookie", "softie", "sortie", "stoolie", "sweetie", "techie",
381381
"^tie", "toughie", "valkyrie", "veggie", "weenie", "yuppie", "zombie"
382382
]
383+
singular_s = plural_categories['s-singular']
384+
385+
# key plural, value singular
383386
singular_irregular = {
384387
"men": "man",
385388
"people": "person",
@@ -449,6 +452,9 @@ def singularize(word, pos=NOUN, custom={}):
449452
for w in singular_ie:
450453
if lower.endswith(w+"s"):
451454
return w
455+
for w in singular_s:
456+
if lower.endswith(w + 'es'):
457+
return w
452458
for w in list(singular_irregular.keys()):
453459
if lower.endswith(w):
454460
return re.sub('(?i)'+w+'$', singular_irregular[w], word)

0 commit comments

Comments
 (0)