Skip to content

Commit 86c1fda

Browse files
authored
Merge pull request #11 from piazzamp/master
handle unicode in sanitization functions
2 parents e2563be + 9ec88c7 commit 86c1fda

File tree

2 files changed

+107
-44
lines changed

2 files changed

+107
-44
lines changed

base/sanitize.go

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
package base
22

3-
// OnlyWordsAndNumbers is a transform
4-
// function that will only let 0-1a-zA-Z,
5-
// and spaces though
6-
func OnlyWordsAndNumbers(r rune) bool {
3+
import (
4+
"unicode"
5+
)
6+
7+
// OnlyAsciiWordsAndNumbers is a transform
8+
// function that will only let 0-9a-zA-Z,
9+
// and spaces through
10+
func OnlyAsciiWordsAndNumbers(r rune) bool {
711
switch {
812
case r >= 'A' && r <= 'Z':
913
return false
1014
case r >= 'a' && r <= 'z':
1115
return false
12-
case r >= '0' && r <= '1':
16+
case r >= '0' && r <= '9':
1317
return false
1418
case r == ' ':
1519
return false
@@ -18,10 +22,17 @@ func OnlyWordsAndNumbers(r rune) bool {
1822
}
1923
}
2024

21-
// OnlyWords is a transform function
25+
// OnlyWordsAndNumbers is a transform
26+
// function that lets any unicode letter
27+
// or digit through as well as spaces
28+
func OnlyWordsAndNumbers(r rune) bool {
29+
return !(r == ' ' || unicode.IsLetter(r) || unicode.IsDigit(r))
30+
}
31+
32+
// OnlyAsciiWords is a transform function
2233
// that will only let a-zA-Z, and
23-
// spaces though
24-
func OnlyWords(r rune) bool {
34+
// spaces through
35+
func OnlyAsciiWords(r rune) bool {
2536
switch {
2637
case r >= 'A' && r <= 'Z':
2738
return false
@@ -34,9 +45,16 @@ func OnlyWords(r rune) bool {
3445
}
3546
}
3647

37-
// OnlyLetters is a transform function
48+
// OnlyWords is a transform function
49+
// that lets any unicode letter through
50+
// as well as spaces
51+
func OnlyWords(r rune) bool {
52+
return !(r == ' ' || unicode.IsLetter(r))
53+
}
54+
55+
// OnlyAsciiLetters is a transform function
3856
// that will only let a-zA-Z through
39-
func OnlyLetters(r rune) bool {
57+
func OnlyAsciiLetters(r rune) bool {
4058
switch {
4159
case r >= 'A' && r <= 'Z':
4260
return false
@@ -46,3 +64,9 @@ func OnlyLetters(r rune) bool {
4664
return true
4765
}
4866
}
67+
68+
// OnlyLetters is a transform function
69+
// that lets any unicode letter through
70+
func OnlyLetters(r rune) bool {
71+
return !unicode.IsLetter(r)
72+
}

base/sanitize_test.go

Lines changed: 73 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,54 +4,93 @@ import (
44
"testing"
55

66
"golang.org/x/text/transform"
7-
8-
"github.com/stretchr/testify/assert"
97
)
108

11-
func TestWordsAndNumbersShouldPass1(t *testing.T) {
12-
s, _, _ := transform.String(transform.RemoveFunc(OnlyWordsAndNumbers), "THIS iz A L337 aNd Un'Sani~~~~tized sentence")
13-
sanitized := []rune(s)
9+
type testCase struct {
10+
input string
11+
expectedOutput string
12+
}
1413

15-
for i := range sanitized {
16-
assert.False(t, OnlyWordsAndNumbers(sanitized[i]), "Letter %v should be sanitized", sanitized[i])
14+
func TestWordsAndNumbers(t *testing.T) {
15+
tests := []testCase{
16+
{"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THIS iz A L337 aNd UnSanitized sentence"},
17+
{"here're some unicode letters: --Æ.ÒÑ", "herere some unicode letters ÆÒÑ"},
18+
{")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""},
19+
}
20+
for _, test := range tests {
21+
s, _, _ := transform.String(transform.RemoveFunc(OnlyWordsAndNumbers), test.input)
22+
if s != test.expectedOutput {
23+
t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput)
24+
}
1725
}
1826
}
1927

20-
func TestWordsAndNumbersShouldPass2(t *testing.T) {
21-
s, _, _ := transform.String(transform.RemoveFunc(OnlyWordsAndNumbers), ")(*&^%$@!@#$%^&*(*&^%$#@#$%")
22-
sanitized := []rune(s)
23-
24-
assert.Equal(t, 0, len(sanitized), "Length of string should be 0")
28+
func TestAsciiWordsAndNumbers(t *testing.T) {
29+
tests := []testCase{
30+
{"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THIS iz A L337 aNd UnSanitized sentence"},
31+
{"here're some unicode letters: --Æ.ÒÑ", "herere some unicode letters "},
32+
{")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""},
33+
}
34+
for _, test := range tests {
35+
s, _, _ := transform.String(transform.RemoveFunc(OnlyAsciiWordsAndNumbers), test.input)
36+
if s != test.expectedOutput {
37+
t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput)
38+
}
39+
}
2540
}
2641

27-
func TestWordsShouldPass1(t *testing.T) {
28-
s, _, _ := transform.String(transform.RemoveFunc(OnlyWords), "THIS iz A L337 aNd Un'Sani~~~~tized sentence")
29-
sanitized := []rune(s)
30-
31-
for i := range sanitized {
32-
assert.False(t, OnlyWords(sanitized[i]), "Letter %v should be sanitized", sanitized[i])
42+
func TestWords(t *testing.T) {
43+
tests := []testCase{
44+
{"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THIS iz A L aNd UnSanitized sentence"},
45+
{"here're some unicode letters: --Æ.ÒÑ", "herere some unicode letters ÆÒÑ"},
46+
{")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""},
47+
}
48+
for _, test := range tests {
49+
s, _, _ := transform.String(transform.RemoveFunc(OnlyWords), test.input)
50+
if s != test.expectedOutput {
51+
t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput)
52+
}
3353
}
3454
}
3555

36-
func TestWordsShouldPass2(t *testing.T) {
37-
s, _, _ := transform.String(transform.RemoveFunc(OnlyWords), "08765432123456789)(*&^%$@!@#$%^&*(*&^%$#@#$%")
38-
sanitized := []rune(s)
39-
40-
assert.Equal(t, 0, len(sanitized), "Length of string should be 0")
56+
func TestAsciiWords(t *testing.T) {
57+
tests := []testCase{
58+
{"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THIS iz A L aNd UnSanitized sentence"},
59+
{"here're some unicode letters: ÆÒÑ", "herere some unicode letters "},
60+
{")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""},
61+
}
62+
for _, test := range tests {
63+
s, _, _ := transform.String(transform.RemoveFunc(OnlyAsciiWords), test.input)
64+
if s != test.expectedOutput {
65+
t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput)
66+
}
67+
}
4168
}
4269

43-
func TestLettersShouldPass1(t *testing.T) {
44-
s, _, _ := transform.String(transform.RemoveFunc(OnlyLetters), "THIS iz A L337 aNd Un'Sani~~~~tized sentence")
45-
sanitized := []rune(s)
46-
47-
for i := range sanitized {
48-
assert.False(t, OnlyLetters(sanitized[i]), "Letter %v should be sanitized", sanitized[i])
70+
func TestLetters(t *testing.T) {
71+
tests := []testCase{
72+
{"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THISizALaNdUnSanitizedsentence"},
73+
{"here're some unicode letters: --Æ.ÒÑ", "hereresomeunicodelettersÆÒÑ"},
74+
{")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""},
75+
}
76+
for _, test := range tests {
77+
s, _, _ := transform.String(transform.RemoveFunc(OnlyLetters), test.input)
78+
if s != test.expectedOutput {
79+
t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput)
80+
}
4981
}
5082
}
5183

52-
func TestLettersShouldPass2(t *testing.T) {
53-
s, _, _ := transform.String(transform.RemoveFunc(OnlyLetters), "0876543212 3456789)(*&^ %$@!@#$%^& *(*&^%$#@#$%")
54-
sanitized := []rune(s)
55-
56-
assert.Equal(t, 0, len(sanitized), "Length of string should be 0")
84+
func TestAsciiLetters(t *testing.T) {
85+
tests := []testCase{
86+
{"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THISizALaNdUnSanitizedsentence"},
87+
{"here're some unicode letters: --Æ.ÒÑ", "hereresomeunicodeletters"},
88+
{")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""},
89+
}
90+
for _, test := range tests {
91+
s, _, _ := transform.String(transform.RemoveFunc(OnlyAsciiLetters), test.input)
92+
if s != test.expectedOutput {
93+
t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput)
94+
}
95+
}
5796
}

0 commit comments

Comments
 (0)