diff --git a/base/sanitize.go b/base/sanitize.go index 27874ab..af14f3d 100644 --- a/base/sanitize.go +++ b/base/sanitize.go @@ -1,15 +1,19 @@ package base -// OnlyWordsAndNumbers is a transform -// function that will only let 0-1a-zA-Z, -// and spaces though -func OnlyWordsAndNumbers(r rune) bool { +import ( + "unicode" +) + +// OnlyAsciiWordsAndNumbers is a transform +// function that will only let 0-9a-zA-Z, +// and spaces through +func OnlyAsciiWordsAndNumbers(r rune) bool { switch { case r >= 'A' && r <= 'Z': return false case r >= 'a' && r <= 'z': return false - case r >= '0' && r <= '1': + case r >= '0' && r <= '9': return false case r == ' ': return false @@ -18,10 +22,17 @@ func OnlyWordsAndNumbers(r rune) bool { } } -// OnlyWords is a transform function +// OnlyWordsAndNumbers is a transform +// function that lets any unicode letter +// or digit through as well as spaces +func OnlyWordsAndNumbers(r rune) bool { + return !(r == ' ' || unicode.IsLetter(r) || unicode.IsDigit(r)) +} + +// OnlyAsciiWords is a transform function // that will only let a-zA-Z, and -// spaces though -func OnlyWords(r rune) bool { +// spaces through +func OnlyAsciiWords(r rune) bool { switch { case r >= 'A' && r <= 'Z': return false @@ -34,9 +45,16 @@ func OnlyWords(r rune) bool { } } -// OnlyLetters is a transform function +// OnlyWords is a transform function +// that lets any unicode letter through +// as well as spaces +func OnlyWords(r rune) bool { + return !(r == ' ' || unicode.IsLetter(r)) +} + +// OnlyAsciiLetters is a transform function // that will only let a-zA-Z through -func OnlyLetters(r rune) bool { +func OnlyAsciiLetters(r rune) bool { switch { case r >= 'A' && r <= 'Z': return false @@ -46,3 +64,9 @@ func OnlyLetters(r rune) bool { return true } } + +// OnlyLetters is a transform function +// that lets any unicode letter through +func OnlyLetters(r rune) bool { + return !unicode.IsLetter(r) +} diff --git a/base/sanitize_test.go b/base/sanitize_test.go index 699a80f..628e024 100644 --- a/base/sanitize_test.go +++ b/base/sanitize_test.go @@ -4,54 +4,93 @@ import ( "testing" "golang.org/x/text/transform" - - "github.com/stretchr/testify/assert" ) -func TestWordsAndNumbersShouldPass1(t *testing.T) { - s, _, _ := transform.String(transform.RemoveFunc(OnlyWordsAndNumbers), "THIS iz A L337 aNd Un'Sani~~~~tized sentence") - sanitized := []rune(s) +type testCase struct { + input string + expectedOutput string +} - for i := range sanitized { - assert.False(t, OnlyWordsAndNumbers(sanitized[i]), "Letter %v should be sanitized", sanitized[i]) +func TestWordsAndNumbers(t *testing.T) { + tests := []testCase{ + {"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THIS iz A L337 aNd UnSanitized sentence"}, + {"here're some unicode letters: --Æ.ÒÑ", "herere some unicode letters ÆÒÑ"}, + {")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""}, + } + for _, test := range tests { + s, _, _ := transform.String(transform.RemoveFunc(OnlyWordsAndNumbers), test.input) + if s != test.expectedOutput { + t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput) + } } } -func TestWordsAndNumbersShouldPass2(t *testing.T) { - s, _, _ := transform.String(transform.RemoveFunc(OnlyWordsAndNumbers), ")(*&^%$@!@#$%^&*(*&^%$#@#$%") - sanitized := []rune(s) - - assert.Equal(t, 0, len(sanitized), "Length of string should be 0") +func TestAsciiWordsAndNumbers(t *testing.T) { + tests := []testCase{ + {"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THIS iz A L337 aNd UnSanitized sentence"}, + {"here're some unicode letters: --Æ.ÒÑ", "herere some unicode letters "}, + {")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""}, + } + for _, test := range tests { + s, _, _ := transform.String(transform.RemoveFunc(OnlyAsciiWordsAndNumbers), test.input) + if s != test.expectedOutput { + t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput) + } + } } -func TestWordsShouldPass1(t *testing.T) { - s, _, _ := transform.String(transform.RemoveFunc(OnlyWords), "THIS iz A L337 aNd Un'Sani~~~~tized sentence") - sanitized := []rune(s) - - for i := range sanitized { - assert.False(t, OnlyWords(sanitized[i]), "Letter %v should be sanitized", sanitized[i]) +func TestWords(t *testing.T) { + tests := []testCase{ + {"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THIS iz A L aNd UnSanitized sentence"}, + {"here're some unicode letters: --Æ.ÒÑ", "herere some unicode letters ÆÒÑ"}, + {")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""}, + } + for _, test := range tests { + s, _, _ := transform.String(transform.RemoveFunc(OnlyWords), test.input) + if s != test.expectedOutput { + t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput) + } } } -func TestWordsShouldPass2(t *testing.T) { - s, _, _ := transform.String(transform.RemoveFunc(OnlyWords), "08765432123456789)(*&^%$@!@#$%^&*(*&^%$#@#$%") - sanitized := []rune(s) - - assert.Equal(t, 0, len(sanitized), "Length of string should be 0") +func TestAsciiWords(t *testing.T) { + tests := []testCase{ + {"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THIS iz A L aNd UnSanitized sentence"}, + {"here're some unicode letters: ÆÒÑ", "herere some unicode letters "}, + {")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""}, + } + for _, test := range tests { + s, _, _ := transform.String(transform.RemoveFunc(OnlyAsciiWords), test.input) + if s != test.expectedOutput { + t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput) + } + } } -func TestLettersShouldPass1(t *testing.T) { - s, _, _ := transform.String(transform.RemoveFunc(OnlyLetters), "THIS iz A L337 aNd Un'Sani~~~~tized sentence") - sanitized := []rune(s) - - for i := range sanitized { - assert.False(t, OnlyLetters(sanitized[i]), "Letter %v should be sanitized", sanitized[i]) +func TestLetters(t *testing.T) { + tests := []testCase{ + {"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THISizALaNdUnSanitizedsentence"}, + {"here're some unicode letters: --Æ.ÒÑ", "hereresomeunicodelettersÆÒÑ"}, + {")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""}, + } + for _, test := range tests { + s, _, _ := transform.String(transform.RemoveFunc(OnlyLetters), test.input) + if s != test.expectedOutput { + t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput) + } } } -func TestLettersShouldPass2(t *testing.T) { - s, _, _ := transform.String(transform.RemoveFunc(OnlyLetters), "0876543212 3456789)(*&^ %$@!@#$%^& *(*&^%$#@#$%") - sanitized := []rune(s) - - assert.Equal(t, 0, len(sanitized), "Length of string should be 0") +func TestAsciiLetters(t *testing.T) { + tests := []testCase{ + {"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THISizALaNdUnSanitizedsentence"}, + {"here're some unicode letters: --Æ.ÒÑ", "hereresomeunicodeletters"}, + {")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""}, + } + for _, test := range tests { + s, _, _ := transform.String(transform.RemoveFunc(OnlyAsciiLetters), test.input) + if s != test.expectedOutput { + t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput) + } + } }