Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 34 additions & 10 deletions base/sanitize.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
package base

// OnlyWordsAndNumbers is a transform
// function that will only let 0-1a-zA-Z,
// and spaces though
func OnlyWordsAndNumbers(r rune) bool {
import (
"unicode"
)

// OnlyAsciiWordsAndNumbers is a transform
// function that will only let 0-9a-zA-Z,
// and spaces through
func OnlyAsciiWordsAndNumbers(r rune) bool {
switch {
case r >= 'A' && r <= 'Z':
return false
case r >= 'a' && r <= 'z':
return false
case r >= '0' && r <= '1':
case r >= '0' && r <= '9':
return false
case r == ' ':
return false
Expand All @@ -18,10 +22,17 @@ func OnlyWordsAndNumbers(r rune) bool {
}
}

// OnlyWords is a transform function
// OnlyWordsAndNumbers is a transform
// function that lets any unicode letter
// or digit through as well as spaces
func OnlyWordsAndNumbers(r rune) bool {
return !(r == ' ' || unicode.IsLetter(r) || unicode.IsDigit(r))
}

// OnlyAsciiWords is a transform function
// that will only let a-zA-Z, and
// spaces though
func OnlyWords(r rune) bool {
// spaces through
func OnlyAsciiWords(r rune) bool {
switch {
case r >= 'A' && r <= 'Z':
return false
Expand All @@ -34,9 +45,16 @@ func OnlyWords(r rune) bool {
}
}

// OnlyLetters is a transform function
// OnlyWords is a transform function
// that lets any unicode letter through
// as well as spaces
func OnlyWords(r rune) bool {
return !(r == ' ' || unicode.IsLetter(r))
}

// OnlyAsciiLetters is a transform function
// that will only let a-zA-Z through
func OnlyLetters(r rune) bool {
func OnlyAsciiLetters(r rune) bool {
switch {
case r >= 'A' && r <= 'Z':
return false
Expand All @@ -46,3 +64,9 @@ func OnlyLetters(r rune) bool {
return true
}
}

// OnlyLetters is a transform function
// that lets any unicode letter through
func OnlyLetters(r rune) bool {
return !unicode.IsLetter(r)
}
107 changes: 73 additions & 34 deletions base/sanitize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,54 +4,93 @@ import (
"testing"

"golang.org/x/text/transform"

"github.com/stretchr/testify/assert"
)

func TestWordsAndNumbersShouldPass1(t *testing.T) {
s, _, _ := transform.String(transform.RemoveFunc(OnlyWordsAndNumbers), "THIS iz A L337 aNd Un'Sani~~~~tized sentence")
sanitized := []rune(s)
type testCase struct {
input string
expectedOutput string
}

for i := range sanitized {
assert.False(t, OnlyWordsAndNumbers(sanitized[i]), "Letter %v should be sanitized", sanitized[i])
func TestWordsAndNumbers(t *testing.T) {
tests := []testCase{
{"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THIS iz A L337 aNd UnSanitized sentence"},
{"here're some unicode letters: --Æ.ÒÑ", "herere some unicode letters ÆÒÑ"},
{")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""},
}
for _, test := range tests {
s, _, _ := transform.String(transform.RemoveFunc(OnlyWordsAndNumbers), test.input)
if s != test.expectedOutput {
t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput)
}
}
}

func TestWordsAndNumbersShouldPass2(t *testing.T) {
s, _, _ := transform.String(transform.RemoveFunc(OnlyWordsAndNumbers), ")(*&^%$@!@#$%^&*(*&^%$#@#$%")
sanitized := []rune(s)

assert.Equal(t, 0, len(sanitized), "Length of string should be 0")
func TestAsciiWordsAndNumbers(t *testing.T) {
tests := []testCase{
{"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THIS iz A L337 aNd UnSanitized sentence"},
{"here're some unicode letters: --Æ.ÒÑ", "herere some unicode letters "},
{")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""},
}
for _, test := range tests {
s, _, _ := transform.String(transform.RemoveFunc(OnlyAsciiWordsAndNumbers), test.input)
if s != test.expectedOutput {
t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput)
}
}
}

func TestWordsShouldPass1(t *testing.T) {
s, _, _ := transform.String(transform.RemoveFunc(OnlyWords), "THIS iz A L337 aNd Un'Sani~~~~tized sentence")
sanitized := []rune(s)

for i := range sanitized {
assert.False(t, OnlyWords(sanitized[i]), "Letter %v should be sanitized", sanitized[i])
func TestWords(t *testing.T) {
tests := []testCase{
{"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THIS iz A L aNd UnSanitized sentence"},
{"here're some unicode letters: --Æ.ÒÑ", "herere some unicode letters ÆÒÑ"},
{")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""},
}
for _, test := range tests {
s, _, _ := transform.String(transform.RemoveFunc(OnlyWords), test.input)
if s != test.expectedOutput {
t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput)
}
}
}

func TestWordsShouldPass2(t *testing.T) {
s, _, _ := transform.String(transform.RemoveFunc(OnlyWords), "08765432123456789)(*&^%$@!@#$%^&*(*&^%$#@#$%")
sanitized := []rune(s)

assert.Equal(t, 0, len(sanitized), "Length of string should be 0")
func TestAsciiWords(t *testing.T) {
tests := []testCase{
{"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THIS iz A L aNd UnSanitized sentence"},
{"here're some unicode letters: ÆÒÑ", "herere some unicode letters "},
{")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""},
}
for _, test := range tests {
s, _, _ := transform.String(transform.RemoveFunc(OnlyAsciiWords), test.input)
if s != test.expectedOutput {
t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput)
}
}
}

func TestLettersShouldPass1(t *testing.T) {
s, _, _ := transform.String(transform.RemoveFunc(OnlyLetters), "THIS iz A L337 aNd Un'Sani~~~~tized sentence")
sanitized := []rune(s)

for i := range sanitized {
assert.False(t, OnlyLetters(sanitized[i]), "Letter %v should be sanitized", sanitized[i])
func TestLetters(t *testing.T) {
tests := []testCase{
{"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THISizALaNdUnSanitizedsentence"},
{"here're some unicode letters: --Æ.ÒÑ", "hereresomeunicodelettersÆÒÑ"},
{")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""},
}
for _, test := range tests {
s, _, _ := transform.String(transform.RemoveFunc(OnlyLetters), test.input)
if s != test.expectedOutput {
t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput)
}
}
}

func TestLettersShouldPass2(t *testing.T) {
s, _, _ := transform.String(transform.RemoveFunc(OnlyLetters), "0876543212 3456789)(*&^ %$@!@#$%^& *(*&^%$#@#$%")
sanitized := []rune(s)

assert.Equal(t, 0, len(sanitized), "Length of string should be 0")
func TestAsciiLetters(t *testing.T) {
tests := []testCase{
{"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THISizALaNdUnSanitizedsentence"},
{"here're some unicode letters: --Æ.ÒÑ", "hereresomeunicodeletters"},
{")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""},
}
for _, test := range tests {
s, _, _ := transform.String(transform.RemoveFunc(OnlyAsciiLetters), test.input)
if s != test.expectedOutput {
t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput)
}
}
}