Skip to content

Commit ac88a7c

Browse files
committed
Make diffCleanupSemanticScore a regular function
1 parent 5aee5ba commit ac88a7c

File tree

1 file changed

+45
-55
lines changed

1 file changed

+45
-55
lines changed

diffmatchpatch/diff.go

Lines changed: 45 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -785,65 +785,55 @@ var (
785785
blanklineStartRegex = regexp.MustCompile(`^\r?\n\r?\n`)
786786
)
787787

788+
// diffCleanupSemanticScore computes a score representing whether the internal boundary falls on logical boundaries. Scores range from 6 (best) to 0 (worst). Closure, but does not reference any external variables.
789+
func diffCleanupSemanticScore(one, two string) int {
790+
if len(one) == 0 || len(two) == 0 {
791+
// Edges are the best.
792+
return 6
793+
}
794+
795+
// Each port of this function behaves slightly differently due to
796+
// subtle differences in each language's definition of things like
797+
// 'whitespace'. Since this function's purpose is largely cosmetic,
798+
// the choice has been made to use each language's native features
799+
// rather than force total conformity.
800+
rune1, _ := utf8.DecodeLastRuneInString(one)
801+
rune2, _ := utf8.DecodeRuneInString(two)
802+
char1 := string(rune1)
803+
char2 := string(rune2)
804+
805+
nonAlphaNumeric1 := nonAlphaNumericRegex.MatchString(char1)
806+
nonAlphaNumeric2 := nonAlphaNumericRegex.MatchString(char2)
807+
whitespace1 := nonAlphaNumeric1 && whitespaceRegex.MatchString(char1)
808+
whitespace2 := nonAlphaNumeric2 && whitespaceRegex.MatchString(char2)
809+
lineBreak1 := whitespace1 && linebreakRegex.MatchString(char1)
810+
lineBreak2 := whitespace2 && linebreakRegex.MatchString(char2)
811+
blankLine1 := lineBreak1 && blanklineEndRegex.MatchString(one)
812+
blankLine2 := lineBreak2 && blanklineEndRegex.MatchString(two)
813+
814+
if blankLine1 || blankLine2 {
815+
// Five points for blank lines.
816+
return 5
817+
} else if lineBreak1 || lineBreak2 {
818+
// Four points for line breaks.
819+
return 4
820+
} else if nonAlphaNumeric1 && !whitespace1 && whitespace2 {
821+
// Three points for end of sentences.
822+
return 3
823+
} else if whitespace1 || whitespace2 {
824+
// Two points for whitespace.
825+
return 2
826+
} else if nonAlphaNumeric1 || nonAlphaNumeric2 {
827+
// One point for non-alphanumeric.
828+
return 1
829+
}
830+
return 0
831+
}
832+
788833
// DiffCleanupSemanticLossless looks for single edits surrounded on both sides by equalities
789834
// which can be shifted sideways to align the edit to a word boundary.
790835
// e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
791836
func (dmp *DiffMatchPatch) DiffCleanupSemanticLossless(diffs []Diff) []Diff {
792-
793-
/**
794-
* Given two strings, compute a score representing whether the internal
795-
* boundary falls on logical boundaries.
796-
* Scores range from 6 (best) to 0 (worst).
797-
* Closure, but does not reference any external variables.
798-
* @param {string} one First string.
799-
* @param {string} two Second string.
800-
* @return {number} The score.
801-
* @private
802-
*/
803-
diffCleanupSemanticScore := func(one, two string) int {
804-
if len(one) == 0 || len(two) == 0 {
805-
// Edges are the best.
806-
return 6
807-
}
808-
809-
// Each port of this function behaves slightly differently due to
810-
// subtle differences in each language's definition of things like
811-
// 'whitespace'. Since this function's purpose is largely cosmetic,
812-
// the choice has been made to use each language's native features
813-
// rather than force total conformity.
814-
rune1, _ := utf8.DecodeLastRuneInString(one)
815-
rune2, _ := utf8.DecodeRuneInString(two)
816-
char1 := string(rune1)
817-
char2 := string(rune2)
818-
819-
nonAlphaNumeric1 := nonAlphaNumericRegex.MatchString(char1)
820-
nonAlphaNumeric2 := nonAlphaNumericRegex.MatchString(char2)
821-
whitespace1 := nonAlphaNumeric1 && whitespaceRegex.MatchString(char1)
822-
whitespace2 := nonAlphaNumeric2 && whitespaceRegex.MatchString(char2)
823-
lineBreak1 := whitespace1 && linebreakRegex.MatchString(char1)
824-
lineBreak2 := whitespace2 && linebreakRegex.MatchString(char2)
825-
blankLine1 := lineBreak1 && blanklineEndRegex.MatchString(one)
826-
blankLine2 := lineBreak2 && blanklineEndRegex.MatchString(two)
827-
828-
if blankLine1 || blankLine2 {
829-
// Five points for blank lines.
830-
return 5
831-
} else if lineBreak1 || lineBreak2 {
832-
// Four points for line breaks.
833-
return 4
834-
} else if nonAlphaNumeric1 && !whitespace1 && whitespace2 {
835-
// Three points for end of sentences.
836-
return 3
837-
} else if whitespace1 || whitespace2 {
838-
// Two points for whitespace.
839-
return 2
840-
} else if nonAlphaNumeric1 || nonAlphaNumeric2 {
841-
// One point for non-alphanumeric.
842-
return 1
843-
}
844-
return 0
845-
}
846-
847837
pointer := 1
848838

849839
// Intentionally ignore the first and last element (don't need checking).

0 commit comments

Comments
 (0)