@@ -785,65 +785,55 @@ var (
785
785
blanklineStartRegex = regexp .MustCompile (`^\r?\n\r?\n` )
786
786
)
787
787
788
+ // diffCleanupSemanticScore computes a score representing whether the internal boundary falls on logical boundaries. Scores range from 6 (best) to 0 (worst). Closure, but does not reference any external variables.
789
+ func diffCleanupSemanticScore (one , two string ) int {
790
+ if len (one ) == 0 || len (two ) == 0 {
791
+ // Edges are the best.
792
+ return 6
793
+ }
794
+
795
+ // Each port of this function behaves slightly differently due to
796
+ // subtle differences in each language's definition of things like
797
+ // 'whitespace'. Since this function's purpose is largely cosmetic,
798
+ // the choice has been made to use each language's native features
799
+ // rather than force total conformity.
800
+ rune1 , _ := utf8 .DecodeLastRuneInString (one )
801
+ rune2 , _ := utf8 .DecodeRuneInString (two )
802
+ char1 := string (rune1 )
803
+ char2 := string (rune2 )
804
+
805
+ nonAlphaNumeric1 := nonAlphaNumericRegex .MatchString (char1 )
806
+ nonAlphaNumeric2 := nonAlphaNumericRegex .MatchString (char2 )
807
+ whitespace1 := nonAlphaNumeric1 && whitespaceRegex .MatchString (char1 )
808
+ whitespace2 := nonAlphaNumeric2 && whitespaceRegex .MatchString (char2 )
809
+ lineBreak1 := whitespace1 && linebreakRegex .MatchString (char1 )
810
+ lineBreak2 := whitespace2 && linebreakRegex .MatchString (char2 )
811
+ blankLine1 := lineBreak1 && blanklineEndRegex .MatchString (one )
812
+ blankLine2 := lineBreak2 && blanklineEndRegex .MatchString (two )
813
+
814
+ if blankLine1 || blankLine2 {
815
+ // Five points for blank lines.
816
+ return 5
817
+ } else if lineBreak1 || lineBreak2 {
818
+ // Four points for line breaks.
819
+ return 4
820
+ } else if nonAlphaNumeric1 && ! whitespace1 && whitespace2 {
821
+ // Three points for end of sentences.
822
+ return 3
823
+ } else if whitespace1 || whitespace2 {
824
+ // Two points for whitespace.
825
+ return 2
826
+ } else if nonAlphaNumeric1 || nonAlphaNumeric2 {
827
+ // One point for non-alphanumeric.
828
+ return 1
829
+ }
830
+ return 0
831
+ }
832
+
788
833
// DiffCleanupSemanticLossless looks for single edits surrounded on both sides by equalities
789
834
// which can be shifted sideways to align the edit to a word boundary.
790
835
// e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
791
836
func (dmp * DiffMatchPatch ) DiffCleanupSemanticLossless (diffs []Diff ) []Diff {
792
-
793
- /**
794
- * Given two strings, compute a score representing whether the internal
795
- * boundary falls on logical boundaries.
796
- * Scores range from 6 (best) to 0 (worst).
797
- * Closure, but does not reference any external variables.
798
- * @param {string} one First string.
799
- * @param {string} two Second string.
800
- * @return {number} The score.
801
- * @private
802
- */
803
- diffCleanupSemanticScore := func (one , two string ) int {
804
- if len (one ) == 0 || len (two ) == 0 {
805
- // Edges are the best.
806
- return 6
807
- }
808
-
809
- // Each port of this function behaves slightly differently due to
810
- // subtle differences in each language's definition of things like
811
- // 'whitespace'. Since this function's purpose is largely cosmetic,
812
- // the choice has been made to use each language's native features
813
- // rather than force total conformity.
814
- rune1 , _ := utf8 .DecodeLastRuneInString (one )
815
- rune2 , _ := utf8 .DecodeRuneInString (two )
816
- char1 := string (rune1 )
817
- char2 := string (rune2 )
818
-
819
- nonAlphaNumeric1 := nonAlphaNumericRegex .MatchString (char1 )
820
- nonAlphaNumeric2 := nonAlphaNumericRegex .MatchString (char2 )
821
- whitespace1 := nonAlphaNumeric1 && whitespaceRegex .MatchString (char1 )
822
- whitespace2 := nonAlphaNumeric2 && whitespaceRegex .MatchString (char2 )
823
- lineBreak1 := whitespace1 && linebreakRegex .MatchString (char1 )
824
- lineBreak2 := whitespace2 && linebreakRegex .MatchString (char2 )
825
- blankLine1 := lineBreak1 && blanklineEndRegex .MatchString (one )
826
- blankLine2 := lineBreak2 && blanklineEndRegex .MatchString (two )
827
-
828
- if blankLine1 || blankLine2 {
829
- // Five points for blank lines.
830
- return 5
831
- } else if lineBreak1 || lineBreak2 {
832
- // Four points for line breaks.
833
- return 4
834
- } else if nonAlphaNumeric1 && ! whitespace1 && whitespace2 {
835
- // Three points for end of sentences.
836
- return 3
837
- } else if whitespace1 || whitespace2 {
838
- // Two points for whitespace.
839
- return 2
840
- } else if nonAlphaNumeric1 || nonAlphaNumeric2 {
841
- // One point for non-alphanumeric.
842
- return 1
843
- }
844
- return 0
845
- }
846
-
847
837
pointer := 1
848
838
849
839
// Intentionally ignore the first and last element (don't need checking).
0 commit comments