Skip to content

Commit 71d7f40

Browse files
author
Örjan Persson
committed
Fixed index out of bounds for unicode code points
1 parent cfaed94 commit 71d7f40

File tree

2 files changed

+17
-5
lines changed

2 files changed

+17
-5
lines changed

diffmatchpatch/dmp.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -901,16 +901,15 @@ func (dmp *DiffMatchPatch) DiffCleanupSemanticLossless(diffs []Diff) []Diff {
901901
return 6
902902
}
903903

904-
_one := []rune(one)
905-
_two := []rune(two)
906-
907904
// Each port of this function behaves slightly differently due to
908905
// subtle differences in each language's definition of things like
909906
// 'whitespace'. Since this function's purpose is largely cosmetic,
910907
// the choice has been made to use each language's native features
911908
// rather than force total conformity.
912-
char1 := string(_one[len(one)-1])
913-
char2 := string(_two[0])
909+
rune1, _ := utf8.DecodeLastRuneInString(one)
910+
rune2, _ := utf8.DecodeRuneInString(two)
911+
char1 := string(rune1)
912+
char2 := string(rune2)
914913

915914
nonAlphaNumeric1 := nonAlphaNumericRegex_.MatchString(char1)
916915
nonAlphaNumeric2 := nonAlphaNumericRegex_.MatchString(char2)

diffmatchpatch/dmp_test.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,19 @@ func Test_diffCleanupSemanticLossless(t *testing.T) {
437437
Diff{DiffEqual, "The xxx."},
438438
Diff{DiffInsert, " The zzz."},
439439
Diff{DiffEqual, " The yyy."}}, diffs)
440+
441+
// UTF-8 strings.
442+
diffs = []Diff{
443+
Diff{DiffEqual, "The ♕. The "},
444+
Diff{DiffInsert, "♔. The "},
445+
Diff{DiffEqual, "♖."}}
446+
447+
dmp.DiffCleanupSemanticLossless(diffs)
448+
449+
assertDiffEqual(t, []Diff{
450+
Diff{DiffEqual, "The ♕."},
451+
Diff{DiffInsert, " The ♔."},
452+
Diff{DiffEqual, " The ♖."}}, diffs)
440453
}
441454

442455
func Test_diffCleanupSemantic(t *testing.T) {

0 commit comments

Comments
 (0)