Skip to content

Commit 1d4c130

Browse files
author
Örjan Persson
committed
Step one rune at a time
If prefixing bytes were common, runes would become invalid since they were broken up.
1 parent c872b84 commit 1d4c130

File tree

2 files changed

+20
-3
lines changed

2 files changed

+20
-3
lines changed

diffmatchpatch/dmp.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -981,10 +981,14 @@ func (dmp *DiffMatchPatch) DiffCleanupSemanticLossless(diffs []Diff) []Diff {
981981
bestScore := diffCleanupSemanticScore_(equality1, edit) +
982982
diffCleanupSemanticScore_(edit, equality2)
983983

984-
for len(edit) != 0 && len(equality2) != 0 && edit[0] == equality2[0] {
984+
for len(edit) != 0 && len(equality2) != 0 {
985+
_, sz := utf8.DecodeRuneInString(edit)
986+
if edit[:sz] != equality2[:sz] {
987+
break
988+
}
985989
equality1 += string(edit[0])
986-
edit = edit[1:] + string(equality2[0])
987-
equality2 = equality2[1:]
990+
edit = edit[sz:] + string(equality2[0])
991+
equality2 = equality2[sz:]
988992
score := diffCleanupSemanticScore_(equality1, edit) +
989993
diffCleanupSemanticScore_(edit, equality2)
990994
// The >= encourages trailing rather than leading whitespace on

diffmatchpatch/dmp_test.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,19 @@ func Test_diffCleanupSemanticLossless(t *testing.T) {
450450
Diff{DiffEqual, "The ♕."},
451451
Diff{DiffInsert, " The ♔."},
452452
Diff{DiffEqual, " The ♖."}}, diffs)
453+
454+
// Rune boundaries.
455+
diffs = []Diff{
456+
Diff{DiffEqual, "♕♕"},
457+
Diff{DiffInsert, "♔♔"},
458+
Diff{DiffEqual, "♖♖"}}
459+
460+
dmp.DiffCleanupSemanticLossless(diffs)
461+
462+
assertDiffEqual(t, []Diff{
463+
Diff{DiffEqual, "♕♕"},
464+
Diff{DiffInsert, "♔♔"},
465+
Diff{DiffEqual, "♖♖"}}, diffs)
453466
}
454467

455468
func Test_diffCleanupSemantic(t *testing.T) {

0 commit comments

Comments
 (0)