Skip to content

Commit af0ba86

Browse files
committed
Switched DiffBisect and diffBisectSplit_ to use []rune instead of string.
When strings were used, invalid UTF-8 sequences were created.
1 parent e7fd693 commit af0ba86

File tree

2 files changed

+31
-32
lines changed

2 files changed

+31
-32
lines changed

diffmatchpatch/dmp.go

Lines changed: 29 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -365,10 +365,13 @@ func (dmp *DiffMatchPatch) diffLineMode(text1, text2 string, deadline time.Time)
365365
// and return the recursively constructed diff.
366366
// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
367367
func (dmp *DiffMatchPatch) DiffBisect(text1, text2 string, deadline time.Time) []Diff {
368+
// Convert to runes to avoid utf8 slicing bugs.
369+
runes1 := []rune(text1)
370+
runes2 := []rune(text2)
368371
// Cache the text lengths to prevent multiple calls.
369-
text1_len, text2_len := len(text1), len(text2)
372+
runes1_len, runes2_len := len(runes1), len(runes2)
370373

371-
max_d := (text1_len + text2_len + 1) / 2
374+
max_d := (runes1_len + runes2_len + 1) / 2
372375
v_offset := max_d
373376
v_length := 2 * max_d
374377

@@ -381,7 +384,7 @@ func (dmp *DiffMatchPatch) DiffBisect(text1, text2 string, deadline time.Time) [
381384
v1[v_offset+1] = 0
382385
v2[v_offset+1] = 0
383386

384-
delta := text1_len - text2_len
387+
delta := runes1_len - runes2_len
385388
// If the total number of characters is odd, then the front path will collide
386389
// with the reverse path.
387390
front := (delta%2 != 0)
@@ -409,30 +412,28 @@ func (dmp *DiffMatchPatch) DiffBisect(text1, text2 string, deadline time.Time) [
409412
}
410413

411414
y1 := x1 - k1
412-
for x1 < text1_len && y1 < text2_len {
413-
r1, size := utf8.DecodeRuneInString(text1[x1:])
414-
r2, _ := utf8.DecodeRuneInString(text2[y1:])
415-
if r1 != r2 {
415+
for x1 < runes1_len && y1 < runes2_len {
416+
if runes1[x1] != runes2[y1] {
416417
break
417418
}
418-
x1 += size
419-
y1 += size
419+
x1++
420+
y1++
420421
}
421422
v1[k1_offset] = x1
422-
if x1 > text1_len {
423+
if x1 > runes1_len {
423424
// Ran off the right of the graph.
424425
k1end += 2
425-
} else if y1 > text2_len {
426+
} else if y1 > runes2_len {
426427
// Ran off the bottom of the graph.
427428
k1start += 2
428429
} else if front {
429430
k2_offset := v_offset + delta - k1
430431
if k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1 {
431432
// Mirror x2 onto top-left coordinate system.
432-
x2 := text1_len - v2[k2_offset]
433+
x2 := runes1_len - v2[k2_offset]
433434
if x1 >= x2 {
434435
// Overlap detected.
435-
return dmp.diffBisectSplit_(text1, text2, x1, y1, deadline)
436+
return dmp.diffBisectSplit_(runes1, runes2, x1, y1, deadline)
436437
}
437438
}
438439
}
@@ -447,20 +448,18 @@ func (dmp *DiffMatchPatch) DiffBisect(text1, text2 string, deadline time.Time) [
447448
x2 = v2[k2_offset-1] + 1
448449
}
449450
var y2 = x2 - k2
450-
for x2 < text1_len && y2 < text2_len {
451-
r1, size := utf8.DecodeLastRuneInString(text1[:text1_len-x2])
452-
r2, _ := utf8.DecodeLastRuneInString(text2[:text2_len-y2])
453-
if r1 != r2 {
451+
for x2 < runes1_len && y2 < runes2_len {
452+
if runes1[runes1_len-x2-1] != runes2[runes2_len-y2-1] {
454453
break
455454
}
456-
x2 += size
457-
y2 += size
455+
x2++
456+
y2++
458457
}
459458
v2[k2_offset] = x2
460-
if x2 > text1_len {
459+
if x2 > runes1_len {
461460
// Ran off the left of the graph.
462461
k2end += 2
463-
} else if y2 > text2_len {
462+
} else if y2 > runes2_len {
464463
// Ran off the top of the graph.
465464
k2start += 2
466465
} else if !front {
@@ -469,10 +468,10 @@ func (dmp *DiffMatchPatch) DiffBisect(text1, text2 string, deadline time.Time) [
469468
x1 := v1[k1_offset]
470469
y1 := v_offset + x1 - k1_offset
471470
// Mirror x2 onto top-left coordinate system.
472-
x2 = text1_len - x2
471+
x2 = runes1_len - x2
473472
if x1 >= x2 {
474473
// Overlap detected.
475-
return dmp.diffBisectSplit_(text1, text2, x1, y1, deadline)
474+
return dmp.diffBisectSplit_(runes1, runes2, x1, y1, deadline)
476475
}
477476
}
478477
}
@@ -486,16 +485,16 @@ func (dmp *DiffMatchPatch) DiffBisect(text1, text2 string, deadline time.Time) [
486485
}
487486
}
488487

489-
func (dmp *DiffMatchPatch) diffBisectSplit_(text1, text2 string, x, y int,
488+
func (dmp *DiffMatchPatch) diffBisectSplit_(runes1, runes2 []rune, x, y int,
490489
deadline time.Time) []Diff {
491-
text1a := text1[:x]
492-
text2a := text2[:y]
493-
text1b := text1[x:]
494-
text2b := text2[y:]
490+
runes1a := runes1[:x]
491+
runes2a := runes2[:y]
492+
runes1b := runes1[x:]
493+
runes2b := runes2[y:]
495494

496495
// Compute both diffs serially.
497-
diffs := dmp.diffMain(text1a, text2a, false, deadline)
498-
diffsb := dmp.diffMain(text1b, text2b, false, deadline)
496+
diffs := dmp.diffMain(string(runes1a), string(runes2a), false, deadline)
497+
diffsb := dmp.diffMain(string(runes1b), string(runes2b), false, deadline)
499498

500499
return append(diffs, diffsb...)
501500
}

diffmatchpatch/dmp_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,8 @@ func Test_diffHalfmatchTest(t *testing.T) {
196196
func Test_diffBisectSplit(t *testing.T) {
197197
// As originally written, this can produce invalid utf8 strings.
198198
dmp := New()
199-
diffs := dmp.diffBisectSplit_("STUV\x05WX\x05YZ\x05[",
200-
"WĺĻļ\x05YZ\x05ĽľĿŀZ", 7, 6, time.Now().Add(time.Hour))
199+
diffs := dmp.diffBisectSplit_([]rune("STUV\x05WX\x05YZ\x05["),
200+
[]rune("WĺĻļ\x05YZ\x05ĽľĿŀZ"), 7, 6, time.Now().Add(time.Hour))
201201
for _, d := range diffs {
202202
assert.True(t, utf8.ValidString(d.Text))
203203
}

0 commit comments

Comments
 (0)