Skip to content

Commit 1f56a9f

Browse files
committed
fixed a bug in DiffBisect wrt runes vs bytes
1 parent 1307524 commit 1f56a9f

File tree

2 files changed

+48
-50
lines changed

2 files changed

+48
-50
lines changed

diff/dmp.go

Lines changed: 27 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -225,17 +225,18 @@ func (dmp *DiffMatchPatch) DiffMain(text1 string, text2 string, opt ...interface
225225
return diffs
226226
}
227227

228+
// Trim off common prefix (speedup).
228229
commonlength := dmp.DiffCommonPrefix(text1, text2)
229-
commonprefix := text1[0:commonlength]
230+
commonprefix := text1[:commonlength]
230231
text1 = text1[commonlength:]
231232
text2 = text2[commonlength:]
232233

233234
// Trim off common suffix (speedup).
234235
commonlength = dmp.DiffCommonSuffix(text1, text2)
235236
commonsuffix := text1[len(text1)-commonlength:]
236237

237-
text1 = text1[0 : len(text1)-commonlength]
238-
text2 = text2[0 : len(text2)-commonlength]
238+
text1 = text1[:len(text1)-commonlength]
239+
text2 = text2[:len(text2)-commonlength]
239240

240241
// Compute the diff on the middle block.
241242
diffs = dmp.diffCompute(text1, text2, checklines, deadline)
@@ -382,10 +383,10 @@ func (dmp *DiffMatchPatch) diffLineMode(text1 string, text2 string, deadline int
382383
// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
383384
func (dmp *DiffMatchPatch) DiffBisect(text1 string, text2 string, deadline int64) []Diff {
384385
// Cache the text lengths to prevent multiple calls.
385-
text1_length := len(text1)
386-
text2_length := len(text2)
386+
s1, s2 := []rune(text1), []rune(text2)
387+
s1_length, s2_length := len(s1), len(s2)
387388

388-
max_d := int(math.Ceil(float64(((text1_length + text2_length) / 2))))
389+
max_d := int(math.Ceil(float64(((s1_length + s2_length) / 2))))
389390
v_offset := max_d
390391
v_length := 2 * max_d
391392
v1 := make([]int, v_length)
@@ -394,7 +395,7 @@ func (dmp *DiffMatchPatch) DiffBisect(text1 string, text2 string, deadline int64
394395
v1[v_offset+1] = 0
395396
v2[v_offset+1] = 0
396397

397-
delta := text1_length - text2_length
398+
delta := s1_length - s2_length
398399
// If the total number of characters is odd, then the front path will collide
399400
// with the reverse path.
400401
front := (delta%2 != 0)
@@ -422,26 +423,26 @@ func (dmp *DiffMatchPatch) DiffBisect(text1 string, text2 string, deadline int64
422423
}
423424

424425
y1 := x1 - k1
425-
for x1 < text1_length && y1 < text2_length &&
426-
text1[x1] == text2[y1] {
426+
for x1 < s1_length && y1 < s2_length &&
427+
s1[x1] == s2[y1] {
427428
x1++
428429
y1++
429430
}
430431
v1[k1_offset] = x1
431-
if x1 > text1_length {
432+
if x1 > s1_length {
432433
// Ran off the right of the graph.
433434
k1end += 2
434-
} else if y1 > text2_length {
435+
} else if y1 > s2_length {
435436
// Ran off the bottom of the graph.
436437
k1start += 2
437438
} else if front {
438439
k2_offset := v_offset + delta - k1
439440
if k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1 {
440441
// Mirror x2 onto top-left coordinate system.
441-
x2 := text1_length - v2[k2_offset]
442+
x2 := s1_length - v2[k2_offset]
442443
if x1 >= x2 {
443444
// Overlap detected.
444-
return dmp.diffBisectSplit_(text1, text2, x1, y1, deadline)
445+
return dmp.diffBisectSplit_(s1, s2, x1, y1, deadline)
445446
}
446447
}
447448
}
@@ -456,17 +457,17 @@ func (dmp *DiffMatchPatch) DiffBisect(text1 string, text2 string, deadline int64
456457
x2 = v2[k2_offset-1] + 1
457458
}
458459
var y2 = x2 - k2
459-
for x2 < text1_length &&
460-
y2 < text2_length &&
461-
(text1[text1_length-x2-1] == text2[text2_length-y2-1]) {
460+
for x2 < s1_length &&
461+
y2 < s2_length &&
462+
(s1[s1_length-x2-1] == s2[s2_length-y2-1]) {
462463
x2++
463464
y2++
464465
}
465466
v2[k2_offset] = x2
466-
if x2 > text1_length {
467+
if x2 > s1_length {
467468
// Ran off the left of the graph.
468469
k2end += 2
469-
} else if y2 > text2_length {
470+
} else if y2 > s2_length {
470471
// Ran off the top of the graph.
471472
k2start += 2
472473
} else if !front {
@@ -475,10 +476,10 @@ func (dmp *DiffMatchPatch) DiffBisect(text1 string, text2 string, deadline int64
475476
x1 := v1[k1_offset]
476477
y1 := v_offset + x1 - k1_offset
477478
// Mirror x2 onto top-left coordinate system.
478-
x2 = text1_length - x2
479+
x2 = s1_length - x2
479480
if x1 >= x2 {
480481
// Overlap detected.
481-
return dmp.diffBisectSplit_(text1, text2, x1, y1, deadline)
482+
return dmp.diffBisectSplit_(s1, s2, x1, y1, deadline)
482483
}
483484
}
484485
}
@@ -492,11 +493,11 @@ func (dmp *DiffMatchPatch) DiffBisect(text1 string, text2 string, deadline int64
492493
}
493494
}
494495

495-
func (dmp *DiffMatchPatch) diffBisectSplit_(text1 string, text2 string, x int, y int, deadline int64) []Diff {
496-
text1a := text1[0:x]
497-
text2a := text2[0:y]
498-
text1b := text1[x:]
499-
text2b := text2[y:]
496+
func (dmp *DiffMatchPatch) diffBisectSplit_(text1 , text2 []rune, x, y int, deadline int64) []Diff {
497+
text1a := string(text1[:x])
498+
text2a := string(text2[:y])
499+
text1b := string(text1[x:])
500+
text2b := string(text2[y:])
500501

501502
// Compute both diffs serially.
502503
diffs := dmp.DiffMain(text1a, text2a, false, deadline)
@@ -587,26 +588,6 @@ func (dmp *DiffMatchPatch) DiffCommonPrefix(text1 string, text2 string) int {
587588
}
588589
}
589590
return n
590-
591-
// Binary search.
592-
// Performance analysis: http://neil.fraser.name/news/2007/10/09/
593-
/*
594-
pointermin := 0
595-
pointermax := math.Min(len(text1), len(text2))
596-
pointermid := pointermax
597-
pointerstart := 0
598-
for pointermin < pointermid {
599-
if text1[pointerstart:pointermid] ==
600-
text2[pointerstart:pointermid] {
601-
pointermin = pointermid
602-
pointerstart = pointermin
603-
} else {
604-
pointermax = pointermid
605-
}
606-
pointermid = math.Floor((pointermax-pointermin)/2 + pointermin)
607-
}
608-
return pointermid
609-
*/
610591
}
611592

612593
// DiffCommonSuffix determines the common suffix length of two strings.
@@ -2147,3 +2128,4 @@ func (dmp *DiffMatchPatch) PatchFromText(textline string) ([]Patch, error) {
21472128
}
21482129
return patches, nil
21492130
}
2131+

diff/dmp_test.go

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -844,7 +844,6 @@ func Test_diffMain(t *testing.T) {
844844
Diff{DiffEqual, "s are a"},
845845
Diff{DiffInsert, "lso"},
846846
Diff{DiffEqual, " fruit."}}
847-
848847
assertDiffEqual(t, diffs, dmp.DiffMain("Apples are a fruit.", "Bananas are also fruit.", false))
849848

850849
diffs = []Diff{
@@ -853,15 +852,32 @@ func Test_diffMain(t *testing.T) {
853852
Diff{DiffEqual, "x"},
854853
Diff{DiffDelete, "\t"},
855854
Diff{DiffInsert, "\u0000"}}
856-
857855
assertDiffEqual(t, diffs, dmp.DiffMain("ax\t", "\u0680x\u0000", false))
858-
diffs = []Diff{Diff{DiffDelete, "1"}, Diff{DiffEqual, "a"}, Diff{DiffDelete, "y"}, Diff{DiffEqual, "b"}, Diff{DiffDelete, "2"}, Diff{DiffInsert, "xab"}}
856+
857+
diffs = []Diff{
858+
Diff{DiffDelete, "1"},
859+
Diff{DiffEqual, "a"},
860+
Diff{DiffDelete, "y"},
861+
Diff{DiffEqual, "b"},
862+
Diff{DiffDelete, "2"},
863+
Diff{DiffInsert, "xab"}}
859864
assertDiffEqual(t, diffs, dmp.DiffMain("1ayb2", "abxab", false))
860865

861-
diffs = []Diff{Diff{DiffInsert, "xaxcx"}, Diff{DiffEqual, "abc"}, Diff{DiffDelete, "y"}}
866+
diffs = []Diff{
867+
Diff{DiffInsert, "xaxcx"},
868+
Diff{DiffEqual, "abc"}, Diff{DiffDelete, "y"}}
862869
assertDiffEqual(t, diffs, dmp.DiffMain("abcy", "xaxcxabc", false))
863870

864-
diffs = []Diff{Diff{DiffDelete, "ABCD"}, Diff{DiffEqual, "a"}, Diff{DiffDelete, "="}, Diff{DiffInsert, "-"}, Diff{DiffEqual, "bcd"}, Diff{DiffDelete, "="}, Diff{DiffInsert, "-"}, Diff{DiffEqual, "efghijklmnopqrs"}, Diff{DiffDelete, "EFGHIJKLMNOefg"}}
871+
diffs = []Diff{
872+
Diff{DiffDelete, "ABCD"},
873+
Diff{DiffEqual, "a"},
874+
Diff{DiffDelete, "="},
875+
Diff{DiffInsert, "-"},
876+
Diff{DiffEqual, "bcd"},
877+
Diff{DiffDelete, "="},
878+
Diff{DiffInsert, "-"},
879+
Diff{DiffEqual, "efghijklmnopqrs"},
880+
Diff{DiffDelete, "EFGHIJKLMNOefg"}}
865881
assertDiffEqual(t, diffs, dmp.DiffMain("ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs", false))
866882

867883
diffs = []Diff{Diff{DiffInsert, " "}, Diff{DiffEqual, "a"}, Diff{DiffInsert, "nd"}, Diff{DiffEqual, " [[Pennsylvania]]"}, Diff{DiffDelete, " and [[New"}}

0 commit comments

Comments
 (0)