Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 67 additions & 68 deletions diffmatchpatch/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,41 @@ type Diff struct {
Text string
}

// splice removes amount elements from slice at index index, replacing them with elements.
func splice(slice []Diff, index int, amount int, elements ...Diff) []Diff {
return append(slice[:index], append(elements, slice[index+amount:]...)...)
if len(elements) == amount {
// Easy case: overwrite the relevant items.
copy(slice[index:], elements)
return slice
}
if len(elements) < amount {
// Fewer new items than old.
// Copy in the new items.
copy(slice[index:], elements)
// Shift the remaining items left.
copy(slice[index+len(elements):], slice[index+amount:])
// Calculate the new end of the slice.
end := len(slice) - amount + len(elements)
// Zero stranded elements at end so that they can be garbage collected.
tail := slice[end:]
for i := range tail {
tail[i] = Diff{}
}
return slice[:end]
}
// More new items than old.
// Make room in slice for new elements.
// There's probably an even more efficient way to do this,
// but this is simple and clear.
need := len(slice) - amount + len(elements)
for len(slice) < need {
slice = append(slice, Diff{})
}
// Shift slice elements right to make room for new elements.
copy(slice[index+len(elements):], slice[index+amount:])
// Copy in new elements.
copy(slice[index:], elements)
return slice
}

// DiffMain finds the differences between two texts.
Expand Down Expand Up @@ -145,7 +178,10 @@ func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, dea
diffsA := dmp.diffMainRunes(text1A, text2A, checklines, deadline)
diffsB := dmp.diffMainRunes(text1B, text2B, checklines, deadline)
// Merge the results.
return append(diffsA, append([]Diff{Diff{DiffEqual, string(midCommon)}}, diffsB...)...)
diffs := diffsA
diffs = append(diffs, Diff{DiffEqual, string(midCommon)})
diffs = append(diffs, diffsB...)
return diffs
} else if checklines && len(text1) > 100 && len(text2) > 100 {
return dmp.diffLineMode(text1, text2, deadline)
}
Expand Down Expand Up @@ -247,7 +283,7 @@ func (dmp *DiffMatchPatch) diffBisect(runes1, runes2 []rune, deadline time.Time)
k2end := 0
for d := 0; d < maxD; d++ {
// Bail out if deadline is reached.
if !deadline.IsZero() && time.Now().After(deadline) {
if !deadline.IsZero() && d%16 == 0 && time.Now().After(deadline) {
break
}

Expand Down Expand Up @@ -434,48 +470,29 @@ func (dmp *DiffMatchPatch) DiffCommonSuffix(text1, text2 string) int {

// commonPrefixLength returns the length of the common prefix of two rune slices.
func commonPrefixLength(text1, text2 []rune) int {
short, long := text1, text2
if len(short) > len(long) {
short, long = long, short
}
for i, r := range short {
if r != long[i] {
return i
// Linear search. See comment in commonSuffixLength.
n := 0
for ; n < len(text1) && n < len(text2); n++ {
if text1[n] != text2[n] {
return n
}
}
return len(short)
return n
}

// commonSuffixLength returns the length of the common suffix of two rune slices.
func commonSuffixLength(text1, text2 []rune) int {
n := min(len(text1), len(text2))
for i := 0; i < n; i++ {
if text1[len(text1)-i-1] != text2[len(text2)-i-1] {
return i
// Use linear search rather than the binary search discussed at https://neil.fraser.name/news/2007/10/09/.
// See discussion at https://github.com/sergi/go-diff/issues/54.
i1 := len(text1)
i2 := len(text2)
for n := 0; ; n++ {
i1--
i2--
if i1 < 0 || i2 < 0 || text1[i1] != text2[i2] {
return n
}
}
return n

// TODO research and benchmark this, why is it not activated? https://github.com/sergi/go-diff/issues/54
// Binary search.
// Performance analysis: http://neil.fraser.name/news/2007/10/09/
/*
pointermin := 0
pointermax := math.Min(len(text1), len(text2))
pointermid := pointermax
pointerend := 0
for pointermin < pointermid {
if text1[len(text1)-pointermid:len(text1)-pointerend] ==
text2[len(text2)-pointermid:len(text2)-pointerend] {
pointermin = pointermid
pointerend = pointermin
} else {
pointermax = pointermid
}
pointermid = math.Floor((pointermax-pointermin)/2 + pointermin)
}
return pointermid
*/
}

// DiffCommonOverlap determines if the suffix of one string is the prefix of another.
Expand Down Expand Up @@ -628,11 +645,7 @@ func (dmp *DiffMatchPatch) diffHalfMatchI(l, s []rune, i int) [][]rune {
func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
changes := false
// Stack of indices where equalities are found.
type equality struct {
data int
next *equality
}
var equalities *equality
equalities := make([]int, 0, len(diffs))

var lastequality string
// Always equal to diffs[equalities[equalitiesLength - 1]][1]
Expand All @@ -645,11 +658,7 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
for pointer < len(diffs) {
if diffs[pointer].Type == DiffEqual {
// Equality found.

equalities = &equality{
data: pointer,
next: equalities,
}
equalities = append(equalities, pointer)
lengthInsertions1 = lengthInsertions2
lengthDeletions1 = lengthDeletions2
lengthInsertions2 = 0
Expand All @@ -670,23 +679,20 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
(len(lastequality) <= difference1) &&
(len(lastequality) <= difference2) {
// Duplicate record.
insPoint := equalities.data
diffs = append(
diffs[:insPoint],
append([]Diff{Diff{DiffDelete, lastequality}}, diffs[insPoint:]...)...)
insPoint := equalities[len(equalities)-1]
diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality})

// Change second copy to insert.
diffs[insPoint+1].Type = DiffInsert
// Throw away the equality we just deleted.
equalities = equalities.next
equalities = equalities[:len(equalities)-1]

if equalities != nil {
equalities = equalities.next
if len(equalities) > 0 {
equalities = equalities[:len(equalities)-1]
}
if equalities != nil {
pointer = equalities.data
} else {
pointer = -1
pointer = -1
if len(equalities) > 0 {
pointer = equalities[len(equalities)-1]
}

lengthInsertions1 = 0 // Reset the counters.
Expand Down Expand Up @@ -724,10 +730,7 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
float64(overlapLength1) >= float64(len(insertion))/2 {

// Overlap found. Insert an equality and trim the surrounding edits.
diffs = append(
diffs[:pointer],
append([]Diff{Diff{DiffEqual, insertion[:overlapLength1]}}, diffs[pointer:]...)...)

diffs = splice(diffs, pointer, 0, Diff{DiffEqual, insertion[:overlapLength1]})
diffs[pointer-1].Text =
deletion[0 : len(deletion)-overlapLength1]
diffs[pointer+1].Text = insertion[overlapLength1:]
Expand All @@ -738,10 +741,7 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
float64(overlapLength2) >= float64(len(insertion))/2 {
// Reverse overlap found. Insert an equality and swap and trim the surrounding edits.
overlap := Diff{DiffEqual, deletion[:overlapLength2]}
diffs = append(
diffs[:pointer],
append([]Diff{overlap}, diffs[pointer:]...)...)

diffs = splice(diffs, pointer, 0, overlap)
diffs[pointer-1].Type = DiffInsert
diffs[pointer-1].Text = insertion[0 : len(insertion)-overlapLength2]
diffs[pointer+1].Type = DiffDelete
Expand Down Expand Up @@ -954,8 +954,7 @@ func (dmp *DiffMatchPatch) DiffCleanupEfficiency(diffs []Diff) []Diff {
insPoint := equalities.data

// Duplicate record.
diffs = append(diffs[:insPoint],
append([]Diff{Diff{DiffDelete, lastequality}}, diffs[insPoint:]...)...)
diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality})

// Change second copy to insert.
diffs[insPoint+1].Type = DiffInsert
Expand Down
36 changes: 35 additions & 1 deletion diffmatchpatch/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ func TestDiffCommonSuffix(t *testing.T) {
}
}

var SinkInt int // exported sink var to avoid compiler optimizations in benchmarks

func BenchmarkDiffCommonSuffix(b *testing.B) {
s := "ABCDEFGHIJKLMNOPQRSTUVWXYZÅÄÖ"

Expand All @@ -138,10 +140,42 @@ func BenchmarkDiffCommonSuffix(b *testing.B) {
b.ResetTimer()

for i := 0; i < b.N; i++ {
dmp.DiffCommonSuffix(s, s)
SinkInt = dmp.DiffCommonSuffix(s, s)
}
}

func BenchmarkCommonLength(b *testing.B) {
data := []struct {
name string
x, y []rune
}{
{name: "empty", x: nil, y: []rune{}},
{name: "short", x: []rune("AABCC"), y: []rune("AA-CC")},
{name: "long",
x: []rune(strings.Repeat("A", 1000) + "B" + strings.Repeat("C", 1000)),
y: []rune(strings.Repeat("A", 1000) + "-" + strings.Repeat("C", 1000)),
},
}
b.Run("prefix", func(b *testing.B) {
for _, d := range data {
b.Run(d.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
SinkInt = commonPrefixLength(d.x, d.y)
}
})
}
})
b.Run("suffix", func(b *testing.B) {
for _, d := range data {
b.Run(d.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
SinkInt = commonSuffixLength(d.x, d.y)
}
})
}
})
}

func TestCommonSuffixLength(t *testing.T) {
type TestCase struct {
Text1 string
Expand Down