diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index 08c36e7..67e6750 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -79,6 +79,9 @@ func splice(slice []Diff, index int, amount int, elements ...Diff) []Diff { return slice } +// DiffFunction represents a function that can compute differences between two rune slices +type DiffFunction func(text1, text2 []rune) []Diff + // DiffMain finds the differences between two texts. // If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character. func (dmp *DiffMatchPatch) DiffMain(text1, text2 string, checklines bool) []Diff { @@ -88,14 +91,20 @@ func (dmp *DiffMatchPatch) DiffMain(text1, text2 string, checklines bool) []Diff // DiffMainRunes finds the differences between two rune sequences. // If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character. func (dmp *DiffMatchPatch) DiffMainRunes(text1, text2 []rune, checklines bool) []Diff { - var deadline time.Time - if dmp.DiffTimeout > 0 { - deadline = time.Now().Add(dmp.DiffTimeout) + deadline := dmp.getDeadline() + + // Encapsulate the deadline and line mode logic in the closure + diffFn := func(text1, text2 []rune) []Diff { + if checklines && len(text1) > 100 && len(text2) > 100 { + return dmp.diffBigLine(text1, text2, deadline) + } + return dmp.diffBisect(text1, text2, deadline) } - return dmp.diffMainRunes(text1, text2, checklines, deadline) + + return dmp.diffMainRunes(text1, text2, diffFn) } -func (dmp *DiffMatchPatch) diffMainRunes(text1, text2 []rune, checklines bool, deadline time.Time) []Diff { +func (dmp *DiffMatchPatch) diffMainRunes(text1, text2 []rune, diffFn DiffFunction) []Diff { if runesEqual(text1, text2) { var diffs []Diff if len(text1) > 0 { @@ -116,7 +125,7 @@ func (dmp *DiffMatchPatch) diffMainRunes(text1, text2 []rune, checklines bool, d text2 = text2[:len(text2)-commonlength] // Compute the diff on the middle block. - diffs := dmp.diffCompute(text1, text2, checklines, deadline) + diffs := dmp.diffCompute(text1, text2, diffFn) // Restore the prefix and suffix. if len(commonprefix) != 0 { @@ -129,8 +138,16 @@ func (dmp *DiffMatchPatch) diffMainRunes(text1, text2 []rune, checklines bool, d return dmp.DiffCleanupMerge(diffs) } +// getDeadline returns the deadline for the diff operation +func (dmp *DiffMatchPatch) getDeadline() time.Time { + if dmp.DiffTimeout > 0 { + return time.Now().Add(dmp.DiffTimeout) + } + return time.Time{} +} + // diffCompute finds the differences between two rune slices. Assumes that the texts do not have any common prefix or suffix. -func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, deadline time.Time) []Diff { +func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, diffFn DiffFunction) []Diff { diffs := []Diff{} if len(text1) == 0 { // Just add some text (speedup). @@ -177,25 +194,30 @@ func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, dea text2B := hm[3] midCommon := hm[4] // Send both pairs off for separate processing. - diffsA := dmp.diffMainRunes(text1A, text2A, checklines, deadline) - diffsB := dmp.diffMainRunes(text1B, text2B, checklines, deadline) + diffsA := dmp.diffMainRunes(text1A, text2A, diffFn) + diffsB := dmp.diffMainRunes(text1B, text2B, diffFn) // Merge the results. diffs := diffsA diffs = append(diffs, Diff{DiffEqual, string(midCommon)}) diffs = append(diffs, diffsB...) return diffs - } else if checklines && len(text1) > 100 && len(text2) > 100 { - return dmp.diffLineMode(text1, text2, deadline) } - return dmp.diffBisect(text1, text2, deadline) + + return diffFn(text1, text2) } -// diffLineMode does a quick line-level diff on both []runes, then rediff the parts for greater accuracy. This speedup can produce non-minimal diffs. -func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time) []Diff { +// diffBigLine does a quick line-level diff on both []runes, then rediff the parts for greater accuracy. This speedup can produce non-minimal diffs. +func (dmp *DiffMatchPatch) diffBigLine(text1, text2 []rune, deadline time.Time) []Diff { // Scan the text on a line-by-line basis first. text1, text2, linearray := dmp.DiffLinesToRunes(string(text1), string(text2)) - diffs := dmp.diffMainRunes(text1, text2, false, deadline) + // For line-level diffing, we want to do a simple comparison of the line-based runes + // rather than character-by-character diffing + diffFn := func(text1, text2 []rune) []Diff { + return dmp.diffBisect(text1, text2, deadline) + } + + diffs := dmp.diffMainRunes(text1, text2, diffFn) // Convert the diff back to original text. diffs = dmp.DiffCharsToLines(diffs, linearray) @@ -230,7 +252,7 @@ func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time) countDelete+countInsert) pointer = pointer - countDelete - countInsert - a := dmp.diffMainRunes([]rune(textDelete), []rune(textInsert), false, deadline) + a := dmp.diffMainRunes([]rune(textDelete), []rune(textInsert), diffFn) for j := len(a) - 1; j >= 0; j-- { diffs = splice(diffs, pointer, 0, a[j]) } @@ -248,6 +270,37 @@ func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time) return diffs[:len(diffs)-1] // Remove the dummy entry at the end. } +// DiffLineMode finds the differences between two texts, always using line mode. +// Unlike DiffMain with checklines=true, this method will always use line mode regardless of text length. +// If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character. +func (dmp *DiffMatchPatch) DiffLineMode(text1, text2 string) []Diff { + return dmp.diffOnlyByLines([]rune(text1), []rune(text2)) +} + +// diffOnlyByLines finds the differences between two texts, only by lines. +func (dmp *DiffMatchPatch) diffOnlyByLines(text1, text2 []rune) []Diff { + // For line-level diffing, we want to do a simple comparison of the line-based runes + // rather than character-by-character diffing + diffFn := func(text1, text2 []rune) []Diff { + if !runesEqual(text1, text2) { + return []Diff{ + {DiffDelete, string(text1)}, + {DiffInsert, string(text2)}, + } + } + return []Diff{{DiffEqual, string(text1)}} + } + + // For line-based diffing, we want to avoid the character-based optimizations in diffCompute + // and just use our simple diff function directly + diffs := diffFn(text1, text2) + + // Optimize line-based diffs using line-specific cleanup + diffs = dmp.DiffCleanupLineBased(diffs) + + return diffs +} + // DiffBisect finds the 'middle snake' of a diff, split the problem in two and return the recursively constructed diff. // If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character. // See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. @@ -380,9 +433,14 @@ func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int, runes1b := runes1[x:] runes2b := runes2[y:] + // wrap dmp.diffBisect with deadline + diffFn := func(text1, text2 []rune) []Diff { + return dmp.diffBisect(text1, text2, deadline) + } + // Compute both diffs serially. - diffs := dmp.diffMainRunes(runes1a, runes2a, false, deadline) - diffsb := dmp.diffMainRunes(runes1b, runes2b, false, deadline) + diffs := dmp.diffMainRunes(runes1a, runes2a, diffFn) + diffsb := dmp.diffMainRunes(runes1b, runes2b, diffFn) return append(diffs, diffsb...) } @@ -953,6 +1011,77 @@ func (dmp *DiffMatchPatch) DiffCleanupEfficiency(diffs []Diff) []Diff { return diffs } +// DiffCleanupLineBased optimizes line-based diffs by merging consecutive operations, +// removing empty line diffs, and grouping related line changes together. +// This function is specifically designed for line-level diffing where each diff +// represents entire lines rather than character-level changes. +func (dmp *DiffMatchPatch) DiffCleanupLineBased(diffs []Diff) []Diff { + if len(diffs) == 0 { + return diffs + } + + // First pass: merge consecutive operations of the same type + cleaned := make([]Diff, 0, len(diffs)) + pointer := 0 + + for pointer < len(diffs) { + current := diffs[pointer] + + // If this is an equality, just add it + if current.Type == DiffEqual { + cleaned = append(cleaned, current) + pointer++ + continue + } + + // Collect consecutive operations of the same type + mergedText := current.Text + pointer++ + + // Merge consecutive deletions or insertions + for pointer < len(diffs) && diffs[pointer].Type == current.Type { + mergedText += diffs[pointer].Text + pointer++ + } + + // Only add non-empty merged operations + if len(strings.TrimSpace(mergedText)) > 0 { + cleaned = append(cleaned, Diff{current.Type, mergedText}) + } + } + + // Second pass: remove trivial equalities (empty lines or whitespace-only lines) + // and merge adjacent equalities + if len(cleaned) > 1 { + final := make([]Diff, 0, len(cleaned)) + + for i := 0; i < len(cleaned); i++ { + current := cleaned[i] + + // Skip empty or whitespace-only equalities + if current.Type == DiffEqual && len(strings.TrimSpace(current.Text)) == 0 { + continue + } + + // Merge consecutive equalities + if current.Type == DiffEqual && len(final) > 0 && final[len(final)-1].Type == DiffEqual { + final[len(final)-1].Text += current.Text + } else { + final = append(final, current) + } + } + + cleaned = final + } + + // Third pass: optimize deletion-insertion pairs + // If we have a deletion followed by an insertion, and they're similar, + // we might want to keep them as separate operations for clarity in line-based diffs + // This preserves the line-by-line nature of the diff + + return cleaned +} + // DiffCleanupMerge reorders and merges like edit sections. Merge equalities. // Any edit section can move as long as it doesn't cross an equality. func (dmp *DiffMatchPatch) DiffCleanupMerge(diffs []Diff) []Diff { diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go index 2c43864..160231f 100644 --- a/diffmatchpatch/diff_test.go +++ b/diffmatchpatch/diff_test.go @@ -1574,3 +1574,236 @@ func BenchmarkDiffMainRunesLargeDiffLines(b *testing.B) { diffs = dmp.DiffCharsToLines(diffs, linearray) } } + +func TestDiffLineMode(t *testing.T) { + dmp := New() + + // Helper function to test line mode diffing + testLineMode := func(t *testing.T, text1, text2 string, expected []Diff) { + t.Helper() + actual := dmp.DiffLineMode(text1, text2) + assert.Equal(t, expected, actual, + fmt.Sprintf("DiffLineMode(%q, %q) = %v, want %v", text1, text2, actual, expected)) + } + + t.Run("empty strings", func(t *testing.T) { + testLineMode(t, "", "", []Diff{{DiffEqual, ""}}) + }) + + t.Run("identical strings", func(t *testing.T) { + testLineMode(t, "abc", "abc", []Diff{{DiffEqual, "abc"}}) + }) + + t.Run("simple insertions", func(t *testing.T) { + testLineMode(t, "abc", "ab123c", []Diff{{DiffDelete, "abc"}, {DiffInsert, "ab123c"}}) + testLineMode(t, "abc", "a123b456c", []Diff{{DiffDelete, "abc"}, {DiffInsert, "a123b456c"}}) + }) + + t.Run("simple deletions", func(t *testing.T) { + testLineMode(t, "a123bc", "abc", []Diff{{DiffDelete, "a123bc"}, {DiffInsert, "abc"}}) + testLineMode(t, "a123b456c", "abc", []Diff{{DiffDelete, "a123b456c"}, {DiffInsert, "abc"}}) + }) + + t.Run("single character replacements", func(t *testing.T) { + testLineMode(t, "a", "b", []Diff{{DiffDelete, "a"}, {DiffInsert, "b"}}) + }) + + t.Run("sentence replacements", func(t *testing.T) { + testLineMode(t, "Apples are a fruit.", "Bananas are also fruit.", []Diff{ + {DiffDelete, "Apples are a fruit."}, + {DiffInsert, "Bananas are also fruit."}, + }) + }) + + t.Run("multi-line text changes", func(t *testing.T) { + testLineMode(t, "Apples are a fruit.\nline 2, line 3", "Bananas are also fruit.\nline 2, line 3\nline 4, line 5", []Diff{ + {DiffDelete, "Apples are a fruit.\nline 2, line 3"}, + {DiffInsert, "Bananas are also fruit.\nline 2, line 3\nline 4, line 5"}, + }) + }) + + t.Run("unicode and special characters", func(t *testing.T) { + testLineMode(t, "ax\t", "\u0680x\u0000", []Diff{ + {DiffDelete, "ax\t"}, + {DiffInsert, "\u0680x\u0000"}, + }) + }) + + t.Run("complex text transformations", func(t *testing.T) { + testLineMode(t, "1ayb2", "abxab", []Diff{ + {DiffDelete, "1ayb2"}, + {DiffInsert, "abxab"}, + }) + + testLineMode(t, "abcy", "xaxcxabc", []Diff{ + {DiffDelete, "abcy"}, + {DiffInsert, "xaxcxabc"}, + }) + }) + + t.Run("long text with mixed operations", func(t *testing.T) { + testLineMode(t, "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs", []Diff{ + {DiffDelete, "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg"}, + {DiffInsert, "a-bcd-efghijklmnopqrs"}, + }) + }) + + t.Run("wiki-style text", func(t *testing.T) { + // t.Skip("FIXME: check why this fails") + testLineMode(t, + "a [[Pennsylvania]] and [[New", + " and [[Pennsylvania]]", + []Diff{ + {DiffDelete, "a [[Pennsylvania]] and [[New"}, + {DiffInsert, " and [[Pennsylvania]]"}, + }, + ) + }) + + t.Run("invalid UTF-8 sequences", func(t *testing.T) { + testLineMode(t, "\xe0\xe5", "", []Diff{{DiffDelete, "��"}}) + }) + + t.Run("real diff without timeout", func(t *testing.T) { + // Perform a real diff and switch off the timeout. + dmp.DiffTimeout = 0 + + // Re-run a few key tests to ensure they work with timeout disabled + testLineMode(t, "a", "b", []Diff{{DiffDelete, "a"}, {DiffInsert, "b"}}) + testLineMode(t, "Apples are a fruit.", "Bananas are also fruit.", []Diff{ + {DiffDelete, "Apples are a fruit."}, + {DiffInsert, "Bananas are also fruit."}, + }) + }) +} + +func TestDiffCleanupLineBased(t *testing.T) { + dmp := New() + + tests := []struct { + name string + input []Diff + expected []Diff + }{ + { + name: "empty diffs", + input: []Diff{}, + expected: []Diff{}, + }, + { + name: "merge consecutive deletions", + input: []Diff{ + {DiffDelete, "line1\n"}, + {DiffDelete, "line2\n"}, + {DiffEqual, "unchanged\n"}, + }, + expected: []Diff{ + {DiffDelete, "line1\nline2\n"}, + {DiffEqual, "unchanged\n"}, + }, + }, + { + name: "merge consecutive insertions", + input: []Diff{ + {DiffEqual, "unchanged\n"}, + {DiffInsert, "newline1\n"}, + {DiffInsert, "newline2\n"}, + }, + expected: []Diff{ + {DiffEqual, "unchanged\n"}, + {DiffInsert, "newline1\nnewline2\n"}, + }, + }, + { + name: "merge consecutive equalities", + input: []Diff{ + {DiffEqual, "line1\n"}, + {DiffEqual, "line2\n"}, + {DiffDelete, "deleted\n"}, + }, + expected: []Diff{ + {DiffEqual, "line1\nline2\n"}, + {DiffDelete, "deleted\n"}, + }, + }, + { + name: "remove empty whitespace-only equalities", + input: []Diff{ + {DiffEqual, "line1\n"}, + {DiffEqual, " \n"}, + {DiffEqual, "line2\n"}, + }, + expected: []Diff{ + {DiffEqual, "line1\nline2\n"}, + }, + }, + { + name: "complex line-based diff", + input: []Diff{ + {DiffDelete, "old1\n"}, + {DiffDelete, "old2\n"}, + {DiffEqual, "common1\n"}, + {DiffEqual, "common2\n"}, + {DiffInsert, "new1\n"}, + {DiffInsert, "new2\n"}, + }, + expected: []Diff{ + {DiffDelete, "old1\nold2\n"}, + {DiffEqual, "common1\ncommon2\n"}, + {DiffInsert, "new1\nnew2\n"}, + }, + }, + { + name: "preserve non-empty whitespace lines", + input: []Diff{ + {DiffEqual, "line1\n"}, + {DiffEqual, " indented\n"}, + {DiffEqual, "line2\n"}, + }, + expected: []Diff{ + {DiffEqual, "line1\n indented\nline2\n"}, + }, + }, + { + name: "remove empty insert with a equal line between", + input: []Diff{ + {DiffEqual, ""}, + {DiffEqual, "line2\n"}, + {DiffInsert, ""}, + }, + expected: []Diff{ + {DiffEqual, "line2\n"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := dmp.DiffCleanupLineBased(tt.input) + assert.Equal(t, tt.expected, result, + fmt.Sprintf("DiffCleanupLineBased failed for test: %s", tt.name)) + }) + } +} + +func BenchmarkDiffCleanupLineBased(b *testing.B) { + dmp := New() + + // Create a realistic line-based diff with many operations + diffs := []Diff{ + {DiffDelete, "old line 1\n"}, + {DiffDelete, "old line 2\n"}, + {DiffEqual, "common line 1\n"}, + {DiffEqual, "common line 2\n"}, + {DiffInsert, "new line 1\n"}, + {DiffInsert, "new line 2\n"}, + {DiffEqual, "more common\n"}, + {DiffDelete, "another old\n"}, + {DiffInsert, "another new\n"}, + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + dmp.DiffCleanupLineBased(diffs) + } +} diff --git a/diffmatchpatch/patch.go b/diffmatchpatch/patch.go index 0dbe3bd..275d918 100644 --- a/diffmatchpatch/patch.go +++ b/diffmatchpatch/patch.go @@ -11,6 +11,7 @@ package diffmatchpatch import ( "bytes" "errors" + "fmt" "math" "net/url" "regexp" @@ -31,26 +32,8 @@ type Patch struct { // Header: @@ -382,8 +481,9 @@ // Indices are printed as 1-based, not 0-based. func (p *Patch) String() string { - var coords1, coords2 string - - if p.Length1 == 0 { - coords1 = strconv.Itoa(p.Start1) + ",0" - } else if p.Length1 == 1 { - coords1 = strconv.Itoa(p.Start1 + 1) - } else { - coords1 = strconv.Itoa(p.Start1+1) + "," + strconv.Itoa(p.Length1) - } - - if p.Length2 == 0 { - coords2 = strconv.Itoa(p.Start2) + ",0" - } else if p.Length2 == 1 { - coords2 = strconv.Itoa(p.Start2 + 1) - } else { - coords2 = strconv.Itoa(p.Start2+1) + "," + strconv.Itoa(p.Length2) - } - var text bytes.Buffer - _, _ = text.WriteString("@@ -" + coords1 + " +" + coords2 + " @@\n") + _, _ = text.WriteString(p.header()) // Escape the body of the patch with %xx notation. for _, aDiff := range p.diffs { @@ -70,6 +53,23 @@ func (p *Patch) String() string { return unescaper.Replace(text.String()) } +func (p Patch) header() string { + return fmt.Sprintf("@@ -%s +%s @@\n", + p.coords(p.Start1, p.Length1), + p.coords(p.Start2, p.Length2)) +} + +func (Patch) coords(start, length int) string { + switch { + case length == 0: + return fmt.Sprintf("%d,0", start) + case length == 1: + return fmt.Sprintf("%d", start+1) + default: + return fmt.Sprintf("%d,%d", start+1, length) + } +} + // PatchAddContext increases the context until it is unique, but doesn't let the pattern expand beyond MatchMaxBits. func (dmp *DiffMatchPatch) PatchAddContext(patch Patch, text string) Patch { if len(text) == 0 { diff --git a/diffmatchpatch/unified.go b/diffmatchpatch/unified.go new file mode 100644 index 0000000..ad064a7 --- /dev/null +++ b/diffmatchpatch/unified.go @@ -0,0 +1,435 @@ +package diffmatchpatch + +import ( + "fmt" + "strings" +) + +// Unified computes the differences between text1 and text2 and formats the differences in the "unified diff" format. +// Optionally pass UnifiedOption to set the new/old labels and context lines. +func (dmp *DiffMatchPatch) Unified(text1, text2 string, opts ...UnifiedOption) string { + options := newUnifiedOptions(opts) + + text1Enc, text2Enc, lines := dmp.DiffLinesToChars(text1, text2) + + diffs := dmp.DiffMain(text1Enc, text2Enc, false) + diffs = dmp.DiffCharsToLines(diffs, lines) + + unified := newUnified(diffs, options) + + return unified.String() +} + +// DiffUnified formats the diffs slice in the "unified diff" format. +// Optionally pass UnifiedOption to set the new/old labels and context lines. +func (dmp *DiffMatchPatch) DiffUnified(diffs []Diff, opts ...UnifiedOption) string { + options := newUnifiedOptions(opts) + + u := newUnified(diffs, options) + + return u.String() +} + +// newUnified takes a []Diff slice and converts into into a unified struct, which +// can then be used to produce the unified diff output using its String() +// method. +func newUnified(diffs []Diff, opts unifiedOptions) unified { + return unified{ + label1: opts.text1Label, + label2: opts.text2Label, + + patches: patchMakeUnified(diffs, opts.contextLines), + } +} + +func patchMakeUnified(diffs []Diff, contextLines int) []Patch { + maxCtx := contextLines * 2 + + var patches []Patch + + if diffIsEqual(diffs) { + return nil + } + + diffs = diffLinewise(diffs) + + var ( + patch Patch + + lineNo1 int + lineNo2 int + context []Diff + ) + for _, diff := range diffs { + switch diff.Type { + case DiffDelete: + lineNo1++ + case DiffInsert: + lineNo2++ + case DiffEqual: + lineNo1++ + lineNo2++ + } + + if diff.Type == DiffEqual { + context = append(context, diff) + continue + } + + // close previous patch + if len(patch.diffs) != 0 && len(context) > maxCtx { + cl := min(len(context), contextLines) + + patch.diffs = append(patch.diffs, context[:cl]...) + + patchUpdateLength(&patch) + + patches = append(patches, patch) + patch = Patch{} + } + + // start new patch + if len(patch.diffs) == 0 { + cl := min(len(context), contextLines) + + l1 := lineNo1 - cl + l2 := lineNo2 - cl + + // When starting a new patch, the line number for lineNo1 XOR lineNo2 + // as already been advanced, but not the other. Account for that in + // l1 or l2. + switch diff.Type { + case DiffDelete: + l1-- + case DiffInsert: + l2-- + } + + patch = Patch{ + Start1: l1, + Start2: l2, + diffs: context[len(context)-cl:], + } + + context = nil + } + + patch.diffs = append(patch.diffs, context...) + context = nil + + patch.diffs = append(patch.diffs, diff) + } + + // close last hunk + if len(patch.diffs) != 0 { + cl := min(len(context), contextLines) + + patch.diffs = append(patch.diffs, context[:cl]...) + + patchUpdateLength(&patch) + + patches = append(patches, patch) + patch = Patch{} + } + + return patches +} + +func patchUpdateLength(p *Patch) { + p.Length1 = 0 + p.Length2 = 0 + + for _, diff := range p.diffs { + switch diff.Type { + case DiffDelete: + p.Length1++ + case DiffInsert: + p.Length2++ + case DiffEqual: + p.Length1++ + p.Length2++ + } + } +} + +func diffIsEqual(diffs []Diff) bool { + for _, diff := range diffs { + if diff.Type != DiffEqual { + return false + } + } + + return true +} + +// diffLinewise splits and merged diffs so that each individual diff represents one line, including the final newline character. +func diffLinewise(diffs []Diff) []Diff { + var ( + ret []Diff + line1, line2 string + ) + + diffs = diffCleanupNewline(diffs) + + add := func(d Diff) { + switch d.Type { + case DiffDelete: + line1 = line1 + d.Text + case DiffInsert: + line2 = line2 + d.Text + default: // equal + line1 = line1 + d.Text + line2 = line2 + d.Text + } + + if strings.HasSuffix(line1, "\n") && line1 == line2 { + ret = append(ret, Diff{ + Type: DiffEqual, + Text: line1, + }) + + line1, line2 = "", "" + } + + if strings.HasSuffix(line1, "\n") { + ret = append(ret, Diff{ + Type: DiffDelete, + Text: line1, + }) + + line1 = "" + } + + if strings.HasSuffix(line2, "\n") { + ret = append(ret, Diff{ + Type: DiffInsert, + Text: line2, + }) + + line2 = "" + } + } + + for _, diff := range diffs { + for _, segment := range strings.SplitAfter(diff.Text, "\n") { + add(Diff{ + Type: diff.Type, + Text: segment, + }) + } + } + + // line1 and/or line2 may be non-empty if there is no newline at the end of file. + if line1 != "" && line1 == line2 { + ret = append(ret, Diff{ + Type: DiffEqual, + Text: line1, + }) + + line1, line2 = "", "" + } + + if line1 != "" { + ret = append(ret, Diff{ + Type: DiffDelete, + Text: line1, + }) + + line1 = "" + } + + if line2 != "" { + ret = append(ret, Diff{ + Type: DiffInsert, + Text: line2, + }) + + line2 = "" + } + + return reorderDeletionsFirst(ret) +} + +// diffCleanupNewline looks for single edits surrounded on both sides by equalities which can be shifted sideways to align on newlines. +func diffCleanupNewline(diffs []Diff) []Diff { + var ret []Diff + + for i := 0; i < len(diffs); i++ { + if i < len(diffs)-2 && diffs[i].Type == DiffEqual && diffs[i+1].Type != DiffEqual && diffs[i+2].Type == DiffEqual { + common := prefixWithNewline(diffs[i+1].Text, diffs[i+2].Text) + + // Convert ["=", "±", "="] + // to ["=", "±", "="] + if common != "" { + ret = append(ret, + Diff{ + Type: DiffEqual, + Text: diffs[i].Text + common, + }, + Diff{ + Type: diffs[i+1].Type, + Text: strings.TrimPrefix(diffs[i+1].Text, common) + common, + }, + Diff{ + Type: DiffEqual, + Text: strings.TrimPrefix(diffs[i+2].Text, common), + }, + ) + + i += 2 + continue + } + } + + ret = append(ret, diffs[i]) + } + + return ret +} + +// prefixWithNewline returns the longest common prefix between text1 and text2, up to and including a newline character. +// If text1 and text2 do not have a common prefix, or the common prefix does not include a newline character, the empty string is returned. +func prefixWithNewline(text1, text2 string) string { + prefix := New().DiffCommonPrefix(text1, text2) + + index := strings.LastIndex(text1[:prefix], "\n") + if index != -1 { + return text1[:index+1] + } + + return "" +} + +// reorderDeletionsFirst reorders changes so that deletions come before insertions, without crossing an equality boundary. +func reorderDeletionsFirst(diffs []Diff) []Diff { + var ( + ret []Diff + deletions []Diff + insertions []Diff + ) + + for _, diff := range diffs { + switch diff.Type { + case DiffDelete: + deletions = append(deletions, diff) + case DiffInsert: + insertions = append(insertions, diff) + case DiffEqual: + ret = append(ret, deletions...) + deletions = nil + + ret = append(ret, insertions...) + insertions = nil + + ret = append(ret, diff) + } + } + + ret = append(ret, deletions...) + ret = append(ret, insertions...) + + return ret +} + +// unified represents modifications in a form conducive to printing a unified diff. +type unified struct { + label1, label2 string + + patches []Patch +} + +// String converts a unified diff to the standard textual form for that diff. +// The output of this function can be passed to tools like patch. +func (u unified) String() string { + if len(u.patches) == 0 { + return "" + } + + var b strings.Builder + fmt.Fprintf(&b, "--- %s\n", u.label1) + fmt.Fprintf(&b, "+++ %s\n", u.label2) + + for _, patch := range u.patches { + fmt.Fprint(&b, patchFormatUnified(patch)) + } + + return b.String() +} + +// patchFormatUnified implements GNU's unified diff format. +// This differs from Patch.String() in that this function assumes that each Diff +// (except possibly the last ones) ends in a newline. If either input does not +// end with a newline character, an appropriate message will be printed. +// The output is not URL encoded. +func patchFormatUnified(p Patch) string { + var b strings.Builder + + fmt.Fprint(&b, p.header()) + + for _, diff := range p.diffs { + var prefix string + switch diff.Type { + case DiffDelete: + prefix = "-" + case DiffInsert: + prefix = "+" + case DiffEqual: + prefix = " " + } + + fmt.Fprint(&b, prefix, diff.Text) + + if !strings.HasSuffix(diff.Text, "\n") { + fmt.Fprint(&b, "\n\\ No newline at end of file\n") + } + } + + return b.String() +} + +// DefaultContextLines is the number of unchanged lines of surrounding +// context displayed by Unified. +const DefaultContextLines = 3 + +// UnifiedOption is an option for DiffUnified(). +type UnifiedOption func(*unifiedOptions) + +type unifiedOptions struct { + contextLines int + text1Label string + text2Label string +} + +func newUnifiedOptions(opts []UnifiedOption) unifiedOptions { + ret := unifiedOptions{ + contextLines: DefaultContextLines, + text1Label: "text1", + text2Label: "text2", + } + + for _, o := range opts { + o(&ret) + } + + return ret +} + +// UnifiedContextLines sets the number of unchanged lines of surrounding context +// printed. Defaults to DefaultContextLines. +func UnifiedContextLines(lines int) UnifiedOption { + if lines <= 0 { + lines = DefaultContextLines + } + + return func(o *unifiedOptions) { + o.contextLines = lines + } +} + +// UnifiedLabels sets the labels for the old and new files. Defaults to "text1" and "text2". +func UnifiedLabels(oldLabel, newLabel string) UnifiedOption { + return func(o *unifiedOptions) { + o.text1Label = oldLabel + o.text2Label = newLabel + } +} diff --git a/diffmatchpatch/unified_test.go b/diffmatchpatch/unified_test.go new file mode 100644 index 0000000..47270a9 --- /dev/null +++ b/diffmatchpatch/unified_test.go @@ -0,0 +1,274 @@ +package diffmatchpatch_test + +import ( + "fmt" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/sergi/go-diff/diffmatchpatch" +) + +func TestDiffUnified(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + text1 string + text2 string + want string + }{ + { + name: "No changes", + text1: "Hello, world!\n", + text2: "Hello, world!\n", + want: "", + }, + { + name: "Insertion at beginning", + text1: "Hello, world!\n", + text2: "New line\nHello, world!\n", + want: "--- text1\n+++ text2\n@@ -1 +1,2 @@\n+New line\n Hello, world!\n", + }, + { + name: "Insertion at end", + text1: "Hello, world!\n", + text2: "Hello, world!\nNew line\n", + want: "--- text1\n+++ text2\n@@ -1 +1,2 @@\n Hello, world!\n+New line\n", + }, + { + name: "Insertion middle", + text1: "Hello, world!\nHello, world!\n", + text2: "Hello, world!\nNew line\nHello, world!\n", + want: "--- text1\n+++ text2\n@@ -1,2 +1,3 @@\n Hello, world!\n+New line\n Hello, world!\n", + }, + { + name: "Removal at beginning", + text1: "Old line\nHello, world!\n", + text2: "Hello, world!\n", + want: "--- text1\n+++ text2\n@@ -1,2 +1 @@\n-Old line\n Hello, world!\n", + }, + { + name: "Removal at end", + text1: "Hello, world!\nOld line\n", + text2: "Hello, world!\n", + want: "--- text1\n+++ text2\n@@ -1,2 +1 @@\n Hello, world!\n-Old line\n", + }, + { + name: "Removal middle", + text1: "Hello, world!\nOld line\nHello, world!\n", + text2: "Hello, world!\nHello, world!\n", + want: "--- text1\n+++ text2\n@@ -1,3 +1,2 @@\n Hello, world!\n-Old line\n Hello, world!\n", + }, + { + name: "Replacement", + text1: "Prefix\nHello, world!\nSuffix\n", + text2: "Prefix\nHello, Golang!\nSuffix\n", + want: "--- text1\n+++ text2\n@@ -1,3 +1,3 @@\n Prefix\n-Hello, world!\n+Hello, Golang!\n Suffix\n", + }, + { + name: "Insertion", + text1: makeContext(10, 0), + text2: makeContext(5, 0) + "INSERTION\n" + makeContext(5, 5), + want: "--- text1\n+++ text2\n@@ -3,6 +3,7 @@\n context2\n context3\n context4\n+INSERTION\n context5\n context6\n context7\n", + }, + { + name: "Multiple hunks", + text1: makeContext(20, 0), + text2: makeContext(5, 0) + "INSERTION1\n" + makeContext(10, 5) + "INSERTION2\n" + makeContext(5, 15), + want: `--- text1 ++++ text2 +@@ -3,6 +3,7 @@ + context2 + context3 + context4 ++INSERTION1 + context5 + context6 + context7 +@@ -13,6 +14,7 @@ + context12 + context13 + context14 ++INSERTION2 + context15 + context16 + context17 +`, + }, + { + name: "Merge hunk with <= 5 lines of context", + text1: makeContext(15, 0), + text2: makeContext(5, 0) + "INSERTION1\n" + makeContext(5, 5) + "INSERTION2\n" + makeContext(5, 10), + want: `--- text1 ++++ text2 +@@ -3,11 +3,13 @@ + context2 + context3 + context4 ++INSERTION1 + context5 + context6 + context7 + context8 + context9 ++INSERTION2 + context10 + context11 + context12 +`, + }, + { + name: "Insert without newline", + text1: "context1", + text2: "context1\nnew line", + want: `--- text1 ++++ text2 +@@ -1 +1,2 @@ +-context1 +\ No newline at end of file ++context1 ++new line +\ No newline at end of file +`, + }, + { + name: "Removal without newline", + text1: "context1\nold line", + text2: "context1", + want: `--- text1 ++++ text2 +@@ -1,2 +1 @@ +-context1 +-old line +\ No newline at end of file ++context1 +\ No newline at end of file +`, + }, + { + name: "context without newline", + text1: "context0\nold1\ncontext1", + text2: "context0\nnew1\ncontext1", + want: `--- text1 ++++ text2 +@@ -1,3 +1,3 @@ + context0 +-old1 ++new1 + context1 +\ No newline at end of file +`, + }, + { + name: "Replace multiple subsequent lines", + text1: makeContext(5, 0) + "old1\nold2\nold3\n" + makeContext(5, 5), + text2: makeContext(5, 0) + "new1\nnew2\nnew3\n" + makeContext(5, 5), + want: `--- text1 ++++ text2 +@@ -3,9 +3,9 @@ + context2 + context3 + context4 +-old1 +-old2 +-old3 ++new1 ++new2 ++new3 + context5 + context6 + context7 +`, + }, + { + name: "empty text1", + text1: "", + text2: "new1\n", + want: `--- text1 ++++ text2 +@@ -0,0 +1 @@ ++new1 +`, + }, + { + name: "empty text2", + text1: "old1\n", + text2: "", + want: `--- text1 ++++ text2 +@@ -1 +0,0 @@ +-old1 +`, + }, + } + + for _, tc := range cases { + // Un-alias tc for compatibility with Go <1.22. + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + dmp := diffmatchpatch.New() + + got := dmp.Unified(tc.text1, tc.text2, diffmatchpatch.UnifiedLabels("text1", "text2")) + + t.Logf("dmp.Unified() =\n%s", got) + + if got != tc.want { + t.Errorf("Unified() output differs (-want/+got):\n%s", cmp.Diff(tc.want, got)) + } + + // DiffLinesToChars / DiffCharsToLines is not required for correct results. + diffs := dmp.DiffMain(tc.text1, tc.text2, false) + + got = dmp.DiffUnified(diffs, diffmatchpatch.UnifiedLabels("text1", "text2"), diffmatchpatch.UnifiedContextLines(3)) + if got != tc.want { + t.Errorf("DiffUnified() output differs (-want/+got):\n%s", cmp.Diff(tc.want, got)) + } + + }) + } +} + +func makeContext(n, start int) string { + var b strings.Builder + + for i := start; i < start+n; i++ { + fmt.Fprintf(&b, "context%d\n", i) + } + + return b.String() +} + +func ExampleDiffMatchPatch_DiffUnified() { + text1 := "Prefix\nHello, world!\nSuffix\n" + text2 := "Prefix\nHello, Golang!\nSuffix\n" + + dmp := diffmatchpatch.New() + + // Pre-process the inputs so that each codepoint in text[12]End represents one line. + text1Enc, text2Enc, lines := dmp.DiffLinesToChars(text1, text2) + + // Run the diff algorithm on the preprocessed inputs. + diffs := dmp.DiffMain(text1Enc, text2Enc, false) + + // Expand the diffs back into the full lines they represent. + diffs = dmp.DiffCharsToLines(diffs, lines) + + // Format as unified diff. + unifiedDiff := dmp.DiffUnified(diffs, + diffmatchpatch.UnifiedLabels("old.txt", "new.txt"), + diffmatchpatch.UnifiedContextLines(3)) + + fmt.Print(unifiedDiff) + // Output: + // --- old.txt + // +++ new.txt + // @@ -1,3 +1,3 @@ + // Prefix + // -Hello, world! + // +Hello, Golang! + // Suffix +} diff --git a/go.mod b/go.mod index c7886ce..be471e6 100644 --- a/go.mod +++ b/go.mod @@ -1,11 +1,15 @@ module github.com/sergi/go-diff +require github.com/stretchr/testify v1.10.0 + +require gopkg.in/yaml.v3 v3.0.1 // indirect + require ( github.com/davecgh/go-spew v1.1.1 // indirect + github.com/google/go-cmp v0.6.0 github.com/kr/pretty v0.1.0 // indirect - github.com/stretchr/testify v1.4.0 + github.com/pmezard/go-difflib v1.0.0 // indirect gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect ) -go 1.13 +go 1.24 diff --git a/go.sum b/go.sum index 8dd9f36..f561504 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,7 @@ -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= @@ -9,15 +9,10 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I= -gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/mise.toml b/mise.toml new file mode 100644 index 0000000..9822d22 --- /dev/null +++ b/mise.toml @@ -0,0 +1,15 @@ +[tools] +go = "1.24" + +[tasks.install-go-tools] +description = "Install Go tools for development" +run = """ +#!/usr/bin/env bash +set -e + +go install golang.org/x/tools/gopls@latest +go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest +go install github.com/go-delve/delve/cmd/dlv@latest + +echo "✅ Go tools installed" +"""