Skip to content

Commit 6dbe13c

Browse files
committed
fix: use common lineHash to share indice between text1 and text2
Use common cache of line contents between two texts in `DiffLinesToChars` to get line diffs correctly. In some cases, line diffs cannot be retrieved correctly in the standard way (https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs#line-mode). In the below case, we failed to get line diffs correctly before this fix. ```go:main.go package main import ( "fmt" "github.com/sergi/go-diff/diffmatchpatch" ) const ( text1 = `hoge: step11: - arrayitem1 - arrayitem2 step12: step21: hoge step22: -93 fuga: flatitem ` text2 = `hoge: step11: - arrayitem4 - arrayitem2 - arrayitem3 step12: step21: hoge step22: -92 fuga: flatitem ` ) func main() { dmp := diffmatchpatch.New() a, b, c := dmp.DiffLinesToChars(text1, text2) diffs := dmp.DiffMain(a, b, false) diffs = dmp.DiffCharsToLines(diffs, c) // diffs = dmp.DiffCleanupSemantic(diffs) fmt.Println(diffs) } ``` ```text:output [{Insert hoge: step11: hoge: } {Equal hoge: } {Insert hoge: } {Equal step11: } {Insert hoge: } {Equal - arrayitem1 } {Insert hoge: } {Equal - arrayitem2 } {Insert hoge: } {Equal step12: } {Insert hoge: } {Equal step21: hoge } {Insert hoge: } {Equal step22: -93 } {Delete fuga: flatitem }] ``` Note: This fix corresponds to a javascript implementation. (ref: https://github.com/google/diff-match-patch/blob/62f2e689f498f9c92dbc588c58750addec9b1654/javascript/diff_match_patch_uncompressed.js#L466)
1 parent 849d7eb commit 6dbe13c

File tree

2 files changed

+6
-4
lines changed

2 files changed

+6
-4
lines changed

diffmatchpatch/diff.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,17 +1313,17 @@ func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, stri
13131313
// '\x00' is a valid character, but various debuggers don't like it. So we'll insert a junk entry to avoid generating a null character.
13141314
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
13151315

1316+
lineHash := make(map[string]int)
13161317
//Each string has the index of lineArray which it points to
1317-
strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray)
1318-
strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray)
1318+
strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray, lineHash)
1319+
strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray, lineHash)
13191320

13201321
return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
13211322
}
13221323

13231324
// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
1324-
func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []uint32 {
1325+
func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string, lineHash map[string]int) []uint32 {
13251326
// Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
1326-
lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
13271327
lineStart := 0
13281328
lineEnd := -1
13291329
strs := []uint32{}

diffmatchpatch/diff_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,8 @@ func TestDiffLinesToChars(t *testing.T) {
318318
{"a", "b", "1", "2", []string{"", "a", "b"}},
319319
// Omit final newline.
320320
{"alpha\nbeta\nalpha", "", "1,2,3", "", []string{"", "alpha\n", "beta\n", "alpha"}},
321+
// Same lines in Text1 and Text2
322+
{"abc\ndefg\n12345\n", "abc\ndef\n12345\n678", "1,2,3", "1,4,3,5", []string{"", "abc\n", "defg\n", "12345\n", "def\n", "678"}},
321323
} {
322324
actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(tc.Text1, tc.Text2)
323325
assert.Equal(t, tc.ExpectedChars1, actualChars1, fmt.Sprintf("Test case #%d, %#v", i, tc))

0 commit comments

Comments
 (0)