diff --git a/direct.go b/direct.go new file mode 100644 index 0000000000..8b18bcfb8c --- /dev/null +++ b/direct.go @@ -0,0 +1,269 @@ +package excelize + +import ( + "bytes" + "errors" + "fmt" + "io" + "strconv" + "sync" +) + +// DirectWriter is a simpler and optimized version of the StreamWriter. Its primary use is sending large amount of sheet data row by row directly +// to a io.Writer. The typical use case is an API writing directly to a TCP connection, with minimal server side buffering. +type DirectWriter struct { + sync.RWMutex + File *File + Sheet string + SheetID int + cols string + worksheet *xlsxWorksheet + sheetPath string + maxBufferSize int + bytesWritten int64 + buf []byte + out io.Writer + done chan bool + rowCount int + maxColLengths []int + waitMode bool +} + +// NewDirectWriter return a new DirectWriter for the given sheet name. If the sheet doesn't yet exists it is created. +// Similar limitations apply as when using the StreamWriter. To enable writing an xlsx file concurrently to +// a io.Writer you must: +// +// - create a File. +// +// - create at least one DirectWriter. +// +// - launch writing using file.WriteTo() in a separate goroutine, this call will block until all direct writers are closed. +// +// - add data using AddRow, then call Close. +// +// - wait for the goroutine to return +func (f *File) NewDirectWriter(sheet string, maxBufferSize int) (*DirectWriter, error) { + _ = f.NewSheet(sheet) + sheetID := f.getSheetID(sheet) + if sheetID == -1 { + return nil, errors.New("bug: sheetID not found after call to NewSheet") + } + dw := &DirectWriter{ + File: f, + Sheet: sheet, + SheetID: sheetID, + maxBufferSize: maxBufferSize, + done: make(chan bool), + } + var err error + dw.worksheet, err = f.workSheetReader(sheet) + if err != nil { + return nil, err + } + + dw.sheetPath = f.sheetMap[trimSheetName(sheet)] + f.directWriters = append(f.directWriters, dw) + + return dw, err +} + +// SetWait enables or disables the wait mode. In wait mode nothing is flushed to writer (if any), even if the buffer grows beyond maxBufferSize. +func (dw *DirectWriter) SetWait(b bool) error { + if b { + if dw.bytesWritten > 0 { + return errors.New("Can't enable wait mode since first data already written.") + } + dw.waitMode = true + return nil + } + dw.waitMode = false + return nil +} + +// AddRow is used for streaming a large data file row by row, without any gaps. +// It omits cell reference values and only accept []Cell to reduce interface{} related allocations. +// It returns the number of bytes currently in the write buffer. +func (dw *DirectWriter) AddRow(values []Cell, opts ...RowOpts) (buffered int, err error) { + dw.rowCount++ + dw.buf = append(dw.buf, ` 0 { + attrs, err := marshalRowAttrs(opts...) + if err != nil { + return len(dw.buf), err + } + dw.buf = append(dw.buf, attrs...) + } + dw.buf = append(dw.buf, '>') + if len(values) > len(dw.maxColLengths) { + l := make([]int, len(values)) + copy(l, dw.maxColLengths) + dw.maxColLengths = l + } + for i, val := range values { + c := xlsxC{ + S: val.StyleID, + } + if val.Formula != "" { + c.F = &xlsxF{Content: val.Formula} + } + if err := setCellValFunc(&c, val.Value); err != nil { + dw.buf = append(dw.buf, ""...) + return len(dw.buf), err + } + if l := len(c.V); l > dw.maxColLengths[i] { + dw.maxColLengths[i] = l + } + dw.buf = appendCellNoRef(dw.buf, c) + } + dw.buf = append(dw.buf, ""...) + if len(dw.buf) > dw.maxBufferSize && !dw.waitMode { + err := dw.tryFlush() + return len(dw.buf), err + } + return len(dw.buf), nil +} + +// MaxColumnLengths returns the max lengths (in bytes as written to XML) for each column written so far. +func (dw *DirectWriter) MaxColumnLengths() []int { + return dw.maxColLengths +} + +// SetColWidth provides a function to set the width of a single column or +// multiple columns for the DirectWriter. Since column definitions need to be written before sheet data, either use this +// function before the first call to AddRow, or set the writer in wait mode using SetWait. +func (dw *DirectWriter) SetColWidth(min, max int, width float64) error { + if dw.bytesWritten > 0 { + return errors.New("Can't set col width since first data already written.") + } + if min > TotalColumns || max > TotalColumns { + return ErrColumnNumber + } + if min < 1 || max < 1 { + return ErrColumnNumber + } + if width > MaxColumnWidth { + return ErrColumnWidth + } + if min > max { + min, max = max, min + } + dw.cols += fmt.Sprintf(``, min, max, width) + return nil +} + +// Close ends the streaming writing process. +func (dw *DirectWriter) Close() error { + dw.buf = append(dw.buf, ``...) + bulkAppendFields(dw, dw.worksheet, 8, 15) + bulkAppendFields(dw, dw.worksheet, 17, 38) + bulkAppendFields(dw, dw.worksheet, 40, 40) + dw.buf = append(dw.buf, ``...) + + if err := dw.tryFlush(); err != nil { + return err + } + + dw.File.Sheet.Delete(dw.sheetPath) + delete(dw.File.checked, dw.sheetPath) + dw.File.Pkg.Delete(dw.sheetPath) + + close(dw.done) + return nil +} + +// WriteTo writes the output of the DirectWriter to w. The call will block until the DirectWriter is closed by a call to Close. +func (dw *DirectWriter) WriteTo(w io.Writer) (int64, error) { + select { + case <-dw.done: + if dw.bytesWritten > 0 { + return 0, errors.New("Cant't write to new writer w since part of the data already been written and flushed.") + } + n, err := w.Write(dw.buildHeader()) + if err != nil { + return int64(n), err + } + n2, err := w.Write(dw.buf) + return int64(n + n2), err + default: + dw.Lock() + dw.out = w + dw.Unlock() + <-dw.done + return dw.bytesWritten, nil + } +} + +func (dw *DirectWriter) Write(p []byte) (n int, err error) { + dw.buf = append(dw.buf, p...) + return len(p), nil +} + +func (dw *DirectWriter) buildHeader() []byte { + var header bytes.Buffer + header.WriteString(XMLHeader + ` 0 { + header.WriteString("" + dw.cols + "") + } + header.WriteString(``) + return header.Bytes() +} + +func (dw *DirectWriter) tryFlush() error { + dw.Lock() + if dw.out == nil { + dw.Unlock() + return nil + } + if dw.bytesWritten == 0 { + n, err := dw.out.Write(dw.buildHeader()) + if err != nil { + return err + } + dw.bytesWritten += int64(n) + } + n, err := dw.out.Write(dw.buf) + dw.Unlock() + if err != nil { + return err + } + dw.bytesWritten += int64(n) + dw.buf = dw.buf[:0] + return nil +} + +func appendCellNoRef(dst []byte, c xlsxC) []byte { + dst = append(dst, `') + if c.F != nil { + dst = append(dst, ``...) + dst = appendEscapedString(dst, c.F.Content, true) + dst = append(dst, ``...) + } + if c.V != "" { + dst = append(dst, ``...) + dst = appendEscapedString(dst, c.V, true) + dst = append(dst, ``...) + } + dst = append(dst, ``...) + return dst +} diff --git a/direct_test.go b/direct_test.go new file mode 100644 index 0000000000..f1015312ad --- /dev/null +++ b/direct_test.go @@ -0,0 +1,178 @@ +package excelize + +import ( + "archive/zip" + "bytes" + "fmt" + "io" + "strconv" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func BenchmarkAddRow(b *testing.B) { + file := NewFile() + row := make([]Cell, 10) + for colID := 0; colID < 10; colID++ { + row[colID] = Cell{ + StyleID: 1, + Value: "foo", + } + } + dw, err := file.NewDirectWriter("Sheet1", 8192) + require.NoError(b, err) + go dw.WriteTo(io.Discard) //nolint + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, _ = dw.AddRow(row) + } + err = dw.Close() + assert.NoError(b, err) + b.SetBytes(dw.bytesWritten) + b.ReportAllocs() +} + +func TestDirectWriter(t *testing.T) { + t.Run("non-concurrent-writer", func(t *testing.T) { + file, row, expectedRow := setupTestFileRow() + + dw, err := file.NewDirectWriter("Sheet1", 8192) + require.NoError(t, err) + + require.NoError(t, dw.SetColWidth(1, 2, 20)) + expectedCols := `` + + _, err = dw.AddRow(row) + assert.NoError(t, err) + err = dw.Close() + assert.NoError(t, err) + + var out bytes.Buffer + _, err = dw.WriteTo(&out) + require.NoError(t, err) + assert.True(t, bytes.HasPrefix(out.Bytes(), dw.buildHeader())) + assert.Contains(t, out.String(), expectedCols) + assert.Contains(t, out.String(), expectedRow) + assert.True(t, bytes.HasSuffix(out.Bytes(), []byte(""))) + }) + t.Run("concurrent-writer", func(t *testing.T) { + file, row, expectedRow := setupTestFileRow() + + dw, err := file.NewDirectWriter("Sheet1", 8192) + require.NoError(t, err) + + var out bytes.Buffer + ch := make(chan error) + go func() { + _, err := dw.WriteTo(&out) + ch <- err + }() + + _, err = dw.AddRow(row) + assert.NoError(t, err) + err = dw.Close() + assert.NoError(t, err) + + err = <-ch + require.NoError(t, err) + assert.True(t, bytes.HasPrefix(out.Bytes(), dw.buildHeader())) + assert.Contains(t, + out.String(), + expectedRow, + ) + assert.True(t, bytes.HasSuffix(out.Bytes(), []byte(""))) + }) + t.Run("multiple-concurrent-writers", func(t *testing.T) { + file, row, _ := setupTestFileRow() + var ( + sheets = 100 + rows = 100 + dws = make([]*DirectWriter, sheets) + err error + ) + + // setup some sheets with direct writers + for i := range dws { + dws[i], err = file.NewDirectWriter("Sheet"+strconv.Itoa(i+1), 512) + require.NoError(t, err) + } + + // launch writer on the final zip file to a buffer + var out bytes.Buffer + ch := make(chan error) + go func() { + _, err := file.WriteTo(&out) + ch <- err + }() + + // for each sheet write some rows, and then close it + for _, dw := range dws { + for i := 0; i < rows; i++ { + _, err = dw.AddRow(row) + assert.NoError(t, err) + } + err = dw.Close() + require.NoError(t, err) + } + + err = <-ch + require.NoError(t, err) + + // verify all sheets made it into the zip archive + z, err := zip.NewReader(bytes.NewReader(out.Bytes()), int64(out.Len())) + assert.NoError(t, err) + for i := range dws { + f, err := z.Open("xl/worksheets/sheet" + strconv.Itoa(dws[i].SheetID) + ".xml") + assert.NoError(t, err) + if f != nil { + f.Close() + } + } + // os.WriteFile("test.xlsx", out.Bytes(), os.ModePerm) + }) + t.Run("wait-mode", func(t *testing.T) { + file, row, _ := setupTestFileRow() + const maxBufferSize = 8 + dw, err := file.NewDirectWriter("Sheet1", maxBufferSize) + require.NoError(t, err) + require.NoError(t, dw.SetWait(true)) + + go dw.WriteTo(io.Discard) //nolint + // loop waiting for the goroutine to launch and register the writer + for { + dw.Lock() + w := dw.out + dw.Unlock() + if w != nil { + break + } + } + + buffered, err := dw.AddRow(row) + assert.NoError(t, err) + assert.True(t, buffered > maxBufferSize, "buffer should not have been flushed in wait mode") + + require.NoError(t, dw.SetWait(false)) + + buffered, err = dw.AddRow(row) + assert.NoError(t, err) + assert.Equal(t, 0, buffered, "buffer should have been flushed since wait mode is now disabled") + }) +} + +func setupTestFileRow() (*File, []Cell, string) { + file := NewFile() + ts, _ := file.NewStyle(&Style{NumFmt: 22}) + row := []Cell{ + {Value: "foo"}, + // add trailing ws to trigger xml:space + {Value: "bar "}, + {Value: time.Date(2021, 11, 29, 0, 0, 0, 0, time.UTC), StyleID: ts}, + {Value: 123}, + } + expected := fmt.Sprintf("foobar 44529123", ts) + return file, row, expected +} diff --git a/excelize.go b/excelize.go index c5778c8542..8b4b512b63 100644 --- a/excelize.go +++ b/excelize.go @@ -37,6 +37,7 @@ type File struct { checked map[string]bool sheetMap map[string]string streams map[string]*StreamWriter + directWriters []*DirectWriter tempFiles sync.Map CalcChain *xlsxCalcChain Comments map[string]*xlsxComments diff --git a/file.go b/file.go index c0092a2eb4..62568ae066 100644 --- a/file.go +++ b/file.go @@ -163,6 +163,17 @@ func (f *File) writeToZip(zw *zip.Writer) error { f.sharedStringsWriter() f.styleSheetWriter() + var pathDone = make(map[string]bool) + for _, d := range f.directWriters { + fi, err := zw.Create(d.sheetPath) + if err != nil { + return err + } + if _, err := d.WriteTo(fi); err != nil { + return err + } + pathDone[d.sheetPath] = true + } for path, stream := range f.streams { fi, err := zw.Create(path) if err != nil { @@ -179,13 +190,14 @@ func (f *File) writeToZip(zw *zip.Writer) error { return err } _ = stream.rawData.Close() + pathDone[path] = true } var err error f.Pkg.Range(func(path, content interface{}) bool { if err != nil { return false } - if _, ok := f.streams[path.(string)]; ok { + if _, ok := pathDone[path.(string)]; ok { return true } var fi io.Writer @@ -196,6 +208,5 @@ func (f *File) writeToZip(zw *zip.Writer) error { _, err = fi.Write(content.([]byte)) return true }) - return err } diff --git a/go.mod b/go.mod index 41f53a205d..b08283123a 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/xuri/excelize/v2 +module github.com/cls-nordic/excelize go 1.15 @@ -7,8 +7,8 @@ require ( github.com/richardlehane/mscfb v1.0.3 github.com/stretchr/testify v1.6.1 github.com/xuri/efp v0.0.0-20210322160811-ab561f5b45e3 - golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97 + golang.org/x/crypto v0.14.0 golang.org/x/image v0.0.0-20210220032944-ac19c3e999fb - golang.org/x/net v0.0.0-20210726213435-c6fcb2dbf985 - golang.org/x/text v0.3.6 + golang.org/x/net v0.17.0 + golang.org/x/text v0.13.0 ) diff --git a/go.sum b/go.sum index 53c304719c..a925dd480e 100644 --- a/go.sum +++ b/go.sum @@ -13,22 +13,50 @@ github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/xuri/efp v0.0.0-20210322160811-ab561f5b45e3 h1:EpI0bqf/eX9SdZDwlMmahKM+CDBgNbsXMhsN28XrM8o= github.com/xuri/efp v0.0.0-20210322160811-ab561f5b45e3/go.mod h1:ybY/Jr0T0GTCnYjKqmdwxyxn2BQf2RcQIIvex5QldPI= -golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97 h1:/UOmuWzQfxxo9UtlXMwuQU8CMgg1eZXqTRwkSQJWKOI= -golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc= +golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= golang.org/x/image v0.0.0-20210220032944-ac19c3e999fb h1:fqpd0EBDzlHRCjiphRR5Zo/RSWWQlWv34418dnEixWk= golang.org/x/image v0.0.0-20210220032944-ac19c3e999fb/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210726213435-c6fcb2dbf985 h1:4CSI6oo7cOjJKajidEljs9h+uP0rRZBPPPhcCbj5mw8= -golang.org/x/net v0.0.0-20210726213435-c6fcb2dbf985/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= +golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= -golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= diff --git a/xml.go b/xml.go new file mode 100644 index 0000000000..f6ea60d53f --- /dev/null +++ b/xml.go @@ -0,0 +1,70 @@ +package excelize + +import ( + "unicode/utf8" +) + +// copied from stdlib xml pkg +var ( + escQuot = []byte(""") // shorter than """ + escApos = []byte("'") // shorter than "'" + escAmp = []byte("&") + escLT = []byte("<") + escGT = []byte(">") + escTab = []byte(" ") + escNL = []byte(" ") + escCR = []byte(" ") + escFFFD = []byte("\uFFFD") // Unicode replacement character +) + +// copied from stdlib xml pkg +func isInCharacterRange(r rune) (inrange bool) { + return r == 0x09 || + r == 0x0A || + r == 0x0D || + r >= 0x20 && r <= 0xD7FF || + r >= 0xE000 && r <= 0xFFFD || + r >= 0x10000 && r <= 0x10FFFF +} + +// copied and modified from stdlib xml.EscapeText() +func appendEscapedString(dst []byte, s string, escapeNewline bool) []byte { + var esc []byte + last := 0 + for i := 0; i < len(s); { + r, width := utf8.DecodeRuneInString(s[i:]) + i += width + switch r { + case '"': + esc = escQuot + case '\'': + esc = escApos + case '&': + esc = escAmp + case '<': + esc = escLT + case '>': + esc = escGT + case '\t': + esc = escTab + case '\n': + if !escapeNewline { + continue + } + esc = escNL + case '\r': + esc = escCR + default: + if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { + esc = escFFFD + break + } + continue + } + dst = append(dst, s[last:i-width]...) + dst = append(dst, esc...) + last = i + } + dst = append(dst, s[last:]...) + return dst +}