Skip to content

Commit ce9061f

Browse files
committed
This fix corrupted workbook generated when any inner ZIP64 file's size exceeds 4GB
- Update unit tests - Support set version required to 4.5 in local file header - Note that this fix not work on Office 2010
1 parent f85dae6 commit ce9061f

File tree

3 files changed

+131
-29
lines changed

3 files changed

+131
-29
lines changed

Diff for: excelize.go

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ type File struct {
3131
mu sync.Mutex
3232
checked sync.Map
3333
formulaChecked bool
34+
zip64Entries []string
3435
options *Options
3536
sharedStringItem [][]uint
3637
sharedStringsMap map[string]int

Diff for: file.go

+61-29
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@ package excelize
1414
import (
1515
"archive/zip"
1616
"bytes"
17+
"encoding/binary"
1718
"encoding/xml"
1819
"io"
20+
"math"
1921
"os"
2022
"path/filepath"
2123
"sort"
@@ -123,17 +125,11 @@ func (f *File) WriteTo(w io.Writer, opts ...Options) (int64, error) {
123125
return 0, err
124126
}
125127
}
126-
if f.options != nil && f.options.Password != "" {
127-
buf, err := f.WriteToBuffer()
128-
if err != nil {
129-
return 0, err
130-
}
131-
return buf.WriteTo(w)
132-
}
133-
if err := f.writeDirectToWriter(w); err != nil {
128+
buf, err := f.WriteToBuffer()
129+
if err != nil {
134130
return 0, err
135131
}
136-
return 0, nil
132+
return buf.WriteTo(w)
137133
}
138134

139135
// WriteToBuffer provides a function to get bytes.Buffer from the saved file,
@@ -143,32 +139,22 @@ func (f *File) WriteToBuffer() (*bytes.Buffer, error) {
143139
zw := zip.NewWriter(buf)
144140

145141
if err := f.writeToZip(zw); err != nil {
146-
return buf, zw.Close()
142+
_ = zw.Close()
143+
return buf, err
147144
}
148-
145+
if err := zw.Close(); err != nil {
146+
return buf, err
147+
}
148+
f.writeZip64LFH(buf)
149149
if f.options != nil && f.options.Password != "" {
150-
if err := zw.Close(); err != nil {
151-
return buf, err
152-
}
153150
b, err := Encrypt(buf.Bytes(), f.options)
154151
if err != nil {
155152
return buf, err
156153
}
157154
buf.Reset()
158155
buf.Write(b)
159-
return buf, nil
160-
}
161-
return buf, zw.Close()
162-
}
163-
164-
// writeDirectToWriter provides a function to write to io.Writer.
165-
func (f *File) writeDirectToWriter(w io.Writer) error {
166-
zw := zip.NewWriter(w)
167-
if err := f.writeToZip(zw); err != nil {
168-
_ = zw.Close()
169-
return err
170156
}
171-
return zw.Close()
157+
return buf, nil
172158
}
173159

174160
// writeToZip provides a function to write to zip.Writer
@@ -197,11 +183,16 @@ func (f *File) writeToZip(zw *zip.Writer) error {
197183
_ = stream.rawData.Close()
198184
return err
199185
}
200-
if _, err = io.Copy(fi, from); err != nil {
186+
written, err := io.Copy(fi, from)
187+
if err != nil {
201188
return err
202189
}
190+
if written > math.MaxUint32 {
191+
f.zip64Entries = append(f.zip64Entries, path)
192+
}
203193
}
204194
var (
195+
n int
205196
err error
206197
files, tempFiles []string
207198
)
@@ -219,7 +210,9 @@ func (f *File) writeToZip(zw *zip.Writer) error {
219210
break
220211
}
221212
content, _ := f.Pkg.Load(path)
222-
_, err = fi.Write(content.([]byte))
213+
if n, err = fi.Write(content.([]byte)); n > math.MaxUint32 {
214+
f.zip64Entries = append(f.zip64Entries, path)
215+
}
223216
}
224217
f.tempFiles.Range(func(path, content interface{}) bool {
225218
if _, ok := f.Pkg.Load(path); ok {
@@ -234,7 +227,46 @@ func (f *File) writeToZip(zw *zip.Writer) error {
234227
if fi, err = zw.Create(path); err != nil {
235228
break
236229
}
237-
_, err = fi.Write(f.readBytes(path))
230+
if n, err = fi.Write(f.readBytes(path)); n > math.MaxUint32 {
231+
f.zip64Entries = append(f.zip64Entries, path)
232+
}
238233
}
239234
return err
240235
}
236+
237+
// writeZip64LFH function sets the ZIP version to 0x2D (45) in the Local File
238+
// Header (LFH). Excel strictly enforces ZIP64 format validation rules. When any
239+
// file within the workbook (OCP) exceeds 4GB in size, the ZIP64 format must be
240+
// used according to the PKZIP specification. However, ZIP files generated using
241+
// Go's standard archive/zip library always set the version in the local file
242+
// header to 20 (ZIP version 2.0) by default, as defined in the internal
243+
// 'writeHeader' function during ZIP creation. The archive/zip package only sets
244+
// the 'ReaderVersion' to 45 (ZIP64 version 4.5) in the central directory for
245+
// entries larger than 4GB. This results in a version mismatch between the
246+
// central directory and the local file header. As a result, opening the
247+
// generated workbook with spreadsheet application will prompt file corruption.
248+
func (f *File) writeZip64LFH(buf *bytes.Buffer) error {
249+
if len(f.zip64Entries) == 0 {
250+
return nil
251+
}
252+
data, offset := buf.Bytes(), 0
253+
for offset < len(data) {
254+
idx := bytes.Index(data[offset:], []byte{0x50, 0x4b, 0x03, 0x04})
255+
if idx == -1 {
256+
break
257+
}
258+
idx += offset
259+
if idx+30 > len(data) {
260+
break
261+
}
262+
filenameLen := int(binary.LittleEndian.Uint16(data[idx+26 : idx+28]))
263+
if idx+30+filenameLen > len(data) {
264+
break
265+
}
266+
if inStrSlice(f.zip64Entries, string(data[idx+30:idx+30+filenameLen]), true) != -1 {
267+
binary.LittleEndian.PutUint16(data[idx+4:idx+6], 45)
268+
}
269+
offset = idx + 1
270+
}
271+
return nil
272+
}

Diff for: file_test.go

+69
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package excelize
33
import (
44
"bufio"
55
"bytes"
6+
"encoding/binary"
7+
"math"
68
"os"
79
"path/filepath"
810
"strings"
@@ -95,3 +97,70 @@ func TestClose(t *testing.T) {
9597
f.tempFiles.Store("/d/", "/d/")
9698
require.Error(t, f.Close())
9799
}
100+
101+
func TestZip64(t *testing.T) {
102+
f := NewFile()
103+
_, err := f.NewSheet("Sheet2")
104+
assert.NoError(t, err)
105+
sw, err := f.NewStreamWriter("Sheet1")
106+
assert.NoError(t, err)
107+
for r := range 131 {
108+
rowData := make([]interface{}, 1000)
109+
for c := range 1000 {
110+
rowData[c] = strings.Repeat("c", TotalCellChars)
111+
}
112+
cell, err := CoordinatesToCellName(1, r+1)
113+
assert.NoError(t, err)
114+
assert.NoError(t, sw.SetRow(cell, rowData))
115+
}
116+
assert.NoError(t, sw.Flush())
117+
assert.NoError(t, f.SaveAs(filepath.Join("test", "TestZip64.xlsx")))
118+
assert.NoError(t, f.Close())
119+
120+
// Test with filename length overflow
121+
f = NewFile()
122+
f.zip64Entries = append(f.zip64Entries, defaultXMLPathSharedStrings)
123+
buf := new(bytes.Buffer)
124+
buf.Write([]byte{0x50, 0x4b, 0x03, 0x04})
125+
buf.Write(make([]byte, 20))
126+
assert.NoError(t, f.writeZip64LFH(buf))
127+
128+
// Test with file header less than the required 30 for the fixed header part
129+
f = NewFile()
130+
f.zip64Entries = append(f.zip64Entries, defaultXMLPathSharedStrings)
131+
buf.Reset()
132+
buf.Write([]byte{0x50, 0x4b, 0x03, 0x04})
133+
buf.Write(make([]byte, 22))
134+
binary.Write(buf, binary.LittleEndian, uint16(10))
135+
buf.Write(make([]byte, 2))
136+
buf.WriteString("test")
137+
assert.NoError(t, f.writeZip64LFH(buf))
138+
139+
t.Run("for_save_zip64_with_in_memory_file_over_4GB", func(t *testing.T) {
140+
// Test save workbook in ZIP64 format with in memory file with size over 4GB.
141+
f := NewFile()
142+
f.Sheet.Delete("xl/worksheets/sheet1.xml")
143+
f.Pkg.Store("xl/worksheets/sheet1.xml", make([]byte, math.MaxUint32+1))
144+
_, err := f.WriteToBuffer()
145+
assert.NoError(t, err)
146+
assert.NoError(t, f.Close())
147+
})
148+
149+
t.Run("for_save_zip64_with_in_temporary_file_over_4GB", func(t *testing.T) {
150+
// Test save workbook in ZIP64 format with temporary file with size over 4GB.
151+
if os.Getenv("GITHUB_ACTIONS") == "true" {
152+
t.Skip()
153+
}
154+
f := NewFile()
155+
f.Pkg.Delete("xl/worksheets/sheet1.xml")
156+
f.Sheet.Delete("xl/worksheets/sheet1.xml")
157+
tmp, err := os.CreateTemp(os.TempDir(), "excelize-")
158+
assert.NoError(t, err)
159+
assert.NoError(t, tmp.Truncate(math.MaxUint32+1))
160+
f.tempFiles.Store("xl/worksheets/sheet1.xml", tmp.Name())
161+
assert.NoError(t, tmp.Close())
162+
_, err = f.WriteToBuffer()
163+
assert.NoError(t, err)
164+
assert.NoError(t, f.Close())
165+
})
166+
}

0 commit comments

Comments
 (0)