Skip to content

Commit 720c192

Browse files
authored
plumbing: format/packfile, prevent large objects from being read into memory completely (go-git#303)
This PR adds code to prevent large objects from being read into memory from packfiles or the filesystem. Objects greater than 1Mb are now no longer directly stored in the cache or read completely into memory. Signed-off-by: Andrew Thornton <[email protected]>
1 parent e6e2339 commit 720c192

File tree

7 files changed

+422
-1
lines changed

7 files changed

+422
-1
lines changed

plumbing/format/packfile/fsobject.go

+15
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"github.com/go-git/go-git/v5/plumbing"
88
"github.com/go-git/go-git/v5/plumbing/cache"
99
"github.com/go-git/go-git/v5/plumbing/format/idxfile"
10+
"github.com/go-git/go-git/v5/utils/ioutil"
1011
)
1112

1213
// FSObject is an object from the packfile on the filesystem.
@@ -63,6 +64,20 @@ func (o *FSObject) Reader() (io.ReadCloser, error) {
6364
}
6465

6566
p := NewPackfileWithCache(o.index, nil, f, o.cache)
67+
if o.size > LargeObjectThreshold {
68+
// We have a big object
69+
h, err := p.objectHeaderAtOffset(o.offset)
70+
if err != nil {
71+
return nil, err
72+
}
73+
74+
r, err := p.getReaderDirect(h)
75+
if err != nil {
76+
_ = f.Close()
77+
return nil, err
78+
}
79+
return ioutil.NewReadCloserWithCloser(r, f.Close), nil
80+
}
6681
r, err := p.getObjectContent(o.offset)
6782
if err != nil {
6883
_ = f.Close()

plumbing/format/packfile/packfile.go

+73
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ var (
3232
// wrapped in FSObject.
3333
const smallObjectThreshold = 16 * 1024
3434

35+
// Conversely there are large objects that should not be cached and kept
36+
// in memory as they're too large to be reasonably cached. Objects larger
37+
// than this threshold are now always never read into memory to be stored
38+
// in the cache
39+
const LargeObjectThreshold = 1024 * 1024
40+
3541
// Packfile allows retrieving information from inside a packfile.
3642
type Packfile struct {
3743
idxfile.Index
@@ -282,6 +288,37 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) {
282288
return obj.Reader()
283289
}
284290

291+
func (p *Packfile) getReaderDirect(h *ObjectHeader) (io.ReadCloser, error) {
292+
switch h.Type {
293+
case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
294+
return p.s.ReadObject()
295+
case plumbing.REFDeltaObject:
296+
deltaRC, err := p.s.ReadObject()
297+
if err != nil {
298+
return nil, err
299+
}
300+
r, err := p.readREFDeltaObjectContent(h, deltaRC)
301+
if err != nil {
302+
_ = deltaRC.Close()
303+
return nil, err
304+
}
305+
return r, nil
306+
case plumbing.OFSDeltaObject:
307+
deltaRC, err := p.s.ReadObject()
308+
if err != nil {
309+
return nil, err
310+
}
311+
r, err := p.readOFSDeltaObjectContent(h, deltaRC)
312+
if err != nil {
313+
_ = deltaRC.Close()
314+
return nil, err
315+
}
316+
return r, nil
317+
default:
318+
return nil, ErrInvalidObject.AddDetails("type %q", h.Type)
319+
}
320+
}
321+
285322
func (p *Packfile) getNextMemoryObject(h *ObjectHeader) (plumbing.EncodedObject, error) {
286323
var obj = new(plumbing.MemoryObject)
287324
obj.SetSize(h.Length)
@@ -334,6 +371,20 @@ func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plu
334371
return p.fillREFDeltaObjectContentWithBuffer(obj, ref, buf)
335372
}
336373

374+
func (p *Packfile) readREFDeltaObjectContent(h *ObjectHeader, deltaRC io.ReadCloser) (io.ReadCloser, error) {
375+
var err error
376+
377+
base, ok := p.cacheGet(h.Reference)
378+
if !ok {
379+
base, err = p.Get(h.Reference)
380+
if err != nil {
381+
return nil, err
382+
}
383+
}
384+
385+
return ReaderFromDelta(h, base, deltaRC)
386+
}
387+
337388
func (p *Packfile) fillREFDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, ref plumbing.Hash, buf *bytes.Buffer) error {
338389
var err error
339390

@@ -364,6 +415,28 @@ func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset
364415
return p.fillOFSDeltaObjectContentWithBuffer(obj, offset, buf)
365416
}
366417

418+
func (p *Packfile) readOFSDeltaObjectContent(h *ObjectHeader, deltaRC io.ReadCloser) (io.ReadCloser, error) {
419+
hash, err := p.FindHash(h.OffsetReference)
420+
if err != nil {
421+
return nil, err
422+
}
423+
424+
base, err := p.objectAtOffset(h.OffsetReference, hash)
425+
if err != nil {
426+
return nil, err
427+
}
428+
429+
base, ok := p.cacheGet(h.Reference)
430+
if !ok {
431+
base, err = p.Get(h.Reference)
432+
if err != nil {
433+
return nil, err
434+
}
435+
}
436+
437+
return ReaderFromDelta(h, base, deltaRC)
438+
}
439+
367440
func (p *Packfile) fillOFSDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, offset int64, buf *bytes.Buffer) error {
368441
hash, err := p.FindHash(offset)
369442
if err != nil {

plumbing/format/packfile/patch_delta.go

+210
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
package packfile
22

33
import (
4+
"bufio"
45
"bytes"
56
"errors"
67
"io"
8+
"math"
79

810
"github.com/go-git/go-git/v5/plumbing"
911
"github.com/go-git/go-git/v5/utils/ioutil"
@@ -73,6 +75,131 @@ func PatchDelta(src, delta []byte) ([]byte, error) {
7375
return b.Bytes(), nil
7476
}
7577

78+
func ReaderFromDelta(h *ObjectHeader, base plumbing.EncodedObject, deltaRC io.ReadCloser) (io.ReadCloser, error) {
79+
deltaBuf := bufio.NewReaderSize(deltaRC, 1024)
80+
srcSz, err := decodeLEB128ByteReader(deltaBuf)
81+
if err != nil {
82+
if err == io.EOF {
83+
return nil, ErrInvalidDelta
84+
}
85+
return nil, err
86+
}
87+
if srcSz != uint(base.Size()) {
88+
return nil, ErrInvalidDelta
89+
}
90+
91+
targetSz, err := decodeLEB128ByteReader(deltaBuf)
92+
if err != nil {
93+
if err == io.EOF {
94+
return nil, ErrInvalidDelta
95+
}
96+
return nil, err
97+
}
98+
remainingTargetSz := targetSz
99+
100+
dstRd, dstWr := io.Pipe()
101+
102+
go func() {
103+
baseRd, err := base.Reader()
104+
if err != nil {
105+
_ = dstWr.CloseWithError(ErrInvalidDelta)
106+
return
107+
}
108+
defer baseRd.Close()
109+
110+
baseBuf := bufio.NewReader(baseRd)
111+
basePos := uint(0)
112+
113+
for {
114+
cmd, err := deltaBuf.ReadByte()
115+
if err == io.EOF {
116+
_ = dstWr.CloseWithError(ErrInvalidDelta)
117+
return
118+
}
119+
if err != nil {
120+
_ = dstWr.CloseWithError(err)
121+
return
122+
}
123+
124+
if isCopyFromSrc(cmd) {
125+
offset, err := decodeOffsetByteReader(cmd, deltaBuf)
126+
if err != nil {
127+
_ = dstWr.CloseWithError(err)
128+
return
129+
}
130+
sz, err := decodeSizeByteReader(cmd, deltaBuf)
131+
if err != nil {
132+
_ = dstWr.CloseWithError(err)
133+
return
134+
}
135+
136+
if invalidSize(sz, targetSz) ||
137+
invalidOffsetSize(offset, sz, srcSz) {
138+
_ = dstWr.Close()
139+
return
140+
}
141+
142+
discard := offset - basePos
143+
if discard < 0 {
144+
_ = baseRd.Close()
145+
baseRd, err = base.Reader()
146+
if err != nil {
147+
_ = dstWr.CloseWithError(ErrInvalidDelta)
148+
return
149+
}
150+
baseBuf.Reset(baseRd)
151+
discard = offset
152+
}
153+
for discard > math.MaxInt32 {
154+
n, err := baseBuf.Discard(math.MaxInt32)
155+
if err != nil {
156+
_ = dstWr.CloseWithError(err)
157+
return
158+
}
159+
basePos += uint(n)
160+
discard -= uint(n)
161+
}
162+
for discard > 0 {
163+
n, err := baseBuf.Discard(int(discard))
164+
if err != nil {
165+
_ = dstWr.CloseWithError(err)
166+
return
167+
}
168+
basePos += uint(n)
169+
discard -= uint(n)
170+
}
171+
if _, err := io.Copy(dstWr, io.LimitReader(baseBuf, int64(sz))); err != nil {
172+
_ = dstWr.CloseWithError(err)
173+
return
174+
}
175+
remainingTargetSz -= sz
176+
basePos += sz
177+
} else if isCopyFromDelta(cmd) {
178+
sz := uint(cmd) // cmd is the size itself
179+
if invalidSize(sz, targetSz) {
180+
_ = dstWr.CloseWithError(ErrInvalidDelta)
181+
return
182+
}
183+
if _, err := io.Copy(dstWr, io.LimitReader(deltaBuf, int64(sz))); err != nil {
184+
_ = dstWr.CloseWithError(err)
185+
return
186+
}
187+
188+
remainingTargetSz -= sz
189+
} else {
190+
_ = dstWr.CloseWithError(ErrDeltaCmd)
191+
return
192+
}
193+
if remainingTargetSz <= 0 {
194+
_ = dstWr.Close()
195+
return
196+
}
197+
}
198+
}()
199+
200+
return dstRd, nil
201+
}
202+
76203
func patchDelta(dst *bytes.Buffer, src, delta []byte) error {
77204
if len(delta) < deltaSizeMin {
78205
return ErrInvalidDelta
@@ -161,6 +288,25 @@ func decodeLEB128(input []byte) (uint, []byte) {
161288
return num, input[sz:]
162289
}
163290

291+
func decodeLEB128ByteReader(input io.ByteReader) (uint, error) {
292+
var num, sz uint
293+
for {
294+
b, err := input.ReadByte()
295+
if err != nil {
296+
return 0, err
297+
}
298+
299+
num |= (uint(b) & payload) << (sz * 7) // concats 7 bits chunks
300+
sz++
301+
302+
if uint(b)&continuation == 0 {
303+
break
304+
}
305+
}
306+
307+
return num, nil
308+
}
309+
164310
const (
165311
payload = 0x7f // 0111 1111
166312
continuation = 0x80 // 1000 0000
@@ -174,6 +320,40 @@ func isCopyFromDelta(cmd byte) bool {
174320
return (cmd&0x80) == 0 && cmd != 0
175321
}
176322

323+
func decodeOffsetByteReader(cmd byte, delta io.ByteReader) (uint, error) {
324+
var offset uint
325+
if (cmd & 0x01) != 0 {
326+
next, err := delta.ReadByte()
327+
if err != nil {
328+
return 0, err
329+
}
330+
offset = uint(next)
331+
}
332+
if (cmd & 0x02) != 0 {
333+
next, err := delta.ReadByte()
334+
if err != nil {
335+
return 0, err
336+
}
337+
offset |= uint(next) << 8
338+
}
339+
if (cmd & 0x04) != 0 {
340+
next, err := delta.ReadByte()
341+
if err != nil {
342+
return 0, err
343+
}
344+
offset |= uint(next) << 16
345+
}
346+
if (cmd & 0x08) != 0 {
347+
next, err := delta.ReadByte()
348+
if err != nil {
349+
return 0, err
350+
}
351+
offset |= uint(next) << 24
352+
}
353+
354+
return offset, nil
355+
}
356+
177357
func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) {
178358
var offset uint
179359
if (cmd & 0x01) != 0 {
@@ -208,6 +388,36 @@ func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) {
208388
return offset, delta, nil
209389
}
210390

391+
func decodeSizeByteReader(cmd byte, delta io.ByteReader) (uint, error) {
392+
var sz uint
393+
if (cmd & 0x10) != 0 {
394+
next, err := delta.ReadByte()
395+
if err != nil {
396+
return 0, err
397+
}
398+
sz = uint(next)
399+
}
400+
if (cmd & 0x20) != 0 {
401+
next, err := delta.ReadByte()
402+
if err != nil {
403+
return 0, err
404+
}
405+
sz |= uint(next) << 8
406+
}
407+
if (cmd & 0x40) != 0 {
408+
next, err := delta.ReadByte()
409+
if err != nil {
410+
return 0, err
411+
}
412+
sz |= uint(next) << 16
413+
}
414+
if sz == 0 {
415+
sz = 0x10000
416+
}
417+
418+
return sz, nil
419+
}
420+
211421
func decodeSize(cmd byte, delta []byte) (uint, []byte, error) {
212422
var sz uint
213423
if (cmd & 0x10) != 0 {

plumbing/format/packfile/scanner.go

+15
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,21 @@ func (s *Scanner) NextObject(w io.Writer) (written int64, crc32 uint32, err erro
320320
return
321321
}
322322

323+
// ReadObject returns a reader for the object content and an error
324+
func (s *Scanner) ReadObject() (io.ReadCloser, error) {
325+
s.pendingObject = nil
326+
zr := zlibReaderPool.Get().(io.ReadCloser)
327+
328+
if err := zr.(zlib.Resetter).Reset(s.r, nil); err != nil {
329+
return nil, fmt.Errorf("zlib reset error: %s", err)
330+
}
331+
332+
return ioutil.NewReadCloserWithCloser(zr, func() error {
333+
zlibReaderPool.Put(zr)
334+
return nil
335+
}), nil
336+
}
337+
323338
// ReadRegularObject reads and write a non-deltified object
324339
// from it zlib stream in an object entry in the packfile.
325340
func (s *Scanner) copyObject(w io.Writer) (n int64, err error) {

0 commit comments

Comments
 (0)