Skip to content

Commit b4368b2

Browse files
authored
plumbing: format/packfile, prevent large objects from being read into memory completely (go-git#330)
This PR adds code to prevent large objects from being read into memory from packfiles or the filesystem. Objects greater than 1Mb are now no longer directly stored in the cache or read completely into memory. This PR differs and improves the previous broken go-git#323 by fixing several bugs in the reader and transparently wrapping ReaderAt as a Reader. Signed-off-by: Andrew Thornton <[email protected]>
1 parent da81027 commit b4368b2

File tree

13 files changed

+601
-43
lines changed

13 files changed

+601
-43
lines changed

Diff for: plumbing/format/packfile/delta_test.go

+51
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
package packfile
22

33
import (
4+
"bytes"
5+
"io/ioutil"
46
"math/rand"
57

8+
"github.com/go-git/go-git/v5/plumbing"
69
. "gopkg.in/check.v1"
710
)
811

@@ -97,6 +100,32 @@ func (s *DeltaSuite) TestAddDelta(c *C) {
97100
}
98101
}
99102

103+
func (s *DeltaSuite) TestAddDeltaReader(c *C) {
104+
for _, t := range s.testCases {
105+
baseBuf := genBytes(t.base)
106+
baseObj := &plumbing.MemoryObject{}
107+
baseObj.Write(baseBuf)
108+
109+
targetBuf := genBytes(t.target)
110+
111+
delta := DiffDelta(baseBuf, targetBuf)
112+
deltaRC := ioutil.NopCloser(bytes.NewReader(delta))
113+
114+
c.Log("Executing test case:", t.description)
115+
116+
resultRC, err := ReaderFromDelta(baseObj, deltaRC)
117+
c.Assert(err, IsNil)
118+
119+
result, err := ioutil.ReadAll(resultRC)
120+
c.Assert(err, IsNil)
121+
122+
err = resultRC.Close()
123+
c.Assert(err, IsNil)
124+
125+
c.Assert(result, DeepEquals, targetBuf)
126+
}
127+
}
128+
100129
func (s *DeltaSuite) TestIncompleteDelta(c *C) {
101130
for _, t := range s.testCases {
102131
c.Log("Incomplete delta on:", t.description)
@@ -125,3 +154,25 @@ func (s *DeltaSuite) TestMaxCopySizeDelta(c *C) {
125154
c.Assert(err, IsNil)
126155
c.Assert(result, DeepEquals, targetBuf)
127156
}
157+
158+
func (s *DeltaSuite) TestMaxCopySizeDeltaReader(c *C) {
159+
baseBuf := randBytes(maxCopySize)
160+
baseObj := &plumbing.MemoryObject{}
161+
baseObj.Write(baseBuf)
162+
163+
targetBuf := baseBuf[0:]
164+
targetBuf = append(targetBuf, byte(1))
165+
166+
delta := DiffDelta(baseBuf, targetBuf)
167+
deltaRC := ioutil.NopCloser(bytes.NewReader(delta))
168+
169+
resultRC, err := ReaderFromDelta(baseObj, deltaRC)
170+
c.Assert(err, IsNil)
171+
172+
result, err := ioutil.ReadAll(resultRC)
173+
c.Assert(err, IsNil)
174+
175+
err = resultRC.Close()
176+
c.Assert(err, IsNil)
177+
c.Assert(result, DeepEquals, targetBuf)
178+
}

Diff for: plumbing/format/packfile/encoder_advanced_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ func (s *EncoderAdvancedSuite) testEncodeDecode(
105105
_, err = f.Seek(0, io.SeekStart)
106106
c.Assert(err, IsNil)
107107

108-
p := NewPackfile(index, fs, f)
108+
p := NewPackfile(index, fs, f, 0)
109109

110110
decodeHash, err := p.ID()
111111
c.Assert(err, IsNil)

Diff for: plumbing/format/packfile/encoder_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ func packfileFromReader(c *C, buf *bytes.Buffer) (*Packfile, func()) {
318318
index, err := w.Index()
319319
c.Assert(err, IsNil)
320320

321-
return NewPackfile(index, fs, file), func() {
321+
return NewPackfile(index, fs, file, 0), func() {
322322
c.Assert(file.Close(), IsNil)
323323
}
324324
}

Diff for: plumbing/format/packfile/fsobject.go

+36-18
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,21 @@ import (
77
"github.com/go-git/go-git/v5/plumbing"
88
"github.com/go-git/go-git/v5/plumbing/cache"
99
"github.com/go-git/go-git/v5/plumbing/format/idxfile"
10+
"github.com/go-git/go-git/v5/utils/ioutil"
1011
)
1112

1213
// FSObject is an object from the packfile on the filesystem.
1314
type FSObject struct {
14-
hash plumbing.Hash
15-
h *ObjectHeader
16-
offset int64
17-
size int64
18-
typ plumbing.ObjectType
19-
index idxfile.Index
20-
fs billy.Filesystem
21-
path string
22-
cache cache.Object
15+
hash plumbing.Hash
16+
h *ObjectHeader
17+
offset int64
18+
size int64
19+
typ plumbing.ObjectType
20+
index idxfile.Index
21+
fs billy.Filesystem
22+
path string
23+
cache cache.Object
24+
largeObjectThreshold int64
2325
}
2426

2527
// NewFSObject creates a new filesystem object.
@@ -32,16 +34,18 @@ func NewFSObject(
3234
fs billy.Filesystem,
3335
path string,
3436
cache cache.Object,
37+
largeObjectThreshold int64,
3538
) *FSObject {
3639
return &FSObject{
37-
hash: hash,
38-
offset: offset,
39-
size: contentSize,
40-
typ: finalType,
41-
index: index,
42-
fs: fs,
43-
path: path,
44-
cache: cache,
40+
hash: hash,
41+
offset: offset,
42+
size: contentSize,
43+
typ: finalType,
44+
index: index,
45+
fs: fs,
46+
path: path,
47+
cache: cache,
48+
largeObjectThreshold: largeObjectThreshold,
4549
}
4650
}
4751

@@ -62,7 +66,21 @@ func (o *FSObject) Reader() (io.ReadCloser, error) {
6266
return nil, err
6367
}
6468

65-
p := NewPackfileWithCache(o.index, nil, f, o.cache)
69+
p := NewPackfileWithCache(o.index, nil, f, o.cache, o.largeObjectThreshold)
70+
if o.largeObjectThreshold > 0 && o.size > o.largeObjectThreshold {
71+
// We have a big object
72+
h, err := p.objectHeaderAtOffset(o.offset)
73+
if err != nil {
74+
return nil, err
75+
}
76+
77+
r, err := p.getReaderDirect(h)
78+
if err != nil {
79+
_ = f.Close()
80+
return nil, err
81+
}
82+
return ioutil.NewReadCloserWithCloser(r, f.Close), nil
83+
}
6684
r, err := p.getObjectContent(o.offset)
6785
if err != nil {
6886
_ = f.Close()

Diff for: plumbing/format/packfile/packfile.go

+85-7
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package packfile
22

33
import (
44
"bytes"
5+
"compress/zlib"
6+
"fmt"
57
"io"
68
"os"
79

@@ -35,11 +37,12 @@ const smallObjectThreshold = 16 * 1024
3537
// Packfile allows retrieving information from inside a packfile.
3638
type Packfile struct {
3739
idxfile.Index
38-
fs billy.Filesystem
39-
file billy.File
40-
s *Scanner
41-
deltaBaseCache cache.Object
42-
offsetToType map[int64]plumbing.ObjectType
40+
fs billy.Filesystem
41+
file billy.File
42+
s *Scanner
43+
deltaBaseCache cache.Object
44+
offsetToType map[int64]plumbing.ObjectType
45+
largeObjectThreshold int64
4346
}
4447

4548
// NewPackfileWithCache creates a new Packfile with the given object cache.
@@ -50,6 +53,7 @@ func NewPackfileWithCache(
5053
fs billy.Filesystem,
5154
file billy.File,
5255
cache cache.Object,
56+
largeObjectThreshold int64,
5357
) *Packfile {
5458
s := NewScanner(file)
5559
return &Packfile{
@@ -59,15 +63,16 @@ func NewPackfileWithCache(
5963
s,
6064
cache,
6165
make(map[int64]plumbing.ObjectType),
66+
largeObjectThreshold,
6267
}
6368
}
6469

6570
// NewPackfile returns a packfile representation for the given packfile file
6671
// and packfile idx.
6772
// If the filesystem is provided, the packfile will return FSObjects, otherwise
6873
// it will return MemoryObjects.
69-
func NewPackfile(index idxfile.Index, fs billy.Filesystem, file billy.File) *Packfile {
70-
return NewPackfileWithCache(index, fs, file, cache.NewObjectLRUDefault())
74+
func NewPackfile(index idxfile.Index, fs billy.Filesystem, file billy.File, largeObjectThreshold int64) *Packfile {
75+
return NewPackfileWithCache(index, fs, file, cache.NewObjectLRUDefault(), largeObjectThreshold)
7176
}
7277

7378
// Get retrieves the encoded object in the packfile with the given hash.
@@ -263,6 +268,7 @@ func (p *Packfile) getNextObject(h *ObjectHeader, hash plumbing.Hash) (plumbing.
263268
p.fs,
264269
p.file.Name(),
265270
p.deltaBaseCache,
271+
p.largeObjectThreshold,
266272
), nil
267273
}
268274

@@ -282,6 +288,50 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) {
282288
return obj.Reader()
283289
}
284290

291+
func asyncReader(p *Packfile) (io.ReadCloser, error) {
292+
reader := ioutil.NewReaderUsingReaderAt(p.file, p.s.r.offset)
293+
zr := zlibReaderPool.Get().(io.ReadCloser)
294+
295+
if err := zr.(zlib.Resetter).Reset(reader, nil); err != nil {
296+
return nil, fmt.Errorf("zlib reset error: %s", err)
297+
}
298+
299+
return ioutil.NewReadCloserWithCloser(zr, func() error {
300+
zlibReaderPool.Put(zr)
301+
return nil
302+
}), nil
303+
304+
}
305+
306+
func (p *Packfile) getReaderDirect(h *ObjectHeader) (io.ReadCloser, error) {
307+
switch h.Type {
308+
case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
309+
return asyncReader(p)
310+
case plumbing.REFDeltaObject:
311+
deltaRc, err := asyncReader(p)
312+
if err != nil {
313+
return nil, err
314+
}
315+
r, err := p.readREFDeltaObjectContent(h, deltaRc)
316+
if err != nil {
317+
return nil, err
318+
}
319+
return r, nil
320+
case plumbing.OFSDeltaObject:
321+
deltaRc, err := asyncReader(p)
322+
if err != nil {
323+
return nil, err
324+
}
325+
r, err := p.readOFSDeltaObjectContent(h, deltaRc)
326+
if err != nil {
327+
return nil, err
328+
}
329+
return r, nil
330+
default:
331+
return nil, ErrInvalidObject.AddDetails("type %q", h.Type)
332+
}
333+
}
334+
285335
func (p *Packfile) getNextMemoryObject(h *ObjectHeader) (plumbing.EncodedObject, error) {
286336
var obj = new(plumbing.MemoryObject)
287337
obj.SetSize(h.Length)
@@ -334,6 +384,20 @@ func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plu
334384
return p.fillREFDeltaObjectContentWithBuffer(obj, ref, buf)
335385
}
336386

387+
func (p *Packfile) readREFDeltaObjectContent(h *ObjectHeader, deltaRC io.Reader) (io.ReadCloser, error) {
388+
var err error
389+
390+
base, ok := p.cacheGet(h.Reference)
391+
if !ok {
392+
base, err = p.Get(h.Reference)
393+
if err != nil {
394+
return nil, err
395+
}
396+
}
397+
398+
return ReaderFromDelta(base, deltaRC)
399+
}
400+
337401
func (p *Packfile) fillREFDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, ref plumbing.Hash, buf *bytes.Buffer) error {
338402
var err error
339403

@@ -364,6 +428,20 @@ func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset
364428
return p.fillOFSDeltaObjectContentWithBuffer(obj, offset, buf)
365429
}
366430

431+
func (p *Packfile) readOFSDeltaObjectContent(h *ObjectHeader, deltaRC io.Reader) (io.ReadCloser, error) {
432+
hash, err := p.FindHash(h.OffsetReference)
433+
if err != nil {
434+
return nil, err
435+
}
436+
437+
base, err := p.objectAtOffset(h.OffsetReference, hash)
438+
if err != nil {
439+
return nil, err
440+
}
441+
442+
return ReaderFromDelta(base, deltaRC)
443+
}
444+
367445
func (p *Packfile) fillOFSDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, offset int64, buf *bytes.Buffer) error {
368446
hash, err := p.FindHash(offset)
369447
if err != nil {

Diff for: plumbing/format/packfile/packfile_test.go

+6-6
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ func (s *PackfileSuite) SetUpTest(c *C) {
111111
s.idx = idxfile.NewMemoryIndex()
112112
c.Assert(idxfile.NewDecoder(s.f.Idx()).Decode(s.idx), IsNil)
113113

114-
s.p = packfile.NewPackfile(s.idx, fixtures.Filesystem, s.f.Packfile())
114+
s.p = packfile.NewPackfile(s.idx, fixtures.Filesystem, s.f.Packfile(), 0)
115115
}
116116

117117
func (s *PackfileSuite) TearDownTest(c *C) {
@@ -122,7 +122,7 @@ func (s *PackfileSuite) TestDecode(c *C) {
122122
fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) {
123123
index := getIndexFromIdxFile(f.Idx())
124124

125-
p := packfile.NewPackfile(index, fixtures.Filesystem, f.Packfile())
125+
p := packfile.NewPackfile(index, fixtures.Filesystem, f.Packfile(), 0)
126126
defer p.Close()
127127

128128
for _, h := range expectedHashes {
@@ -138,7 +138,7 @@ func (s *PackfileSuite) TestDecodeByTypeRefDelta(c *C) {
138138

139139
index := getIndexFromIdxFile(f.Idx())
140140

141-
packfile := packfile.NewPackfile(index, fixtures.Filesystem, f.Packfile())
141+
packfile := packfile.NewPackfile(index, fixtures.Filesystem, f.Packfile(), 0)
142142
defer packfile.Close()
143143

144144
iter, err := packfile.GetByType(plumbing.CommitObject)
@@ -171,7 +171,7 @@ func (s *PackfileSuite) TestDecodeByType(c *C) {
171171
for _, t := range ts {
172172
index := getIndexFromIdxFile(f.Idx())
173173

174-
packfile := packfile.NewPackfile(index, fixtures.Filesystem, f.Packfile())
174+
packfile := packfile.NewPackfile(index, fixtures.Filesystem, f.Packfile(), 0)
175175
defer packfile.Close()
176176

177177
iter, err := packfile.GetByType(t)
@@ -189,7 +189,7 @@ func (s *PackfileSuite) TestDecodeByTypeConstructor(c *C) {
189189
f := fixtures.Basic().ByTag("packfile").One()
190190
index := getIndexFromIdxFile(f.Idx())
191191

192-
packfile := packfile.NewPackfile(index, fixtures.Filesystem, f.Packfile())
192+
packfile := packfile.NewPackfile(index, fixtures.Filesystem, f.Packfile(), 0)
193193
defer packfile.Close()
194194

195195
_, err := packfile.GetByType(plumbing.OFSDeltaObject)
@@ -266,7 +266,7 @@ func (s *PackfileSuite) TestSize(c *C) {
266266

267267
index := getIndexFromIdxFile(f.Idx())
268268

269-
packfile := packfile.NewPackfile(index, fixtures.Filesystem, f.Packfile())
269+
packfile := packfile.NewPackfile(index, fixtures.Filesystem, f.Packfile(), 0)
270270
defer packfile.Close()
271271

272272
// Get the size of binary.jpg, which is not delta-encoded.

0 commit comments

Comments
 (0)