Skip to content

Commit e28d9c9

Browse files
committed
Support partial hashes in Repository.ResolveRevision.
Like `git rev-parse <prefix>`, this enumerates the hashes of objects with the given prefix and adds them to the list of candidates for resolution. This has an exhaustive slow path, which requires enumerating all objects and filtering each one, but also a couple of fast paths for common cases. There's room for future work to make this faster; TODOs have been left for that. Fixes go-git#135.
1 parent 41758ec commit e28d9c9

File tree

8 files changed

+234
-10
lines changed

8 files changed

+234
-10
lines changed

Diff for: internal/revision/parser.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ func (e *ErrInvalidRevision) Error() string {
2828
type Revisioner interface {
2929
}
3030

31-
// Ref represents a reference name : HEAD, master
31+
// Ref represents a reference name : HEAD, master, <hash>
3232
type Ref string
3333

3434
// TildePath represents ~, ~{n}

Diff for: internal/revision/parser_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,7 @@ func (s *ParserSuite) TestParseRefWithValidName(c *C) {
354354
"refs/remotes/test",
355355
"refs/remotes/origin/HEAD",
356356
"refs/remotes/origin/master",
357+
"0123abcd", // short hash
357358
}
358359

359360
for _, d := range datas {

Diff for: repository.go

+74-6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package git
33
import (
44
"bytes"
55
"context"
6+
"encoding/hex"
67
"errors"
78
"fmt"
89
"io"
@@ -1426,7 +1427,7 @@ func (r *Repository) Worktree() (*Worktree, error) {
14261427
// resolve to a commit hash, not a tree or annotated tag.
14271428
//
14281429
// Implemented resolvers : HEAD, branch, tag, heads/branch, refs/heads/branch,
1429-
// refs/tags/tag, refs/remotes/origin/branch, refs/remotes/origin/HEAD, tilde and caret (HEAD~1, master~^, tag~2, ref/heads/master~1, ...), selection by text (HEAD^{/fix nasty bug})
1430+
// refs/tags/tag, refs/remotes/origin/branch, refs/remotes/origin/HEAD, tilde and caret (HEAD~1, master~^, tag~2, ref/heads/master~1, ...), selection by text (HEAD^{/fix nasty bug}), hash (prefix and full)
14301431
func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, error) {
14311432
p := revision.NewParserFromString(string(rev))
14321433

@@ -1445,11 +1446,7 @@ func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, err
14451446

14461447
var tryHashes []plumbing.Hash
14471448

1448-
maybeHash := plumbing.NewHash(string(revisionRef))
1449-
1450-
if !maybeHash.IsZero() {
1451-
tryHashes = append(tryHashes, maybeHash)
1452-
}
1449+
tryHashes = append(tryHashes, r.resolveHashPrefix(string(revisionRef))...)
14531450

14541451
for _, rule := range append([]string{"%s"}, plumbing.RefRevParseRules...) {
14551452
ref, err := storer.ResolveReference(r.Storer, plumbing.ReferenceName(fmt.Sprintf(rule, revisionRef)))
@@ -1567,6 +1564,49 @@ func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, err
15671564
return &commit.Hash, nil
15681565
}
15691566

1567+
// resolveHashPrefix returns a list of potential hashes that the given string
1568+
// is a prefix of. It quietly swallows errors, returning nil.
1569+
func (r *Repository) resolveHashPrefix(hashStr string) []plumbing.Hash {
1570+
// Handle complete and partial hashes.
1571+
// plumbing.NewHash forces args into a full 20 byte hash, which isn't suitable
1572+
// for partial hashes since they will become zero-filled.
1573+
1574+
if hashStr == "" {
1575+
return nil
1576+
}
1577+
if len(hashStr) == len(plumbing.ZeroHash)*2 {
1578+
// Only a full hash is possible.
1579+
hexb, err := hex.DecodeString(hashStr)
1580+
if err != nil {
1581+
return nil
1582+
}
1583+
var h plumbing.Hash
1584+
copy(h[:], hexb)
1585+
return []plumbing.Hash{h}
1586+
}
1587+
1588+
// Partial hash.
1589+
// hex.DecodeString only decodes to complete bytes, so only works with pairs of hex digits.
1590+
evenHex := hashStr[:len(hashStr)&^1]
1591+
hexb, err := hex.DecodeString(evenHex)
1592+
if err != nil {
1593+
return nil
1594+
}
1595+
candidates := expandPartialHash(r.Storer, hexb)
1596+
if len(evenHex) == len(hashStr) {
1597+
// The prefix was an exact number of bytes.
1598+
return candidates
1599+
}
1600+
// Do another prefix check to ensure the dangling nybble is correct.
1601+
var hashes []plumbing.Hash
1602+
for _, h := range candidates {
1603+
if strings.HasPrefix(h.String(), hashStr) {
1604+
hashes = append(hashes, h)
1605+
}
1606+
}
1607+
return hashes
1608+
}
1609+
15701610
type RepackConfig struct {
15711611
// UseRefDeltas configures whether packfile encoder will use reference deltas.
15721612
// By default OFSDeltaObject is used.
@@ -1659,3 +1699,31 @@ func (r *Repository) createNewObjectPack(cfg *RepackConfig) (h plumbing.Hash, er
16591699

16601700
return h, err
16611701
}
1702+
1703+
func expandPartialHash(st storer.EncodedObjectStorer, prefix []byte) (hashes []plumbing.Hash) {
1704+
// The fast version is implemented by storage/filesystem.ObjectStorage.
1705+
type fastIter interface {
1706+
HashesWithPrefix(prefix []byte) ([]plumbing.Hash, error)
1707+
}
1708+
if fi, ok := st.(fastIter); ok {
1709+
h, err := fi.HashesWithPrefix(prefix)
1710+
if err != nil {
1711+
return nil
1712+
}
1713+
return h
1714+
}
1715+
1716+
// Slow path.
1717+
iter, err := st.IterEncodedObjects(plumbing.AnyObject)
1718+
if err != nil {
1719+
return nil
1720+
}
1721+
iter.ForEach(func(obj plumbing.EncodedObject) error {
1722+
h := obj.Hash()
1723+
if bytes.HasPrefix(h[:], prefix) {
1724+
hashes = append(hashes, h)
1725+
}
1726+
return nil
1727+
})
1728+
return
1729+
}

Diff for: repository_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -2642,6 +2642,7 @@ func (s *RepositorySuite) TestResolveRevision(c *C) {
26422642
"v1.0.0~1": "918c48b83bd081e863dbe1b80f8998f058cd8294",
26432643
"master~1": "918c48b83bd081e863dbe1b80f8998f058cd8294",
26442644
"918c48b83bd081e863dbe1b80f8998f058cd8294": "918c48b83bd081e863dbe1b80f8998f058cd8294",
2645+
"918c48b": "918c48b83bd081e863dbe1b80f8998f058cd8294", // odd number of hex digits
26452646
}
26462647

26472648
for rev, hash := range datas {

Diff for: storage/filesystem/dotgit/dotgit.go

+74-3
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@ package dotgit
33

44
import (
55
"bufio"
6+
"bytes"
67
"errors"
78
"fmt"
89
"io"
910
stdioutil "io/ioutil"
1011
"os"
1112
"path/filepath"
13+
"sort"
1214
"strings"
1315
"time"
1416

@@ -88,7 +90,7 @@ type DotGit struct {
8890
incomingChecked bool
8991
incomingDirName string
9092

91-
objectList []plumbing.Hash
93+
objectList []plumbing.Hash // sorted
9294
objectMap map[plumbing.Hash]struct{}
9395
packList []plumbing.Hash
9496
packMap map[plumbing.Hash]struct{}
@@ -336,6 +338,53 @@ func (d *DotGit) NewObject() (*ObjectWriter, error) {
336338
return newObjectWriter(d.fs)
337339
}
338340

341+
// ObjectsWithPrefix returns the hashes of objects that have the given prefix.
342+
func (d *DotGit) ObjectsWithPrefix(prefix []byte) ([]plumbing.Hash, error) {
343+
// Handle edge cases.
344+
if len(prefix) < 1 {
345+
return d.Objects()
346+
} else if len(prefix) > len(plumbing.ZeroHash) {
347+
return nil, nil
348+
}
349+
350+
if d.options.ExclusiveAccess {
351+
err := d.genObjectList()
352+
if err != nil {
353+
return nil, err
354+
}
355+
356+
// Rely on d.objectList being sorted.
357+
// Figure out the half-open interval defined by the prefix.
358+
first := sort.Search(len(d.objectList), func(i int) bool {
359+
// Same as plumbing.HashSlice.Less.
360+
return bytes.Compare(d.objectList[i][:], prefix) >= 0
361+
})
362+
lim := len(d.objectList)
363+
if limPrefix, overflow := incBytes(prefix); !overflow {
364+
lim = sort.Search(len(d.objectList), func(i int) bool {
365+
// Same as plumbing.HashSlice.Less.
366+
return bytes.Compare(d.objectList[i][:], limPrefix) >= 0
367+
})
368+
}
369+
return d.objectList[first:lim], nil
370+
}
371+
372+
// This is the slow path.
373+
var objects []plumbing.Hash
374+
var n int
375+
err := d.ForEachObjectHash(func(hash plumbing.Hash) error {
376+
n++
377+
if bytes.HasPrefix(hash[:], prefix) {
378+
objects = append(objects, hash)
379+
}
380+
return nil
381+
})
382+
if err != nil {
383+
return nil, err
384+
}
385+
return objects, nil
386+
}
387+
339388
// Objects returns a slice with the hashes of objects found under the
340389
// .git/objects/ directory.
341390
func (d *DotGit) Objects() ([]plumbing.Hash, error) {
@@ -427,12 +476,17 @@ func (d *DotGit) genObjectList() error {
427476
}
428477

429478
d.objectMap = make(map[plumbing.Hash]struct{})
430-
return d.forEachObjectHash(func(h plumbing.Hash) error {
479+
populate := func(h plumbing.Hash) error {
431480
d.objectList = append(d.objectList, h)
432481
d.objectMap[h] = struct{}{}
433482

434483
return nil
435-
})
484+
}
485+
if err := d.forEachObjectHash(populate); err != nil {
486+
return err
487+
}
488+
plumbing.HashesSort(d.objectList)
489+
return nil
436490
}
437491

438492
func (d *DotGit) hasObject(h plumbing.Hash) error {
@@ -1115,3 +1169,20 @@ func isNum(b byte) bool {
11151169
func isHexAlpha(b byte) bool {
11161170
return b >= 'a' && b <= 'f' || b >= 'A' && b <= 'F'
11171171
}
1172+
1173+
// incBytes increments a byte slice, which involves incrementing the
1174+
// right-most byte, and following carry leftward.
1175+
// It makes a copy so that the provided slice's underlying array is not modified.
1176+
// If the overall operation overflows (e.g. incBytes(0xff, 0xff)), the second return parameter indicates that.
1177+
func incBytes(in []byte) (out []byte, overflow bool) {
1178+
out = make([]byte, len(in))
1179+
copy(out, in)
1180+
for i := len(out) - 1; i >= 0; i-- {
1181+
out[i]++
1182+
if out[i] != 0 {
1183+
return // Didn't overflow.
1184+
}
1185+
}
1186+
overflow = true
1187+
return
1188+
}

Diff for: storage/filesystem/dotgit/dotgit_test.go

+35
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package dotgit
22

33
import (
44
"bufio"
5+
"encoding/hex"
56
"io/ioutil"
67
"os"
78
"path/filepath"
@@ -591,13 +592,15 @@ func (s *SuiteDotGit) TestObjects(c *C) {
591592
dir := New(fs)
592593

593594
testObjects(c, fs, dir)
595+
testObjectsWithPrefix(c, fs, dir)
594596
}
595597

596598
func (s *SuiteDotGit) TestObjectsExclusive(c *C) {
597599
fs := fixtures.ByTag(".git").ByTag("unpacked").One().DotGit()
598600
dir := NewWithOptions(fs, Options{ExclusiveAccess: true})
599601

600602
testObjects(c, fs, dir)
603+
testObjectsWithPrefix(c, fs, dir)
601604
}
602605

603606
func testObjects(c *C, fs billy.Filesystem, dir *DotGit) {
@@ -609,6 +612,20 @@ func testObjects(c *C, fs billy.Filesystem, dir *DotGit) {
609612
c.Assert(hashes[2].String(), Equals, "03db8e1fbe133a480f2867aac478fd866686d69e")
610613
}
611614

615+
func testObjectsWithPrefix(c *C, fs billy.Filesystem, dir *DotGit) {
616+
prefix, _ := hex.DecodeString("01d5")
617+
hashes, err := dir.ObjectsWithPrefix(prefix)
618+
c.Assert(err, IsNil)
619+
c.Assert(hashes, HasLen, 1)
620+
c.Assert(hashes[0].String(), Equals, "01d5fa556c33743006de7e76e67a2dfcd994ca04")
621+
622+
// Empty prefix should yield all objects.
623+
// (subset of testObjects)
624+
hashes, err = dir.ObjectsWithPrefix(nil)
625+
c.Assert(err, IsNil)
626+
c.Assert(hashes, HasLen, 187)
627+
}
628+
612629
func (s *SuiteDotGit) TestObjectsNoFolder(c *C) {
613630
tmp, err := ioutil.TempDir("", "dot-git")
614631
c.Assert(err, IsNil)
@@ -835,3 +852,21 @@ type norwfs struct {
835852
func (f *norwfs) Capabilities() billy.Capability {
836853
return billy.Capabilities(f.Filesystem) &^ billy.ReadAndWriteCapability
837854
}
855+
856+
func (s *SuiteDotGit) TestIncBytes(c *C) {
857+
tests := []struct {
858+
in []byte
859+
out []byte
860+
overflow bool
861+
}{
862+
{[]byte{0}, []byte{1}, false},
863+
{[]byte{0xff}, []byte{0}, true},
864+
{[]byte{7, 0xff}, []byte{8, 0}, false},
865+
{[]byte{0xff, 0xff}, []byte{0, 0}, true},
866+
}
867+
for _, test := range tests {
868+
out, overflow := incBytes(test.in)
869+
c.Assert(out, DeepEquals, test.out)
870+
c.Assert(overflow, Equals, test.overflow)
871+
}
872+
}

Diff for: storage/filesystem/object.go

+31
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package filesystem
22

33
import (
4+
"bytes"
45
"io"
56
"os"
67
"time"
@@ -518,6 +519,36 @@ func (s *ObjectStorage) findObjectInPackfile(h plumbing.Hash) (plumbing.Hash, pl
518519
return plumbing.ZeroHash, plumbing.ZeroHash, -1
519520
}
520521

522+
func (s *ObjectStorage) HashesWithPrefix(prefix []byte) ([]plumbing.Hash, error) {
523+
hashes, err := s.dir.ObjectsWithPrefix(prefix)
524+
if err != nil {
525+
return nil, err
526+
}
527+
528+
// TODO: This could be faster with some idxfile changes,
529+
// or diving into the packfile.
530+
for _, index := range s.index {
531+
ei, err := index.Entries()
532+
if err != nil {
533+
return nil, err
534+
}
535+
for {
536+
e, err := ei.Next()
537+
if err == io.EOF {
538+
break
539+
} else if err != nil {
540+
return nil, err
541+
}
542+
if bytes.HasPrefix(e.Hash[:], prefix) {
543+
hashes = append(hashes, e.Hash)
544+
}
545+
}
546+
ei.Close()
547+
}
548+
549+
return hashes, nil
550+
}
551+
521552
// IterEncodedObjects returns an iterator for all the objects in the packfile
522553
// with the given type.
523554
func (s *ObjectStorage) IterEncodedObjects(t plumbing.ObjectType) (storer.EncodedObjectIter, error) {

Diff for: storage/filesystem/object_test.go

+17
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package filesystem
22

33
import (
4+
"encoding/hex"
45
"fmt"
56
"io"
67
"io/ioutil"
@@ -332,6 +333,22 @@ func (s *FsSuite) TestGetFromObjectFileSharedCache(c *C) {
332333
c.Assert(err, Equals, plumbing.ErrObjectNotFound)
333334
}
334335

336+
func (s *FsSuite) TestHashesWithPrefix(c *C) {
337+
// Same setup as TestGetFromObjectFile.
338+
fs := fixtures.ByTag(".git").ByTag("unpacked").One().DotGit()
339+
o := NewObjectStorage(dotgit.New(fs), cache.NewObjectLRUDefault())
340+
expected := plumbing.NewHash("f3dfe29d268303fc6e1bbce268605fc99573406e")
341+
obj, err := o.EncodedObject(plumbing.AnyObject, expected)
342+
c.Assert(err, IsNil)
343+
c.Assert(obj.Hash(), Equals, expected)
344+
345+
prefix, _ := hex.DecodeString("f3dfe2")
346+
hashes, err := o.HashesWithPrefix(prefix)
347+
c.Assert(err, IsNil)
348+
c.Assert(hashes, HasLen, 1)
349+
c.Assert(hashes[0].String(), Equals, "f3dfe29d268303fc6e1bbce268605fc99573406e")
350+
}
351+
335352
func BenchmarkPackfileIter(b *testing.B) {
336353
defer fixtures.Clean()
337354

0 commit comments

Comments
 (0)