Skip to content

Commit

Permalink
recsplit: compact Enum=true representation (#12970)
Browse files Browse the repository at this point in the history
used wrong `bytesPerRec`
It reducing hot random-read parts of `.efi` and some `.idx` ~25%

For: #12852

PR is backward/forward compatible
  • Loading branch information
AskAlexSharov authored Dec 4, 2024
1 parent 2c8003c commit 6cee197
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 5 deletions.
1 change: 1 addition & 0 deletions erigon-lib/recsplit/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ func (idx *Index) Lookup(bucketHash, fingerprint uint64) (uint64, bool) {
}
b := gr.ReadNext(idx.golombParam(m))
rec := int(cumKeys) + int(remap16(remix(fingerprint+idx.startSeed[level]+b), m))

pos := 1 + 8 + idx.bytesPerRec*(rec+1)

found := binary.BigEndian.Uint64(idx.data[pos:]) & idx.recMask
Expand Down
6 changes: 5 additions & 1 deletion erigon-lib/recsplit/recsplit.go
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,11 @@ func (rs *RecSplit) Build(ctx context.Context) error {
return fmt.Errorf("write number of keys: %w", err)
}
// Write number of bytes per index record
rs.bytesPerRec = common.BitLenToByteLen(bits.Len64(rs.maxOffset))
if rs.enums {
rs.bytesPerRec = common.BitLenToByteLen(bits.Len64(rs.keysAdded + 1))
} else {
rs.bytesPerRec = common.BitLenToByteLen(bits.Len64(rs.maxOffset))
}
if err = rs.indexW.WriteByte(byte(rs.bytesPerRec)); err != nil {
return fmt.Errorf("write bytes per record: %w", err)
}
Expand Down
7 changes: 4 additions & 3 deletions erigon-lib/recsplit/recsplit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,9 @@ func TestTwoLayerIndex(t *testing.T) {
tmpDir := t.TempDir()
indexFile := filepath.Join(tmpDir, "index")
salt := uint32(1)
N := 2571
rs, err := NewRecSplit(RecSplitArgs{
KeyCount: 100,
KeyCount: N,
BucketSize: 10,
Salt: &salt,
TmpDir: tmpDir,
Expand All @@ -162,7 +163,7 @@ func TestTwoLayerIndex(t *testing.T) {
if err != nil {
t.Fatal(err)
}
for i := 0; i < 100; i++ {
for i := 0; i < N; i++ {
if err = rs.AddKey([]byte(fmt.Sprintf("key %d", i)), uint64(i*17)); err != nil {
t.Fatal(err)
}
Expand All @@ -173,7 +174,7 @@ func TestTwoLayerIndex(t *testing.T) {

idx := MustOpen(indexFile)
defer idx.Close()
for i := 0; i < 100; i++ {
for i := 0; i < N; i++ {
reader := NewIndexReader(idx)
e, _ := reader.Lookup([]byte(fmt.Sprintf("key %d", i)))
if e != uint64(i) {
Expand Down
2 changes: 1 addition & 1 deletion turbo/app/snapshots_cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func joinFlags(lists ...[]cli.Flag) (res []cli.Flag) {

var snapshotCommand = cli.Command{
Name: "seg",
Aliases: []string{"snapshots"},
Aliases: []string{"snapshots", "segments"},
Usage: `Managing historical data segments (partitions)`,
Before: func(cliCtx *cli.Context) error {
go mem.LogMemStats(cliCtx.Context, log.New())
Expand Down

0 comments on commit 6cee197

Please sign in to comment.