From 6cee197767a5337cf00ead4756764f2d0a18c3fd Mon Sep 17 00:00:00 2001 From: Alex Sharov Date: Wed, 4 Dec 2024 09:13:52 +0700 Subject: [PATCH] recsplit: compact Enum=true representation (#12970) used wrong `bytesPerRec` It reducing hot random-read parts of `.efi` and some `.idx` ~25% For: https://github.com/erigontech/erigon/issues/12852 PR is backward/forward compatible --- erigon-lib/recsplit/index.go | 1 + erigon-lib/recsplit/recsplit.go | 6 +++++- erigon-lib/recsplit/recsplit_test.go | 7 ++++--- turbo/app/snapshots_cmd.go | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/erigon-lib/recsplit/index.go b/erigon-lib/recsplit/index.go index 5dc9534de77..ae52667a84d 100644 --- a/erigon-lib/recsplit/index.go +++ b/erigon-lib/recsplit/index.go @@ -353,6 +353,7 @@ func (idx *Index) Lookup(bucketHash, fingerprint uint64) (uint64, bool) { } b := gr.ReadNext(idx.golombParam(m)) rec := int(cumKeys) + int(remap16(remix(fingerprint+idx.startSeed[level]+b), m)) + pos := 1 + 8 + idx.bytesPerRec*(rec+1) found := binary.BigEndian.Uint64(idx.data[pos:]) & idx.recMask diff --git a/erigon-lib/recsplit/recsplit.go b/erigon-lib/recsplit/recsplit.go index 161cd2343f1..76e06d30cd0 100644 --- a/erigon-lib/recsplit/recsplit.go +++ b/erigon-lib/recsplit/recsplit.go @@ -604,7 +604,11 @@ func (rs *RecSplit) Build(ctx context.Context) error { return fmt.Errorf("write number of keys: %w", err) } // Write number of bytes per index record - rs.bytesPerRec = common.BitLenToByteLen(bits.Len64(rs.maxOffset)) + if rs.enums { + rs.bytesPerRec = common.BitLenToByteLen(bits.Len64(rs.keysAdded + 1)) + } else { + rs.bytesPerRec = common.BitLenToByteLen(bits.Len64(rs.maxOffset)) + } if err = rs.indexW.WriteByte(byte(rs.bytesPerRec)); err != nil { return fmt.Errorf("write bytes per record: %w", err) } diff --git a/erigon-lib/recsplit/recsplit_test.go b/erigon-lib/recsplit/recsplit_test.go index 5d7062d18ae..6c770a876fe 100644 --- a/erigon-lib/recsplit/recsplit_test.go +++ b/erigon-lib/recsplit/recsplit_test.go @@ -149,8 +149,9 @@ func TestTwoLayerIndex(t *testing.T) { tmpDir := t.TempDir() indexFile := filepath.Join(tmpDir, "index") salt := uint32(1) + N := 2571 rs, err := NewRecSplit(RecSplitArgs{ - KeyCount: 100, + KeyCount: N, BucketSize: 10, Salt: &salt, TmpDir: tmpDir, @@ -162,7 +163,7 @@ func TestTwoLayerIndex(t *testing.T) { if err != nil { t.Fatal(err) } - for i := 0; i < 100; i++ { + for i := 0; i < N; i++ { if err = rs.AddKey([]byte(fmt.Sprintf("key %d", i)), uint64(i*17)); err != nil { t.Fatal(err) } @@ -173,7 +174,7 @@ func TestTwoLayerIndex(t *testing.T) { idx := MustOpen(indexFile) defer idx.Close() - for i := 0; i < 100; i++ { + for i := 0; i < N; i++ { reader := NewIndexReader(idx) e, _ := reader.Lookup([]byte(fmt.Sprintf("key %d", i))) if e != uint64(i) { diff --git a/turbo/app/snapshots_cmd.go b/turbo/app/snapshots_cmd.go index 7208f9fd4a1..67e005b4659 100644 --- a/turbo/app/snapshots_cmd.go +++ b/turbo/app/snapshots_cmd.go @@ -88,7 +88,7 @@ func joinFlags(lists ...[]cli.Flag) (res []cli.Flag) { var snapshotCommand = cli.Command{ Name: "seg", - Aliases: []string{"snapshots"}, + Aliases: []string{"snapshots", "segments"}, Usage: `Managing historical data segments (partitions)`, Before: func(cliCtx *cli.Context) error { go mem.LogMemStats(cliCtx.Context, log.New())