Skip to content

Commit b466858

Browse files
Merge branch 'master' into add-stopopcode-flatcalltracer
2 parents abd6c47 + 8516902 commit b466858

File tree

2 files changed

+51
-20
lines changed

2 files changed

+51
-20
lines changed

core/state/pruner/pruner.go

+50-19
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ import (
2424
"math"
2525
"os"
2626
"path/filepath"
27-
"runtime"
2827
"sync"
28+
"sync/atomic"
2929
"time"
3030

3131
"github.com/ethereum/go-ethereum/common"
@@ -38,6 +38,7 @@ import (
3838
"github.com/ethereum/go-ethereum/params"
3939
"github.com/ethereum/go-ethereum/rlp"
4040
"github.com/ethereum/go-ethereum/trie"
41+
"github.com/ethereum/go-ethereum/trie/triedb/hashdb"
4142
)
4243

4344
const (
@@ -56,8 +57,10 @@ const (
5657

5758
// Config includes all the configurations for pruning.
5859
type Config struct {
59-
Datadir string // The directory of the state database
60-
BloomSize uint64 // The Megabytes of memory allocated to bloom-filter
60+
Datadir string // The directory of the state database
61+
BloomSize uint64 // The Megabytes of memory allocated to bloom-filter
62+
Threads int // The maximum number of threads spawned in dumpRawTrieDescendants and removeOtherRoots
63+
CleanCacheSize int // The Megabytes of clean cache size used in dumpRawTrieDescendants
6164
}
6265

6366
// Pruner is an offline tool to prune the stale state with the
@@ -107,6 +110,10 @@ func NewPruner(db ethdb.Database, config Config) (*Pruner, error) {
107110
if err != nil {
108111
return nil, err
109112
}
113+
// sanitize threads number, if set too low
114+
if config.Threads <= 0 {
115+
config.Threads = 1
116+
}
110117
return &Pruner{
111118
config: config,
112119
chainHeader: headBlock.Header(),
@@ -124,7 +131,7 @@ func readStoredChainConfig(db ethdb.Database) *params.ChainConfig {
124131
return rawdb.ReadChainConfig(db, block0Hash)
125132
}
126133

127-
func removeOtherRoots(db ethdb.Database, rootsList []common.Hash, stateBloom *stateBloom) error {
134+
func removeOtherRoots(db ethdb.Database, rootsList []common.Hash, stateBloom *stateBloom, threads int) error {
128135
chainConfig := readStoredChainConfig(db)
129136
var genesisBlockNum uint64
130137
if chainConfig != nil {
@@ -139,7 +146,6 @@ func removeOtherRoots(db ethdb.Database, rootsList []common.Hash, stateBloom *st
139146
return errors.New("failed to load head block")
140147
}
141148
blockRange := headBlock.NumberU64() - genesisBlockNum
142-
threads := runtime.NumCPU()
143149
var wg sync.WaitGroup
144150
errors := make(chan error, threads)
145151
for thread := 0; thread < threads; thread++ {
@@ -207,7 +213,7 @@ func removeOtherRoots(db ethdb.Database, rootsList []common.Hash, stateBloom *st
207213
}
208214

209215
// Arbitrum: snaptree and root are for the final snapshot kept
210-
func prune(snaptree *snapshot.Tree, allRoots []common.Hash, maindb ethdb.Database, stateBloom *stateBloom, bloomPath string, start time.Time) error {
216+
func prune(snaptree *snapshot.Tree, allRoots []common.Hash, maindb ethdb.Database, stateBloom *stateBloom, bloomPath string, start time.Time, threads int) error {
211217
// Delete all stale trie nodes in the disk. With the help of state bloom
212218
// the trie nodes(and codes) belong to the active state will be filtered
213219
// out. A very small part of stale tries will also be filtered because of
@@ -297,7 +303,7 @@ func prune(snaptree *snapshot.Tree, allRoots []common.Hash, maindb ethdb.Databas
297303
}
298304

299305
// Clean up any false positives that are top-level state roots.
300-
err := removeOtherRoots(maindb, allRoots, stateBloom)
306+
err := removeOtherRoots(maindb, allRoots, stateBloom, threads)
301307
if err != nil {
302308
return err
303309
}
@@ -333,8 +339,16 @@ func prune(snaptree *snapshot.Tree, allRoots []common.Hash, maindb ethdb.Databas
333339
}
334340

335341
// We assume state blooms do not need the value, only the key
336-
func dumpRawTrieDescendants(db ethdb.Database, root common.Hash, output *stateBloom) error {
337-
sdb := state.NewDatabase(db)
342+
func dumpRawTrieDescendants(db ethdb.Database, root common.Hash, output *stateBloom, config *Config) error {
343+
// Offline pruning is only supported in legacy hash based scheme.
344+
hashConfig := *hashdb.Defaults
345+
hashConfig.CleanCacheSize = config.CleanCacheSize * 1024 * 1024
346+
trieConfig := &trie.Config{
347+
Preimages: false,
348+
HashDB: &hashConfig,
349+
}
350+
sdb := state.NewDatabaseWithConfig(db, trieConfig)
351+
defer sdb.TrieDB().Close()
338352
tr, err := sdb.OpenTrie(root)
339353
if err != nil {
340354
return err
@@ -350,11 +364,12 @@ func dumpRawTrieDescendants(db ethdb.Database, root common.Hash, output *stateBl
350364
// To do so, we create a semaphore out of a channel's buffer.
351365
// Before launching a new goroutine, we acquire the semaphore by taking an entry from this channel.
352366
// This channel doubles as a mechanism for the background goroutine to report an error on release.
353-
threads := runtime.NumCPU()
367+
threads := config.Threads
354368
results := make(chan error, threads)
355369
for i := 0; i < threads; i++ {
356370
results <- nil
357371
}
372+
var threadsRunning atomic.Int32
358373

359374
for accountIt.Next(true) {
360375
accountTrieHash := accountIt.Hash()
@@ -385,7 +400,10 @@ func dumpRawTrieDescendants(db ethdb.Database, root common.Hash, output *stateBl
385400
output.Put(data.CodeHash, nil)
386401
}
387402
if data.Root != (common.Hash{}) {
388-
storageTr, err := trie.NewStateTrie(trie.StorageTrieID(root, key, data.Root), sdb.TrieDB())
403+
// note: we are passing data.Root as stateRoot here, to skip the check for stateRoot existence in trie.newTrieReader,
404+
// we already check that when opening state trie and reading the account node
405+
trieID := trie.StorageTrieID(data.Root, key, data.Root)
406+
storageTr, err := trie.NewStateTrie(trieID, sdb.TrieDB())
389407
if err != nil {
390408
return err
391409
}
@@ -394,14 +412,20 @@ func dumpRawTrieDescendants(db ethdb.Database, root common.Hash, output *stateBl
394412
return err
395413
}
396414
go func() {
415+
threadsRunning.Add(1)
416+
defer threadsRunning.Add(-1)
397417
var err error
398418
defer func() {
399419
results <- err
400420
}()
421+
threadStartedAt := time.Now()
422+
threadLastLog := time.Now()
423+
401424
storageIt, err := storageTr.NodeIterator(nil)
402425
if err != nil {
403426
return
404427
}
428+
var processedNodes uint64
405429
for storageIt.Next(true) {
406430
storageTrieHash := storageIt.Hash()
407431
if storageTrieHash != (common.Hash{}) {
@@ -411,6 +435,13 @@ func dumpRawTrieDescendants(db ethdb.Database, root common.Hash, output *stateBl
411435
return
412436
}
413437
}
438+
processedNodes++
439+
if time.Since(threadLastLog) > 5*time.Minute {
440+
elapsedTotal := time.Since(startedAt)
441+
elapsedThread := time.Since(threadStartedAt)
442+
log.Info("traversing trie database - traversing storage trie taking long", "key", key, "elapsedTotal", elapsedTotal, "elapsedThread", elapsedThread, "processedNodes", processedNodes, "threadsRunning", threadsRunning.Load())
443+
threadLastLog = time.Now()
444+
}
414445
}
415446
err = storageIt.Error()
416447
if err != nil {
@@ -445,7 +476,7 @@ func (p *Pruner) Prune(inputRoots []common.Hash) error {
445476
return err
446477
}
447478
if bloomExists {
448-
return RecoverPruning(p.config.Datadir, p.db)
479+
return RecoverPruning(p.config.Datadir, p.db, p.config.Threads)
449480
}
450481
// Retrieve all snapshot layers from the current HEAD.
451482
// In theory there are 128 difflayers + 1 disk layer present,
@@ -511,14 +542,14 @@ func (p *Pruner) Prune(inputRoots []common.Hash) error {
511542
return err
512543
}
513544
} else {
514-
if err := dumpRawTrieDescendants(p.db, root, p.stateBloom); err != nil {
545+
if err := dumpRawTrieDescendants(p.db, root, p.stateBloom, &p.config); err != nil {
515546
return err
516547
}
517548
}
518549
}
519550
// Traverse the genesis, put all genesis state entries into the
520551
// bloom filter too.
521-
if err := extractGenesis(p.db, p.stateBloom); err != nil {
552+
if err := extractGenesis(p.db, p.stateBloom, &p.config); err != nil {
522553
return err
523554
}
524555

@@ -529,7 +560,7 @@ func (p *Pruner) Prune(inputRoots []common.Hash) error {
529560
return err
530561
}
531562
log.Info("State bloom filter committed", "name", filterName, "roots", roots)
532-
return prune(p.snaptree, roots, p.db, p.stateBloom, filterName, start)
563+
return prune(p.snaptree, roots, p.db, p.stateBloom, filterName, start, p.config.Threads)
533564
}
534565

535566
// RecoverPruning will resume the pruning procedure during the system restart.
@@ -539,7 +570,7 @@ func (p *Pruner) Prune(inputRoots []common.Hash) error {
539570
// pruning can be resumed. What's more if the bloom filter is constructed, the
540571
// pruning **has to be resumed**. Otherwise a lot of dangling nodes may be left
541572
// in the disk.
542-
func RecoverPruning(datadir string, db ethdb.Database) error {
573+
func RecoverPruning(datadir string, db ethdb.Database, threads int) error {
543574
exists, err := bloomFilterExists(datadir)
544575
if err != nil {
545576
return err
@@ -578,12 +609,12 @@ func RecoverPruning(datadir string, db ethdb.Database) error {
578609
}
579610
log.Info("Loaded state bloom filter", "path", stateBloomPath, "roots", stateBloomRoots)
580611

581-
return prune(snaptree, stateBloomRoots, db, stateBloom, stateBloomPath, time.Now())
612+
return prune(snaptree, stateBloomRoots, db, stateBloom, stateBloomPath, time.Now(), threads)
582613
}
583614

584615
// extractGenesis loads the genesis state and commits all the state entries
585616
// into the given bloomfilter.
586-
func extractGenesis(db ethdb.Database, stateBloom *stateBloom) error {
617+
func extractGenesis(db ethdb.Database, stateBloom *stateBloom, config *Config) error {
587618
genesisHash := rawdb.ReadCanonicalHash(db, 0)
588619
if genesisHash == (common.Hash{}) {
589620
return errors.New("missing genesis hash")
@@ -593,7 +624,7 @@ func extractGenesis(db ethdb.Database, stateBloom *stateBloom) error {
593624
return errors.New("missing genesis block")
594625
}
595626

596-
return dumpRawTrieDescendants(db, genesis.Root(), stateBloom)
627+
return dumpRawTrieDescendants(db, genesis.Root(), stateBloom, config)
597628
}
598629

599630
func bloomFilterPath(datadir string) string {

eth/backend.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) {
140140
}
141141
// Try to recover offline state pruning only in hash-based.
142142
if scheme == rawdb.HashScheme {
143-
if err := pruner.RecoverPruning(stack.ResolvePath(""), chainDb); err != nil {
143+
if err := pruner.RecoverPruning(stack.ResolvePath(""), chainDb, 1); err != nil {
144144
log.Error("Failed to recover state", "error", err)
145145
}
146146
}

0 commit comments

Comments
 (0)