@@ -24,8 +24,8 @@ import (
2424 "math"
2525 "os"
2626 "path/filepath"
27- "runtime"
2827 "sync"
28+ "sync/atomic"
2929 "time"
3030
3131 "github.com/ethereum/go-ethereum/common"
@@ -38,6 +38,7 @@ import (
3838 "github.com/ethereum/go-ethereum/params"
3939 "github.com/ethereum/go-ethereum/rlp"
4040 "github.com/ethereum/go-ethereum/trie"
41+ "github.com/ethereum/go-ethereum/trie/triedb/hashdb"
4142)
4243
4344const (
@@ -56,8 +57,10 @@ const (
5657
5758// Config includes all the configurations for pruning.
5859type Config struct {
59- Datadir string // The directory of the state database
60- BloomSize uint64 // The Megabytes of memory allocated to bloom-filter
60+ Datadir string // The directory of the state database
61+ BloomSize uint64 // The Megabytes of memory allocated to bloom-filter
62+ Threads int // The maximum number of threads spawned in dumpRawTrieDescendants and removeOtherRoots
63+ CleanCacheSize int // The Megabytes of clean cache size used in dumpRawTrieDescendants
6164}
6265
6366// Pruner is an offline tool to prune the stale state with the
@@ -107,6 +110,10 @@ func NewPruner(db ethdb.Database, config Config) (*Pruner, error) {
107110 if err != nil {
108111 return nil , err
109112 }
113+ // sanitize threads number, if set too low
114+ if config .Threads <= 0 {
115+ config .Threads = 1
116+ }
110117 return & Pruner {
111118 config : config ,
112119 chainHeader : headBlock .Header (),
@@ -124,7 +131,7 @@ func readStoredChainConfig(db ethdb.Database) *params.ChainConfig {
124131 return rawdb .ReadChainConfig (db , block0Hash )
125132}
126133
127- func removeOtherRoots (db ethdb.Database , rootsList []common.Hash , stateBloom * stateBloom ) error {
134+ func removeOtherRoots (db ethdb.Database , rootsList []common.Hash , stateBloom * stateBloom , threads int ) error {
128135 chainConfig := readStoredChainConfig (db )
129136 var genesisBlockNum uint64
130137 if chainConfig != nil {
@@ -139,7 +146,6 @@ func removeOtherRoots(db ethdb.Database, rootsList []common.Hash, stateBloom *st
139146 return errors .New ("failed to load head block" )
140147 }
141148 blockRange := headBlock .NumberU64 () - genesisBlockNum
142- threads := runtime .NumCPU ()
143149 var wg sync.WaitGroup
144150 errors := make (chan error , threads )
145151 for thread := 0 ; thread < threads ; thread ++ {
@@ -207,7 +213,7 @@ func removeOtherRoots(db ethdb.Database, rootsList []common.Hash, stateBloom *st
207213}
208214
209215// Arbitrum: snaptree and root are for the final snapshot kept
210- func prune (snaptree * snapshot.Tree , allRoots []common.Hash , maindb ethdb.Database , stateBloom * stateBloom , bloomPath string , start time.Time ) error {
216+ func prune (snaptree * snapshot.Tree , allRoots []common.Hash , maindb ethdb.Database , stateBloom * stateBloom , bloomPath string , start time.Time , threads int ) error {
211217 // Delete all stale trie nodes in the disk. With the help of state bloom
212218 // the trie nodes(and codes) belong to the active state will be filtered
213219 // out. A very small part of stale tries will also be filtered because of
@@ -297,7 +303,7 @@ func prune(snaptree *snapshot.Tree, allRoots []common.Hash, maindb ethdb.Databas
297303 }
298304
299305 // Clean up any false positives that are top-level state roots.
300- err := removeOtherRoots (maindb , allRoots , stateBloom )
306+ err := removeOtherRoots (maindb , allRoots , stateBloom , threads )
301307 if err != nil {
302308 return err
303309 }
@@ -333,8 +339,16 @@ func prune(snaptree *snapshot.Tree, allRoots []common.Hash, maindb ethdb.Databas
333339}
334340
335341// We assume state blooms do not need the value, only the key
336- func dumpRawTrieDescendants (db ethdb.Database , root common.Hash , output * stateBloom ) error {
337- sdb := state .NewDatabase (db )
342+ func dumpRawTrieDescendants (db ethdb.Database , root common.Hash , output * stateBloom , config * Config ) error {
343+ // Offline pruning is only supported in legacy hash based scheme.
344+ hashConfig := * hashdb .Defaults
345+ hashConfig .CleanCacheSize = config .CleanCacheSize * 1024 * 1024
346+ trieConfig := & trie.Config {
347+ Preimages : false ,
348+ HashDB : & hashConfig ,
349+ }
350+ sdb := state .NewDatabaseWithConfig (db , trieConfig )
351+ defer sdb .TrieDB ().Close ()
338352 tr , err := sdb .OpenTrie (root )
339353 if err != nil {
340354 return err
@@ -350,11 +364,12 @@ func dumpRawTrieDescendants(db ethdb.Database, root common.Hash, output *stateBl
350364 // To do so, we create a semaphore out of a channel's buffer.
351365 // Before launching a new goroutine, we acquire the semaphore by taking an entry from this channel.
352366 // This channel doubles as a mechanism for the background goroutine to report an error on release.
353- threads := runtime . NumCPU ()
367+ threads := config . Threads
354368 results := make (chan error , threads )
355369 for i := 0 ; i < threads ; i ++ {
356370 results <- nil
357371 }
372+ var threadsRunning atomic.Int32
358373
359374 for accountIt .Next (true ) {
360375 accountTrieHash := accountIt .Hash ()
@@ -385,7 +400,10 @@ func dumpRawTrieDescendants(db ethdb.Database, root common.Hash, output *stateBl
385400 output .Put (data .CodeHash , nil )
386401 }
387402 if data .Root != (common.Hash {}) {
388- storageTr , err := trie .NewStateTrie (trie .StorageTrieID (root , key , data .Root ), sdb .TrieDB ())
403+ // note: we are passing data.Root as stateRoot here, to skip the check for stateRoot existence in trie.newTrieReader,
404+ // we already check that when opening state trie and reading the account node
405+ trieID := trie .StorageTrieID (data .Root , key , data .Root )
406+ storageTr , err := trie .NewStateTrie (trieID , sdb .TrieDB ())
389407 if err != nil {
390408 return err
391409 }
@@ -394,14 +412,20 @@ func dumpRawTrieDescendants(db ethdb.Database, root common.Hash, output *stateBl
394412 return err
395413 }
396414 go func () {
415+ threadsRunning .Add (1 )
416+ defer threadsRunning .Add (- 1 )
397417 var err error
398418 defer func () {
399419 results <- err
400420 }()
421+ threadStartedAt := time .Now ()
422+ threadLastLog := time .Now ()
423+
401424 storageIt , err := storageTr .NodeIterator (nil )
402425 if err != nil {
403426 return
404427 }
428+ var processedNodes uint64
405429 for storageIt .Next (true ) {
406430 storageTrieHash := storageIt .Hash ()
407431 if storageTrieHash != (common.Hash {}) {
@@ -411,6 +435,13 @@ func dumpRawTrieDescendants(db ethdb.Database, root common.Hash, output *stateBl
411435 return
412436 }
413437 }
438+ processedNodes ++
439+ if time .Since (threadLastLog ) > 5 * time .Minute {
440+ elapsedTotal := time .Since (startedAt )
441+ elapsedThread := time .Since (threadStartedAt )
442+ log .Info ("traversing trie database - traversing storage trie taking long" , "key" , key , "elapsedTotal" , elapsedTotal , "elapsedThread" , elapsedThread , "processedNodes" , processedNodes , "threadsRunning" , threadsRunning .Load ())
443+ threadLastLog = time .Now ()
444+ }
414445 }
415446 err = storageIt .Error ()
416447 if err != nil {
@@ -445,7 +476,7 @@ func (p *Pruner) Prune(inputRoots []common.Hash) error {
445476 return err
446477 }
447478 if bloomExists {
448- return RecoverPruning (p .config .Datadir , p .db )
479+ return RecoverPruning (p .config .Datadir , p .db , p . config . Threads )
449480 }
450481 // Retrieve all snapshot layers from the current HEAD.
451482 // In theory there are 128 difflayers + 1 disk layer present,
@@ -511,14 +542,14 @@ func (p *Pruner) Prune(inputRoots []common.Hash) error {
511542 return err
512543 }
513544 } else {
514- if err := dumpRawTrieDescendants (p .db , root , p .stateBloom ); err != nil {
545+ if err := dumpRawTrieDescendants (p .db , root , p .stateBloom , & p . config ); err != nil {
515546 return err
516547 }
517548 }
518549 }
519550 // Traverse the genesis, put all genesis state entries into the
520551 // bloom filter too.
521- if err := extractGenesis (p .db , p .stateBloom ); err != nil {
552+ if err := extractGenesis (p .db , p .stateBloom , & p . config ); err != nil {
522553 return err
523554 }
524555
@@ -529,7 +560,7 @@ func (p *Pruner) Prune(inputRoots []common.Hash) error {
529560 return err
530561 }
531562 log .Info ("State bloom filter committed" , "name" , filterName , "roots" , roots )
532- return prune (p .snaptree , roots , p .db , p .stateBloom , filterName , start )
563+ return prune (p .snaptree , roots , p .db , p .stateBloom , filterName , start , p . config . Threads )
533564}
534565
535566// RecoverPruning will resume the pruning procedure during the system restart.
@@ -539,7 +570,7 @@ func (p *Pruner) Prune(inputRoots []common.Hash) error {
539570// pruning can be resumed. What's more if the bloom filter is constructed, the
540571// pruning **has to be resumed**. Otherwise a lot of dangling nodes may be left
541572// in the disk.
542- func RecoverPruning (datadir string , db ethdb.Database ) error {
573+ func RecoverPruning (datadir string , db ethdb.Database , threads int ) error {
543574 exists , err := bloomFilterExists (datadir )
544575 if err != nil {
545576 return err
@@ -578,12 +609,12 @@ func RecoverPruning(datadir string, db ethdb.Database) error {
578609 }
579610 log .Info ("Loaded state bloom filter" , "path" , stateBloomPath , "roots" , stateBloomRoots )
580611
581- return prune (snaptree , stateBloomRoots , db , stateBloom , stateBloomPath , time .Now ())
612+ return prune (snaptree , stateBloomRoots , db , stateBloom , stateBloomPath , time .Now (), threads )
582613}
583614
584615// extractGenesis loads the genesis state and commits all the state entries
585616// into the given bloomfilter.
586- func extractGenesis (db ethdb.Database , stateBloom * stateBloom ) error {
617+ func extractGenesis (db ethdb.Database , stateBloom * stateBloom , config * Config ) error {
587618 genesisHash := rawdb .ReadCanonicalHash (db , 0 )
588619 if genesisHash == (common.Hash {}) {
589620 return errors .New ("missing genesis hash" )
@@ -593,7 +624,7 @@ func extractGenesis(db ethdb.Database, stateBloom *stateBloom) error {
593624 return errors .New ("missing genesis block" )
594625 }
595626
596- return dumpRawTrieDescendants (db , genesis .Root (), stateBloom )
627+ return dumpRawTrieDescendants (db , genesis .Root (), stateBloom , config )
597628}
598629
599630func bloomFilterPath (datadir string ) string {
0 commit comments