@@ -24,8 +24,8 @@ import (
24
24
"math"
25
25
"os"
26
26
"path/filepath"
27
- "runtime"
28
27
"sync"
28
+ "sync/atomic"
29
29
"time"
30
30
31
31
"github.com/ethereum/go-ethereum/common"
@@ -38,6 +38,7 @@ import (
38
38
"github.com/ethereum/go-ethereum/params"
39
39
"github.com/ethereum/go-ethereum/rlp"
40
40
"github.com/ethereum/go-ethereum/trie"
41
+ "github.com/ethereum/go-ethereum/trie/triedb/hashdb"
41
42
)
42
43
43
44
const (
@@ -56,8 +57,10 @@ const (
56
57
57
58
// Config includes all the configurations for pruning.
58
59
type Config struct {
59
- Datadir string // The directory of the state database
60
- BloomSize uint64 // The Megabytes of memory allocated to bloom-filter
60
+ Datadir string // The directory of the state database
61
+ BloomSize uint64 // The Megabytes of memory allocated to bloom-filter
62
+ Threads int // The maximum number of threads spawned in dumpRawTrieDescendants and removeOtherRoots
63
+ CleanCacheSize int // The Megabytes of clean cache size used in dumpRawTrieDescendants
61
64
}
62
65
63
66
// Pruner is an offline tool to prune the stale state with the
@@ -107,6 +110,10 @@ func NewPruner(db ethdb.Database, config Config) (*Pruner, error) {
107
110
if err != nil {
108
111
return nil , err
109
112
}
113
+ // sanitize threads number, if set too low
114
+ if config .Threads <= 0 {
115
+ config .Threads = 1
116
+ }
110
117
return & Pruner {
111
118
config : config ,
112
119
chainHeader : headBlock .Header (),
@@ -124,7 +131,7 @@ func readStoredChainConfig(db ethdb.Database) *params.ChainConfig {
124
131
return rawdb .ReadChainConfig (db , block0Hash )
125
132
}
126
133
127
- func removeOtherRoots (db ethdb.Database , rootsList []common.Hash , stateBloom * stateBloom ) error {
134
+ func removeOtherRoots (db ethdb.Database , rootsList []common.Hash , stateBloom * stateBloom , threads int ) error {
128
135
chainConfig := readStoredChainConfig (db )
129
136
var genesisBlockNum uint64
130
137
if chainConfig != nil {
@@ -139,7 +146,6 @@ func removeOtherRoots(db ethdb.Database, rootsList []common.Hash, stateBloom *st
139
146
return errors .New ("failed to load head block" )
140
147
}
141
148
blockRange := headBlock .NumberU64 () - genesisBlockNum
142
- threads := runtime .NumCPU ()
143
149
var wg sync.WaitGroup
144
150
errors := make (chan error , threads )
145
151
for thread := 0 ; thread < threads ; thread ++ {
@@ -207,7 +213,7 @@ func removeOtherRoots(db ethdb.Database, rootsList []common.Hash, stateBloom *st
207
213
}
208
214
209
215
// Arbitrum: snaptree and root are for the final snapshot kept
210
- func prune (snaptree * snapshot.Tree , allRoots []common.Hash , maindb ethdb.Database , stateBloom * stateBloom , bloomPath string , start time.Time ) error {
216
+ func prune (snaptree * snapshot.Tree , allRoots []common.Hash , maindb ethdb.Database , stateBloom * stateBloom , bloomPath string , start time.Time , threads int ) error {
211
217
// Delete all stale trie nodes in the disk. With the help of state bloom
212
218
// the trie nodes(and codes) belong to the active state will be filtered
213
219
// out. A very small part of stale tries will also be filtered because of
@@ -297,7 +303,7 @@ func prune(snaptree *snapshot.Tree, allRoots []common.Hash, maindb ethdb.Databas
297
303
}
298
304
299
305
// Clean up any false positives that are top-level state roots.
300
- err := removeOtherRoots (maindb , allRoots , stateBloom )
306
+ err := removeOtherRoots (maindb , allRoots , stateBloom , threads )
301
307
if err != nil {
302
308
return err
303
309
}
@@ -333,8 +339,16 @@ func prune(snaptree *snapshot.Tree, allRoots []common.Hash, maindb ethdb.Databas
333
339
}
334
340
335
341
// We assume state blooms do not need the value, only the key
336
- func dumpRawTrieDescendants (db ethdb.Database , root common.Hash , output * stateBloom ) error {
337
- sdb := state .NewDatabase (db )
342
+ func dumpRawTrieDescendants (db ethdb.Database , root common.Hash , output * stateBloom , config * Config ) error {
343
+ // Offline pruning is only supported in legacy hash based scheme.
344
+ hashConfig := * hashdb .Defaults
345
+ hashConfig .CleanCacheSize = config .CleanCacheSize * 1024 * 1024
346
+ trieConfig := & trie.Config {
347
+ Preimages : false ,
348
+ HashDB : & hashConfig ,
349
+ }
350
+ sdb := state .NewDatabaseWithConfig (db , trieConfig )
351
+ defer sdb .TrieDB ().Close ()
338
352
tr , err := sdb .OpenTrie (root )
339
353
if err != nil {
340
354
return err
@@ -350,11 +364,12 @@ func dumpRawTrieDescendants(db ethdb.Database, root common.Hash, output *stateBl
350
364
// To do so, we create a semaphore out of a channel's buffer.
351
365
// Before launching a new goroutine, we acquire the semaphore by taking an entry from this channel.
352
366
// This channel doubles as a mechanism for the background goroutine to report an error on release.
353
- threads := runtime . NumCPU ()
367
+ threads := config . Threads
354
368
results := make (chan error , threads )
355
369
for i := 0 ; i < threads ; i ++ {
356
370
results <- nil
357
371
}
372
+ var threadsRunning atomic.Int32
358
373
359
374
for accountIt .Next (true ) {
360
375
accountTrieHash := accountIt .Hash ()
@@ -385,7 +400,10 @@ func dumpRawTrieDescendants(db ethdb.Database, root common.Hash, output *stateBl
385
400
output .Put (data .CodeHash , nil )
386
401
}
387
402
if data .Root != (common.Hash {}) {
388
- storageTr , err := trie .NewStateTrie (trie .StorageTrieID (root , key , data .Root ), sdb .TrieDB ())
403
+ // note: we are passing data.Root as stateRoot here, to skip the check for stateRoot existence in trie.newTrieReader,
404
+ // we already check that when opening state trie and reading the account node
405
+ trieID := trie .StorageTrieID (data .Root , key , data .Root )
406
+ storageTr , err := trie .NewStateTrie (trieID , sdb .TrieDB ())
389
407
if err != nil {
390
408
return err
391
409
}
@@ -394,14 +412,20 @@ func dumpRawTrieDescendants(db ethdb.Database, root common.Hash, output *stateBl
394
412
return err
395
413
}
396
414
go func () {
415
+ threadsRunning .Add (1 )
416
+ defer threadsRunning .Add (- 1 )
397
417
var err error
398
418
defer func () {
399
419
results <- err
400
420
}()
421
+ threadStartedAt := time .Now ()
422
+ threadLastLog := time .Now ()
423
+
401
424
storageIt , err := storageTr .NodeIterator (nil )
402
425
if err != nil {
403
426
return
404
427
}
428
+ var processedNodes uint64
405
429
for storageIt .Next (true ) {
406
430
storageTrieHash := storageIt .Hash ()
407
431
if storageTrieHash != (common.Hash {}) {
@@ -411,6 +435,13 @@ func dumpRawTrieDescendants(db ethdb.Database, root common.Hash, output *stateBl
411
435
return
412
436
}
413
437
}
438
+ processedNodes ++
439
+ if time .Since (threadLastLog ) > 5 * time .Minute {
440
+ elapsedTotal := time .Since (startedAt )
441
+ elapsedThread := time .Since (threadStartedAt )
442
+ log .Info ("traversing trie database - traversing storage trie taking long" , "key" , key , "elapsedTotal" , elapsedTotal , "elapsedThread" , elapsedThread , "processedNodes" , processedNodes , "threadsRunning" , threadsRunning .Load ())
443
+ threadLastLog = time .Now ()
444
+ }
414
445
}
415
446
err = storageIt .Error ()
416
447
if err != nil {
@@ -445,7 +476,7 @@ func (p *Pruner) Prune(inputRoots []common.Hash) error {
445
476
return err
446
477
}
447
478
if bloomExists {
448
- return RecoverPruning (p .config .Datadir , p .db )
479
+ return RecoverPruning (p .config .Datadir , p .db , p . config . Threads )
449
480
}
450
481
// Retrieve all snapshot layers from the current HEAD.
451
482
// In theory there are 128 difflayers + 1 disk layer present,
@@ -511,14 +542,14 @@ func (p *Pruner) Prune(inputRoots []common.Hash) error {
511
542
return err
512
543
}
513
544
} else {
514
- if err := dumpRawTrieDescendants (p .db , root , p .stateBloom ); err != nil {
545
+ if err := dumpRawTrieDescendants (p .db , root , p .stateBloom , & p . config ); err != nil {
515
546
return err
516
547
}
517
548
}
518
549
}
519
550
// Traverse the genesis, put all genesis state entries into the
520
551
// bloom filter too.
521
- if err := extractGenesis (p .db , p .stateBloom ); err != nil {
552
+ if err := extractGenesis (p .db , p .stateBloom , & p . config ); err != nil {
522
553
return err
523
554
}
524
555
@@ -529,7 +560,7 @@ func (p *Pruner) Prune(inputRoots []common.Hash) error {
529
560
return err
530
561
}
531
562
log .Info ("State bloom filter committed" , "name" , filterName , "roots" , roots )
532
- return prune (p .snaptree , roots , p .db , p .stateBloom , filterName , start )
563
+ return prune (p .snaptree , roots , p .db , p .stateBloom , filterName , start , p . config . Threads )
533
564
}
534
565
535
566
// RecoverPruning will resume the pruning procedure during the system restart.
@@ -539,7 +570,7 @@ func (p *Pruner) Prune(inputRoots []common.Hash) error {
539
570
// pruning can be resumed. What's more if the bloom filter is constructed, the
540
571
// pruning **has to be resumed**. Otherwise a lot of dangling nodes may be left
541
572
// in the disk.
542
- func RecoverPruning (datadir string , db ethdb.Database ) error {
573
+ func RecoverPruning (datadir string , db ethdb.Database , threads int ) error {
543
574
exists , err := bloomFilterExists (datadir )
544
575
if err != nil {
545
576
return err
@@ -578,12 +609,12 @@ func RecoverPruning(datadir string, db ethdb.Database) error {
578
609
}
579
610
log .Info ("Loaded state bloom filter" , "path" , stateBloomPath , "roots" , stateBloomRoots )
580
611
581
- return prune (snaptree , stateBloomRoots , db , stateBloom , stateBloomPath , time .Now ())
612
+ return prune (snaptree , stateBloomRoots , db , stateBloom , stateBloomPath , time .Now (), threads )
582
613
}
583
614
584
615
// extractGenesis loads the genesis state and commits all the state entries
585
616
// into the given bloomfilter.
586
- func extractGenesis (db ethdb.Database , stateBloom * stateBloom ) error {
617
+ func extractGenesis (db ethdb.Database , stateBloom * stateBloom , config * Config ) error {
587
618
genesisHash := rawdb .ReadCanonicalHash (db , 0 )
588
619
if genesisHash == (common.Hash {}) {
589
620
return errors .New ("missing genesis hash" )
@@ -593,7 +624,7 @@ func extractGenesis(db ethdb.Database, stateBloom *stateBloom) error {
593
624
return errors .New ("missing genesis block" )
594
625
}
595
626
596
- return dumpRawTrieDescendants (db , genesis .Root (), stateBloom )
627
+ return dumpRawTrieDescendants (db , genesis .Root (), stateBloom , config )
597
628
}
598
629
599
630
func bloomFilterPath (datadir string ) string {
0 commit comments