Skip to content

Commit a11b4be

Browse files
authored
Revert "core/state/snapshot: simplify snapshot rebuild (ethereum#30772)" (ethereum#30810)
This reverts commit 2380012. The original pull request introduces a bug and some flaky tests are detected because of this flaw. ``` --- FAIL: TestRecoverSnapshotFromWipingCrash (0.27s) blockchain_snapshot_test.go:158: The disk layer is not integrated snapshot is not constructed {"pc":0,"op":88,"gas":"0x7148","gasCost":"0x2","memSize":0,"stack":[],"depth":1,"refund":0,"opName":"PC"} {"pc":1,"op":255,"gas":"0x7146","gasCost":"0x1db0","memSize":0,"stack":["0x0"],"depth":1,"refund":0,"opName":"SELFDESTRUCT"} {"output":"","gasUsed":"0x0"} {"output":"","gasUsed":"0x1db2"} {"pc":0,"op":116,"gas":"0x13498","gasCost":"0x3","memSize":0,"stack":[],"depth":1,"refund":0,"opName":"PUSH21"} ``` Before the original PR, the snapshot would block the function until the disk layer was fully generated under the following conditions: (a) explicitly required by users with `AsyncBuild = false`. (b) the snapshot was being fully rebuilt or *the disk layer generation had resumed*. Unfortunately, with the changes introduced in that PR, the snapshot no longer waits for disk layer generation to complete if the generation is resumed. It brings lots of uncertainty and breaks this tiny debug feature.
1 parent d7e7b54 commit a11b4be

File tree

1 file changed

+34
-14
lines changed

1 file changed

+34
-14
lines changed

core/state/snapshot/snapshot.go

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -206,24 +206,47 @@ func New(config Config, diskdb ethdb.KeyValueStore, triedb *triedb.Database, roo
206206
log.Warn("Snapshot maintenance disabled (syncing)")
207207
return snap, nil
208208
}
209+
// Create the building waiter iff the background generation is allowed
210+
if !config.NoBuild && !config.AsyncBuild {
211+
defer snap.waitBuild()
212+
}
209213
if err != nil {
210214
log.Warn("Failed to load snapshot", "err", err)
211-
if config.NoBuild {
212-
return nil, err
213-
}
214-
wait := snap.Rebuild(root)
215-
if !config.AsyncBuild {
216-
wait()
215+
if !config.NoBuild {
216+
snap.Rebuild(root)
217+
return snap, nil
217218
}
218-
return snap, nil
219+
return nil, err // Bail out the error, don't rebuild automatically.
219220
}
220221
// Existing snapshot loaded, seed all the layers
221-
for ; head != nil; head = head.Parent() {
222+
for head != nil {
222223
snap.layers[head.Root()] = head
224+
head = head.Parent()
223225
}
224226
return snap, nil
225227
}
226228

229+
// waitBuild blocks until the snapshot finishes rebuilding. This method is meant
230+
// to be used by tests to ensure we're testing what we believe we are.
231+
func (t *Tree) waitBuild() {
232+
// Find the rebuild termination channel
233+
var done chan struct{}
234+
235+
t.lock.RLock()
236+
for _, layer := range t.layers {
237+
if layer, ok := layer.(*diskLayer); ok {
238+
done = layer.genPending
239+
break
240+
}
241+
}
242+
t.lock.RUnlock()
243+
244+
// Wait until the snapshot is generated
245+
if done != nil {
246+
<-done
247+
}
248+
}
249+
227250
// Disable interrupts any pending snapshot generator, deletes all the snapshot
228251
// layers in memory and marks snapshots disabled globally. In order to resume
229252
// the snapshot functionality, the caller must invoke Rebuild.
@@ -665,9 +688,8 @@ func (t *Tree) Journal(root common.Hash) (common.Hash, error) {
665688

666689
// Rebuild wipes all available snapshot data from the persistent database and
667690
// discard all caches and diff layers. Afterwards, it starts a new snapshot
668-
// generator with the given root hash. The returned function blocks until
669-
// regeneration is complete.
670-
func (t *Tree) Rebuild(root common.Hash) (wait func()) {
691+
// generator with the given root hash.
692+
func (t *Tree) Rebuild(root common.Hash) {
671693
t.lock.Lock()
672694
defer t.lock.Unlock()
673695

@@ -699,11 +721,9 @@ func (t *Tree) Rebuild(root common.Hash) (wait func()) {
699721
// Start generating a new snapshot from scratch on a background thread. The
700722
// generator will run a wiper first if there's not one running right now.
701723
log.Info("Rebuilding state snapshot")
702-
disk := generateSnapshot(t.diskdb, t.triedb, t.config.CacheSize, root)
703724
t.layers = map[common.Hash]snapshot{
704-
root: disk,
725+
root: generateSnapshot(t.diskdb, t.triedb, t.config.CacheSize, root),
705726
}
706-
return func() { <-disk.genPending }
707727
}
708728

709729
// AccountIterator creates a new account iterator for the specified root hash and

0 commit comments

Comments
 (0)