From 379672fc4bc788b25b250b28fa15eb411603b3fa Mon Sep 17 00:00:00 2001 From: Vitaly Terekhov Date: Wed, 11 Dec 2024 13:56:01 +0000 Subject: [PATCH] fix(collator): avoid 2 parallel shard collations after sync in some cases --- collator/src/manager/mod.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/collator/src/manager/mod.rs b/collator/src/manager/mod.rs index 9b3905fe9..0ade086f9 100644 --- a/collator/src/manager/mod.rs +++ b/collator/src/manager/mod.rs @@ -1335,6 +1335,8 @@ where ); } + Self::reset_collation_sync_status(&mut self.collation_sync_state.lock()); + // TODO: refactor this logic // replace last collated block id with last applied self.blocks_cache @@ -1894,6 +1896,20 @@ where } } + /// Reset collation status from `WaitForMasterStatus` to `AttemptsInProgress` for every shard. + /// + /// Use this method before resuming collation after sync to avoid ambiguous situations. + /// If any shard has collation status `WaitForMasterStatus` and sync was executed, + /// when master collation check was finished first then it will enqueue one more resume for shard, + /// so we will have two parallel collations for shard that will cause panic futher. + fn reset_collation_sync_status(guard: &mut CollationSyncState) { + for (_, collation_state) in guard.states.iter_mut() { + if collation_state.status == CollationStatus::WaitForMasterStatus { + collation_state.status = CollationStatus::AttemptsInProgress; + } + } + } + /// 1. Store collation status for current shard /// 2. Detect the next step: wait for master status, resume attempts, run master collation fn detect_next_collation_step(