From 6731ba9844379d972312d211a04493557623e722 Mon Sep 17 00:00:00 2001 From: teor Date: Tue, 29 Oct 2024 10:05:26 +1000 Subject: [PATCH] Exit when snap sync needs user intervention --- crates/subspace-service/src/lib.rs | 5 +- .../src/sync_from_dsn/snap_sync.rs | 49 +++++++++++++++++-- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/crates/subspace-service/src/lib.rs b/crates/subspace-service/src/lib.rs index f35396a318..009a82ccfb 100644 --- a/crates/subspace-service/src/lib.rs +++ b/crates/subspace-service/src/lib.rs @@ -1113,7 +1113,10 @@ where Box::pin(async move { // Run snap-sync before DSN-sync. if config.sync == ChainSyncMode::Snap { - snap_sync_task.await; + if let Err(error) = snap_sync_task.await { + error!(%error, "Snap sync exited with a fatal error"); + return; + } } if let Err(error) = worker.await { diff --git a/crates/subspace-service/src/sync_from_dsn/snap_sync.rs b/crates/subspace-service/src/sync_from_dsn/snap_sync.rs index 752a0090d3..312e77eb34 100644 --- a/crates/subspace-service/src/sync_from_dsn/snap_sync.rs +++ b/crates/subspace-service/src/sync_from_dsn/snap_sync.rs @@ -12,7 +12,6 @@ use sc_consensus_subspace::archiver::{decode_block, SegmentHeadersStore}; use sc_network::{NetworkBlock, PeerId}; use sc_network_sync::service::network::NetworkServiceHandle; use sc_network_sync::SyncingService; -use sc_service::Error; use sp_api::ProvideRuntimeApi; use sp_blockchain::HeaderBackend; use sp_consensus::BlockOrigin; @@ -31,6 +30,35 @@ use subspace_networking::Node; use tokio::time::sleep; use tracing::{debug, error}; +/// Error type for snap sync. +#[derive(thiserror::Error, Debug)] +pub enum Error { + /// A fatal snap sync error which requires user intervention. + /// Most snap sync errors are non-fatal, because we can just continue with regular sync. + #[error("Snap Sync requires user action: {0}")] + SnapSyncImpossible(String), + + /// Substrate service error. + #[error(transparent)] + Sub(#[from] sc_service::Error), + + /// Substrate blockchain client error. + #[error(transparent)] + Client(#[from] sp_blockchain::Error), + + /// Other. + #[error("Snap sync error: {0}")] + Other(String), +} + +impl From for Error { + fn from(error: String) -> Self { + Error::Other(error) + } +} + +/// Run a snap sync, return an error if snap sync is impossible and user intervention is required. +/// Otherwise, just log the error and return `Ok(())` so that regular sync continues. #[allow(clippy::too_many_arguments)] pub(crate) async fn snap_sync( segment_headers_store: SegmentHeadersStore, @@ -43,7 +71,8 @@ pub(crate) async fn snap_sync( sync_service: Arc>, network_service_handle: NetworkServiceHandle, erasure_coding: ErasureCoding, -) where +) -> Result<(), Error> +where Block: BlockT, AS: AuxStore, Client: HeaderBackend @@ -81,6 +110,13 @@ pub(crate) async fn snap_sync( debug!("Snap sync finished successfully"); } Err(error) => { + // A fatal snap sync error requiring user intervention. The caller will log this error, + // so just return it to terminate the task. + if matches!(error, Error::SnapSyncImpossible(_)) { + return Err(error); + } + + // Other errors are non-fatal error!(%error, "Snap sync failed"); } } @@ -95,6 +131,8 @@ pub(crate) async fn snap_sync( } else { debug!("Snap sync can only work with genesis state, skipping"); } + + Ok(()) } // Get blocks from the last segment or from the segment containing the target block. @@ -166,11 +204,12 @@ where // We don't have the genesis state when we choose to snap sync. if target_segment_index <= SegmentIndex::ONE { - error!( + // The caller logs this error + return Err(Error::SnapSyncImpossible( "Snap sync is impossible - not enough archived history: \ wipe the DB folder and rerun with --sync=full" - ); - return Err("Snap sync is impossible - not enough archived history".into()); + .into(), + )); } // Identify all segment headers that would need to be reconstructed in order to get first