Skip to content

Commit

Permalink
feat: graceful and alertable relayer reorg detection (#5401)
Browse files Browse the repository at this point in the history
### Description

- Only panics in the validator if there is a reorg detected
- Mostly solves hyperlane-xyz/issues#1394
(does not have super clear alert conditions for a reorg occurring, this
should come later)
- Whereas before we would use `Result<Option<Vec<u8>>` as a return type
for metadata building, where the Result indicates some error occurred
(e.g. an RPC issue or something), Ok(Some(_)) meant that metadata
building was successful, and Ok(None) meant that no error occurred but
simply that fetching metadata wasn't successful -- we now have a new
`Metadata` enum:
```
#[derive(Clone, Debug)]
pub enum Metadata {
    /// Able to fetch metadata
    Ok(Vec<u8>),
    /// Unable to fetch metadata, but no error occurred
    CouldNotFetch,
    /// While building metadata, encountered something that should
    /// prohibit all metadata for the message from being built.
    /// Provides the reason for the refusal.
    Refused(String),
}
```
- When we are building metadata and encounter a reorg flag in a
validator that's used by the message, we handle this and return
`Metadata::MetadataBuildingRefused`. The aggregation ISM will propagate
this to make sure that we have end behavior of flatly refusing to build
metadata for a message even if a nested metadata builder encounters
this. If this happens, a new reprepare reason `MessageMetadataRefused`
is introduced that's used.
- e2e test is added to confirm this behavior

### Drive-by changes

<!--
Are there any minor or drive-by changes also included?
-->

### Related issues

<!--
- Fixes #[issue number here]
-->

### Backward compatibility

<!--
Are these changes backward compatible? Are there any infrastructure
implications, e.g. changes that would prohibit deploying older commits
using this infra tooling?

Yes/No
-->

### Testing

e2e, some unit

---------

Co-authored-by: Daniel Savu <[email protected]>
  • Loading branch information
tkporter and daniel-savu authored Feb 10, 2025
1 parent 23e1f94 commit dfd9e9f
Show file tree
Hide file tree
Showing 16 changed files with 242 additions and 119 deletions.
28 changes: 19 additions & 9 deletions rust/main/agents/relayer/src/msg/metadata/aggregation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use tracing::{info, instrument};

use hyperlane_core::{HyperlaneMessage, InterchainSecurityModule, ModuleType, H256, U256};

use super::{MessageMetadataBuilder, MetadataBuilder};
use super::{MessageMetadataBuilder, Metadata, MetadataBuilder};

/// Bytes used to store one member of the (start, end) range tuple
/// Copied from `AggregationIsmMetadata.sol`
Expand Down Expand Up @@ -119,11 +119,7 @@ impl AggregationIsmMetadataBuilder {
impl MetadataBuilder for AggregationIsmMetadataBuilder {
#[instrument(err, skip(self, message), ret)]
#[allow(clippy::blocks_in_conditions)] // TODO: `rustc` 1.80.1 clippy issue
async fn build(
&self,
ism_address: H256,
message: &HyperlaneMessage,
) -> eyre::Result<Option<Vec<u8>>> {
async fn build(&self, ism_address: H256, message: &HyperlaneMessage) -> eyre::Result<Metadata> {
const CTX: &str = "When fetching AggregationIsm metadata";
let ism = self.build_aggregation_ism(ism_address).await.context(CTX)?;
let (ism_addresses, threshold) = ism.modules_and_threshold(message).await.context(CTX)?;
Expand All @@ -136,6 +132,18 @@ impl MetadataBuilder for AggregationIsmMetadataBuilder {
)
.await;

// If any inner ISMs are refusing to build metadata, we propagate just the first refusal.
if let Some(first_refusal) = sub_modules_and_metas.iter().find_map(|result| {
result.as_ref().ok().and_then(|sub_module_and_meta| {
match &sub_module_and_meta.metadata {
Metadata::Refused(reason) => Some(Metadata::Refused(reason.clone())),
_ => None,
}
})
}) {
return Ok(first_refusal);
}

// Partitions things into
// 1. ok_sub_modules: ISMs with metadata with valid metadata
// 2. err_sub_modules: ISMs with invalid metadata
Expand All @@ -145,19 +153,21 @@ impl MetadataBuilder for AggregationIsmMetadataBuilder {
.enumerate()
.partition_map(|(index, (result, ism_address))| match result {
Ok(sub_module_and_meta) => match sub_module_and_meta.metadata {
Some(metadata) => Either::Left(IsmAndMetadata::new(
Metadata::Found(metadata) => Either::Left(IsmAndMetadata::new(
sub_module_and_meta.ism,
index,
metadata,
)),
None => Either::Right((*ism_address, Some(sub_module_and_meta.module_type))),
_ => Either::Right((*ism_address, Some(sub_module_and_meta.module_type))),
},
Err(_) => Either::Right((*ism_address, None)),
});
let maybe_aggregation_metadata =
Self::cheapest_valid_metas(ok_sub_modules, message, threshold, err_sub_modules)
.await
.map(|mut metas| Self::format_metadata(&mut metas, ism_addresses.len()));
.map_or(Metadata::CouldNotFetch, |mut metas| {
Metadata::Found(Self::format_metadata(&mut metas, ism_addresses.len()))
});
Ok(maybe_aggregation_metadata)
}
}
Expand Down
35 changes: 25 additions & 10 deletions rust/main/agents/relayer/src/msg/metadata/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ use crate::{
use async_trait::async_trait;
use derive_new::new;
use eyre::{Context, Result};
use hyperlane_base::db::{HyperlaneDb, HyperlaneRocksDB};
use hyperlane_base::{
db::{HyperlaneDb, HyperlaneRocksDB},
settings::CheckpointSyncerBuildError,
};
use hyperlane_base::{
settings::{ChainConf, CheckpointSyncerConf},
CheckpointSyncer, CoreMetrics, MultisigCheckpointSyncer,
Expand All @@ -44,17 +47,28 @@ pub enum MetadataBuilderError {
MaxDepthExceeded(u32),
}

#[derive(Clone, Debug)]
pub enum Metadata {
/// Able to fetch metadata
Found(Vec<u8>),
/// Unable to fetch metadata, but no error occurred
CouldNotFetch,
/// While building metadata, encountered something that should
/// prohibit all metadata for the message from being built.
/// Provides the reason for the refusal.
Refused(String),
}

#[derive(Debug)]
pub struct IsmWithMetadataAndType {
pub ism: Box<dyn InterchainSecurityModule>,
pub metadata: Option<Vec<u8>>,
pub metadata: Metadata,
pub module_type: ModuleType,
}

#[async_trait]
pub trait MetadataBuilder: Send + Sync {
async fn build(&self, ism_address: H256, message: &HyperlaneMessage)
-> Result<Option<Vec<u8>>>;
async fn build(&self, ism_address: H256, message: &HyperlaneMessage) -> Result<Metadata>;
}

/// Allows fetching the default ISM, caching the value for a period of time
Expand Down Expand Up @@ -190,11 +204,7 @@ impl Deref for MessageMetadataBuilder {
#[async_trait]
impl MetadataBuilder for MessageMetadataBuilder {
#[instrument(err, skip(self, message), fields(destination_domain=self.destination_domain().name()))]
async fn build(
&self,
ism_address: H256,
message: &HyperlaneMessage,
) -> Result<Option<Vec<u8>>> {
async fn build(&self, ism_address: H256, message: &HyperlaneMessage) -> Result<Metadata> {
self.build_ism_and_metadata(ism_address, message)
.await
.map(|ism_with_metadata| ism_with_metadata.metadata)
Expand Down Expand Up @@ -370,7 +380,7 @@ impl BaseMetadataBuilder {
message: &HyperlaneMessage,
validators: &[H256],
app_context: Option<String>,
) -> Result<MultisigCheckpointSyncer> {
) -> Result<MultisigCheckpointSyncer, CheckpointSyncerBuildError> {
let storage_locations = self
.origin_validator_announce
.get_announced_storage_locations(validators)
Expand Down Expand Up @@ -416,6 +426,11 @@ impl BaseMetadataBuilder {
checkpoint_syncers.insert(validator.into(), checkpoint_syncer.into());
break;
}
Err(CheckpointSyncerBuildError::ReorgEvent(reorg_event)) => {
// If a reorg event has been posted to a checkpoint syncer,
// we refuse to build
return Err(CheckpointSyncerBuildError::ReorgEvent(reorg_event));
}
Err(err) => {
debug!(
error=%err,
Expand Down
18 changes: 7 additions & 11 deletions rust/main/agents/relayer/src/msg/metadata/ccip_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use serde::{Deserialize, Serialize};
use serde_json::json;
use tracing::{info, instrument};

use super::{base::MessageMetadataBuilder, MetadataBuilder};
use super::{base::MessageMetadataBuilder, Metadata, MetadataBuilder};

#[derive(Serialize, Deserialize)]
struct OffchainResponse {
Expand All @@ -28,11 +28,7 @@ pub struct CcipReadIsmMetadataBuilder {
#[async_trait]
impl MetadataBuilder for CcipReadIsmMetadataBuilder {
#[instrument(err, skip(self, message))]
async fn build(
&self,
ism_address: H256,
message: &HyperlaneMessage,
) -> eyre::Result<Option<Vec<u8>>> {
async fn build(&self, ism_address: H256, message: &HyperlaneMessage) -> eyre::Result<Metadata> {
const CTX: &str = "When fetching CcipRead metadata";
let ism = self.build_ccip_read_ism(ism_address).await.context(CTX)?;

Expand All @@ -42,15 +38,15 @@ impl MetadataBuilder for CcipReadIsmMetadataBuilder {
let info: OffchainLookup = match response {
Ok(_) => {
info!("incorrectly configured getOffchainVerifyInfo, expected revert");
return Ok(None);
return Ok(Metadata::CouldNotFetch);
}
Err(raw_error) => {
let matching_regex = Regex::new(r"0x[[:xdigit:]]+")?;
if let Some(matching) = &matching_regex.captures(&raw_error.to_string()) {
OffchainLookup::decode(hex_decode(&matching[0][2..])?)?
} else {
info!("unable to parse custom error out of revert");
return Ok(None);
info!(?raw_error, "unable to parse custom error out of revert");
return Ok(Metadata::CouldNotFetch);
}
}
};
Expand Down Expand Up @@ -85,7 +81,7 @@ impl MetadataBuilder for CcipReadIsmMetadataBuilder {
Ok(result) => {
// remove leading 0x which hex_decode doesn't like
let metadata = hex_decode(&result.data[2..])?;
return Ok(Some(metadata));
return Ok(Metadata::Found(metadata));
}
Err(_err) => {
// try the next URL
Expand All @@ -94,6 +90,6 @@ impl MetadataBuilder for CcipReadIsmMetadataBuilder {
}

// No metadata endpoints or endpoints down
Ok(None)
Ok(Metadata::CouldNotFetch)
}
}
4 changes: 2 additions & 2 deletions rust/main/agents/relayer/src/msg/metadata/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ mod null_metadata;
mod routing;

use aggregation::AggregationIsmMetadataBuilder;
pub(crate) use base::MetadataBuilder;
pub(crate) use base::{
AppContextClassifier, BaseMetadataBuilder, IsmAwareAppContextClassifier, MessageMetadataBuilder,
AppContextClassifier, BaseMetadataBuilder, IsmAwareAppContextClassifier,
MessageMetadataBuilder, Metadata, MetadataBuilder,
};
use ccip_read::CcipReadIsmMetadataBuilder;
use null_metadata::NullMetadataBuilder;
Expand Down
30 changes: 19 additions & 11 deletions rust/main/agents/relayer/src/msg/metadata/multisig/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use derive_new::new;
use ethers::abi::Token;

use eyre::{Context, Result};
use hyperlane_base::settings::CheckpointSyncerBuildError;
use hyperlane_base::MultisigCheckpointSyncer;
use hyperlane_core::accumulator::merkle::Proof;
use hyperlane_core::{HyperlaneMessage, MultisigSignedCheckpoint, H256};
Expand All @@ -14,7 +15,7 @@ use tracing::{debug, info};

use crate::msg::metadata::base::MessageMetadataBuilder;

use crate::msg::metadata::MetadataBuilder;
use crate::msg::metadata::{Metadata, MetadataBuilder};

#[derive(new, AsRef, Deref)]
pub struct MultisigMetadata {
Expand Down Expand Up @@ -93,11 +94,7 @@ pub trait MultisigIsmMetadataBuilder: AsRef<MessageMetadataBuilder> + Send + Syn

#[async_trait]
impl<T: MultisigIsmMetadataBuilder> MetadataBuilder for T {
async fn build(
&self,
ism_address: H256,
message: &HyperlaneMessage,
) -> Result<Option<Vec<u8>>> {
async fn build(&self, ism_address: H256, message: &HyperlaneMessage) -> Result<Metadata> {
const CTX: &str = "When fetching MultisigIsm metadata";
let multisig_ism = self
.as_ref()
Expand All @@ -112,30 +109,41 @@ impl<T: MultisigIsmMetadataBuilder> MetadataBuilder for T {

if validators.is_empty() {
info!("Could not fetch metadata: No validator set found for ISM");
return Ok(None);
return Ok(Metadata::CouldNotFetch);
}

info!(hyp_message=?message, ?validators, threshold, "List of validators and threshold for message");

let checkpoint_syncer = self
let checkpoint_syncer = match self
.as_ref()
.build_checkpoint_syncer(message, &validators, self.as_ref().app_context.clone())
.await
.context(CTX)?;
{
Ok(syncer) => syncer,
Err(CheckpointSyncerBuildError::ReorgEvent(reorg_event)) => {
return Ok(Metadata::Refused(format!(
"A reorg event occurred {:?}",
reorg_event
)));
}
Err(e) => {
return Err(e).context(CTX);
}
};

if let Some(metadata) = self
.fetch_metadata(&validators, threshold, message, &checkpoint_syncer)
.await
.context(CTX)?
{
debug!(hyp_message=?message, ?metadata.checkpoint, "Found checkpoint with quorum");
Ok(Some(self.format_metadata(metadata)?))
Ok(Metadata::Found(self.format_metadata(metadata)?))
} else {
info!(
hyp_message=?message, ?validators, threshold, ism=%multisig_ism.address(),
"Could not fetch metadata: Unable to reach quorum"
);
Ok(None)
Ok(Metadata::CouldNotFetch)
}
}
}
6 changes: 3 additions & 3 deletions rust/main/agents/relayer/src/msg/metadata/null_metadata.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::MetadataBuilder;
use super::{Metadata, MetadataBuilder};
use async_trait::async_trait;
use derive_new::new;
use tracing::instrument;
Expand All @@ -16,7 +16,7 @@ impl MetadataBuilder for NullMetadataBuilder {
&self,
_ism_address: H256,
_message: &HyperlaneMessage,
) -> eyre::Result<Option<Vec<u8>>> {
Ok(Some(vec![]))
) -> eyre::Result<Metadata> {
Ok(Metadata::Found(vec![]))
}
}
8 changes: 2 additions & 6 deletions rust/main/agents/relayer/src/msg/metadata/routing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use eyre::Context;
use hyperlane_core::{HyperlaneMessage, H256};
use tracing::instrument;

use super::{MessageMetadataBuilder, MetadataBuilder};
use super::{MessageMetadataBuilder, Metadata, MetadataBuilder};

#[derive(Clone, Debug, new, Deref)]
pub struct RoutingIsmMetadataBuilder {
Expand All @@ -16,11 +16,7 @@ pub struct RoutingIsmMetadataBuilder {
impl MetadataBuilder for RoutingIsmMetadataBuilder {
#[instrument(err, skip(self, message), ret)]
#[allow(clippy::blocks_in_conditions)] // TODO: `rustc` 1.80.1 clippy issue
async fn build(
&self,
ism_address: H256,
message: &HyperlaneMessage,
) -> eyre::Result<Option<Vec<u8>>> {
async fn build(&self, ism_address: H256, message: &HyperlaneMessage) -> eyre::Result<Metadata> {
const CTX: &str = "When fetching RoutingIsm metadata";
let ism = self.build_routing_ism(ism_address).await.context(CTX)?;
let module = ism.route(message).await.context(CTX)?;
Expand Down
6 changes: 3 additions & 3 deletions rust/main/agents/relayer/src/msg/op_queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,12 +376,12 @@ pub mod test {

fn initialize_queue(broadcaster: &sync::broadcast::Sender<MessageRetryRequest>) -> OpQueue {
let (metrics, queue_metrics_label) = dummy_metrics_and_label();
let op_queue = OpQueue::new(

OpQueue::new(
metrics.clone(),
queue_metrics_label.clone(),
Arc::new(Mutex::new(broadcaster.subscribe())),
);
op_queue
)
}

fn generate_test_messages(
Expand Down
22 changes: 16 additions & 6 deletions rust/main/agents/relayer/src/msg/pending_message.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use tracing::{debug, error, info, info_span, instrument, trace, warn, Instrument

use super::{
gas_payment::{GasPaymentEnforcer, GasPolicyStatus},
metadata::{BaseMetadataBuilder, MessageMetadataBuilder, MetadataBuilder},
metadata::{BaseMetadataBuilder, MessageMetadataBuilder, Metadata, MetadataBuilder},
};

pub const CONFIRM_DELAY: Duration = if cfg!(any(test, feature = "test-utils")) {
Expand Down Expand Up @@ -273,10 +273,20 @@ impl PendingOperation for PendingMessage {
return self.on_reprepare(Some(err), ReprepareReason::ErrorBuildingMetadata);
}
};
self.metadata = metadata.clone();

let Some(metadata) = metadata else {
return self.on_reprepare::<String>(None, ReprepareReason::CouldNotFetchMetadata);
let metadata_bytes = match metadata {
Metadata::Found(metadata_bytes) => {
self.metadata = Some(metadata_bytes.clone());
metadata_bytes
}
Metadata::CouldNotFetch => {
return self.on_reprepare::<String>(None, ReprepareReason::CouldNotFetchMetadata);
}
// If the metadata building is refused, we still allow it to be retried later.
Metadata::Refused(reason) => {
warn!(?reason, "Metadata building refused");
return self.on_reprepare::<String>(None, ReprepareReason::MessageMetadataRefused);
}
};

// Estimate transaction costs for the process call. If there are issues, it's
Expand All @@ -286,7 +296,7 @@ impl PendingOperation for PendingMessage {
let tx_cost_estimate = match self
.ctx
.destination_mailbox
.process_estimate_costs(&self.message, &metadata)
.process_estimate_costs(&self.message, &metadata_bytes)
.await
{
Ok(tx_cost_estimate) => tx_cost_estimate,
Expand Down Expand Up @@ -334,7 +344,7 @@ impl PendingOperation for PendingMessage {
}

self.submission_data = Some(Box::new(MessageSubmissionData {
metadata,
metadata: metadata_bytes,
gas_limit,
}));
PendingOperationResult::Success
Expand Down
Loading

0 comments on commit dfd9e9f

Please sign in to comment.