Skip to content

Commit

Permalink
Monitoring the indexer service (#1386)
Browse files Browse the repository at this point in the history
* Monitoring the indexer service

* Update response

* Update graphql api to cloud

* Rename

* Fix alarm
  • Loading branch information
yrong authored Feb 20, 2025
1 parent 51a29ad commit 55acf5a
Show file tree
Hide file tree
Showing 6 changed files with 162 additions and 66 deletions.
2 changes: 1 addition & 1 deletion web/packages/api/src/environment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ export const SNOWBRIDGE_ENV: { [id: string]: SnowbridgeEnvironment } = {
ASSET_HUB_URL: "https://assethub-polkadot.api.subscan.io",
BRIDGE_HUB_URL: "https://bridgehub-polkadot.api.subscan.io",
},
GRAPHQL_API_URL: "https://data.snowbridge.network/graphql",
GRAPHQL_API_URL: "https://snowbridge.squids.live/snowbridge-subsquid@v2/api/graphql",
},
},
westend_sepolia: {
Expand Down
61 changes: 33 additions & 28 deletions web/packages/api/src/status.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { Context } from "./index"
import { fetchBeaconSlot, fetchFinalityUpdate, fetchEstimatedDeliveryTime } from "./utils"
import { fetchBeaconSlot, fetchFinalityUpdate } from "./utils"
import { fetchEstimatedDeliveryTime } from "./subsquid"
import { Relayer, SourceType } from "./environment"
import { ApiPromise } from "@polkadot/api"
import { IGateway } from "@snowbridge/contract-types"
Expand Down Expand Up @@ -61,25 +62,37 @@ export type ChannelStatusInfo = {

export type Sovereign = { name: string; account: string; balance: bigint; type: SourceType }

export type IndexerServiceStatusInfo = {
chain: string
latency: number
}

export type AllMetrics = {
name: string
bridgeStatus: BridgeStatusInfo
channels: ChannelStatusInfo[]
sovereigns: Sovereign[]
relayers: Relayer[]
indexerStatus: IndexerServiceStatusInfo[]
}

export type OperationStatus = {
toEthereum: {
outbound: OperatingMode;
};
outbound: OperatingMode
}
toPolkadot: {
beacon: OperatingMode;
inbound: OperatingMode;
outbound: OperatingMode;
};
beacon: OperatingMode
inbound: OperatingMode
outbound: OperatingMode
}
}
export async function getOperatingStatus({ gateway, bridgeHub }: { gateway: IGateway, bridgeHub: ApiPromise }): Promise<OperationStatus> {
export async function getOperatingStatus({
gateway,
bridgeHub,
}: {
gateway: IGateway
bridgeHub: ApiPromise
}): Promise<OperationStatus> {
const ethereumOperatingMode = await gateway.operatingMode()
const beaconOperatingMode = (
await bridgeHub.query.ethereumBeaconClient.operatingMode()
Expand All @@ -99,7 +112,7 @@ export async function getOperatingStatus({ gateway, bridgeHub }: { gateway: IGat
beacon: beaconOperatingMode as OperatingMode,
inbound: inboundOperatingMode as OperatingMode,
outbound: ethereumOperatingMode === 0n ? "Normal" : ("Halted" as OperatingMode),
}
},
}
}

Expand All @@ -117,14 +130,12 @@ export const bridgeStatusInfo = async (
context.ethereum(),
context.gateway(),
context.beefyClient(),
context.relaychain()
context.relaychain(),
])

// Beefy status
const latestBeefyBlock = Number(await beefyClient.latestBeefyBlock())
const latestPolkadotBlock = (
await relaychain.query.system.number()
).toPrimitive() as number
const latestPolkadotBlock = (await relaychain.query.system.number()).toPrimitive() as number
const latestBeaconSlot = await ethereum.getBlockNumber()
const latestFinalizedBeefyBlock = (
await relaychain.rpc.chain.getHeader(
Expand Down Expand Up @@ -156,9 +167,7 @@ export const bridgeStatusInfo = async (
)
const beaconBlockLatency = latestBeaconBlock.data.message.slot - latestBeaconBlockOnPolkadot
const beaconLatencySeconds = beaconBlockLatency * options.ethereumBlockTimeInSeconds
const latestBridgeHubBlock = (
await bridgeHub.query.system.number()
).toPrimitive() as number
const latestBridgeHubBlock = (await bridgeHub.query.system.number()).toPrimitive() as number
const previousBridgeHubBlock = await bridgeHub.query.system.blockHash(
latestBridgeHubBlock > options.toPolkadotCheckIntervalInBlock
? latestBridgeHubBlock - options.toPolkadotCheckIntervalInBlock
Expand Down Expand Up @@ -211,8 +220,7 @@ export const channelStatusInfo = async (
context.gateway(),
])

const [inbound_nonce_eth, outbound_nonce_eth] =
await gateway.channelNoncesOf(channelId)
const [inbound_nonce_eth, outbound_nonce_eth] = await gateway.channelNoncesOf(channelId)
const operatingMode = await gateway.channelOperatingModeOf(channelId)
const inbound_nonce_sub = (
await bridgeHub.query.ethereumInboundQueue.nonce(channelId)
Expand All @@ -222,16 +230,16 @@ export const channelStatusInfo = async (
).toPrimitive() as number

const latestEthereumBlock = await ethereum.getBlockNumber()
const [previous_inbound_nonce_eth, previous_outbound_nonce_eth] =
await gateway.channelNoncesOf(channelId, {
const [previous_inbound_nonce_eth, previous_outbound_nonce_eth] = await gateway.channelNoncesOf(
channelId,
{
blockTag:
latestEthereumBlock > options.toEthereumCheckIntervalInBlock
? latestEthereumBlock - options.toEthereumCheckIntervalInBlock
: 100,
})
const latestBridgeHubBlock = (
await bridgeHub.query.system.number()
).toPrimitive() as number
}
)
const latestBridgeHubBlock = (await bridgeHub.query.system.number()).toPrimitive() as number
const previousBridgeHubBlock = await bridgeHub.query.system.blockHash(
latestBridgeHubBlock > options.toPolkadotCheckIntervalInBlock
? latestBridgeHubBlock - options.toPolkadotCheckIntervalInBlock
Expand All @@ -248,10 +256,7 @@ export const channelStatusInfo = async (
let estimatedDeliveryTime: any
if (context.config.graphqlApiUrl) {
try {
estimatedDeliveryTime = await fetchEstimatedDeliveryTime(
context.config.graphqlApiUrl,
channelId
)
estimatedDeliveryTime = await fetchEstimatedDeliveryTime(channelId)
} catch (e: any) {
console.error("estimate api error:" + e.message)
}
Expand Down
40 changes: 39 additions & 1 deletion web/packages/api/src/subsquid.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
const graphqlApiUrl = process.env["GRAPHQL_API_URL"] || "https://data.snowbridge.network/graphql"
const graphqlApiUrl =
process.env["GRAPHQL_API_URL"] ||
"https://snowbridge.squids.live/snowbridge-subsquid@v2/api/graphql"
const graphqlQuerySize = process.env["GRAPHQL_QUERY_SIZE"] || "100"

/**
Expand Down Expand Up @@ -416,3 +418,39 @@ export const fetchToEthereumTransferById = async (id: string) => {
let result = await queryByGraphQL(query)
return result?.transferStatusToEthereums
}

/**
* Query the recent synced blockes on multiple chains
curl -H 'Content-Type: application/json' \
-X POST -d \
'{ "query": "query { latestBlocks { height name } }" }' \
$graphqlApiUrl --no-progress-meter | jq "."
{
"data": {
"latestBlocks": [
{
"height": 8245566,
"name": "assethub"
},
{
"height": 4561260,
"name": "bridgehub"
},
{
"height": 21878012,
"name": "ethereum"
}
]
}
}
**/
export const fetchLatestBlocksSynced = async () => {
let query = `query { latestBlocks {
height
name
}}`
let result = await queryByGraphQL(query)
return result
}
14 changes: 0 additions & 14 deletions web/packages/api/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,20 +118,6 @@ export const fetchFinalityUpdate = async (
}
}

export const fetchEstimatedDeliveryTime = async (graphqlUrl: string, channelId: string) => {
let response = await fetch(graphqlUrl, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
query: `query { toEthereumElapse(channelId:"${channelId}") { elapse } toPolkadotElapse(channelId:"${channelId}") { elapse } }`,
}),
})
let data = await response.json()
return data?.data
}

export const getEventIndex = (id: string) => {
let parts = id.split("-")
let blockNumber = parseInt(parts[0])
Expand Down
42 changes: 41 additions & 1 deletion web/packages/operations/src/alarm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export enum AlarmReason {
ToPolkadotNoTransfer = "ToPolkadotNoTransfer",
ToEthereumChannelAttacked = "ToEthereumChannelAttacked",
ToPolkadotChannelAttacked = "ToPolkadotChannelAttacked",
IndexServiceStale = "IndexServiceStale",
}

export const InsufficientBalanceThreshold = {
Expand All @@ -44,7 +45,7 @@ export const BlockLatencyThreshold = {
// Syncing beefy finality update every 4 hours(1200 ethereum blocks), leave some buffer here
ToEthereum: process.env["BlockLatencyToEthereum"]
? parseInt(process.env["BlockLatencyToEthereum"])
: 2400,
: 1800,
// Syncing beacon finality update every 6.4 minutes(64 substrate blocks), leave some buffer here
ToPolkadot: process.env["BlockLatencyToPolkadot"]
? parseInt(process.env["BlockLatencyToPolkadot"])
Expand All @@ -70,6 +71,10 @@ export const AlarmEvaluationConfiguration = {
},
}

export const IndexerLatencyThreshold = process.env["IndexerLatencyThreshold"]
? parseInt(process.env["IndexerLatencyThreshold"])
: 150

export const sendMetrics = async (metrics: status.AllMetrics) => {
let client = new CloudWatchClient({})
let metricData = []
Expand Down Expand Up @@ -290,6 +295,27 @@ export const sendMetrics = async (metrics: status.AllMetrics) => {
})
}
}
let indexerStale = false
for (let status of metrics.indexerStatus) {
metricData.push({
MetricName: "IndexerLatency",
Dimensions: [
{
Name: "ChainName",
Value: status.chain,
},
],
Value: Number(status.latency),
})
indexerStale = status.latency > IndexerLatencyThreshold
if (indexerStale) {
break
}
}
metricData.push({
MetricName: AlarmReason.IndexServiceStale.toString(),
Value: Number(indexerStale),
})
const command = new PutMetricDataCommand({
MetricData: metricData,
Namespace: CLOUD_WATCH_NAME_SPACE + "-" + metrics.name,
Expand Down Expand Up @@ -448,4 +474,18 @@ export const initializeAlarms = async () => {
...alarmCommandSharedInput,
})
await client.send(accountBalanceAlarm)

// Alarm for indexer service
let indexerAlarm = new PutMetricAlarmCommand({
AlarmName: AlarmReason.IndexServiceStale.toString() + "-" + name,
MetricName: AlarmReason.IndexServiceStale.toString(),
AlarmDescription: AlarmReason.IndexServiceStale.toString(),
Statistic: "Average",
ComparisonOperator: "GreaterThanThreshold",
AlarmActions: [BRIDGE_STALE_SNS_TOPIC],
EvaluationPeriods: 3,
Period: 1800,
...alarmCommandSharedInput,
})
await client.send(indexerAlarm)
}
Loading

0 comments on commit 55acf5a

Please sign in to comment.