Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: add dora metrics #225

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 49 additions & 1 deletion clients/consensus/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@ import (
"time"

v1 "github.com/attestantio/go-eth2-client/api/v1"
"github.com/ethpandaops/dora/metrics"
"github.com/ethpandaops/ethwallclock"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/sirupsen/logrus"
"golang.org/x/exp/rand"
)
Expand All @@ -19,12 +22,16 @@ type Pool struct {
}

func NewPool(ctx context.Context, logger logrus.FieldLogger) *Pool {
return &Pool{
pool := &Pool{
ctx: ctx,
logger: logger,
clients: make([]*Client, 0),
chainState: newChainState(),
}

pool.registerMetrics()

return pool
}

func (pool *Pool) SubscribeFinalizedEvent(capacity int) *Subscription[*v1.Finality] {
Expand Down Expand Up @@ -97,3 +104,44 @@ func (pool *Pool) AwaitReadyEndpoint(ctx context.Context, clientType ClientType)
}
}
}

func (pool *Pool) registerMetrics() {
clientCountGauge := promauto.NewGauge(prometheus.GaugeOpts{
Name: "dora_cl_pool_clients",
Help: "Number of consensus clients",
})
onlineCountGauge := promauto.NewGauge(prometheus.GaugeOpts{
Name: "dora_cl_pool_clients_online",
Help: "Number of consensus clients online",
})
syncingCountGauge := promauto.NewGauge(prometheus.GaugeOpts{
Name: "dora_cl_pool_clients_syncing",
Help: "Number of consensus clients syncing",
})
optimisticCountGauge := promauto.NewGauge(prometheus.GaugeOpts{
Name: "dora_cl_pool_clients_optimistic",
Help: "Number of consensus clients optimistic",
})

metrics.AddPreCollectFn(func() {
online := 0
syncing := 0
optimistic := 0
for _, client := range pool.clients {
if client.isOnline {
online++
}
if client.isSyncing {
syncing++
}
if client.isOptimistic {
optimistic++
}
}

clientCountGauge.Set(float64(len(pool.clients)))
onlineCountGauge.Set(float64(online))
syncingCountGauge.Set(float64(syncing))
optimisticCountGauge.Set(float64(optimistic))
})
}
41 changes: 40 additions & 1 deletion clients/execution/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ import (
"math/rand/v2"
"time"

"github.com/ethpandaops/dora/metrics"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/sirupsen/logrus"
)

Expand All @@ -17,12 +20,16 @@ type Pool struct {
}

func NewPool(ctx context.Context, logger logrus.FieldLogger) *Pool {
return &Pool{
pool := &Pool{
ctx: ctx,
logger: logger,
clients: make([]*Client, 0),
chainState: newChainState(),
}

pool.registerMetrics()

return pool
}

func (pool *Pool) GetChainState() *ChainState {
Expand Down Expand Up @@ -92,3 +99,35 @@ func (pool *Pool) AwaitReadyEndpoint(ctx context.Context, clientType ClientType)
}
}
}

func (pool *Pool) registerMetrics() {
clientCountGauge := promauto.NewGauge(prometheus.GaugeOpts{
Name: "dora_el_pool_clients",
Help: "Number of execution clients",
})
onlineCountGauge := promauto.NewGauge(prometheus.GaugeOpts{
Name: "dora_el_pool_clients_online",
Help: "Number of execution clients online",
})
syncingCountGauge := promauto.NewGauge(prometheus.GaugeOpts{
Name: "dora_el_pool_clients_syncing",
Help: "Number of execution clients syncing",
})

metrics.AddPreCollectFn(func() {
online := 0
syncing := 0
for _, client := range pool.clients {
if client.isOnline {
online++
}
if client.isSyncing {
syncing++
}
}

clientCountGauge.Set(float64(len(pool.clients)))
onlineCountGauge.Set(float64(online))
syncingCountGauge.Set(float64(syncing))
})
}
13 changes: 13 additions & 0 deletions cmd/dora-explorer/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (

"github.com/ethpandaops/dora/db"
"github.com/ethpandaops/dora/handlers"
"github.com/ethpandaops/dora/metrics"
"github.com/ethpandaops/dora/services"
"github.com/ethpandaops/dora/static"
"github.com/ethpandaops/dora/types"
Expand Down Expand Up @@ -66,6 +67,13 @@ func main() {
}
}

if cfg.Metrics.Enabled && !cfg.Metrics.Public {
err = metrics.StartMetricsServer(logger.WithField("module", "metrics"), cfg.Metrics.Host, cfg.Metrics.Port)
if err != nil {
logger.Fatalf("error starting metrics server: %v", err)
}
}

err = services.GlobalBeaconService.StartService()
if err != nil {
logger.Fatalf("error starting beacon service: %v", err)
Expand Down Expand Up @@ -176,6 +184,11 @@ func startFrontend(webserver *http.Server) {
// add pprof handler
router.PathPrefix("/debug/pprof/").Handler(http.DefaultServeMux)
router.HandleFunc("/debug/cache", handlers.DebugCache).Methods("GET")
router.Handle("/debug/metrics", metrics.GetMetricsHandler())
}

if utils.Config.Metrics.Enabled && utils.Config.Metrics.Public {
router.Handle("/metrics", metrics.GetMetricsHandler())
}

if utils.Config.Frontend.Debug {
Expand Down
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ require (

require (
github.com/ipfs/go-cid v0.4.1 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/libp2p/go-buffer-pool v0.1.0 // indirect
github.com/minio/highwayhash v1.0.2 // indirect
github.com/mr-tron/base58 v1.2.0 // indirect
Expand Down Expand Up @@ -116,7 +117,7 @@ require (
github.com/mmcloughlin/addchain v0.4.0 // indirect
github.com/ncruces/go-strftime v0.1.9 // indirect
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.20.0 // indirect
github.com/prometheus/client_golang v1.20.5 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,8 @@ github.com/pressly/goose/v3 v3.24.1 h1:bZmxRco2uy5uu5Ng1MMVEfYsFlrMJI+e/VMXHQ3C4
github.com/pressly/goose/v3 v3.24.1/go.mod h1:rEWreU9uVtt0DHCyLzF9gRcWiiTF/V+528DV+4DORug=
github.com/prometheus/client_golang v1.20.0 h1:jBzTZ7B099Rg24tny+qngoynol8LtVYlA2bqx3vEloI=
github.com/prometheus/client_golang v1.20.0/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y=
github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
Expand Down
4 changes: 4 additions & 0 deletions indexer/beacon/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ func (c *Client) emitBlockLogEntry(slot phase0.Slot, root phase0.Root, source st

if isNew {
c.logger.Infof("received block %v:%v [0x%x] %v %v fork: %v", chainState.EpochOfSlot(slot), slot, root[:], source, processingTimesStr, forkId)

c.indexer.metrics.blockLoadDuration.Observe(float64(processingTimes[0].Milliseconds()))
c.indexer.metrics.blockProcessDuration.Observe(float64(processingTimes[1].Milliseconds()))
c.indexer.metrics.blockStoreDuration.Observe(float64(processingTimes[2].Milliseconds()))
} else {
c.logger.Debugf("received known block %v:%v [0x%x] %v %v fork: %v", chainState.EpochOfSlot(slot), slot, root[:], source, processingTimesStr, forkId)
}
Expand Down
33 changes: 10 additions & 23 deletions indexer/beacon/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,18 @@ type CacheDebugStats struct {
BlockSize uint64
}
EpochCache struct {
StatsMap CacheDebugMapSize
StateMap CacheDebugMapSize
StatsFull uint64
StatsPrecalc uint64
StatsPruned uint64
StateLoaded uint64
VotesCacheLen uint64
VotesCacheHit uint64
VotesCacheMiss uint64
StatsMap CacheDebugMapSize
StateMap CacheDebugMapSize
StatsFull uint64
StatsPrecalc uint64
StatsPruned uint64
StateLoaded uint64
VotesCacheLen uint64
}
ForkCache struct {
ForkMap CacheDebugMapSize
ParentIdCacheLen uint64
ParentIdCacheHit uint64
ParentIdCacheMiss uint64
ParentIdsCacheLen uint64
ParentIdsCacheHit uint64
ParentIdsCacheMiss uint64
ForkMap CacheDebugMapSize
ParentIdCacheLen uint64
ParentIdsCacheLen uint64
}
ValidatorCache struct {
Validators uint64
Expand Down Expand Up @@ -129,8 +123,6 @@ func (indexer *Indexer) getEpochCacheDebugStats(cacheStats *CacheDebugStats) {
}

cacheStats.EpochCache.VotesCacheLen = uint64(indexer.epochCache.votesCache.Len())
cacheStats.EpochCache.VotesCacheHit = indexer.epochCache.votesCacheHit
cacheStats.EpochCache.VotesCacheMiss = indexer.epochCache.votesCacheMiss
}

func (indexer *Indexer) getForkCacheDebugStats(cacheStats *CacheDebugStats) {
Expand All @@ -143,12 +135,7 @@ func (indexer *Indexer) getForkCacheDebugStats(cacheStats *CacheDebugStats) {
}

cacheStats.ForkCache.ParentIdCacheLen = uint64(indexer.forkCache.parentIdCache.Len())
cacheStats.ForkCache.ParentIdCacheHit = indexer.forkCache.parentIdCacheHit
cacheStats.ForkCache.ParentIdCacheMiss = indexer.forkCache.parentIdCacheMiss

cacheStats.ForkCache.ParentIdsCacheLen = uint64(indexer.forkCache.parentIdsCache.Len())
cacheStats.ForkCache.ParentIdsCacheHit = indexer.forkCache.parentIdsCacheHit
cacheStats.ForkCache.ParentIdsCacheMiss = indexer.forkCache.parentIdsCacheMiss
}

func (indexer *Indexer) getValidatorCacheDebugStats(cacheStats *CacheDebugStats) {
Expand Down
4 changes: 1 addition & 3 deletions indexer/beacon/epochcache.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,7 @@ type epochCache struct {
syncCache []phase0.ValidatorIndex // global sync committee cache for reuse if matching
precomputeLock sync.Mutex // mutex to prevent concurrent precomputing of epoch stats

votesCache *lru.Cache[epochVotesKey, *EpochVotes] // cache for epoch vote aggregations
votesCacheHit uint64
votesCacheMiss uint64
votesCache *lru.Cache[epochVotesKey, *EpochVotes] // cache for epoch vote aggregations
}

// newEpochCache creates & returns a new instance of epochCache.
Expand Down
4 changes: 4 additions & 0 deletions indexer/beacon/epochstate.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,15 @@ func (s *epochState) loadState(ctx context.Context, client *Client, cache *epoch

s.stateRoot = blockHeader.Message.StateRoot

t1 := time.Now()
resState, err := LoadBeaconState(ctx, client, blockHeader.Message.StateRoot)
if err != nil {
return nil, err
}

client.indexer.metrics.epochStateLoadDuration.Observe(float64(time.Since(t1).Milliseconds()))
client.indexer.metrics.epochStateLoadCount.Inc()

err = s.processState(resState, cache)
if err != nil {
return nil, err
Expand Down
11 changes: 10 additions & 1 deletion indexer/beacon/epochstats.go
Original file line number Diff line number Diff line change
Expand Up @@ -412,23 +412,32 @@ func (es *EpochStats) processState(indexer *Indexer, validatorSet []*phase0.Vali
DutiesSSZ: packedSsz,
}

t1dur := time.Since(t1)
t1 = time.Now()

err = db.RunDBTransaction(func(tx *sqlx.Tx) error {
return db.InsertUnfinalizedDuty(dbDuty, tx)
})
if err != nil {
indexer.logger.WithError(err).Errorf("failed storing epoch %v stats (%v / %v) to unfinalized duties", es.epoch, es.dependentRoot.String(), es.dependentState.stateRoot.String())
}

t2dur := time.Since(t1)

es.isInDb = true

indexer.metrics.epochStatsProcessDuration.Observe(float64(t1dur.Milliseconds()))
indexer.metrics.epochStatsStoreDuration.Observe(float64(t2dur.Milliseconds()))
indexer.metrics.epochStatsPackedSize.Observe(float64(len(packedSsz)))

indexer.logger.Infof(
"processed epoch %v stats (root: %v / state: %v, validators: %v/%v, %v ms), %v bytes",
es.epoch,
es.dependentRoot.String(),
es.dependentState.stateRoot.String(),
values.ActiveValidators,
len(validatorSet),
time.Since(t1).Milliseconds(),
(t1dur + t2dur).Milliseconds(),
len(packedSsz),
)

Expand Down
6 changes: 4 additions & 2 deletions indexer/beacon/epochvotes.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ func (indexer *Indexer) aggregateEpochVotes(epoch phase0.Epoch, chainState *cons

votesKey := getEpochVotesKey(epoch, targetRoot, blocks[len(blocks)-1].Root, uint8(len(blocks)), votesWithValues, votesWithPrecalc)
if cachedVotes, isOk := indexer.epochCache.votesCache.Get(votesKey); isOk {
indexer.epochCache.votesCacheHit++
indexer.metrics.epochCacheVotesCacheHit.Inc()
return cachedVotes
}

votes := indexer.aggregateEpochVotesAndActivity(epoch, chainState, blocks, epochStats)
indexer.epochCache.votesCacheMiss++
indexer.metrics.epochCacheVotesCacheMiss.Inc()

return votes
}
Expand Down Expand Up @@ -233,6 +233,8 @@ func (indexer *Indexer) aggregateEpochVotesAndActivity(epoch phase0.Epoch, chain
indexer.logger.Debugf("aggregated epoch %v votes in %v (blocks: %v) [0x%x]", epoch, time.Since(t1), len(blocks), votesKey[:])
indexer.epochCache.votesCache.Add(votesKey, votes)

indexer.metrics.epochVoteAggregateDuration.Observe(float64(time.Since(t1).Milliseconds()))

return votes
}

Expand Down
9 changes: 8 additions & 1 deletion indexer/beacon/finalization.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,15 @@ func (indexer *Indexer) finalizeEpoch(epoch phase0.Epoch, justifiedRoot phase0.R
indexer.blockCache.removeBlock(block)
}

t3dur := time.Since(t1)

indexer.metrics.finalizationLoadDuration.Observe(float64(t1loading.Milliseconds()))
indexer.metrics.finalizationProcessDuration.Observe(float64(t1dur.Milliseconds()))
indexer.metrics.finalizationStoreDuration.Observe(float64(t2dur.Milliseconds()))
indexer.metrics.finalizationCleanDuration.Observe(float64(t3dur.Milliseconds()))

// log summary
indexer.logger.Infof("completed epoch %v finalization (process: %v ms, load: %v s, write: %v ms, clean: %v ms)", epoch, t1dur.Milliseconds(), t1loading.Seconds(), t2dur.Milliseconds(), time.Since(t1).Milliseconds())
indexer.logger.Infof("completed epoch %v finalization (process: %v ms, load: %v s, write: %v ms, clean: %v ms)", epoch, t1dur.Milliseconds(), t1loading.Seconds(), t2dur.Milliseconds(), t3dur.Milliseconds())
indexer.logger.Infof("epoch %v blocks: %v canonical, %v orphaned", epoch, len(canonicalBlocks), len(orphanedBlocks))
if epochStatsValues != nil {
indexer.logger.Infof("epoch %v stats: %v validators (%v ETH)", epoch, epochStatsValues.ActiveValidators, epochStatsValues.EffectiveBalance/EtherGweiFactor)
Expand Down
Loading
Loading