Skip to content

Commit 9653906

Browse files
Merge pull request #6172 from onflow/janez/expose-transaction-metrics
Add grpc endpoint to EN for transaction execution metrics
2 parents 72ac1d1 + 2464067 commit 9653906

File tree

19 files changed

+984
-41
lines changed

19 files changed

+984
-41
lines changed

cmd/execution_builder.go

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ import (
4949
"github.com/onflow/flow-go/engine/execution/checker"
5050
"github.com/onflow/flow-go/engine/execution/computation"
5151
"github.com/onflow/flow-go/engine/execution/computation/committer"
52+
txmetrics "github.com/onflow/flow-go/engine/execution/computation/metrics"
5253
"github.com/onflow/flow-go/engine/execution/ingestion"
5354
"github.com/onflow/flow-go/engine/execution/ingestion/fetcher"
5455
"github.com/onflow/flow-go/engine/execution/ingestion/loader"
@@ -127,7 +128,7 @@ type ExecutionNode struct {
127128

128129
ingestionUnit *engine.Unit
129130

130-
collector module.ExecutionMetrics
131+
collector *metrics.ExecutionCollector
131132
executionState state.ExecutionState
132133
followerState protocol.FollowerState
133134
committee hotstuff.DynamicCommittee
@@ -160,6 +161,7 @@ type ExecutionNode struct {
160161
executionDataTracker tracker.Storage
161162
blobService network.BlobService
162163
blobserviceDependable *module.ProxiedReadyDoneAware
164+
metricsProvider txmetrics.TransactionExecutionMetricsProvider
163165
}
164166

165167
func (builder *ExecutionNodeBuilder) LoadComponentsAndModules() {
@@ -228,6 +230,7 @@ func (builder *ExecutionNodeBuilder) LoadComponentsAndModules() {
228230
Component("block data upload manager", exeNode.LoadBlockUploaderManager).
229231
Component("GCP block data uploader", exeNode.LoadGCPBlockDataUploader).
230232
Component("S3 block data uploader", exeNode.LoadS3BlockDataUploader).
233+
Component("transaction execution metrics", exeNode.LoadTransactionExecutionMetrics).
231234
Component("provider engine", exeNode.LoadProviderEngine).
232235
Component("checker engine", exeNode.LoadCheckerEngine).
233236
Component("ingestion engine", exeNode.LoadIngestionEngine).
@@ -544,10 +547,27 @@ func (exeNode *ExecutionNode) LoadProviderEngine(
544547

545548
vmCtx := fvm.NewContext(opts...)
546549

550+
var collector module.ExecutionMetrics
551+
collector = exeNode.collector
552+
if exeNode.exeConf.transactionExecutionMetricsEnabled {
553+
// inject the transaction execution metrics
554+
collector = exeNode.collector.WithTransactionCallback(
555+
func(dur time.Duration, stats module.TransactionExecutionResultStats, info module.TransactionExecutionResultInfo) {
556+
exeNode.metricsProvider.Collect(
557+
info.BlockID,
558+
info.BlockHeight,
559+
txmetrics.TransactionExecutionMetrics{
560+
TransactionID: info.TransactionID,
561+
ExecutionTime: dur,
562+
ExecutionEffortWeights: stats.ComputationIntensities,
563+
})
564+
})
565+
}
566+
547567
ledgerViewCommitter := committer.NewLedgerViewCommitter(exeNode.ledgerStorage, node.Tracer)
548568
manager, err := computation.New(
549569
node.Logger,
550-
exeNode.collector,
570+
collector,
551571
node.Tracer,
552572
node.Me,
553573
node.State,
@@ -1130,6 +1150,24 @@ func (exeNode *ExecutionNode) LoadScriptsEngine(node *NodeConfig) (module.ReadyD
11301150
return exeNode.scriptsEng, nil
11311151
}
11321152

1153+
func (exeNode *ExecutionNode) LoadTransactionExecutionMetrics(
1154+
node *NodeConfig,
1155+
) (module.ReadyDoneAware, error) {
1156+
lastFinalizedHeader := node.LastFinalizedHeader
1157+
1158+
metricsProvider := txmetrics.NewTransactionExecutionMetricsProvider(
1159+
node.Logger,
1160+
exeNode.executionState,
1161+
node.Storage.Headers,
1162+
lastFinalizedHeader.Height,
1163+
exeNode.exeConf.transactionExecutionMetricsBufferSize,
1164+
)
1165+
1166+
node.ProtocolEvents.AddConsumer(metricsProvider)
1167+
exeNode.metricsProvider = metricsProvider
1168+
return metricsProvider, nil
1169+
}
1170+
11331171
func (exeNode *ExecutionNode) LoadConsensusCommittee(
11341172
node *NodeConfig,
11351173
) (
@@ -1331,6 +1369,7 @@ func (exeNode *ExecutionNode) LoadGrpcServer(
13311369
exeNode.results,
13321370
exeNode.txResults,
13331371
node.Storage.Commits,
1372+
exeNode.metricsProvider,
13341373
node.RootChainID,
13351374
signature.NewBlockSignerDecoder(exeNode.committee),
13361375
exeNode.exeConf.apiRatelimits,

cmd/execution_config.go

Lines changed: 33 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -25,35 +25,37 @@ import (
2525

2626
// ExecutionConfig contains the configs for starting up execution nodes
2727
type ExecutionConfig struct {
28-
rpcConf rpc.Config
29-
triedir string
30-
executionDataDir string
31-
registerDir string
32-
mTrieCacheSize uint32
33-
transactionResultsCacheSize uint
34-
checkpointDistance uint
35-
checkpointsToKeep uint
36-
chunkDataPackDir string
37-
chunkDataPackCacheSize uint
38-
chunkDataPackRequestsCacheSize uint32
39-
requestInterval time.Duration
40-
extensiveLog bool
41-
pauseExecution bool
42-
chunkDataPackQueryTimeout time.Duration
43-
chunkDataPackDeliveryTimeout time.Duration
44-
enableBlockDataUpload bool
45-
gcpBucketName string
46-
s3BucketName string
47-
apiRatelimits map[string]int
48-
apiBurstlimits map[string]int
49-
executionDataAllowedPeers string
50-
executionDataPrunerHeightRangeTarget uint64
51-
executionDataPrunerThreshold uint64
52-
blobstoreRateLimit int
53-
blobstoreBurstLimit int
54-
chunkDataPackRequestWorkers uint
55-
maxGracefulStopDuration time.Duration
56-
importCheckpointWorkerCount int
28+
rpcConf rpc.Config
29+
triedir string
30+
executionDataDir string
31+
registerDir string
32+
mTrieCacheSize uint32
33+
transactionResultsCacheSize uint
34+
checkpointDistance uint
35+
checkpointsToKeep uint
36+
chunkDataPackDir string
37+
chunkDataPackCacheSize uint
38+
chunkDataPackRequestsCacheSize uint32
39+
requestInterval time.Duration
40+
extensiveLog bool
41+
pauseExecution bool
42+
chunkDataPackQueryTimeout time.Duration
43+
chunkDataPackDeliveryTimeout time.Duration
44+
enableBlockDataUpload bool
45+
gcpBucketName string
46+
s3BucketName string
47+
apiRatelimits map[string]int
48+
apiBurstlimits map[string]int
49+
executionDataAllowedPeers string
50+
executionDataPrunerHeightRangeTarget uint64
51+
executionDataPrunerThreshold uint64
52+
blobstoreRateLimit int
53+
blobstoreBurstLimit int
54+
chunkDataPackRequestWorkers uint
55+
maxGracefulStopDuration time.Duration
56+
importCheckpointWorkerCount int
57+
transactionExecutionMetricsEnabled bool
58+
transactionExecutionMetricsBufferSize uint
5759

5860
// evm tracing configuration
5961
evmTracingEnabled bool
@@ -122,6 +124,8 @@ func (exeConf *ExecutionConfig) SetupFlags(flags *pflag.FlagSet) {
122124
flags.IntVar(&exeConf.blobstoreBurstLimit, "blobstore-burst-limit", 0, "outgoing burst limit for Execution Data blobstore")
123125
flags.DurationVar(&exeConf.maxGracefulStopDuration, "max-graceful-stop-duration", stop.DefaultMaxGracefulStopDuration, "the maximum amount of time stop control will wait for ingestion engine to gracefully shutdown before crashing")
124126
flags.IntVar(&exeConf.importCheckpointWorkerCount, "import-checkpoint-worker-count", 10, "number of workers to import checkpoint file during bootstrap")
127+
flags.BoolVar(&exeConf.transactionExecutionMetricsEnabled, "tx-execution-metrics", true, "enable collection of transaction execution metrics")
128+
flags.UintVar(&exeConf.transactionExecutionMetricsBufferSize, "tx-execution-metrics-buffer-size", 200, "buffer size for transaction execution metrics. The buffer size is the number of blocks that are kept in memory by the metrics provider engine")
125129
flags.BoolVar(&exeConf.evmTracingEnabled, "evm-tracing-enabled", false, "enable EVM tracing, when set it will generate traces and upload them to the GCP bucket provided by the --evm-traces-gcp-bucket. Warning: this might affect speed of execution")
126130
flags.StringVar(&exeConf.evmTracesGCPBucket, "evm-traces-gcp-bucket", "", "define GCP bucket name used for uploading EVM traces, must be used in combination with --evm-tracing-enabled. if left empty the upload step is skipped")
127131

engine/access/mock/execution_api_client.go

Lines changed: 37 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

engine/access/mock/execution_api_server.go

Lines changed: 30 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

engine/execution/computation/computer/computer.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,6 @@ func (e *blockComputer) queueTransactionRequests(
256256
i == len(collection.Transactions)-1)
257257
txnIndex += 1
258258
}
259-
260259
}
261260

262261
systemCtx := fvm.NewContextFromParent(
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
package metrics
2+
3+
import (
4+
"sync"
5+
6+
"github.com/rs/zerolog"
7+
8+
"github.com/onflow/flow-go/model/flow"
9+
"github.com/onflow/flow-go/module/component"
10+
"github.com/onflow/flow-go/module/irrecoverable"
11+
)
12+
13+
type collector struct {
14+
log zerolog.Logger
15+
16+
collection chan metrics
17+
18+
mu sync.Mutex
19+
20+
lowestAvailableHeight uint64
21+
blocksAtHeight map[uint64]map[flow.Identifier]struct{}
22+
metrics map[flow.Identifier][]TransactionExecutionMetrics
23+
}
24+
25+
func newCollector(
26+
log zerolog.Logger,
27+
lowestAvailableHeight uint64,
28+
) *collector {
29+
return &collector{
30+
log: log,
31+
lowestAvailableHeight: lowestAvailableHeight,
32+
33+
collection: make(chan metrics, 1000),
34+
blocksAtHeight: make(map[uint64]map[flow.Identifier]struct{}),
35+
metrics: make(map[flow.Identifier][]TransactionExecutionMetrics),
36+
}
37+
}
38+
39+
// Collect should never block because it's called from the execution
40+
func (c *collector) Collect(
41+
blockId flow.Identifier,
42+
blockHeight uint64,
43+
t TransactionExecutionMetrics,
44+
) {
45+
select {
46+
case c.collection <- metrics{
47+
TransactionExecutionMetrics: t,
48+
blockHeight: blockHeight,
49+
blockId: blockId,
50+
}:
51+
default:
52+
c.log.Warn().
53+
Uint64("height", blockHeight).
54+
Msg("dropping metrics because the collection channel is full")
55+
}
56+
}
57+
58+
func (c *collector) metricsCollectorWorker(
59+
ctx irrecoverable.SignalerContext,
60+
ready component.ReadyFunc,
61+
) {
62+
ready()
63+
64+
for {
65+
select {
66+
case <-ctx.Done():
67+
return
68+
case m := <-c.collection:
69+
c.collect(m.blockId, m.blockHeight, m.TransactionExecutionMetrics)
70+
}
71+
}
72+
}
73+
74+
func (c *collector) collect(
75+
blockId flow.Identifier,
76+
blockHeight uint64,
77+
t TransactionExecutionMetrics,
78+
) {
79+
c.mu.Lock()
80+
defer c.mu.Unlock()
81+
82+
if blockHeight <= c.lowestAvailableHeight {
83+
c.log.Warn().
84+
Uint64("height", blockHeight).
85+
Uint64("lowestAvailableHeight", c.lowestAvailableHeight).
86+
Msg("received metrics for a block that is older or equal than the most recent block")
87+
return
88+
}
89+
90+
if _, ok := c.blocksAtHeight[blockHeight]; !ok {
91+
c.blocksAtHeight[blockHeight] = make(map[flow.Identifier]struct{})
92+
}
93+
c.blocksAtHeight[blockHeight][blockId] = struct{}{}
94+
c.metrics[blockId] = append(c.metrics[blockId], t)
95+
}
96+
97+
// Pop returns the metrics for the given finalized block at the given height
98+
// and clears all data up to the given height.
99+
func (c *collector) Pop(height uint64, finalizedBlockId flow.Identifier) []TransactionExecutionMetrics {
100+
c.mu.Lock()
101+
defer c.mu.Unlock()
102+
103+
if height <= c.lowestAvailableHeight {
104+
c.log.Warn().
105+
Uint64("height", height).
106+
Stringer("finalizedBlockId", finalizedBlockId).
107+
Msg("requested metrics for a finalizedBlockId that is older or equal than the most recent finalizedBlockId")
108+
return nil
109+
}
110+
111+
// only return metrics for finalized block
112+
metrics := c.metrics[finalizedBlockId]
113+
114+
c.advanceTo(height)
115+
116+
return metrics
117+
}
118+
119+
// advanceTo moves the latest height to the given height
120+
// all data at lower heights will be deleted
121+
func (c *collector) advanceTo(height uint64) {
122+
for c.lowestAvailableHeight < height {
123+
blocks := c.blocksAtHeight[c.lowestAvailableHeight]
124+
for block := range blocks {
125+
delete(c.metrics, block)
126+
}
127+
delete(c.blocksAtHeight, c.lowestAvailableHeight)
128+
c.lowestAvailableHeight++
129+
}
130+
}

0 commit comments

Comments
 (0)