@@ -172,6 +172,18 @@ func (r kvAllocBenchEventRunner) run(ctx context.Context, c cluster.Cluster, t t
172172 t .Status ("running kv workload" , runCmd )
173173 return c .RunE (ctx , option .WithNodes (c .WorkloadNode ()), runCmd )
174174}
175+
176+ // registerAllocationBench registers allocation benchmark tests that measure load
177+ // balancing quality across the cluster under various workload patterns. The tests
178+ // run multiple samples (iterations of a scenario) and report metrics for CPU
179+ // imbalance, write I/O imbalance, and rebalancing cost.
180+ //
181+ // - ops=skew: Mixes different operation types with different rates (e.g., high-rate
182+ // small operations vs low-rate large operations). Each workload has different
183+ // resource characteristics but similar aggregate resource usage.
184+ // - access=skew: Uses the same operation type but with a skewed (power-law) key
185+ // access distribution, where certain keys are accessed much more frequently
186+ // than others.
175187func registerAllocationBench (r registry.Registry ) {
176188 specTemplates := []allocationBenchSpec {
177189 // TODO(kvoli): Add a background event runner and implement events for
@@ -279,6 +291,10 @@ func registerAllocationBenchSpec(r registry.Registry, allocSpec allocationBenchS
279291 })
280292}
281293
294+ // setupAllocationBench initializes the cluster for an allocation benchmark run.
295+ // It starts the CockroachDB nodes with the specified load-based rebalancing mode,
296+ // sets up Prometheus and Grafana for metrics collection, and returns a stat
297+ // collector and cleanup function.
282298func setupAllocationBench (
283299 ctx context.Context , t test.Test , c cluster.Cluster , spec allocationBenchSpec ,
284300) (clusterstats.StatCollector , func (context.Context )) {
@@ -339,6 +355,45 @@ func runAllocationBenchEvent(
339355 return load .run .run (ctx , c , t )
340356}
341357
358+ // runAllocationBench runs the allocation benchmark test. The test measures load
359+ // balancing quality across the cluster by collecting metrics over multiple sample runs.
360+ //
361+ // Metrics are computed by querying Prometheus every 10 seconds (clusterstats/collector.go:
362+ // defaultScrapeInterval), where each data point aggregates over a 5-minute rolling window via
363+ // [5m] range vector selectors (rebalance_stats.go: cpuStat, ioWriteStat). This means each
364+ // 10-second data point represents the average over the previous 5 minutes. After a 10-minute
365+ // warmup, metrics are collected during the workload (default 30 minutes). At each 10-second
366+ // data point, the (max - min) imbalance across nodes is calculated, then all these values are
367+ // averaged to produce the final metric.
368+ //
369+ // Metrics Reported to Roachperf:
370+ //
371+ // 1. cpu(%): Average of (max - min) CPU utilization across nodes per 5-minute interval.
372+ // This measures how well CPU load is balanced. Lower values indicate more evenly
373+ // distributed CPU load. For example, if at an interval nodes have CPU utilizations of
374+ // [30%, 35%, 40%, 25%, 30%], the spread is calculated as 40% - 25% = 15%. This means
375+ // the highest CPU node is 15 percentage points above the lowest on average for that interval.
376+ //
377+ // 2. write(%): Average of (max - min) write disk I/O across nodes per 5-minute
378+ // interval. Write throughput (MB/s) is normalized by 400 MB/s. This measures how
379+ // well write load is balanced. Lower values indicate more evenly distributed I/O.
380+ // For example, if at an interval nodes have write throughput of [200 MB/s, 250 MB/s,
381+ // 300 MB/s, 150 MB/s, 200 MB/s], the spread is 300 - 150 = 150 MB/s. Normalized by
382+ // 400 MB/s, this becomes 150 / 400 = 37.5%.
383+ //
384+ // 3. cost(gb): Total GB of data moved during rebalancing, computed as the difference
385+ // between cumulative rebalance snapshot bytes at the end vs the start of recording.
386+ // Lower values indicate more efficient rebalancing, but some rebalancing is
387+ // necessary to achieve good load distribution. For example, if the cumulative
388+ // rebalance bytes are 10 GB at the start and 15 GB at the end, cost(gb) = 5.
389+ //
390+ // The test runs multiple samples (default: 5) and selects the "middle run" to
391+ // export - the sample with minimum sum of pairwise distances to all other
392+ // samples across all metrics, see findMinDistanceClusterStatRun. Standard
393+ // deviation metrics (std_*) are computed over the values of each metric across
394+ // all sample runs (e.g., std_cpu(%) is the standard deviation of cpu(%) values
395+ // from all 5 runs). These indicate consistency across runs and are indicative
396+ // of worst/best case outcomes.
342397func runAllocationBench (
343398 ctx context.Context , t test.Test , c cluster.Cluster , spec allocationBenchSpec ,
344399) {
@@ -400,6 +455,10 @@ func runAllocationBench(
400455 }
401456}
402457
458+ // runAllocationBenchSample runs a single sample of the allocation benchmark.
459+ // It executes the workload events, then collects and aggregates metrics from
460+ // Prometheus for the recording period (after warmup). Returns the collected
461+ // statistics including cpu(%), write(%), and cost(gb) metrics.
403462func runAllocationBenchSample (
404463 ctx context.Context ,
405464 t test.Test ,
0 commit comments