Skip to content

Commit afd72c3

Browse files
committed
feat(a/b): add support scripts for A/B visualization
Add scripts for combining and plotting results of A/B runs. Move all A/B related scripts to the tools/ab Signed-off-by: Egor Lazarchuk <[email protected]>
1 parent bf5d9da commit afd72c3

File tree

4 files changed

+186
-6
lines changed

4 files changed

+186
-6
lines changed

tools/ab_test.py renamed to tools/ab/ab_test.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,8 @@
3434
# pylint:disable=wrong-import-position
3535
from framework.ab_test import binary_ab_test, check_regression
3636
from framework.properties import global_props
37-
from host_tools.metrics import (
38-
emit_raw_emf,
39-
format_with_reduced_unit,
40-
get_metrics_logger,
41-
)
37+
from host_tools.metrics import (emit_raw_emf, format_with_reduced_unit,
38+
get_metrics_logger)
4239

4340
# Performance tests that are known to be unstable and exhibit variances of up to 60% of the mean
4441
IGNORED = [

tools/ab/combine.py

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import argparse
2+
import json
3+
import os
4+
from pathlib import Path
5+
6+
parser = argparse.ArgumentParser(
7+
description="Combine A/B test fails into groups per test type"
8+
)
9+
parser.add_argument(
10+
"path",
11+
help="Path to the directory with failed A/B runs",
12+
type=Path,
13+
)
14+
args = parser.parse_args()
15+
16+
BLOCK = "test_block_performance"
17+
NET_THROUGHPUT = "test_network_throughput"
18+
NET_LATENCY = "test_network_latency"
19+
20+
block_data = []
21+
net_data = []
22+
net_lat_data = []
23+
for d in os.walk(args.path):
24+
if "ab.json" in d[-1]:
25+
path = d[0] + "/ab.json"
26+
print(path)
27+
with open(path, "r+") as f:
28+
lines = f.read()
29+
j = '{"data":' + lines + "}"
30+
data = json.loads(j)
31+
for e in data["data"]:
32+
match e["performance_test"]:
33+
case BLOCk:
34+
block_data.append(e)
35+
case NET_THROUGHPUT:
36+
net_data.append(e)
37+
case NET_LATENCY:
38+
net_lat_data.append(e)
39+
40+
with open(f"{NET_LATENCY}.json", "w") as f:
41+
json.dump({"results": net_lat_data}, f, indent=2, sort_keys=True)
42+
with open(f"{NET_THROUGHPUT}.json", "w") as f:
43+
json.dump({"results": net_data}, f, indent=2, sort_keys=True)
44+
with open(f"{BLOCK}.json", "w") as f:
45+
json.dump({"fails": block_data}, f, indent=2, sort_keys=True)

tools/ab/plot.py

+138
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import argparse
2+
import json
3+
import os
4+
from enum import Enum
5+
6+
import matplotlib.pyplot as plt
7+
import numpy as np
8+
9+
plt.style.use("dark_background")
10+
11+
12+
def clamp(min_v, max_v, v):
13+
return max(min_v, min(max_v, v))
14+
15+
16+
def lerp(color_a, color_b, t):
17+
return (
18+
clamp(0.0, 1.0, abs(color_a[0] * (1.0 - t) + color_b[0] * t)),
19+
clamp(0.0, 1.0, abs(color_a[1] * (1.0 - t) + color_b[1] * t)),
20+
clamp(0.0, 1.0, abs(color_a[2] * (1.0 - t) + color_b[2] * t)),
21+
)
22+
23+
24+
GREY = (0.5, 0.5, 0.5)
25+
GREEN = (0.1, 0.8, 0.1)
26+
RED = (0.8, 0.0, 0.1)
27+
28+
POSITIVE_COLOR = GREEN
29+
NEGATIVE_COLOR = RED
30+
31+
32+
class DataType(Enum):
33+
Block = "block"
34+
Net = "net"
35+
NetLatency = "net_latency"
36+
37+
38+
parser = argparse.ArgumentParser(description="Plot results of A/B test")
39+
parser.add_argument("path", type=str)
40+
args = parser.parse_args()
41+
42+
paths = [f"{args.path}/{f}" for f in os.listdir(args.path)]
43+
for path in paths:
44+
print(f"processing: {path}")
45+
with open(path) as f:
46+
fails = json.load(f)["fails"]
47+
48+
if not fails:
49+
print(f"skipping {path}. No data present")
50+
continue
51+
52+
instances = set()
53+
host_kernels = set()
54+
aggregated = {}
55+
56+
match fails[0]["performance_test"]:
57+
case "test_block_performance":
58+
data_type = DataType.Block
59+
case "test_network_tcp_throughput":
60+
data_type = DataType.Net
61+
case "test_network_latency":
62+
data_type = DataType.NetLatency
63+
case _:
64+
print("unknown data type. skipping")
65+
continue
66+
67+
for fail in fails:
68+
instances.add(fail["instance"])
69+
host_kernels.add(fail["host_kernel"])
70+
71+
if data_type == DataType.Block:
72+
tag = (
73+
fail["instance"],
74+
fail["host_kernel"],
75+
fail["guest_kernel"],
76+
fail["fio_mode"],
77+
fail["vcpus"],
78+
fail["io_engine"],
79+
)
80+
elif data_type == DataType.Net:
81+
tag = (
82+
fail["instance"],
83+
fail["host_kernel"],
84+
fail["guest_kernel"],
85+
fail["mode"],
86+
fail["vcpus"],
87+
)
88+
elif data_type == DataType.NetLatency:
89+
tag = (
90+
fail["instance"],
91+
fail["host_kernel"],
92+
fail["guest_kernel"],
93+
)
94+
POSITIVE_COLOR = RED
95+
NEGATIVE_COLOR = GREEN
96+
97+
if tag not in aggregated:
98+
aggregated[tag] = []
99+
aggregated[tag].append(fail["diff"])
100+
101+
for instance in sorted(instances):
102+
fig, ax = plt.subplots(len(host_kernels), figsize=(16, 11))
103+
if len(host_kernels) == 1:
104+
ax = [ax]
105+
fig.tight_layout(pad=8.0)
106+
107+
for i, host_kernel in enumerate(sorted(host_kernels)):
108+
data = []
109+
for key, value in aggregated.items():
110+
if key[0] == instance and key[1] == host_kernel:
111+
label = "\n".join(key[2:])
112+
values = np.array(value)
113+
mean = np.mean(values)
114+
std = np.std(values)
115+
data.append((label, mean, std))
116+
data.sort()
117+
labels = np.array([t[0] for t in data])
118+
means = np.array([t[1] for t in data])
119+
errors = np.array([t[2] for t in data])
120+
colors = [
121+
(
122+
lerp(GREY, POSITIVE_COLOR, t)
123+
if 0.0 < t
124+
else lerp(GREY, NEGATIVE_COLOR, -t)
125+
)
126+
for t in [m / 100.0 for m in means]
127+
]
128+
129+
bar = ax[i].bar(labels, means, yerr=errors, color=colors, ecolor="white")
130+
bar_labels = [f"{m:.2f} / {s:.2f}" for (m, s) in zip(means, errors)]
131+
ax[i].bar_label(bar, labels=bar_labels)
132+
ax[i].set_ylabel("Percentage of change: mean / std")
133+
ax[i].grid(color="grey", linestyle="-.", linewidth=0.5, alpha=0.5)
134+
ax[i].set_title(
135+
f"{data_type}\nInstance: {instance}\nHost kernel: {host_kernel}",
136+
)
137+
138+
plt.savefig(f"{args.path}/{data_type}_{instance}.png")

tools/devtool

+1-1
Original file line numberDiff line numberDiff line change
@@ -760,7 +760,7 @@ cmd_test() {
760760
test_script="./tools/test.sh"
761761

762762
if [ $do_ab_test -eq 1 ]; then
763-
test_script="./tools/ab_test.py"
763+
test_script="./tools/ab/ab_test.py"
764764
fi
765765

766766
# Testing (running Firecracker via the jailer) needs root access,

0 commit comments

Comments
 (0)