Skip to content

Commit d618c6b

Browse files
tautschnigkarkhaz
andcommitted
Run performance comparison in CI using Kani's Benchcomp
Use proof harnesses in AWS C Common as (initial) benchmark to guard against performance regressions introduced in PRs. See the "Summary" behind any of the executions of https://github.com/model-checking/kani/actions/workflows/bench.yml for examples what the output will look like. Co-authored-by: Kareem Khazem <[email protected]>
1 parent 270e82c commit d618c6b

File tree

3 files changed

+353
-0
lines changed

3 files changed

+353
-0
lines changed

Diff for: .github/workflows/benchcomp-config.yaml

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# Compare CBMC performance of selected benchmarks across two versions of CBMC,
2+
# erroring out on regression. This config file is to be used together with the
3+
# set-up in performance.yaml, because it assumes that there are two CBMC
4+
# checkouts in the 'old' and 'new' directories; benchcomp compares the
5+
# performance of these two checkouts.
6+
#
7+
# This configuration file is based on Benchcomp's perf-regression.yaml that
8+
# ships with Kani.
9+
10+
variants:
11+
aws-c-common@old:
12+
config:
13+
command_line: cd verification/cbmc/proofs && export PATH=/home/runner/work/cbmc/cbmc/old/build/bin:$PATH &&
14+
./run-cbmc-proofs.py
15+
directory: /home/runner/work/cbmc/cbmc/aws-c-common.git
16+
env: {}
17+
aws-c-common@new:
18+
config:
19+
command_line: cd verification/cbmc/proofs && export PATH=/home/runner/work/cbmc/cbmc/new/build/bin:$PATH &&
20+
./run-cbmc-proofs.py
21+
directory: /home/runner/work/cbmc/cbmc/aws-c-common.git
22+
env: {}
23+
24+
run:
25+
suites:
26+
aws-c-common:
27+
parser:
28+
command: /home/runner/work/cbmc/cbmc/new/.github/workflows/benchcomp-parse_cbmc.py
29+
variants:
30+
- aws-c-common@old
31+
- aws-c-common@new
32+
33+
visualize:
34+
- type: dump_yaml
35+
out_file: '-'
36+
37+
- type: dump_markdown_results_table
38+
out_file: '-'
39+
extra_columns:
40+
41+
# For these two metrics, display the difference between old and new and
42+
# embolden if the absolute difference is more than 10% of the old value
43+
number_vccs:
44+
- column_name: diff old → new
45+
text: >
46+
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
47+
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.1 else "")
48+
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "")
49+
+ str(b["aws-c-common@new"] - b["aws-c-common@old"])
50+
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.1 else "")
51+
number_program_steps:
52+
- column_name: diff old → new
53+
text: >
54+
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
55+
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.1 else "")
56+
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "")
57+
+ str(b["aws-c-common@new"] - b["aws-c-common@old"])
58+
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.1 else "")
59+
60+
# For 'runtime' metrics, display the % change from old to new, emboldening
61+
# cells whose absolute change is >50%
62+
solver_runtime:
63+
- column_name: "% change old → new"
64+
text: >
65+
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
66+
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")
67+
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "")
68+
+ "%.3f%%" % ((b["aws-c-common@new"] - b["aws-c-common@old"]) * 100 / b["aws-c-common@old"])
69+
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")
70+
verification_time:
71+
- column_name: "% change old → new"
72+
text: >
73+
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
74+
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")
75+
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "")
76+
+ "%.3f%%" % ((b["aws-c-common@new"] - b["aws-c-common@old"]) * 100 / b["aws-c-common@old"])
77+
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")
78+
symex_runtime:
79+
- column_name: "% change old → new"
80+
text: >
81+
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
82+
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")
83+
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "")
84+
+ "%.3f%%" % ((b["aws-c-common@new"] - b["aws-c-common@old"]) * 100 / b["aws-c-common@old"])
85+
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")
86+
87+
# For success metric, display some text if success has changed
88+
success:
89+
- column_name: change
90+
text: >
91+
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
92+
else "❌ newly failing" if b["aws-c-common@old"]
93+
else "✅ newly passing"
94+
95+
- type: error_on_regression
96+
variant_pairs: [[aws-c-common@old, aws-c-common@new]]
97+
checks:
98+
- metric: success
99+
# Compare the old and new variants of each benchmark. The
100+
# benchmark has regressed if the lambda returns true.
101+
test: "lambda old, new: False if not old else not new"
102+
- metric: solver_runtime
103+
test: "lambda old, new: False if new < 10 else new/old > 1.5"
104+
- metric: symex_runtime
105+
test: "lambda old, new: False if new < 10 else new/old > 1.5"

Diff for: .github/workflows/benchcomp-parse_cbmc.py

+137
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
#!/usr/bin/env python3
2+
# Copyright Kani Contributors
3+
# SPDX-License-Identifier: Apache-2.0 OR MIT
4+
5+
6+
import json
7+
import logging
8+
import os
9+
import pathlib
10+
import re
11+
import sys
12+
import textwrap
13+
14+
import yaml
15+
16+
17+
def get_description():
18+
return textwrap.dedent("""\
19+
Read CBMC statistics from a Litani run.json file.
20+
""")
21+
22+
23+
def _get_metrics():
24+
return {
25+
"solver_runtime": {
26+
"pat": re.compile(r"Runtime Solver: (?P<value>[-e\d\.]+)s"),
27+
"parse": float,
28+
},
29+
"removed_program_steps": {
30+
"pat": re.compile(r"slicing removed (?P<value>\d+) assignments"),
31+
"parse": int,
32+
},
33+
"number_program_steps": {
34+
"pat": re.compile(r"size of program expression: (?P<value>\d+) steps"),
35+
"parse": int,
36+
},
37+
"number_vccs": {
38+
"pat": re.compile(
39+
r"Generated \d+ VCC\(s\), (?P<value>\d+) remaining after simplification"),
40+
"parse": int,
41+
},
42+
"symex_runtime": {
43+
"pat": re.compile(r"Runtime Symex: (?P<value>[-e\d\.]+)s"),
44+
"parse": float,
45+
},
46+
"success": {
47+
"pat": re.compile(r"VERIFICATION:- (?P<value>\w+)"),
48+
"parse": lambda v: v == "SUCCESSFUL",
49+
},
50+
}
51+
52+
53+
def get_metrics():
54+
metrics = dict(_get_metrics())
55+
for _, info in metrics.items():
56+
for field in ("pat", "parse"):
57+
info.pop(field)
58+
59+
# This is not a metric we return; it is used to find the correct value for
60+
# the number_program_steps metric
61+
metrics.pop("removed_program_steps", None)
62+
63+
return metrics
64+
65+
66+
def get_run(root_dir):
67+
for fyle in pathlib.Path(root_dir).rglob("run.json"):
68+
with open(fyle) as handle:
69+
return json.load(handle)
70+
logging.error("No run.json found in %s", root_dir)
71+
sys.exit(1)
72+
73+
74+
def main(root_dir):
75+
root_dir = pathlib.Path(root_dir)
76+
run = get_run(root_dir)
77+
metrics = _get_metrics()
78+
benchmarks = {}
79+
for pipe in run["pipelines"]:
80+
for stage in pipe["ci_stages"]:
81+
if stage["name"] != "test":
82+
continue
83+
for job in stage["jobs"]:
84+
if not job["wrapper_arguments"]["command"].startswith("cbmc "):
85+
continue
86+
if "cover" in job["wrapper_arguments"]["command"]:
87+
continue
88+
if "--show-properties" in job["wrapper_arguments"]["command"]:
89+
continue
90+
91+
bench_name = f"{run['project']}::{pipe['name']}"
92+
if not job["complete"]:
93+
logging.warning(
94+
"Found an incomplete CBMC benchmark '%s'",
95+
bench_name)
96+
continue
97+
98+
benchmarks[bench_name] = {
99+
"metrics": {
100+
"success": job["outcome"] == "success",
101+
"verification_time": job["duration_ms"],
102+
}}
103+
104+
for line in job["stdout"]:
105+
for metric, metric_info in metrics.items():
106+
m = metric_info["pat"].search(line)
107+
if not m:
108+
continue
109+
110+
parse = metric_info["parse"]
111+
try:
112+
# CBMC prints out some metrics more than once, e.g.
113+
# "Solver" and "decision procedure". Add those
114+
# values together
115+
benchmarks[bench_name]["metrics"][metric] += parse(m["value"])
116+
except (KeyError, TypeError):
117+
benchmarks[bench_name]["metrics"][metric] = parse(m["value"])
118+
break
119+
120+
for bench_name, bench_info in benchmarks.items():
121+
try:
122+
n_steps = bench_info["metrics"]["number_program_steps"]
123+
rm_steps = bench_info["metrics"]["removed_program_steps"]
124+
bench_info["metrics"]["number_program_steps"] = n_steps - rm_steps
125+
bench_info["metrics"].pop("removed_program_steps", None)
126+
except KeyError:
127+
pass
128+
129+
return {
130+
"metrics": get_metrics(),
131+
"benchmarks": benchmarks,
132+
}
133+
134+
135+
if __name__ == "__main__":
136+
result = main(os.getcwd())
137+
print(yaml.dump(result, default_flow_style=False))

Diff for: .github/workflows/performance.yaml

+111
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
# Run performance benchmarks comparing two different CBMC versions.
2+
name: Performance Benchmarking
3+
on:
4+
push:
5+
branches: [ develop ]
6+
pull_request:
7+
branches: [ develop ]
8+
9+
jobs:
10+
perf-benchcomp:
11+
runs-on: ubuntu-20.04
12+
steps:
13+
- name: Save push event HEAD and HEAD~ to environment variables
14+
if: ${{ github.event_name == 'push' }}
15+
run: |
16+
echo "NEW_REF=${{ github.event.after}}" | tee -a "$GITHUB_ENV"
17+
echo "OLD_REF=${{ github.event.before }}" | tee -a "$GITHUB_ENV"
18+
19+
- name: Save pull request HEAD and base to environment variables
20+
if: ${{ contains(fromJSON('["pull_request", "pull_request_target"]'), github.event_name) }}
21+
run: |
22+
echo "OLD_REF=${{ github.event.pull_request.base.sha }}" | tee -a "$GITHUB_ENV"
23+
echo "NEW_REF=${{ github.event.pull_request.head.sha }}" | tee -a "$GITHUB_ENV"
24+
25+
- name: Check out CBMC (old variant)
26+
uses: actions/checkout@v4
27+
with:
28+
submodules: recursive
29+
path: ./old
30+
ref: ${{ env.OLD_REF }}
31+
fetch-depth: 2
32+
33+
- name: Check out CBMC (new variant)
34+
uses: actions/checkout@v4
35+
with:
36+
submodules: recursive
37+
path: ./new
38+
ref: ${{ env.NEW_REF }}
39+
fetch-depth: 1
40+
41+
- name: Fetch dependencies
42+
env:
43+
# This is needed in addition to -yq to prevent apt-get from asking for
44+
# user input
45+
DEBIAN_FRONTEND: noninteractive
46+
run: |
47+
sudo apt-get update
48+
sudo apt-get install --no-install-recommends -yq cmake ninja-build gcc g++ maven flex bison ccache
49+
50+
- name: Prepare ccache
51+
uses: actions/cache/restore@v4
52+
with:
53+
path: .ccache
54+
key: ${{ runner.os }}-20.04-Release-${{ github.ref }}-${{ github.sha }}-PR
55+
restore-keys: |
56+
${{ runner.os }}-20.04-Release-${{ github.ref }}
57+
${{ runner.os }}-20.04-Release
58+
- name: ccache environment
59+
run: echo "CCACHE_DIR=$PWD/.ccache" >> $GITHUB_ENV
60+
- name: Zero ccache stats and limit in size
61+
run: ccache -z --max-size=500M
62+
63+
- name: Configure using CMake (new variant)
64+
run: cmake -S new/ -B new/build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/bin/gcc -DCMAKE_CXX_COMPILER=/usr/bin/g++ -Dsat_impl="minisat2;cadical"
65+
- name: ccache environment (new variant)
66+
run: echo "CCACHE_BASEDIR=$PWD/new" >> $GITHUB_ENV
67+
- name: Build with Ninja (new variant)
68+
run: ninja -C new/build -j2
69+
- name: Print ccache stats (new variant)
70+
run: ccache -s
71+
72+
- name: Configure using CMake (old variant)
73+
run: cmake -S old/ -B old/build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/bin/gcc -DCMAKE_CXX_COMPILER=/usr/bin/g++ -Dsat_impl="minisat2;cadical"
74+
- name: ccache environment (old variant)
75+
run: echo "CCACHE_BASEDIR=$PWD/old" >> $GITHUB_ENV
76+
- name: Build with Ninja (old variant)
77+
run: ninja -C old/build -j2
78+
- name: Print ccache stats (old variant)
79+
run: ccache -s
80+
81+
- name: Obtain benchcomp
82+
run: git clone --depth 1 https://github.com/model-checking/kani kani.git
83+
84+
- name: Fetch benchmarks
85+
run: |
86+
git clone --depth 1 https://github.com/awslabs/aws-c-common aws-c-common.git
87+
sudo apt-get install --no-install-recommends -yq gnuplot graphviz
88+
curl -L --remote-name \
89+
https://github.com/awslabs/aws-build-accumulator/releases/download/1.29.0/litani-1.29.0.deb
90+
sudo dpkg -i litani-1.29.0.deb
91+
92+
- name: Run benchcomp
93+
run: |
94+
kani.git/tools/benchcomp/bin/benchcomp \
95+
--config new/.github/workflows/benchcomp-config.yaml \
96+
run
97+
kani.git/tools/benchcomp/bin/benchcomp \
98+
--config new/.github/workflows/benchcomp-config.yaml \
99+
collate
100+
101+
- name: Perf Regression Results Table
102+
run: |
103+
kani.git/tools/benchcomp/bin/benchcomp \
104+
--config new/.github/workflows/benchcomp-config.yaml \
105+
visualize --only dump_markdown_results_table >> "$GITHUB_STEP_SUMMARY"
106+
107+
- name: Run other visualizations
108+
run: |
109+
kani.git/tools/benchcomp/bin/benchcomp \
110+
--config new/.github/workflows/benchcomp-config.yaml \
111+
visualize --except dump_markdown_results_table

0 commit comments

Comments
 (0)