Skip to content

Commit 37df391

Browse files
ianaylpbalcerlukaszstolarczuk
authored
[CI][Benchmark] Merge new benchmarking script markdown output implementation (#17662)
In continuation of the effort outlined in #17545 (comment), this PR merges further changes introduced in #17229. Specifically, it merges changes related to markdown output generation intended for creating summaries on user PRs. Note: I am relying on this PR having its commits squashed during merge (which should be the default behavior for intel/llvm) --------- Co-authored-by: Piotr Balcer <[email protected]> Co-authored-by: Łukasz Stolarczuk <[email protected]>
1 parent 73c148e commit 37df391

File tree

3 files changed

+44
-29
lines changed

3 files changed

+44
-29
lines changed

Diff for: devops/scripts/benchmarks/main.py

+18-10
Original file line numberDiff line numberDiff line change
@@ -274,14 +274,18 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
274274

275275
if options.output_markdown:
276276
markdown_content = generate_markdown(
277-
this_name, chart_data, options.output_markdown
277+
this_name, chart_data, failures, options.output_markdown
278278
)
279279

280-
with open("benchmark_results.md", "w") as file:
280+
md_path = options.output_directory
281+
if options.output_directory is None:
282+
md_path = os.getcwd()
283+
284+
with open(os.path.join(md_path, "benchmark_results.md"), "w") as file:
281285
file.write(markdown_content)
282286

283287
print(
284-
f"Markdown with benchmark results has been written to {os.getcwd()}/benchmark_results.md"
288+
f"Markdown with benchmark results has been written to {md_path}/benchmark_results.md"
285289
)
286290

287291
saved_name = save_name if save_name is not None else this_name
@@ -385,12 +389,6 @@ def validate_and_parse_env_args(env_args):
385389
help="Regex pattern to filter benchmarks by name.",
386390
default=None,
387391
)
388-
parser.add_argument(
389-
"--epsilon",
390-
type=float,
391-
help="Threshold to consider change of performance significant",
392-
default=options.epsilon,
393-
)
394392
parser.add_argument(
395393
"--verbose", help="Print output of all the commands.", action="store_true"
396394
)
@@ -419,6 +417,12 @@ def validate_and_parse_env_args(env_args):
419417
parser.add_argument(
420418
"--output-html", help="Create HTML output", action="store_true", default=False
421419
)
420+
parser.add_argument(
421+
"--output-dir",
422+
type=str,
423+
help="Location for output files, if --output-html or --output-markdown was specified.",
424+
default=None,
425+
)
422426
parser.add_argument(
423427
"--dry-run",
424428
help="Do not run any actual benchmarks",
@@ -491,7 +495,6 @@ def validate_and_parse_env_args(env_args):
491495
options.sycl = args.sycl
492496
options.iterations = args.iterations
493497
options.timeout = args.timeout
494-
options.epsilon = args.epsilon
495498
options.ur = args.ur
496499
options.ur_adapter = args.adapter
497500
options.exit_on_failure = args.exit_on_failure
@@ -515,6 +518,11 @@ def validate_and_parse_env_args(env_args):
515518
if args.compute_runtime is not None:
516519
options.build_compute_runtime = True
517520
options.compute_runtime_tag = args.compute_runtime
521+
if args.output_dir is not None:
522+
if not os.path.isdir(args.output_dir):
523+
parser.error("Specified --output-dir is not a valid path")
524+
options.output_directory = os.path.abspath(args.output_dir)
525+
518526

519527
benchmark_filter = re.compile(args.filter) if args.filter else None
520528

Diff for: devops/scripts/benchmarks/options.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,9 @@ class Options:
3232
compare_max: int = 10 # average/median over how many results
3333
output_markdown: MarkdownSize = MarkdownSize.SHORT
3434
output_html: bool = False
35+
output_directory: str = None
3536
dry_run: bool = False
36-
# these two should probably be merged into one setting
3737
stddev_threshold: float = 0.02
38-
epsilon: float = 0.02
3938
iterations_stddev: int = 5
4039
build_compute_runtime: bool = False
4140
extra_ld_libraries: list[str] = field(default_factory=list)

Diff for: devops/scripts/benchmarks/output_markdown.py

+25-17
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def get_improved_regressed_summary(is_improved: bool, rows_count: int):
7979
"\n<details>\n"
8080
"<summary>\n"
8181
f"{title} {rows_count} "
82-
f"(threshold {options.epsilon*100:.2f}%)\n"
82+
f"(threshold {options.stddev_threshold*100:.2f}%)\n"
8383
"</summary>\n\n"
8484
)
8585

@@ -138,17 +138,6 @@ def generate_markdown_details(
138138
env_dict = res.env
139139
command = res.command
140140

141-
# If data is collected from already saved results,
142-
# the content is parsed as strings
143-
if isinstance(res.env, str):
144-
# Since the scripts would be used solely on data prepared
145-
# by our scripts, this should be safe
146-
# However, maybe needs an additional blessing
147-
# https://docs.python.org/3/library/ast.html#ast.literal_eval
148-
env_dict = ast.literal_eval(res.env)
149-
if isinstance(res.command, str):
150-
command = ast.literal_eval(res.command)
151-
152141
section = (
153142
"\n<details>\n"
154143
f"<summary>{res.label}</summary>\n\n"
@@ -179,7 +168,7 @@ def generate_markdown_details(
179168
return "\nBenchmark details contain too many chars to display\n"
180169

181170

182-
def generate_summary_table_and_chart(
171+
def generate_summary_table(
183172
chart_data: dict[str, list[Result]], baseline_name: str, markdown_size: MarkdownSize
184173
):
185174
summary_table = get_chart_markdown_header(
@@ -276,7 +265,7 @@ def generate_summary_table_and_chart(
276265
delta = oln.diff - 1
277266
oln.row += f" {delta*100:.2f}%"
278267

279-
if abs(delta) > options.epsilon:
268+
if abs(delta) > options.stddev_threshold:
280269
if delta > 0:
281270
improved_rows.append(oln.row + " | \n")
282271
else:
@@ -374,10 +363,27 @@ def generate_summary_table_and_chart(
374363
return "\n# Summary\n" "Benchmark output is too large to display\n\n"
375364

376365

366+
def generate_failures_section(failures: dict[str, str]) -> str:
367+
if not failures:
368+
return ""
369+
370+
section = "\n# Failures\n"
371+
section += "| Name | Failure |\n"
372+
section += "|---|---|\n"
373+
374+
for name, failure in failures.items():
375+
section += f"| {name} | {failure} |\n"
376+
377+
return section
378+
379+
377380
def generate_markdown(
378-
name: str, chart_data: dict[str, list[Result]], markdown_size: MarkdownSize
381+
name: str,
382+
chart_data: dict[str, list[Result]],
383+
failures: dict[str, str],
384+
markdown_size: MarkdownSize,
379385
):
380-
(summary_line, summary_table) = generate_summary_table_and_chart(
386+
(summary_line, summary_table) = generate_summary_table(
381387
chart_data, name, markdown_size
382388
)
383389

@@ -396,4 +402,6 @@ def generate_markdown(
396402
)
397403
generated_markdown += "\n# Details\n" f"{markdown_details}\n"
398404

399-
return generated_markdown
405+
failures_section = generate_failures_section(failures)
406+
407+
return failures_section + generated_markdown

0 commit comments

Comments
 (0)