[tuner] add the output file (nod-ai#815)

bangtianliu · web-flow · commit b6c99270d2ac · 2025-01-14T15:05:44.000-05:00
This PR addresses the task described in nod-ai#806. It adds a separate output file to the tuner to summarize the most important information, such as the top candidates (dispatch and model) and paths to their specifications. Also, it fixes the issue with incorrect reporting of the compilation success rate in the log. --------- Signed-off-by: Bangtian Liu <liubangtian@gmail.com>
diff --git a/tuner/examples/simple/simple_tuner.py b/tuner/examples/simple/simple_tuner.py
@@ -80,7 +80,7 @@ def main():
     stop_after_phase: str = args.stop_after
 
     print("Setup logging")
-    libtuner.setup_logging(args, path_config)
+    root_logger = libtuner.setup_logging(args, path_config)
     print(path_config.run_log, end="\n\n")
 
     if not args.dry_run:
@@ -93,8 +93,15 @@ def main():
         args.simple_model_benchmark_flags_file
     )
 
+    summary_log_file = path_config.base_dir / "summary.log"
+    summary_handler = logging.FileHandler(summary_log_file)
+    summary_handler.setLevel(logging.INFO)
+    summary_handler.setFormatter(
+        logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
+    )
     print("Generating candidate tuning specs...")
-    with TunerContext() as tuner_context:
+    with TunerContext(logger=root_logger) as tuner_context:
+        tuner_context.logger.addHandler(summary_handler)
         simple_tuner = SimpleTuner(tuner_context)
         candidates = libtuner.generate_candidate_specs(
             args, path_config, candidate_trackers, simple_tuner
@@ -113,7 +120,9 @@ def main():
         if stop_after_phase == libtuner.ExecutionPhases.compile_dispatches:
             return
 
-        print("Benchmarking compiled dispatch candidates...")
+        message = "Benchmarking compiled dispatch candidates..."
+        print(message)
+        logging.info(message)
         simple_tuner.benchmark_flags = ["--input=1", "--benchmark_repetitions=3"]
         top_candidates = libtuner.benchmark(
             args,
@@ -123,6 +132,9 @@ def main():
             simple_tuner,
             args.simple_num_dispatch_candidates,
         )
+        logging.info(f"Top dispatch candidates: {top_candidates}")
+        for id in top_candidates:
+            logging.info(f"{candidate_trackers[id].spec_path.resolve()}")
         if stop_after_phase == libtuner.ExecutionPhases.benchmark_dispatches:
             return
 
@@ -140,7 +152,9 @@ def main():
         if stop_after_phase == libtuner.ExecutionPhases.compile_models:
             return
 
-        print("Benchmarking compiled model candidates...")
+        message = "Benchmarking compiled model candidates..."
+        print(message)
+        logging.info(message)
         simple_tuner.benchmark_flags = model_benchmark_flags
         simple_tuner.benchmark_timeout = 60
         top_model_candidates = libtuner.benchmark(
@@ -151,8 +165,12 @@ def main():
             simple_tuner,
             args.simple_num_model_candidates,
         )
-
+        logging.info(f"Top model candidates: {top_model_candidates}")
+        for id in top_model_candidates:
+            logging.info(f"{candidate_trackers[id].spec_path.resolve()}")
         print(f"Top model candidates: {top_model_candidates}")
 
         print("Check the detailed execution logs in:")
         print(path_config.run_log.resolve())
+        print("Check the summary in:")
+        print(summary_log_file.resolve())
diff --git a/tuner/tuner/candidate_gen.py b/tuner/tuner/candidate_gen.py
@@ -195,12 +195,12 @@ def generate_configs_and_td_specs(
     ):
         if i >= limit:
             break
-        tune_logger.info(f"Solution #{i+1}: {config}")
+        tune_logger.debug(f"Solution #{i+1}: {config}")
         td_spec_module = dispatch_tuner.get_td_spec(input_module, config)
         assert td_spec_module, "Failed to generate transform dialect spec"
         config_specs.append(td_spec_module)
 
-    tune_logger.info(f"Generated {len(config_specs)} tuning specs")
+    tune_logger.debug(f"Generated {len(config_specs)} tuning specs")
     return config_specs
 
 
diff --git a/tuner/tuner/dispatch_constraints.py b/tuner/tuner/dispatch_constraints.py
@@ -376,7 +376,7 @@ def generate_solutions(
         codegen_pipeline,
     )
     M, N, K = problem_size.MNK
-    tuner_ctx.logger.info(f"{M},{N},{K}")
+    tuner_ctx.logger.debug(f"{M},{N},{K}")
     m_vars = [z3.Int(f"m{i}") for i in range(len(M))]
     n_vars = [z3.Int(f"n{i}") for i in range(len(N))]
     k_vars = [z3.Int(f"k{i}") for i in range(len(K))]
diff --git a/tuner/tuner/dispatch_parser.py b/tuner/tuner/dispatch_parser.py
@@ -17,7 +17,7 @@ def parse_mlir(mlir_text: str, ctx: TunerContext) -> ir.Module:
     mlir_module = None
     try:
         mlir_module = ir.Module.parse(mlir_text, ctx.mlir_ctx)
-        ctx.logger.info("MLIR parsing successful!")
+        ctx.logger.debug("MLIR parsing successful!")
     except ir.MLIRError as e:
         ctx.logger.error(f"Error parsing MLIR: {e}")
         raise RuntimeError(f"Error parsing MLIR: {e}")
diff --git a/tuner/tuner/libtuner.py b/tuner/tuner/libtuner.py
@@ -92,7 +92,7 @@ def _name_base_dir(self) -> Path:
         base_dir = Path(f"./tuning_{timestamp}")
         return base_dir
 
-    def _set_run_log(self, run_log: Path):
+    def set_run_log(self, run_log: Path):
         object.__setattr__(self, "run_log", run_log)
 
     def get_candidate_spec_filename(self, candidate_id: int) -> str:
@@ -334,10 +334,10 @@ def parse_arguments(
     return parser.parse_args()
 
 
-def setup_logging(args: argparse.Namespace, path_config: PathConfig):
+def setup_logging(args: argparse.Namespace, path_config: PathConfig) -> logging.Logger:
     log_file_name = f"autotune_{args.input_file.stem}.log"
     run_log_path = path_config.base_dir / log_file_name
-    path_config._set_run_log(run_log_path)
+    path_config.set_run_log(run_log_path)
 
     # Create file handler for logging to a file
     if path_config.run_log is None:
@@ -384,7 +384,9 @@ def format(self, record):
     # Log all arguments
     logging.debug(f"Input Arguments:")
     for arg, value in vars(args).items():
-        tune_logger.info(f"{arg}: {value}")
+        logging.debug(f"{arg}: {value}")
+
+    return logging.getLogger()
 
 
 def handle_error(
@@ -717,10 +719,18 @@ def generate_candidate_specs(
         tune_logger.exception("Error in candidate_gen.py:")
         raise
 
-    logging.info(f"Generated [{len(candidates) - 1}] candidates")
+    logging.debug(f"Generated [{len(candidates) - 1}] candidates")
     return candidates
 
 
+def get_compilation_success_rate(compiled_candiates: list[Optional[int]]) -> float:
+    if not compiled_candiates:
+        return 0.0
+    successful_candidates = [c for c in compiled_candiates if c is not None]
+    success_rate = float(len(successful_candidates)) / float(len(compiled_candiates))
+    return success_rate
+
+
 def collision_handler(index_hash_list: list[tuple[int, str]]) -> tuple[bool, list[int]]:
     """If a collision is found, generate a list of new indexes. If no collision, `unique_indexes = []`"""
     # Check if candidate produces tbe same .vmfb
@@ -800,11 +810,11 @@ def compile(
     compiled_candidates = multiprocess_progress_wrapper(
         num_worker=num_worker, task_list=task_list, function=run_iree_compile_command
     )
-    compiled_candidates = [c for c in compiled_candidates if c is not None]
-    success_rate = float(len(compiled_candidates)) / float(len(candidates))
-    logging.info(
+    success_rate = get_compilation_success_rate(compiled_candidates)
+    logging.debug(
         f"Successfully compiled [{len(compiled_candidates)}] candidates. Success rate: {success_rate:.2f}"
     )
+    compiled_candidates = [c for c in compiled_candidates if c is not None]
 
     # Remove duplicate vmfbs from the candidate list.
     compiled_candidate_hashes = []
@@ -818,7 +828,7 @@ def compile(
     if collision_detected:
         compiled_candidates = unique_compiled_candidates
 
-    logging.info(f"Produced [{len(compiled_candidates)}] unique vmfbs")
+    logging.debug(f"Produced [{len(compiled_candidates)}] unique vmfbs")
     return compiled_candidates
 
 
@@ -875,7 +885,8 @@ def get_speedup(result: BenchmarkResult) -> float:
             speedup = f"{round(get_speedup(r) * 100, 2)}% of baseline"
         else:
             speedup = "baseline unavailable"
-        logging.info(f"Candidate {r.candidate_id} time: {r.time:.2f} ({speedup})")
+        result = f"Candidate {r.candidate_id} time: {r.time:.2f} ms ({speedup})"
+        logging.info(result)
     return best_results
 
 
diff --git a/tuner/tuner/libtuner_test.py b/tuner/tuner/libtuner_test.py
@@ -6,8 +6,6 @@
 
 import argparse
 import math
-import pytest
-import json
 from subprocess import CompletedProcess
 from unittest.mock import call, patch, MagicMock
 from . import libtuner
@@ -176,6 +174,20 @@ def test_validate_devices_with_invalid_device() -> None:
                 assert expected_call in mock_handle_error.call_args_list
 
 
+def test_get_compilation_success_rate():
+    compiled_candidates = [0, None, 2, None, 4]
+    assert libtuner.get_compilation_success_rate(compiled_candidates) == 3.0 / 5.0
+
+    compiled_candidates = [0, 1, 2, 3, 4]
+    assert libtuner.get_compilation_success_rate(compiled_candidates) == 1.0
+
+    compiled_candidates = [None, None, None]
+    assert libtuner.get_compilation_success_rate(compiled_candidates) == 0.0
+
+    compiled_candidates = []
+    assert libtuner.get_compilation_success_rate(compiled_candidates) == 0.0
+
+
 def test_select_best_benchmark_results() -> None:
     candidate_results = [
         libtuner.BenchmarkResult(1, 0.5, "hip://0"),

Original file line number	Diff line number	Diff line change
`@@ -376,7 +376,7 @@ def generate_solutions(`
`376`	`376`	`codegen_pipeline,`
`377`	`377`	`)`
`378`	`378`	`M, N, K = problem_size.MNK`
`379`		`- tuner_ctx.logger.info(f"{M},{N},{K}")`
	`379`	`+ tuner_ctx.logger.debug(f"{M},{N},{K}")`
`380`	`380`	`m_vars = [z3.Int(f"m{i}") for i in range(len(M))]`
`381`	`381`	`n_vars = [z3.Int(f"n{i}") for i in range(len(N))]`
`382`	`382`	`k_vars = [z3.Int(f"k{i}") for i in range(len(K))]`