From 93c32f65bfc325766d029ae288692b8bb2e8f37e Mon Sep 17 00:00:00 2001 From: karmacoma Date: Wed, 18 Dec 2024 15:58:19 -0800 Subject: [PATCH 01/46] add build directory to .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 2f368a47..de66ab4d 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,6 @@ out/ # - adds friction to CI # (at the cost of reproducible builds) uv.lock + +# https://docs.astral.sh/uv/concepts/projects/layout/#the-build-directory +build/ From d419691b546a1fcd6a8fd972ec5accaed4d2e718 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Wed, 18 Dec 2024 15:58:43 -0800 Subject: [PATCH 02/46] add psutil to dependencies --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 796b5820..c2566bff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,8 @@ dependencies = [ "z3-solver==4.12.6.0", "eth_hash[pysha3]>=0.7.0", "rich>=13.9.4", - "xxhash>=3.5.0" + "xxhash>=3.5.0", + "psutil>=6.1.0", ] dynamic = ["version"] From 57151cff76b6f333e92aff0f5014e896e24e366f Mon Sep 17 00:00:00 2001 From: karmacoma Date: Wed, 18 Dec 2024 15:59:08 -0800 Subject: [PATCH 03/46] add PopenFuture and PopenExecutor --- src/halmos/processes.py | 196 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 src/halmos/processes.py diff --git a/src/halmos/processes.py b/src/halmos/processes.py new file mode 100644 index 00000000..7f0fe920 --- /dev/null +++ b/src/halmos/processes.py @@ -0,0 +1,196 @@ +import concurrent.futures +import contextlib +import subprocess +import threading +import time +from subprocess import PIPE, Popen + +import psutil + + +class PopenFuture(concurrent.futures.Future): + cmd: list[str] + process: subprocess.Popen | None + stdout: str | None + stderr: str | None + returncode: int | None + start_time: float | None + end_time: float | None + _exception: Exception | None + + def __init__(self, cmd: list[str]): + super().__init__() + self.cmd = cmd + self.process = None + self.stdout = None + self.stderr = None + self.returncode = None + self.start_time = None + self.end_time = None + self._exception = None + + def start(self): + """Starts the subprocess and immediately returns.""" + + def run(): + try: + self.start_time = time.time() + self.process = Popen(self.cmd, stdout=PIPE, stderr=PIPE, text=True) + + # blocks until the process terminates + self.stdout, self.stderr = self.process.communicate() + self.end_time = time.time() + self.returncode = self.process.returncode + except Exception as e: + self._exception = e + finally: + self.set_result((self.stdout, self.stderr, self.returncode)) + + # avoid daemon threads because they can cause issues during shutdown + # we don't expect them to actually prevent halmos from terminating, + # as long as the underlying processes are terminated (either by natural + # causes or by forceful termination) + threading.Thread(target=run, daemon=False).start() + + return self + + def cancel(self): + """Attempts to terminate and then kill the process and its children.""" + if self.is_running(): + self.process.terminate() + + # use psutil to kill the entire process tree (including children) + try: + parent_process = psutil.Process(self.process.pid) + processes = parent_process.children(recursive=True) + processes.append(parent_process) + + # ask politely to terminate first + for process in processes: + process.terminate() + + # give them some time to terminate + time.sleep(0.1) + + # after grace period, force kill + for process in processes: + if process.is_running(): + process.kill() + + except psutil.NoSuchProcess: + # process already terminated, nothing to do + pass + + def exception(self) -> Exception | None: + """Returns any exception raised during the process.""" + + return self._exception + + def result(self, timeout=None) -> tuple[str | None, str | None, int]: + """Blocks until the process is finished and returns the result (stdout, stderr, returncode). + + Can raise TimeoutError or some Exception raised during execution""" + + return super().result(timeout=timeout) + + def done(self): + """Returns True if the process has finished.""" + + return super().done() + + def is_running(self): + """Returns True if the process is still running. + + Returns False before start() and after termination.""" + + return self.process and self.process.poll() is None + + +class PopenExecutor(concurrent.futures.Executor): + """ + An executor that runs commands in subprocesses. + + Simple implementation that has no concept of max workers or pending futures. + + The explicit goal is to support killing running subprocesses. + """ + + def __init__(self): + self._futures: list[PopenFuture] = list() + self._shutdown = threading.Event() + self._lock = threading.Lock() + + def submit(self, command: list[str]): + if self._shutdown.is_set(): + raise RuntimeError("Cannot submit to a shutdown executor.") + + future = PopenFuture(command) + with self._lock: + self._futures.append(future) + future.start() + return future + + def shutdown(self, wait=True): + self._shutdown.set() + + # we have no concept of pending futures, + # therefore no cancellation of pending futures + if wait: + self._join() + + # if asked for immediate shutdown we cancel everything + else: + with self._lock: + for future in self._futures: + future.cancel() + + def map(self, fn, *iterables, timeout=None, chunksize=1): + raise NotImplementedError() + + def _join(self): + """Wait until all futures are finished or cancelled.""" + + # submitting new futures after join() would be bad, + # so we make this internal and only call it from shutdown() + with contextlib.suppress(concurrent.futures.CancelledError): + for future in list(self._futures): + future.result() + + +def main(): + # example usage + + with PopenExecutor() as executor: + # Submit multiple commands + commands = [ + "sleep 1", + "sleep 10", + "echo hello", + ] + + futures = [executor.submit(command.split()) for command in commands] + + def done_callback(future: PopenFuture): + stdout, stderr, exitcode = future.result() + cmd = " ".join(future.cmd) + elapsed = future.end_time - future.start_time + print( + f"{cmd}\n" + f" exitcode={exitcode}\n" + f" stdout={stdout.strip()}\n" + f" stderr={stderr.strip()}\n" + f" elapsed={elapsed:.2f}s" + ) + executor.shutdown(wait=False) + + for future in futures: + future.add_done_callback(done_callback) + + # exiting the context manager will shutdown the executor with wait=True + # so no new futures can be submitted + # the first call to done_callback will cause the remaining futures to be cancelled + # (and the underlying processes to be terminated) + + +if __name__ == "__main__": + main() From 771dea6621d378085f429cf6225c51d1caf535d2 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Thu, 16 Jan 2025 15:48:22 -0800 Subject: [PATCH 04/46] WIP: add traces.py --- src/halmos/__main__.py | 421 +++++++++++++--------------------------- src/halmos/processes.py | 8 +- src/halmos/traces.py | 162 ++++++++++++++++ 3 files changed, 303 insertions(+), 288 deletions(-) create mode 100644 src/halmos/traces.py diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index b6daa409..bfba44ed 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: AGPL-3.0 import gc -import io import json import logging import os @@ -13,7 +12,6 @@ import traceback import uuid from collections import Counter -from concurrent.futures import ThreadPoolExecutor from copy import deepcopy from dataclasses import asdict, dataclass from datetime import timedelta @@ -21,17 +19,10 @@ from importlib import metadata from z3 import ( - Z3_OP_CONCAT, BitVec, - BitVecNumRef, - BitVecRef, - Bool, CheckSatResult, - Context, ModelRef, Solver, - is_app, - is_bv, sat, set_option, unknown, @@ -58,6 +49,7 @@ warn_code, ) from .mapper import BuildOut, DeployAddressMapper, Mapper +from .processes import PopenExecutor, PopenFuture from .sevm import ( EMPTY_BALANCE, EVM, @@ -72,7 +64,6 @@ CallContext, CallOutput, Contract, - EventLog, Exec, FailCheatcode, Message, @@ -83,14 +74,13 @@ jumpid_str, mnemonic, ) +from .traces import render_trace, rendered_trace from .utils import ( NamedTimer, address, - byte_length, color_error, con, create_solver, - cyan, green, hexify, indent_text, @@ -198,147 +188,6 @@ def mk_solver(args: HalmosConfig, logic="QF_AUFBV", ctx=None, assertion=False): return create_solver(logic, ctx, timeout, args.solver_max_memory) -def rendered_initcode(context: CallContext) -> str: - message = context.message - data = message.data - - initcode_str = "" - args_str = "" - - if ( - isinstance(data, BitVecRef) - and is_app(data) - and data.decl().kind() == Z3_OP_CONCAT - ): - children = [arg for arg in data.children()] - if isinstance(children[0], BitVecNumRef): - initcode_str = hex(children[0].as_long()) - args_str = ", ".join(map(str, children[1:])) - else: - initcode_str = hexify(data) - - return f"{initcode_str}({cyan(args_str)})" - - -def render_output(context: CallContext, file=sys.stdout) -> None: - output = context.output - returndata_str = "0x" - failed = output.error is not None - - if not failed and context.is_stuck(): - return - - data = output.data - if data is not None: - is_create = context.message.is_create() - if hasattr(data, "unwrap"): - data = data.unwrap() - - returndata_str = ( - f"<{byte_length(data)} bytes of code>" - if (is_create and not failed) - else hexify(data) - ) - - ret_scheme = context.output.return_scheme - ret_scheme_str = f"{cyan(mnemonic(ret_scheme))} " if ret_scheme is not None else "" - error_str = f" (error: {repr(output.error)})" if failed else "" - - color = red if failed else green - indent = context.depth * " " - print( - f"{indent}{color('↩ ')}{ret_scheme_str}{color(returndata_str)}{color(error_str)}", - file=file, - ) - - -def rendered_log(log: EventLog) -> str: - opcode_str = f"LOG{len(log.topics)}" - topics = [ - f"{cyan(f'topic{i}')}={hexify(topic)}" for i, topic in enumerate(log.topics) - ] - data_str = f"{cyan('data')}={hexify(log.data)}" - args_str = ", ".join(topics + [data_str]) - - return f"{opcode_str}({args_str})" - - -def rendered_trace(context: CallContext) -> str: - with io.StringIO() as output: - render_trace(context, file=output) - return output.getvalue() - - -def rendered_calldata(calldata: ByteVec, contract_name: str | None = None) -> str: - if not calldata: - return "0x" - - if len(calldata) < 4: - return hexify(calldata) - - if len(calldata) == 4: - return f"{hexify(calldata.unwrap(), contract_name)}()" - - selector = calldata[:4].unwrap() - args = calldata[4:].unwrap() - return f"{hexify(selector, contract_name)}({hexify(args)})" - - -def render_trace(context: CallContext, file=sys.stdout) -> None: - message = context.message - addr = unbox_int(message.target) - addr_str = str(addr) if is_bv(addr) else hex(addr) - # check if we have a contract name for this address in our deployment mapper - addr_str = DeployAddressMapper().get_deployed_contract(addr_str) - - value = unbox_int(message.value) - value_str = f" (value: {value})" if is_bv(value) or value > 0 else "" - - call_scheme_str = f"{cyan(mnemonic(message.call_scheme))} " - indent = context.depth * " " - - if message.is_create(): - # TODO: select verbosity level to render full initcode - # initcode_str = rendered_initcode(context) - - try: - if context.output.error is None: - target = hex(int(str(message.target))) - bytecode = context.output.data.unwrap().hex() - contract_name = Mapper().get_by_bytecode(bytecode).contract_name - - DeployAddressMapper().add_deployed_contract(target, contract_name) - addr_str = contract_name - except Exception: - # TODO: print in debug mode - ... - - initcode_str = f"<{byte_length(message.data)} bytes of initcode>" - print( - f"{indent}{call_scheme_str}{addr_str}::{initcode_str}{value_str}", file=file - ) - - else: - calldata = rendered_calldata(message.data, addr_str) - call_str = f"{addr_str}::{calldata}" - static_str = yellow(" [static]") if message.is_static else "" - print(f"{indent}{call_scheme_str}{call_str}{static_str}{value_str}", file=file) - - log_indent = (context.depth + 1) * " " - for trace_element in context.trace: - if isinstance(trace_element, CallContext): - render_trace(trace_element, file=file) - elif isinstance(trace_element, EventLog): - print(f"{log_indent}{rendered_log(trace_element)}", file=file) - else: - raise HalmosException(f"unexpected trace element: {trace_element}") - - render_output(context, file=file) - - if context.depth == 1: - print(file=file) - - def deploy_test( creation_hexcode: str, deployed_hexcode: str, @@ -567,7 +416,7 @@ def is_global_fail_set(context: CallContext) -> bool: return hevm_fail or any(is_global_fail_set(x) for x in context.subcalls()) -def run( +def run_test( setup_ex: Exec, abi: dict, fun_info: FunctionInfo, @@ -578,10 +427,18 @@ def run( if args.verbose >= 1: print(f"Executing {funname}") + # + # prepare test dump directory if needed + # + dump_dirname = f"/tmp/{funname}-{uuid.uuid4().hex}" + should_dump = args.dump_smt_queries or args.solver_command + if should_dump and not os.path.isdir(dump_dirname): + os.makedirs(dump_dirname) + print(f"Generating SMT queries in {dump_dirname}") # - # calldata + # prepare calldata # sevm = SEVM(args, fun_info) @@ -636,20 +493,21 @@ def run( (logs, steps) = (sevm.logs, sevm.steps) - # check assertion violations normal = 0 + potential = 0 models: list[ModelWithContext] = [] stuck = [] - thread_pool = ThreadPoolExecutor(max_workers=args.solver_threads) - future_models = [] + thread_pool = PopenExecutor(max_workers=args.solver_threads) + futures = [] counterexamples = [] - unsat_cores = [] + unsat_cores: list[list] = [] traces: dict[int, str] = {} exec_cache: dict[int, Exec] = {} - def future_callback(future_model): - m = future_model.result() + def future_callback(future: PopenFuture): + # XXX stdout, stderr, exitcode + m = future.result() models.append(m) model, index, result = m.model, m.index, m.result @@ -688,16 +546,19 @@ def future_callback(future_model): print(f"# {index + 1}") print(exec) - # initialize with default value in case we don't enter the loop body - idx = -1 + # + # consume the sevm.run() generator + # (actually triggers path exploration) + # - for idx, ex in enumerate(exs): + path_id = 0 # default value in case we don't enter the loop body + for path_id, ex in enumerate(exs): # cache exec in case we need to print it later if args.print_failed_states: - exec_cache[idx] = ex + exec_cache[path_id] = ex if args.verbose >= VERBOSITY_TRACE_PATHS: - print(f"Path #{idx+1}:") + print(f"Path #{path_id}:") print(indent_text(hexify(ex.path))) print("\nTrace:") @@ -706,22 +567,28 @@ def future_callback(future_model): output = ex.context.output error_output = output.error if ex.is_panic_of(args.panic_error_codes) or is_global_fail_set(ex.context): + potential += 1 + if args.verbose >= 1: - print(f"Found potential path (id: {idx+1})") + print(f"Found potential path (id: {path_id})") panic_code = unbox_int(output.data[4:36].unwrap()) print(f"Panic(0x{panic_code:02x}) {error_output}") + # we don't know yet if this will lead to a counterexample + # so we save the rendered trace here and potentially print it later + # if a valid counterexample is found if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE: - traces[idx] = rendered_trace(ex.context) + traces[path_id] = rendered_trace(ex.context) - query = ex.path.to_smt2(args) + query: SMTQuery = ex.path.to_smt2(args) - future_model = thread_pool.submit( - gen_model_from_sexpr, - GenModelArgs(args, idx, query, unsat_cores, dump_dirname), - ) - future_model.add_done_callback(future_callback) - future_models.append(future_model) + dump(query, args, dump_filename=f"{dump_dirname}/{path_id}.smt2") + + # if the query contains an unsat-core, it is unsat; no need to run the solver + if check_unsat_cores(query, unsat_cores): + if args.verbose >= 1: + print(" Already proven unsat") + continue elif ex.context.is_stuck(): debug(f"Potential error path (id: {idx+1})") @@ -733,28 +600,27 @@ def future_callback(future_model): elif not error_output: if args.print_success_states: - print(f"# {idx+1}") + print(f"# {path_id}") print(ex) normal += 1 # print post-states if args.print_states: - print(f"# {idx+1}") + print(f"# {path_id}") print(ex) # 0 width is unlimited - if args.width and idx >= args.width: - warn( - f"{funsig}: incomplete execution due to the specified limit: --width {args.width}" - ) + if args.width and path_id >= args.width: + msg = "incomplete execution due to the specified limit" + warn(f"{funsig}: {msg}: --width {args.width}") break - num_execs = idx + 1 + num_execs = path_id timer.create_subtimer("models") - if future_models and args.verbose >= 1: + if potential > 0 and args.verbose >= 1: print( - f"# of potential paths involving assertion violations: {len(future_models)} / {num_execs} (--solver-threads {args.solver_threads})" + f"# of potential paths involving assertion violations: {potential} / {num_execs} (--solver-threads {args.solver_threads})" ) # display assertion solving progress @@ -762,8 +628,8 @@ def future_callback(future_model): while True: if args.early_exit and len(counterexamples) > 0: break - done = sum(fm.done() for fm in future_models) - total = len(future_models) + done = sum(fm.done() for fm in futures) + total = potential if done == total: break elapsed = timedelta(seconds=int(timer.elapsed())) @@ -805,11 +671,11 @@ def future_callback(future_model): f"{passfail} {funsig} (paths: {num_execs}, {time_info}, bounds: [{', '.join([str(x) for x in dyn_params])}])" ) - for idx, _, err in stuck: + for path_id, _, err in stuck: warn_code(INTERNAL_ERROR, f"Encountered {err}") if args.print_blocked_states: - print(f"\nPath #{idx+1}") - print(traces[idx], end="") + print(f"\nPath #{path_id}") + print(traces[path_id], end="") if logs.bounded_loops: warn_code( @@ -874,7 +740,7 @@ class RunArgs: build_out_map: dict -def run_sequential(run_args: RunArgs) -> list[TestResult]: +def run_contract(run_args: RunArgs) -> list[TestResult]: BuildOut().set_build_out(run_args.build_out_map) args = run_args.args @@ -909,7 +775,9 @@ def run_sequential(run_args: RunArgs) -> list[TestResult]: test_config = with_devdoc(args, funsig, run_args.contract_json) solver = mk_solver(test_config) debug(f"{test_config.formatted_layers()}") - test_result = run(setup_ex, run_args.abi, fun_info, test_config, solver) + test_result = run_test( + setup_ex, run_args.abi, fun_info, test_config, solver + ) except Exception as err: print(f"{color_error('[ERROR]')} {funsig}") error(f"{type(err).__name__}: {err}") @@ -932,13 +800,13 @@ def run_sequential(run_args: RunArgs) -> list[TestResult]: @dataclass(frozen=True) class GenModelArgs: args: HalmosConfig - idx: int + path_id: int sexpr: SMTQuery known_unsat_cores: list[list] dump_dirname: str | None = None -def parse_unsat_core(output) -> list | None: +def parse_unsat_core(output) -> list[str] | None: # parsing example: # unsat # (error "the context is unsatisfiable") # <-- this line is optional @@ -955,93 +823,88 @@ def parse_unsat_core(output) -> list | None: return None -def solve( +def dump( query: SMTQuery, args: HalmosConfig, dump_filename: str | None = None ) -> tuple[CheckSatResult, PotentialModel | None, list | None]: - if args.dump_smt_queries or args.solver_command: - if not dump_filename: - dump_filename = f"/tmp/{uuid.uuid4().hex}.smt2" - - # for each implication assertion, `(assert (=> |id| c))`, in query.smtlib, - # generate a corresponding named assertion, `(assert (! |id| :named ))`. - # see `svem.Path.to_smt2()` for more details. - if args.cache_solver: - named_assertions = "".join( - [ - f"(assert (! |{assert_id}| :named <{assert_id}>))\n" - for assert_id in query.assertions - ] - ) + if not dump_filename: + dump_filename = f"/tmp/{uuid.uuid4().hex}.smt2" + + if args.verbose >= 1: + debug(f"Writing SMT query to {dump_filename}") + + # for each implication assertion, `(assert (=> |id| c))`, in query.smtlib, + # generate a corresponding named assertion, `(assert (! |id| :named ))`. + # see `svem.Path.to_smt2()` for more details. + if args.cache_solver: + named_assertions = "".join( + [ + f"(assert (! |{assert_id}| :named <{assert_id}>))\n" + for assert_id in query.assertions + ] + ) with open(dump_filename, "w") as f: - if args.verbose >= 1: - debug(f"Writing SMT query to {dump_filename}") - if args.cache_solver: - f.write("(set-option :produce-unsat-cores true)\n") + f.write("(set-option :produce-unsat-cores true)\n") f.write("(set-logic QF_AUFBV)\n") f.write(query.smtlib) - if args.cache_solver: - f.write(named_assertions) + f.write(named_assertions) f.write("(check-sat)\n") f.write("(get-model)\n") - if args.cache_solver: - f.write("(get-unsat-core)\n") + f.write("(get-unsat-core)\n") - if args.solver_command: - if args.verbose >= 1: - debug(" Checking with external solver process") - debug(f" {args.solver_command} {dump_filename} >{dump_filename}.out") + else: + with open(dump_filename, "w") as f: + f.write("(set-logic QF_AUFBV)\n") + f.write(query.smtlib) + f.write("(check-sat)\n") + f.write("(get-model)\n") - # solver_timeout_assertion == 0 means no timeout, - # which translates to timeout_seconds=None for subprocess.run - timeout_seconds = None - if timeout_millis := args.solver_timeout_assertion: - timeout_seconds = timeout_millis / 1000 - cmd = args.solver_command.split() + [dump_filename] - try: - res_str = subprocess.run( - cmd, capture_output=True, text=True, timeout=timeout_seconds - ).stdout.strip() - res_str_head = res_str.split("\n", 1)[0] +@dataclass(frozen=True) +class SolverOutput: + result: CheckSatResult + model: PotentialModel | None = None + unsat_core: list[str] | None = None - with open(f"{dump_filename}.out", "w") as f: - f.write(res_str) - if args.verbose >= 1: - debug(f" {res_str_head}") +def solve(smt2_filename: str, args: HalmosConfig) -> SolverOutput: + if args.verbose >= 1: + debug(" Checking with external solver process") + debug(f" {args.solver_command} {smt2_filename} >{smt2_filename}.out") - if res_str_head == "unsat": - unsat_core = parse_unsat_core(res_str) if args.cache_solver else None - return unsat, None, unsat_core - elif res_str_head == "sat": - return sat, PotentialModel(f"{dump_filename}.out", args), None - else: - return unknown, None, None - except subprocess.TimeoutExpired: - return unknown, None, None + # solver_timeout_assertion == 0 means no timeout, + # which translates to timeout_seconds=None for subprocess.run + timeout_seconds = None + if timeout_millis := args.solver_timeout_assertion: + timeout_seconds = timeout_millis / 1000 - else: - ctx = Context() - solver = mk_solver(args, ctx=ctx, assertion=True) - solver.from_string(query.smtlib) - if args.cache_solver: - solver.set(unsat_core=True) - ids = [Bool(f"{x}", ctx) for x in query.assertions] - result = solver.check(*ids) + cmd = args.solver_command.split() + [smt2_filename] + try: + # XXX submit to PopenExecutor, process output in callback + res_str = subprocess.run( + cmd, capture_output=True, text=True, timeout=timeout_seconds + ).stdout.strip() + res_str_head = res_str.split("\n", 1)[0] + + with open(f"{smt2_filename}.out", "w") as f: + f.write(res_str) + + if args.verbose >= 1: + debug(f" {res_str_head}") + + if res_str_head == "unsat": + unsat_core = parse_unsat_core(res_str) if args.cache_solver else None + return SolverOutput(result=unsat, unsat_core=unsat_core) + elif res_str_head == "sat": + model = PotentialModel(f"{smt2_filename}.out", args) + return SolverOutput(result=sat, model=model) else: - result = solver.check() - model = PotentialModel(solver.model(), args) if result == sat else None - unsat_core = ( - [str(core) for core in solver.unsat_core()] - if args.cache_solver and result == unsat - else None - ) - solver.reset() - return result, model, unsat_core + return SolverOutput(result=unknown) + except subprocess.TimeoutExpired: + return SolverOutput(result=unknown) -def check_unsat_cores(query, unsat_cores) -> bool: +def check_unsat_cores(query: SMTQuery, unsat_cores: list[list]) -> bool: # return true if the given query contains any given unsat core for unsat_core in unsat_cores: if all(core in query.assertions for core in unsat_core): @@ -1050,34 +913,20 @@ def check_unsat_cores(query, unsat_cores) -> bool: def gen_model_from_sexpr(fn_args: GenModelArgs) -> ModelWithContext: - args, idx, sexpr = fn_args.args, fn_args.idx, fn_args.sexpr + args, path_id, sexpr = fn_args.args, fn_args.path_id, fn_args.sexpr - dump_dirname = fn_args.dump_dirname - dump_filename = f"{dump_dirname}/{idx+1}.smt2" - should_dump = args.dump_smt_queries or args.solver_command - if should_dump and not os.path.isdir(dump_dirname): - os.makedirs(dump_dirname) - print(f"Generating SMT queries in {dump_dirname}") + res, model, unsat_core = solve(sexpr, args) - if args.verbose >= 1: - print(f"Checking path condition (path id: {idx+1})") - - if check_unsat_cores(sexpr, fn_args.known_unsat_cores): - # if the given query contains an unsat-core, it is unsat; no need to run the solver. - if args.verbose >= 1: - print(" Already proven unsat") - return package_result(None, idx, unsat, None, args) + # XXX fix refinement - res, model, unsat_core = solve(sexpr, args, dump_filename) - - if res == sat and not model.is_valid: - if args.verbose >= 1: - print(" Checking again with refinement") + # if res == sat and not model.is_valid: + # if args.verbose >= 1: + # print(" Checking again with refinement") - refined_filename = dump_filename.replace(".smt2", ".refined.smt2") - res, model, unsat_core = solve(refine(sexpr), args, refined_filename) + # refined_filename = dump_filename.replace(".smt2", ".refined.smt2") + # res, model, unsat_core = solve(refine(sexpr), args, refined_filename) - return package_result(model, idx, res, unsat_core, args) + return package_result(model, path_id, res, unsat_core, args) def refine(query: SMTQuery) -> SMTQuery: @@ -1482,7 +1331,7 @@ def on_signal(signum, frame): build_out_map, ) - test_results = run_sequential(run_args) + test_results = run_contract(run_args) num_passed = sum(r.exitcode == 0 for r in test_results) num_failed = num_found - num_passed diff --git a/src/halmos/processes.py b/src/halmos/processes.py index 7f0fe920..1396c799 100644 --- a/src/halmos/processes.py +++ b/src/halmos/processes.py @@ -115,11 +115,13 @@ class PopenExecutor(concurrent.futures.Executor): The explicit goal is to support killing running subprocesses. """ - def __init__(self): + def __init__(self, max_workers: int = 1): self._futures: list[PopenFuture] = list() self._shutdown = threading.Event() self._lock = threading.Lock() + # TODO: support max_workers + def submit(self, command: list[str]): if self._shutdown.is_set(): raise RuntimeError("Cannot submit to a shutdown executor.") @@ -130,7 +132,9 @@ def submit(self, command: list[str]): future.start() return future - def shutdown(self, wait=True): + def shutdown(self, wait=True, cancel_futures=False): + # TODO: support max_workers + self._shutdown.set() # we have no concept of pending futures, diff --git a/src/halmos/traces.py b/src/halmos/traces.py new file mode 100644 index 00000000..53dc1bd2 --- /dev/null +++ b/src/halmos/traces.py @@ -0,0 +1,162 @@ +import io +import sys + +from z3 import Z3_OP_CONCAT, BitVecNumRef, BitVecRef, is_app + +from halmos.bytevec import ByteVec +from halmos.exceptions import HalmosException +from halmos.sevm import CallContext, EventLog +from halmos.utils import ( + DeployAddressMapper, + Mapper, + byte_length, + cyan, + green, + hexify, + is_bv, + mnemonic, + red, + unbox_int, + yellow, +) + + +def rendered_initcode(context: CallContext) -> str: + message = context.message + data = message.data + + initcode_str = "" + args_str = "" + + if ( + isinstance(data, BitVecRef) + and is_app(data) + and data.decl().kind() == Z3_OP_CONCAT + ): + children = [arg for arg in data.children()] + if isinstance(children[0], BitVecNumRef): + initcode_str = hex(children[0].as_long()) + args_str = ", ".join(map(str, children[1:])) + else: + initcode_str = hexify(data) + + return f"{initcode_str}({cyan(args_str)})" + + +def render_output(context: CallContext, file=sys.stdout) -> None: + output = context.output + returndata_str = "0x" + failed = output.error is not None + + if not failed and context.is_stuck(): + return + + data = output.data + if data is not None: + is_create = context.message.is_create() + if hasattr(data, "unwrap"): + data = data.unwrap() + + returndata_str = ( + f"<{byte_length(data)} bytes of code>" + if (is_create and not failed) + else hexify(data) + ) + + ret_scheme = context.output.return_scheme + ret_scheme_str = f"{cyan(mnemonic(ret_scheme))} " if ret_scheme is not None else "" + error_str = f" (error: {repr(output.error)})" if failed else "" + + color = red if failed else green + indent = context.depth * " " + print( + f"{indent}{color('↩ ')}{ret_scheme_str}{color(returndata_str)}{color(error_str)}", + file=file, + ) + + +def rendered_log(log: EventLog) -> str: + opcode_str = f"LOG{len(log.topics)}" + topics = [ + f"{cyan(f'topic{i}')}={hexify(topic)}" for i, topic in enumerate(log.topics) + ] + data_str = f"{cyan('data')}={hexify(log.data)}" + args_str = ", ".join(topics + [data_str]) + + return f"{opcode_str}({args_str})" + + +def rendered_trace(context: CallContext) -> str: + with io.StringIO() as output: + render_trace(context, file=output) + return output.getvalue() + + +def rendered_calldata(calldata: ByteVec, contract_name: str | None = None) -> str: + if not calldata: + return "0x" + + if len(calldata) < 4: + return hexify(calldata) + + if len(calldata) == 4: + return f"{hexify(calldata.unwrap(), contract_name)}()" + + selector = calldata[:4].unwrap() + args = calldata[4:].unwrap() + return f"{hexify(selector, contract_name)}({hexify(args)})" + + +def render_trace(context: CallContext, file=sys.stdout) -> None: + message = context.message + addr = unbox_int(message.target) + addr_str = str(addr) if is_bv(addr) else hex(addr) + # check if we have a contract name for this address in our deployment mapper + addr_str = DeployAddressMapper().get_deployed_contract(addr_str) + + value = unbox_int(message.value) + value_str = f" (value: {value})" if is_bv(value) or value > 0 else "" + + call_scheme_str = f"{cyan(mnemonic(message.call_scheme))} " + indent = context.depth * " " + + if message.is_create(): + # TODO: select verbosity level to render full initcode + # initcode_str = rendered_initcode(context) + + try: + if context.output.error is None: + target = hex(int(str(message.target))) + bytecode = context.output.data.unwrap().hex() + contract_name = Mapper().get_by_bytecode(bytecode).contract_name + + DeployAddressMapper().add_deployed_contract(target, contract_name) + addr_str = contract_name + except Exception: + # TODO: print in debug mode + ... + + initcode_str = f"<{byte_length(message.data)} bytes of initcode>" + print( + f"{indent}{call_scheme_str}{addr_str}::{initcode_str}{value_str}", file=file + ) + + else: + calldata = rendered_calldata(message.data, addr_str) + call_str = f"{addr_str}::{calldata}" + static_str = yellow(" [static]") if message.is_static else "" + print(f"{indent}{call_scheme_str}{call_str}{static_str}{value_str}", file=file) + + log_indent = (context.depth + 1) * " " + for trace_element in context.trace: + if isinstance(trace_element, CallContext): + render_trace(trace_element, file=file) + elif isinstance(trace_element, EventLog): + print(f"{log_indent}{rendered_log(trace_element)}", file=file) + else: + raise HalmosException(f"unexpected trace element: {trace_element}") + + render_output(context, file=file) + + if context.depth == 1: + print(file=file) From 076caae23dd1f8a2c30ee2b9c014b2dbcc2d1cc8 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Thu, 16 Jan 2025 15:53:48 -0800 Subject: [PATCH 05/46] XXX sort data into contract, function and path contexts --- src/halmos/__main__.py | 508 +++++++++++++++++++++++------------------ 1 file changed, 288 insertions(+), 220 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index bfba44ed..d9dd1ab0 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -13,7 +13,7 @@ import uuid from collections import Counter from copy import deepcopy -from dataclasses import asdict, dataclass +from dataclasses import asdict, dataclass, field from datetime import timedelta from enum import Enum from importlib import metadata @@ -110,6 +110,138 @@ VERBOSITY_TRACE_CONSTRUCTOR = 5 +@dataclass +class PotentialModel: + model: AnyModel + is_valid: bool + + def __init__(self, model: ModelRef | str, args: HalmosConfig) -> None: + # convert model into string to avoid pickling errors for z3 (ctypes) objects containing pointers + self.model = ( + to_str_model(model, args.print_full_model) + if isinstance(model, ModelRef) + else model + ) + self.is_valid = is_model_valid(model) + + def __str__(self) -> str: + # expected to be a filename + if isinstance(self.model, str): + return f"see {self.model}" + + formatted = [f"\n {decl} = {val}" for decl, val in self.model.items()] + return "".join(sorted(formatted)) if formatted else "∅" + + +@dataclass(frozen=True) +class ContractContext: + # config with contract-specific overrides + args: HalmosConfig + + # name of this contract + name: str + + # signatures of test functions to run + funsigs: list[str] + + # data parsed from the build output for this contract + creation_hexcode: str + deployed_hexcode: str + abi: dict + method_identifiers: dict[str, str] + contract_json: dict + libs: dict + + # TODO: check if this is really a contract-level variable + build_out_map: dict + + +@dataclass(frozen=True) +class FunctionContext: + # config with function-specific overrides + args: HalmosConfig + + # function name, signature, and selector + info: FunctionInfo + + # solver using the function-specific config + solver: Solver + + # backlink to the parent contract context + contract_ctx: ContractContext + + # optional starting state + setup_ex: Exec | None = None + + # dump directory for this function (generated in __post_init__) + dump_dirname: str = field(init=False) + + def __post_init__(self): + object.__setattr__( + self, "dump_dirname", f"/tmp/{self.info.name}-{uuid.uuid4().hex}" + ) + + +@dataclass +class PathContext: + # id of this path + path_id: int + + # path execution object + ex: Exec + + # SMT query + query: SMTQuery + + # backlink to the parent function context + fun_ctx: FunctionContext + + # filename for this path (generated in __post_init__) + dump_filename: str = field(init=False) + + def __post_init__(self): + object.__setattr__( + self, "dump_filename", f"{self.fun_ctx.dump_dirname}/{self.path_id}.smt2" + ) + + +# XXX remove ModelWithContext +@dataclass(frozen=True) +class ModelWithContext: + # can be a filename containing the model or a dict with variable assignments + model: PotentialModel | None + index: int + result: CheckSatResult + unsat_core: list | None + + +@dataclass(frozen=True) +class TestResult: + name: str # test function name + exitcode: int + num_models: int = None + models: list[ModelWithContext] = None + num_paths: tuple[int, int, int] = None # number of paths: [total, success, blocked] + time: tuple[int, int, int] = None # time: [total, paths, models] + num_bounded_loops: int = None # number of incomplete loops + + +@dataclass(frozen=True) +class SolverOutput: + result: CheckSatResult + model: PotentialModel | None = None + unsat_core: list[str] | None = None + + +class Exitcode(Enum): + PASS = 0 + COUNTEREXAMPLE = 1 + TIMEOUT = 2 + STUCK = 3 + REVERT_ALL = 4 + EXCEPTION = 5 + + def with_devdoc(args: HalmosConfig, fn_sig: str, contract_json: dict) -> HalmosConfig: devdoc = parse_devdoc(fn_sig, contract_json) if not devdoc: @@ -181,21 +313,16 @@ def mk_this() -> Address: return con_addr(FOUNDRY_TEST) -def mk_solver(args: HalmosConfig, logic="QF_AUFBV", ctx=None, assertion=False): - timeout = ( - args.solver_timeout_assertion if assertion else args.solver_timeout_branching +def mk_solver(args: HalmosConfig, logic="QF_AUFBV", ctx=None): + return create_solver( + logic=logic, + ctx=ctx, + timeout=args.solver_timeout_branching, + max_memory=args.solver_max_memory, ) - return create_solver(logic, ctx, timeout, args.solver_max_memory) -def deploy_test( - creation_hexcode: str, - deployed_hexcode: str, - sevm: SEVM, - args: HalmosConfig, - libs: dict, - solver: Solver, -) -> Exec: +def deploy_test(ctx: ContractContext, sevm: SEVM) -> Exec: this = mk_this() message = Message( target=this, @@ -213,12 +340,12 @@ def deploy_test( block=mk_block(), context=CallContext(message=message), pgm=None, # to be added - path=Path(solver), + path=Path(ctx.solver), ) # deploy libraries and resolve library placeholders in hexcode - (creation_hexcode, deployed_hexcode) = ex.resolve_libs( - creation_hexcode, deployed_hexcode, libs + (creation_hexcode, _) = ex.resolve_libs( + ctx.creation_hexcode, ctx.deployed_hexcode, ctx.libs ) # test contract creation bytecode @@ -232,9 +359,9 @@ def deploy_test( if len(exs) != 1: raise ValueError(f"constructor: # of paths: {len(exs)}") - ex = exs[0] + [ex] = exs - if args.verbose >= VERBOSITY_TRACE_CONSTRUCTOR: + if ctx.args.verbose >= VERBOSITY_TRACE_CONSTRUCTOR: print("Constructor trace:") render_trace(ex.context) @@ -258,71 +385,72 @@ def deploy_test( return ex -def setup( - creation_hexcode: str, - deployed_hexcode: str, - abi: dict, - setup_info: FunctionInfo, - args: HalmosConfig, - libs: dict, - solver: Solver, -) -> Exec: +def setup(ctx: FunctionContext) -> Exec: setup_timer = NamedTimer("setup") setup_timer.create_subtimer("decode") + args, setup_info = ctx.args, ctx.info sevm = SEVM(args, setup_info) - setup_ex = deploy_test(creation_hexcode, deployed_hexcode, sevm, args, libs, solver) + setup_ex = deploy_test(ctx, sevm) setup_timer.create_subtimer("run") setup_sig = setup_info.sig - if setup_sig: - # TODO: dyn_params may need to be passed to mk_calldata in run() - calldata, dyn_params = mk_calldata(abi, setup_info, args) - setup_ex.path.process_dyn_params(dyn_params) - - parent_message = setup_ex.message() - setup_ex.context = CallContext( - message=Message( - target=parent_message.target, - caller=parent_message.caller, - origin=parent_message.origin, - value=0, - data=calldata, - call_scheme=EVM.CALL, - ), - ) + if not setup_sig: + if args.statistics: + print(setup_timer.report()) + return setup_ex - setup_exs_all = sevm.run(setup_ex) - setup_exs_no_error = [] + setup_timer.create_subtimer("run") - for idx, setup_ex in enumerate(setup_exs_all): - if args.verbose >= VERBOSITY_TRACE_SETUP: - print(f"{setup_sig} trace #{idx+1}:") - render_trace(setup_ex.context) + # TODO: dyn_params may need to be passed to mk_calldata in run() + calldata, dyn_params = mk_calldata(ctx.abi, setup_info, args) + setup_ex.path.process_dyn_params(dyn_params) + + parent_message = setup_ex.message() + setup_ex.context = CallContext( + message=Message( + target=parent_message.target, + caller=parent_message.caller, + origin=parent_message.origin, + value=0, + data=calldata, + call_scheme=EVM.CALL, + ), + ) - if not (err := setup_ex.context.output.error): - setup_exs_no_error.append((setup_ex, setup_ex.path.to_smt2(args))) + setup_exs_all = sevm.run(setup_ex) + setup_exs_no_error = [] - else: - opcode = setup_ex.current_opcode() - if opcode not in [EVM.REVERT, EVM.INVALID]: - warn_code( - INTERNAL_ERROR, - f"in {setup_sig}, executing {mnemonic(opcode)} failed with: {err}", - ) + for idx, setup_ex in enumerate(setup_exs_all): + if args.verbose >= VERBOSITY_TRACE_SETUP: + print(f"{setup_sig} trace #{idx+1}:") + render_trace(setup_ex.context) - # only render the trace if we didn't already do it - if ( - args.verbose < VERBOSITY_TRACE_SETUP - and args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE - ): - print(f"{setup_sig} trace:") - render_trace(setup_ex.context) + if not (err := setup_ex.context.output.error): + setup_exs_no_error.append((setup_ex, setup_ex.path.to_smt2(args))) - setup_exs = [] + else: + opcode = setup_ex.current_opcode() + if opcode not in [EVM.REVERT, EVM.INVALID]: + warn_code( + INTERNAL_ERROR, + f"in {setup_sig}, executing {mnemonic(opcode)} failed with: {err}", + ) - if len(setup_exs_no_error) > 1: + # only render the trace if we didn't already do it + if VERBOSITY_TRACE_COUNTEREXAMPLE <= args.verbose < VERBOSITY_TRACE_SETUP: + print(f"{setup_sig} trace:") + render_trace(setup_ex.context) + + setup_exs = [] + + match len(setup_exs_no_error): + case 0: + pass + case 1: + setup_exs.append(setup_exs_no_error[0][0]) + case _: for setup_ex, query in setup_exs_no_error: res, _, _ = solve(query, args) if res != unsat: @@ -330,28 +458,24 @@ def setup( if len(setup_exs) > 1: break - elif len(setup_exs_no_error) == 1: - setup_exs.append(setup_exs_no_error[0][0]) - - if len(setup_exs) == 0: + match len(setup_exs): + case 0: raise HalmosException(f"No successful path found in {setup_sig}") - - if len(setup_exs) > 1: + case n if n > 1: debug("\n".join(map(str, setup_exs))) - raise HalmosException(f"Multiple paths were found in {setup_sig}") - setup_ex = setup_exs[0] + [setup_ex] = setup_exs - if args.print_setup_states: - print(setup_ex) + if args.print_setup_states: + print(setup_ex) - if sevm.logs.bounded_loops: - warn_code( - LOOP_BOUND, - f"{setup_sig}: paths have not been fully explored due to the loop unrolling bound: {args.loop}", - ) - debug("\n".join(jumpid_str(x) for x in sevm.logs.bounded_loops)) + if sevm.logs.bounded_loops: + warn_code( + LOOP_BOUND, + f"{setup_sig}: paths have not been fully explored due to the loop unrolling bound: {args.loop}", + ) + debug("\n".join(jumpid_str(x) for x in sevm.logs.bounded_loops)) if args.statistics: print(setup_timer.report()) @@ -359,70 +483,14 @@ def setup( return setup_ex -@dataclass -class PotentialModel: - model: AnyModel - is_valid: bool - - def __init__(self, model: ModelRef | str, args: HalmosConfig) -> None: - # convert model into string to avoid pickling errors for z3 (ctypes) objects containing pointers - self.model = ( - to_str_model(model, args.print_full_model) - if isinstance(model, ModelRef) - else model - ) - self.is_valid = is_model_valid(model) - - def __str__(self) -> str: - # expected to be a filename - if isinstance(self.model, str): - return f"see {self.model}" - - formatted = [f"\n {decl} = {val}" for decl, val in self.model.items()] - return "".join(sorted(formatted)) if formatted else "∅" - - -@dataclass(frozen=True) -class ModelWithContext: - # can be a filename containing the model or a dict with variable assignments - model: PotentialModel | None - index: int - result: CheckSatResult - unsat_core: list | None - - -@dataclass(frozen=True) -class TestResult: - name: str # test function name - exitcode: int - num_models: int = None - models: list[ModelWithContext] = None - num_paths: tuple[int, int, int] = None # number of paths: [total, success, blocked] - time: tuple[int, int, int] = None # time: [total, paths, models] - num_bounded_loops: int = None # number of incomplete loops - - -class Exitcode(Enum): - PASS = 0 - COUNTEREXAMPLE = 1 - TIMEOUT = 2 - STUCK = 3 - REVERT_ALL = 4 - EXCEPTION = 5 - - def is_global_fail_set(context: CallContext) -> bool: hevm_fail = isinstance(context.output.error, FailCheatcode) return hevm_fail or any(is_global_fail_set(x) for x in context.subcalls()) -def run_test( - setup_ex: Exec, - abi: dict, - fun_info: FunctionInfo, - args: HalmosConfig, - solver: Solver, -) -> TestResult: +def run_test(ctx: FunctionContext) -> TestResult: + args = ctx.args + fun_info = ctx.info funname, funsig = fun_info.name, fun_info.sig if args.verbose >= 1: print(f"Executing {funname}") @@ -431,7 +499,7 @@ def run_test( # prepare test dump directory if needed # - dump_dirname = f"/tmp/{funname}-{uuid.uuid4().hex}" + dump_dirname = ctx.dump_dirname should_dump = args.dump_smt_queries or args.solver_command if should_dump and not os.path.isdir(dump_dirname): os.makedirs(dump_dirname) @@ -441,11 +509,12 @@ def run_test( # prepare calldata # + setup_ex = ctx.setup_ex sevm = SEVM(args, fun_info) - path = Path(solver) + path = Path(ctx.solver) path.extend_path(setup_ex.path) - cd, dyn_params = mk_calldata(abi, fun_info, args) + cd, dyn_params = mk_calldata(ctx.abi, fun_info, args) path.process_dyn_params(dyn_params) message = Message( @@ -582,7 +651,14 @@ def future_callback(future: PopenFuture): query: SMTQuery = ex.path.to_smt2(args) - dump(query, args, dump_filename=f"{dump_dirname}/{path_id}.smt2") + path_ctx = PathContext( + path_id=path_id, + ex=ex, + query=query, + fun_info=ctx, + ) + + dump(path_ctx) # if the query contains an unsat-core, it is unsat; no need to run the solver if check_unsat_cores(query, unsat_cores): @@ -616,11 +692,15 @@ def future_callback(future: PopenFuture): break num_execs = path_id + + # the name is a bit misleading: this timer only starts after the exploration phase is complete + # but it's possible that solvers have already been running for a while timer.create_subtimer("models") if potential > 0 and args.verbose >= 1: print( - f"# of potential paths involving assertion violations: {potential} / {num_execs} (--solver-threads {args.solver_threads})" + f"# of potential paths involving assertion violations: {potential} / {num_execs}" + f" (--solver-threads {args.solver_threads})" ) # display assertion solving progress @@ -721,63 +801,51 @@ def extract_setup(methodIdentifiers: dict[str, str]) -> FunctionInfo: return FunctionInfo(setup_name, setup_sig, setup_selector) -@dataclass(frozen=True) -class RunArgs: - # signatures of test functions to run - funsigs: list[str] - - # code of the current contract - creation_hexcode: str - deployed_hexcode: str +def run_contract(ctx: ContractContext) -> list[TestResult]: + BuildOut().set_build_out(ctx.build_out_map) - abi: dict - methodIdentifiers: dict[str, str] - - args: HalmosConfig - contract_json: dict - libs: dict - - build_out_map: dict - - -def run_contract(run_args: RunArgs) -> list[TestResult]: - BuildOut().set_build_out(run_args.build_out_map) - - args = run_args.args - setup_info = extract_setup(run_args.methodIdentifiers) + args = ctx.args + setup_info = extract_setup(ctx.method_identifiers) try: - setup_config = with_devdoc(args, setup_info.sig, run_args.contract_json) + setup_config = with_devdoc(args, setup_info.sig, ctx.contract_json) setup_solver = mk_solver(setup_config) - setup_ex = setup( - run_args.creation_hexcode, - run_args.deployed_hexcode, - run_args.abi, - setup_info, - setup_config, - run_args.libs, - setup_solver, + setup_ctx = FunctionContext( + args=setup_config, + info=setup_info, + solver=setup_solver, + contract_ctx=ctx, ) + + setup_ex = setup(setup_ctx) except Exception as err: error(f"{setup_info.sig} failed: {type(err).__name__}: {err}") if args.debug: traceback.print_exc() + # reset any remaining solver states from the default context setup_solver.reset() + return [] test_results = [] - for funsig in run_args.funsigs: - fun_info = FunctionInfo( - funsig.split("(")[0], funsig, run_args.methodIdentifiers[funsig] - ) + for funsig in ctx.funsigs: + selector = ctx.method_identifiers[funsig] + fun_info = FunctionInfo(funsig.split("(")[0], funsig, selector) try: - test_config = with_devdoc(args, funsig, run_args.contract_json) + test_config = with_devdoc(args, funsig, ctx.contract_json) solver = mk_solver(test_config) debug(f"{test_config.formatted_layers()}") - test_result = run_test( - setup_ex, run_args.abi, fun_info, test_config, solver + + test_ctx = FunctionContext( + args=test_config, + info=fun_info, + solver=solver, + contract_ctx=ctx, + setup_ex=setup_ex, ) + + test_result = run_test(test_ctx) except Exception as err: print(f"{color_error('[ERROR]')} {funsig}") error(f"{type(err).__name__}: {err}") @@ -824,10 +892,11 @@ def parse_unsat_core(output) -> list[str] | None: def dump( - query: SMTQuery, args: HalmosConfig, dump_filename: str | None = None + path_ctx: PathContext, ) -> tuple[CheckSatResult, PotentialModel | None, list | None]: - if not dump_filename: - dump_filename = f"/tmp/{uuid.uuid4().hex}.smt2" + args = path_ctx.fun_ctx.args + query = path_ctx.query + dump_filename = path_ctx.dump_filename if args.verbose >= 1: debug(f"Writing SMT query to {dump_filename}") @@ -860,13 +929,6 @@ def dump( f.write("(get-model)\n") -@dataclass(frozen=True) -class SolverOutput: - result: CheckSatResult - model: PotentialModel | None = None - unsat_core: list[str] | None = None - - def solve(smt2_filename: str, args: HalmosConfig) -> SolverOutput: if args.verbose >= 1: debug(" Checking with external solver process") @@ -884,22 +946,27 @@ def solve(smt2_filename: str, args: HalmosConfig) -> SolverOutput: res_str = subprocess.run( cmd, capture_output=True, text=True, timeout=timeout_seconds ).stdout.strip() - res_str_head = res_str.split("\n", 1)[0] + # save solver output to file with open(f"{smt2_filename}.out", "w") as f: f.write(res_str) + # extract the first line (we expect sat/unsat/unknown) + newline_idx = res_str.find("\n") + res_str_head = res_str[:newline_idx] if newline_idx != -1 else res_str if args.verbose >= 1: debug(f" {res_str_head}") - if res_str_head == "unsat": - unsat_core = parse_unsat_core(res_str) if args.cache_solver else None - return SolverOutput(result=unsat, unsat_core=unsat_core) - elif res_str_head == "sat": - model = PotentialModel(f"{smt2_filename}.out", args) - return SolverOutput(result=sat, model=model) - else: - return SolverOutput(result=unknown) + match res_str_head: + case "unsat": + unsat_core = parse_unsat_core(res_str) if args.cache_solver else None + return SolverOutput(result=unsat, unsat_core=unsat_core) + case "sat": + model = PotentialModel(f"{smt2_filename}.out", args) + return SolverOutput(result=sat, model=model) + case _: + return SolverOutput(result=unknown) + except subprocess.TimeoutExpired: return SolverOutput(result=unknown) @@ -1319,26 +1386,27 @@ def on_signal(signum, frame): # support for `/// @custom:halmos` annotations contract_args = with_natspec(args, contract_name, natspec) - run_args = RunArgs( - funsigs, - creation_hexcode, - deployed_hexcode, - abi, - methodIdentifiers, - contract_args, - contract_json, - libs, - build_out_map, + contract_ctx = ContractContext( + args=contract_args, + name=contract_name, + creation_hexcode=creation_hexcode, + deployed_hexcode=deployed_hexcode, + abi=abi, + method_identifiers=methodIdentifiers, + contract_json=contract_json, + libs=libs, + build_out_map=build_out_map, ) - test_results = run_contract(run_args) - + test_results = run_contract(contract_ctx) num_passed = sum(r.exitcode == 0 for r in test_results) num_failed = num_found - num_passed print( - f"Symbolic test result: {num_passed} passed; " - f"{num_failed} failed; {contract_timer.report()}" + "Symbolic test result: " + f"{num_passed} passed; " + f"{num_failed} failed; " + f"{contract_timer.report()}" ) total_found += num_found From 926e733affe7c5445a8181f36f28563376fbc683 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Thu, 16 Jan 2025 15:57:47 -0800 Subject: [PATCH 06/46] XXX early exit wired actually wired --- src/halmos/__main__.py | 323 +++++++++++++++++++++------------------- src/halmos/processes.py | 37 +++-- src/halmos/traces.py | 6 +- 3 files changed, 193 insertions(+), 173 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index d9dd1ab0..07bf3e66 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -176,13 +176,38 @@ class FunctionContext: # dump directory for this function (generated in __post_init__) dump_dirname: str = field(init=False) + # path-specific queries are submitted to this function-specific executor + thread_pool: PopenExecutor = field(default_factory=PopenExecutor) + + # list of solver outputs for this function + _solver_outputs: list["SolverOutput"] = field(default_factory=list) + + # list of counterexamples for this function + counterexamples: list[PotentialModel] = field(default_factory=list) + + # list of unsat cores for this function + unsat_cores: list[list] = field(default_factory=list) + + # map from path id to trace + traces: dict[int, str] = field(default_factory=dict) + + # map from path id to execution + exec_cache: dict[int, Exec] = field(default_factory=dict) + def __post_init__(self): object.__setattr__( self, "dump_dirname", f"/tmp/{self.info.name}-{uuid.uuid4().hex}" ) + def add_solver_output(self, solver_output: "SolverOutput"): + self._solver_outputs.append(solver_output) -@dataclass + @property + def solver_outputs(self): + return self._solver_outputs + + +@dataclass(frozen=True) class PathContext: # id of this path path_id: int @@ -205,14 +230,20 @@ def __post_init__(self): ) -# XXX remove ModelWithContext @dataclass(frozen=True) -class ModelWithContext: - # can be a filename containing the model or a dict with variable assignments - model: PotentialModel | None - index: int +class SolverOutput: + # solver result result: CheckSatResult - unsat_core: list | None + + # we don't backlink to the parent path context to avoid extra + # references to Exec objects past the lifetime of the path + path_id: int + + # solver model + model: PotentialModel | None = None + + # optional unsat core + unsat_core: list[str] | None = None @dataclass(frozen=True) @@ -220,19 +251,12 @@ class TestResult: name: str # test function name exitcode: int num_models: int = None - models: list[ModelWithContext] = None + models: list[SolverOutput] = None num_paths: tuple[int, int, int] = None # number of paths: [total, success, blocked] time: tuple[int, int, int] = None # time: [total, paths, models] num_bounded_loops: int = None # number of incomplete loops -@dataclass(frozen=True) -class SolverOutput: - result: CheckSatResult - model: PotentialModel | None = None - unsat_core: list[str] | None = None - - class Exitcode(Enum): PASS = 0 COUNTEREXAMPLE = 1 @@ -322,7 +346,7 @@ def mk_solver(args: HalmosConfig, logic="QF_AUFBV", ctx=None): ) -def deploy_test(ctx: ContractContext, sevm: SEVM) -> Exec: +def deploy_test(ctx: FunctionContext, sevm: SEVM) -> Exec: this = mk_this() message = Message( target=this, @@ -344,8 +368,9 @@ def deploy_test(ctx: ContractContext, sevm: SEVM) -> Exec: ) # deploy libraries and resolve library placeholders in hexcode + contract_ctx = ctx.contract_ctx (creation_hexcode, _) = ex.resolve_libs( - ctx.creation_hexcode, ctx.deployed_hexcode, ctx.libs + contract_ctx.creation_hexcode, contract_ctx.deployed_hexcode, contract_ctx.libs ) # test contract creation bytecode @@ -404,7 +429,7 @@ def setup(ctx: FunctionContext) -> Exec: setup_timer.create_subtimer("run") # TODO: dyn_params may need to be passed to mk_calldata in run() - calldata, dyn_params = mk_calldata(ctx.abi, setup_info, args) + calldata, dyn_params = mk_calldata(ctx.contract_ctx.abi, setup_info, args) setup_ex.path.process_dyn_params(dyn_params) parent_message = setup_ex.message() @@ -514,7 +539,7 @@ def run_test(ctx: FunctionContext) -> TestResult: path = Path(ctx.solver) path.extend_path(setup_ex.path) - cd, dyn_params = mk_calldata(ctx.abi, fun_info, args) + cd, dyn_params = mk_calldata(ctx.contract_ctx.abi, fun_info, args) path.process_dyn_params(dyn_params) message = Message( @@ -560,61 +585,10 @@ def run_test(ctx: FunctionContext) -> TestResult: ) ) - (logs, steps) = (sevm.logs, sevm.steps) - normal = 0 potential = 0 - models: list[ModelWithContext] = [] stuck = [] - thread_pool = PopenExecutor(max_workers=args.solver_threads) - futures = [] - counterexamples = [] - unsat_cores: list[list] = [] - traces: dict[int, str] = {} - exec_cache: dict[int, Exec] = {} - - def future_callback(future: PopenFuture): - # XXX stdout, stderr, exitcode - m = future.result() - models.append(m) - - model, index, result = m.model, m.index, m.result - - # retrieve cached exec and clear the cache entry - exec = exec_cache.pop(index, None) - - if result == unsat: - if m.unsat_core: - unsat_cores.append(m.unsat_core) - return - - # model could be an empty dict here - if model is not None: - if model.is_valid: - print(red(f"Counterexample: {model}")) - counterexamples.append(model) - else: - warn_code( - COUNTEREXAMPLE_INVALID, - f"Counterexample (potentially invalid): {model}", - ) - counterexamples.append(model) - else: - warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}") - - if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE: - print( - f"Trace #{index + 1}:" - if args.verbose == VERBOSITY_TRACE_PATHS - else "Trace:" - ) - print(traces[index], end="") - - if args.print_failed_states: - print(f"# {index + 1}") - print(exec) - # # consume the sevm.run() generator # (actually triggers path exploration) @@ -624,7 +598,7 @@ def future_callback(future: PopenFuture): for path_id, ex in enumerate(exs): # cache exec in case we need to print it later if args.print_failed_states: - exec_cache[path_id] = ex + ctx.exec_cache[path_id] = ex if args.verbose >= VERBOSITY_TRACE_PATHS: print(f"Path #{path_id}:") @@ -647,7 +621,7 @@ def future_callback(future: PopenFuture): # so we save the rendered trace here and potentially print it later # if a valid counterexample is found if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE: - traces[path_id] = rendered_trace(ex.context) + ctx.traces[path_id] = rendered_trace(ex.context) query: SMTQuery = ex.path.to_smt2(args) @@ -655,17 +629,22 @@ def future_callback(future: PopenFuture): path_id=path_id, ex=ex, query=query, - fun_info=ctx, + fun_ctx=ctx, ) dump(path_ctx) # if the query contains an unsat-core, it is unsat; no need to run the solver - if check_unsat_cores(query, unsat_cores): + if check_unsat_cores(query, ctx.unsat_cores): if args.verbose >= 1: print(" Already proven unsat") continue + solve(path_ctx) + + # XXX handle refinement + # XXX handle timeout + elif ex.context.is_stuck(): debug(f"Potential error path (id: {idx+1})") res, _, _ = solve(ex.path.to_smt2(args), args) @@ -706,9 +685,9 @@ def future_callback(future: PopenFuture): # display assertion solving progress if not args.no_status or args.early_exit: while True: - if args.early_exit and len(counterexamples) > 0: + if args.early_exit and len(ctx.counterexamples) > 0: break - done = sum(fm.done() for fm in futures) + done = sum(fm.done() for fm in ctx.thread_pool.futures) total = potential if done == total: break @@ -717,11 +696,11 @@ def future_callback(future: PopenFuture): time.sleep(0.1) if args.early_exit: - thread_pool.shutdown(wait=False, cancel_futures=True) + ctx.thread_pool.shutdown(wait=False, cancel_futures=True) else: - thread_pool.shutdown(wait=True) + ctx.thread_pool.shutdown(wait=True) - counter = Counter(str(m.result) for m in models) + counter = Counter(str(m.result) for m in ctx.solver_outputs) if counter["sat"] > 0: passfail = red("[FAIL]") exitcode = Exitcode.COUNTEREXAMPLE.value @@ -746,16 +725,19 @@ def future_callback(future: PopenFuture): timer.stop() time_info = timer.report(include_subtimers=args.statistics) - # print result + # print test result print( - f"{passfail} {funsig} (paths: {num_execs}, {time_info}, bounds: [{', '.join([str(x) for x in dyn_params])}])" + f"{passfail} {funsig} (paths: {num_execs}, {time_info}, " + f"bounds: [{', '.join([str(x) for x in dyn_params])}])" ) for path_id, _, err in stuck: warn_code(INTERNAL_ERROR, f"Encountered {err}") if args.print_blocked_states: print(f"\nPath #{path_id}") - print(traces[path_id], end="") + print(ctx.traces[path_id], end="") + + (logs, steps) = (sevm.logs, sevm.steps) if logs.bounded_loops: warn_code( @@ -771,13 +753,13 @@ def future_callback(future: PopenFuture): # return test result if args.minimal_json_output: - return TestResult(funsig, exitcode, len(counterexamples)) + return TestResult(funsig, exitcode, len(ctx.counterexamples)) else: return TestResult( funsig, exitcode, - len(counterexamples), - counterexamples, + len(ctx.counterexamples), + ctx.counterexamples, (num_execs, normal, len(stuck)), (timer.elapsed(), timer["paths"].elapsed(), timer["models"].elapsed()), len(logs.bounded_loops), @@ -929,46 +911,95 @@ def dump( f.write("(get-model)\n") -def solve(smt2_filename: str, args: HalmosConfig) -> SolverOutput: +def solver_callback(future: PopenFuture): + """Invoked by a PopenFuture when it is finished.""" + + path_ctx: PathContext = future.metadata["path_ctx"] + path_id = path_ctx.path_id + args = path_ctx.fun_ctx.args + smt2_filename = path_ctx.dump_filename + stdout, stderr, returncode = future.result() + + # save solver output to file + with open(f"{smt2_filename}.out", "w") as f: + f.write(stdout) + + # extract the first line (we expect sat/unsat/unknown) + newline_idx = stdout.find("\n") + first_line = stdout[:newline_idx] if newline_idx != -1 else stdout + if args.verbose >= 1: + debug(f" {first_line}") + + solver_output = None + match first_line: + case "unsat": + unsat_core = parse_unsat_core(stdout) if args.cache_solver else None + solver_output = SolverOutput(unsat, path_id, unsat_core=unsat_core) + case "sat": + model = PotentialModel(f"{smt2_filename}.out", args) + solver_output = SolverOutput(sat, path_id, model=model) + case _: + solver_output = SolverOutput(unknown, path_id) + + fun_ctx = path_ctx.fun_ctx + fun_ctx.add_solver_output(solver_output) + + model, result = solver_output.model, solver_output.result + + # retrieve cached exec and clear the cache entry + exec = fun_ctx.exec_cache.pop(path_id, None) + + if result == unsat: + if solver_output.unsat_core: + fun_ctx.unsat_cores.append(solver_output.unsat_core) + return + + # model could be an empty dict here + if model is not None: + if model.is_valid: + print(red(f"Counterexample: {model}")) + fun_ctx.counterexamples.append(model) + else: + warn_code( + COUNTEREXAMPLE_INVALID, + f"Counterexample (potentially invalid): {model}", + ) + fun_ctx.counterexamples.append(model) + else: + warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}") + + if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE: + print( + f"Trace #{path_id}:" if args.verbose == VERBOSITY_TRACE_PATHS else "Trace:" + ) + print(fun_ctx.traces[path_id], end="") + + if args.print_failed_states: + print(f"# {path_id}") + print(exec) + + +def solve(ctx: PathContext): + args = ctx.fun_ctx.args + smt2_filename = ctx.dump_filename + if args.verbose >= 1: debug(" Checking with external solver process") - debug(f" {args.solver_command} {smt2_filename} >{smt2_filename}.out") + debug(f" {args.solver_command} {smt2_filename} > {smt2_filename}.out") + + # XXX fix timeout # solver_timeout_assertion == 0 means no timeout, # which translates to timeout_seconds=None for subprocess.run - timeout_seconds = None - if timeout_millis := args.solver_timeout_assertion: - timeout_seconds = timeout_millis / 1000 + # timeout_seconds = None + # if timeout_millis := args.solver_timeout_assertion: + # timeout_seconds = timeout_millis / 1000 cmd = args.solver_command.split() + [smt2_filename] - try: - # XXX submit to PopenExecutor, process output in callback - res_str = subprocess.run( - cmd, capture_output=True, text=True, timeout=timeout_seconds - ).stdout.strip() - - # save solver output to file - with open(f"{smt2_filename}.out", "w") as f: - f.write(res_str) - - # extract the first line (we expect sat/unsat/unknown) - newline_idx = res_str.find("\n") - res_str_head = res_str[:newline_idx] if newline_idx != -1 else res_str - if args.verbose >= 1: - debug(f" {res_str_head}") - - match res_str_head: - case "unsat": - unsat_core = parse_unsat_core(res_str) if args.cache_solver else None - return SolverOutput(result=unsat, unsat_core=unsat_core) - case "sat": - model = PotentialModel(f"{smt2_filename}.out", args) - return SolverOutput(result=sat, model=model) - case _: - return SolverOutput(result=unknown) - - except subprocess.TimeoutExpired: - return SolverOutput(result=unknown) + thread_pool: PopenExecutor = ctx.fun_ctx.thread_pool + future = PopenFuture(cmd, metadata={"path_ctx": ctx}) + future.add_done_callback(solver_callback) + thread_pool.submit(future) def check_unsat_cores(query: SMTQuery, unsat_cores: list[list]) -> bool: @@ -979,23 +1010,6 @@ def check_unsat_cores(query: SMTQuery, unsat_cores: list[list]) -> bool: return False -def gen_model_from_sexpr(fn_args: GenModelArgs) -> ModelWithContext: - args, path_id, sexpr = fn_args.args, fn_args.path_id, fn_args.sexpr - - res, model, unsat_core = solve(sexpr, args) - - # XXX fix refinement - - # if res == sat and not model.is_valid: - # if args.verbose >= 1: - # print(" Checking again with refinement") - - # refined_filename = dump_filename.replace(".smt2", ".refined.smt2") - # res, model, unsat_core = solve(refine(sexpr), args, refined_filename) - - return package_result(model, path_id, res, unsat_core, args) - - def refine(query: SMTQuery) -> SMTQuery: smtlib = query.smtlib @@ -1018,27 +1032,27 @@ def refine(query: SMTQuery) -> SMTQuery: return SMTQuery(smtlib, query.assertions) -def package_result( - model: PotentialModel | None, - idx: int, - result: CheckSatResult, - unsat_core: list | None, - args: HalmosConfig, -) -> ModelWithContext: - if result == unsat: - if args.verbose >= 1: - print(f" Invalid path; ignored (path id: {idx+1})") - return ModelWithContext(None, idx, result, unsat_core) +# def package_result( +# model: PotentialModel | None, +# idx: int, +# result: CheckSatResult, +# unsat_core: list | None, +# args: HalmosConfig, +# ) -> ModelWithContext: +# if result == unsat: +# if args.verbose >= 1: +# print(f" Invalid path; ignored (path id: {idx+1})") +# return ModelWithContext(None, idx, result, unsat_core) - if result == sat: - if args.verbose >= 1: - print(f" Valid path; counterexample generated (path id: {idx+1})") - return ModelWithContext(model, idx, result, None) +# if result == sat: +# if args.verbose >= 1: +# print(f" Valid path; counterexample generated (path id: {idx+1})") +# return ModelWithContext(model, idx, result, None) - else: - if args.verbose >= 1: - print(f" Timeout (path id: {idx+1})") - return ModelWithContext(None, idx, result, None) +# else: +# if args.verbose >= 1: +# print(f" Timeout (path id: {idx+1})") +# return ModelWithContext(None, idx, result, None) def is_model_valid(model: ModelRef | str) -> bool: @@ -1389,6 +1403,7 @@ def on_signal(signum, frame): contract_ctx = ContractContext( args=contract_args, name=contract_name, + funsigs=funsigs, creation_hexcode=creation_hexcode, deployed_hexcode=deployed_hexcode, abi=abi, diff --git a/src/halmos/processes.py b/src/halmos/processes.py index 1396c799..ca117860 100644 --- a/src/halmos/processes.py +++ b/src/halmos/processes.py @@ -16,9 +16,10 @@ class PopenFuture(concurrent.futures.Future): returncode: int | None start_time: float | None end_time: float | None + metadata: dict | None _exception: Exception | None - def __init__(self, cmd: list[str]): + def __init__(self, cmd: list[str], metadata: dict | None = None): super().__init__() self.cmd = cmd self.process = None @@ -27,6 +28,7 @@ def __init__(self, cmd: list[str]): self.returncode = None self.start_time = None self.end_time = None + self.metadata = metadata self._exception = None def start(self): @@ -122,18 +124,23 @@ def __init__(self, max_workers: int = 1): # TODO: support max_workers - def submit(self, command: list[str]): + @property + def futures(self): + return self._futures + + def submit(self, future: PopenFuture): + """Accepts an unstarted PopenFuture and schedules it for execution.""" + if self._shutdown.is_set(): raise RuntimeError("Cannot submit to a shutdown executor.") - future = PopenFuture(command) with self._lock: self._futures.append(future) future.start() return future def shutdown(self, wait=True, cancel_futures=False): - # TODO: support max_workers + # TODO: support max_workers / pending futures self._shutdown.set() @@ -162,18 +169,8 @@ def _join(self): def main(): - # example usage - with PopenExecutor() as executor: - # Submit multiple commands - commands = [ - "sleep 1", - "sleep 10", - "echo hello", - ] - - futures = [executor.submit(command.split()) for command in commands] - + # example usage def done_callback(future: PopenFuture): stdout, stderr, exitcode = future.result() cmd = " ".join(future.cmd) @@ -187,8 +184,18 @@ def done_callback(future: PopenFuture): ) executor.shutdown(wait=False) + # Submit multiple commands + commands = [ + "sleep 1", + "sleep 10", + "echo hello", + ] + + futures = [PopenFuture(command.split()) for command in commands] + for future in futures: future.add_done_callback(done_callback) + executor.submit(future) # exiting the context manager will shutdown the executor with wait=True # so no new futures can be submitted diff --git a/src/halmos/traces.py b/src/halmos/traces.py index 53dc1bd2..c5d58bc8 100644 --- a/src/halmos/traces.py +++ b/src/halmos/traces.py @@ -5,16 +5,14 @@ from halmos.bytevec import ByteVec from halmos.exceptions import HalmosException -from halmos.sevm import CallContext, EventLog +from halmos.mapper import DeployAddressMapper, Mapper +from halmos.sevm import CallContext, EventLog, mnemonic from halmos.utils import ( - DeployAddressMapper, - Mapper, byte_length, cyan, green, hexify, is_bv, - mnemonic, red, unbox_int, yellow, From e72b36d81fef163c1b44aecf643b7fc3e5d9354d Mon Sep 17 00:00:00 2001 From: karmacoma Date: Thu, 16 Jan 2025 15:59:31 -0800 Subject: [PATCH 07/46] add triage for solver models --- src/halmos/__main__.py | 112 ++++++++++++++++++++++++---------------- src/halmos/processes.py | 3 ++ 2 files changed, 71 insertions(+), 44 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 07bf3e66..75037d1c 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -182,8 +182,11 @@ class FunctionContext: # list of solver outputs for this function _solver_outputs: list["SolverOutput"] = field(default_factory=list) - # list of counterexamples for this function - counterexamples: list[PotentialModel] = field(default_factory=list) + # list of valid counterexamples for this function + valid_counterexamples: list[PotentialModel] = field(default_factory=list) + + # list of potentially invalid counterexamples for this function + invalid_counterexamples: list[PotentialModel] = field(default_factory=list) # list of unsat cores for this function unsat_cores: list[list] = field(default_factory=list) @@ -201,6 +204,53 @@ def __post_init__(self): def add_solver_output(self, solver_output: "SolverOutput"): self._solver_outputs.append(solver_output) + args = self.args + path_id = solver_output.path_id + model, result = solver_output.model, solver_output.result + + # retrieve cached exec and clear the cache entry + exec = self.exec_cache.pop(path_id, None) + + if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE: + id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else "" + print(f"Trace{id_str}:") + print(self.traces[path_id], end="") + + if args.print_failed_states: + print(f"# {path_id}") + print(exec) + + if self.thread_pool.is_shutdown(): + # if the thread pool is in the process of shutting down, + # we want to stop processing remaining models/timeouts/errors, etc. + return + + if result == unsat: + if solver_output.unsat_core: + self.unsat_cores.append(solver_output.unsat_core) + return + + self.add_model(model, result) + + def add_model(self, model: PotentialModel | None, result: CheckSatResult): + """Triage a new model, potentially adding it to the counterexamples list""" + + # model could be an empty dict here, so compare to None explicitly + if model is None: + warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}") + return + + if model.is_valid: + print(red(f"Counterexample: {model}")) + self.valid_counterexamples.append(model) + + # we have a valid counterexample, so we are eligible for early exit + if self.args.early_exit: + self.thread_pool.shutdown(wait=False) + else: + warn_str = f"Counterexample (potentially invalid): {model}" + warn_code(COUNTEREXAMPLE_INVALID, warn_str) + self.invalid_counterexamples.append(model) @property def solver_outputs(self): @@ -682,10 +732,13 @@ def run_test(ctx: FunctionContext) -> TestResult: f" (--solver-threads {args.solver_threads})" ) + # # display assertion solving progress + # + if not args.no_status or args.early_exit: while True: - if args.early_exit and len(ctx.counterexamples) > 0: + if args.early_exit and ctx.valid_counterexamples: break done = sum(fm.done() for fm in ctx.thread_pool.futures) total = potential @@ -695,10 +748,14 @@ def run_test(ctx: FunctionContext) -> TestResult: sevm.status.update(f"[{elapsed}] solving queries: {done} / {total}") time.sleep(0.1) - if args.early_exit: - ctx.thread_pool.shutdown(wait=False, cancel_futures=True) - else: - ctx.thread_pool.shutdown(wait=True) + ctx.thread_pool.shutdown(wait=True) + + timer.stop() + time_info = timer.report(include_subtimers=args.statistics) + + # + # print test result + # counter = Counter(str(m.result) for m in ctx.solver_outputs) if counter["sat"] > 0: @@ -752,14 +809,15 @@ def run_test(ctx: FunctionContext) -> TestResult: json.dump(steps, json_file) # return test result + num_cexes = len(ctx.valid_counterexamples) + len(ctx.invalid_counterexamples) if args.minimal_json_output: - return TestResult(funsig, exitcode, len(ctx.counterexamples)) + return TestResult(funsig, exitcode, num_cexes) else: return TestResult( funsig, exitcode, - len(ctx.counterexamples), - ctx.counterexamples, + num_cexes, + ctx.valid_counterexamples + ctx.invalid_counterexamples, (num_execs, normal, len(stuck)), (timer.elapsed(), timer["paths"].elapsed(), timer["models"].elapsed()), len(logs.bounded_loops), @@ -944,40 +1002,6 @@ def solver_callback(future: PopenFuture): fun_ctx = path_ctx.fun_ctx fun_ctx.add_solver_output(solver_output) - model, result = solver_output.model, solver_output.result - - # retrieve cached exec and clear the cache entry - exec = fun_ctx.exec_cache.pop(path_id, None) - - if result == unsat: - if solver_output.unsat_core: - fun_ctx.unsat_cores.append(solver_output.unsat_core) - return - - # model could be an empty dict here - if model is not None: - if model.is_valid: - print(red(f"Counterexample: {model}")) - fun_ctx.counterexamples.append(model) - else: - warn_code( - COUNTEREXAMPLE_INVALID, - f"Counterexample (potentially invalid): {model}", - ) - fun_ctx.counterexamples.append(model) - else: - warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}") - - if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE: - print( - f"Trace #{path_id}:" if args.verbose == VERBOSITY_TRACE_PATHS else "Trace:" - ) - print(fun_ctx.traces[path_id], end="") - - if args.print_failed_states: - print(f"# {path_id}") - print(exec) - def solve(ctx: PathContext): args = ctx.fun_ctx.args diff --git a/src/halmos/processes.py b/src/halmos/processes.py index ca117860..04c95c47 100644 --- a/src/halmos/processes.py +++ b/src/halmos/processes.py @@ -139,6 +139,9 @@ def submit(self, future: PopenFuture): future.start() return future + def is_shutdown(self) -> bool: + return self._shutdown.is_set() + def shutdown(self, wait=True, cancel_futures=False): # TODO: support max_workers / pending futures From 45d9e67168d9d2f71c8a1d155086bd6a7cb6960e Mon Sep 17 00:00:00 2001 From: karmacoma Date: Fri, 20 Dec 2024 14:54:21 -0800 Subject: [PATCH 08/46] add basic smtlib parsing --- src/halmos/smtlib.py | 92 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_halmos.py | 3 +- tests/test_smtlib.py | 87 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 181 insertions(+), 1 deletion(-) create mode 100644 src/halmos/smtlib.py create mode 100644 tests/test_smtlib.py diff --git a/src/halmos/smtlib.py b/src/halmos/smtlib.py new file mode 100644 index 00000000..f20b2ca8 --- /dev/null +++ b/src/halmos/smtlib.py @@ -0,0 +1,92 @@ +import re +from dataclasses import dataclass + +from halmos.logs import logger + +# Regular expression for capturing halmos variables +halmos_pattern = re.compile( + r""" + \(\s*define-fun\s+ # Match "(define-fun" + \|?(halmos_[^ |]+)\|?\s+ # Capture the full variable name, optionally wrapped in "|" + \(\)\s+\(_\s+([^ ]+)\s+ # Capture the SMTLIB type (e.g., "BitVec 256") + (\d+)\)\s+ # Capture the bit-width or type argument + ( # Group for the value + \#b[01]+ # Binary value (e.g., "#b1010") + |\#x[0-9a-fA-F]+ # Hexadecimal value (e.g., "#xFF") + |\(_\s+bv\d+\s+\d+\) # Decimal value (e.g., "(_ bv42 256)") + ) + """, + re.VERBOSE, +) + + +@dataclass +class ModelVariable: + full_name: str + variable_name: str + solidity_type: str + smt_type: str + size_bits: int + value: int + + +def parse_file(file_path: str) -> dict: + with open(file_path) as file: + return parse_string(file.read()) + + +def parse_const_value(value: str) -> int: + if value.startswith("#b"): + return int(value[2:], 2) + + if value.startswith("#x"): + return int(value[2:], 16) + + # we may have a group like (_ bv123 256) + tokens = value.split() + for token in tokens: + if token.startswith("bv"): + return int(token[2:]) + + raise ValueError(f"unknown value format: {value}") + + +def parse_match(match: re.Match) -> ModelVariable: + full_name = match.group(1).strip() + smt_type = f"{match.group(2)} {match.group(3)}" + size_bits = int(match.group(3)) + value = parse_const_value(match.group(4)) + + # Extract name and typename from the variable name + parts = full_name.split("_") + variable_name = parts[1] + solidity_type = parts[2] + + return ModelVariable( + full_name=full_name, + variable_name=variable_name, + solidity_type=solidity_type, + smt_type=smt_type, + size_bits=size_bits, + value=value, + ) + + +def parse_string(smtlib_str: str) -> dict[str, ModelVariable]: + model_variables: dict[str, ModelVariable] = {} + + # use a regex to find all the variables + # for now we explicitly don't try to properly parse the smtlib output + # because of idiosyncrasies of different solvers: + # - ignores the initial sat/unsat on the first line + # - ignores the occasional `(model)` command used by yices, stp, cvc4, etc. + + for match in halmos_pattern.finditer(smtlib_str): + try: + variable = parse_match(match) + model_variables[variable.full_name] = variable + except Exception as e: + logger.error(f"error parsing smtlib string '{match.string.strip()}': {e}") + raise e + + return model_variables diff --git a/tests/test_halmos.py b/tests/test_halmos.py index 3cd40d41..a8aba028 100644 --- a/tests/test_halmos.py +++ b/tests/test_halmos.py @@ -3,9 +3,10 @@ import pytest -from halmos.__main__ import _main, rendered_calldata +from halmos.__main__ import _main from halmos.bytevec import ByteVec from halmos.sevm import con +from halmos.traces import rendered_calldata @pytest.mark.parametrize( diff --git a/tests/test_smtlib.py b/tests/test_smtlib.py new file mode 100644 index 00000000..0e3d2aaa --- /dev/null +++ b/tests/test_smtlib.py @@ -0,0 +1,87 @@ +from halmos.smtlib import ModelVariable, parse_string + + +def test_smtlib_z3_bv_output(): + full_name = "halmos_y_uint256_043cfd7_01" + smtlib_str = f""" + (define-fun {full_name} () (_ BitVec 256) + #x0000000000000000000000000000000000000000000000000000000000000000) + """ + model = parse_string(smtlib_str) + + assert model[full_name] == ModelVariable( + full_name=full_name, + variable_name="y", + solidity_type="uint256", + smt_type="BitVec 256", + size_bits=256, + value=0, + ) + + +# note that yices only produces output like this with --smt2-model-format +# otherwise we get something like (= x #b00000100) +def test_smtlib_yices_binary_output(): + full_name = "halmos_z_uint256_cabf047_02" + smtlib_str = f""" + (define-fun + {full_name} + () + (_ BitVec 256) + #b1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) + """ + model = parse_string(smtlib_str) + assert model[full_name] == ModelVariable( + full_name=full_name, + variable_name="z", + solidity_type="uint256", + smt_type="BitVec 256", + size_bits=256, + value=1 << 255, + ) + + +def test_smtlib_yices_decimal_output(): + full_name = "halmos_z_uint256_11ce021_08" + val = 57896044618658097711785492504343953926634992332820282019728792003956564819968 + smtlib_str = f""" + (define-fun {full_name} () (_ BitVec 256) (_ bv{val} 256)) + """ + model = parse_string(smtlib_str) + assert model[full_name] == ModelVariable( + full_name=full_name, + variable_name="z", + solidity_type="uint256", + smt_type="BitVec 256", + size_bits=256, + value=val, + ) + + +def test_smtlib_stp_output(): + full_name = "halmos_x_uint8_043cfd7_01" + + # we should tolerate: + # - the extra (model) command + # - duplicate variable names + # - the initial `sat` result + # - the `|` around the variable name + # - the space in `( define-fun ...)` + smtlib_str = f""" + sat + (model + ( define-fun |{full_name}| () (_ BitVec 8) #x04 ) + ) + (model + ( define-fun |{full_name}| () (_ BitVec 8) #x04 ) + ) + """ + model = parse_string(smtlib_str) + assert model[full_name] == ModelVariable( + full_name=full_name, + variable_name="x", + solidity_type="uint8", + smt_type="BitVec 8", + size_bits=8, + value=4, + ) From 1bdafec2e201d3fb6f895d6e8f9f7db0482ab8e7 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Fri, 20 Dec 2024 17:53:09 -0800 Subject: [PATCH 09/46] actually parse external solver output using smtlib --- src/halmos/__main__.py | 92 ++++++++++++------------------------------ src/halmos/smtlib.py | 44 +++++++++++--------- 2 files changed, 51 insertions(+), 85 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 75037d1c..981b6dda 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -21,7 +21,6 @@ from z3 import ( BitVec, CheckSatResult, - ModelRef, Solver, sat, set_option, @@ -74,6 +73,7 @@ jumpid_str, mnemonic, ) +from .smtlib import ModelVariables, parse_string from .traces import render_trace, rendered_trace from .utils import ( NamedTimer, @@ -90,9 +90,6 @@ yellow, ) -StrModel = dict[str, str] -AnyModel = StrModel | str - # Python version >=3.8.14, >=3.9.14, >=3.10.7, or >=3.11 if hasattr(sys, "set_int_max_str_digits"): sys.set_int_max_str_digits(0) @@ -112,26 +109,27 @@ @dataclass class PotentialModel: - model: AnyModel - is_valid: bool - - def __init__(self, model: ModelRef | str, args: HalmosConfig) -> None: - # convert model into string to avoid pickling errors for z3 (ctypes) objects containing pointers - self.model = ( - to_str_model(model, args.print_full_model) - if isinstance(model, ModelRef) - else model - ) - self.is_valid = is_model_valid(model) + model: ModelVariables | str | None def __str__(self) -> str: # expected to be a filename if isinstance(self.model, str): return f"see {self.model}" - formatted = [f"\n {decl} = {val}" for decl, val in self.model.items()] + formatted = [] + for v in self.model.values(): + stringified = stringify(v.full_name, v.value) + formatted.append(f"\n {v.full_name} = {stringified}") return "".join(sorted(formatted)) if formatted else "∅" + @property + def is_valid(self) -> bool: + if self.model is None: + return False + + needs_refinement = any(name.startswith("f_evm_") for name in self.model) + return not needs_refinement + @dataclass(frozen=True) class ContractContext: @@ -527,6 +525,7 @@ def setup(ctx: FunctionContext) -> Exec: setup_exs.append(setup_exs_no_error[0][0]) case _: for setup_ex, query in setup_exs_no_error: + # XXX fix with new interface res, _, _ = solve(query, args) if res != unsat: setup_exs.append(setup_ex) @@ -690,7 +689,13 @@ def run_test(ctx: FunctionContext) -> TestResult: print(" Already proven unsat") continue - solve(path_ctx) + try: + solve(path_ctx) + except RuntimeError: + # XXX use more specific exception + # XXX notify sevm that we're done + # if the thread pool is shut down, here + break # XXX handle refinement # XXX handle timeout @@ -736,6 +741,8 @@ def run_test(ctx: FunctionContext) -> TestResult: # display assertion solving progress # + # XXX clashes with the exploration progress status + if not args.no_status or args.early_exit: while True: if args.early_exit and ctx.valid_counterexamples: @@ -994,7 +1001,7 @@ def solver_callback(future: PopenFuture): unsat_core = parse_unsat_core(stdout) if args.cache_solver else None solver_output = SolverOutput(unsat, path_id, unsat_core=unsat_core) case "sat": - model = PotentialModel(f"{smt2_filename}.out", args) + model = PotentialModel(parse_string(stdout)) solver_output = SolverOutput(sat, path_id, model=model) case _: solver_output = SolverOutput(unknown, path_id) @@ -1056,55 +1063,6 @@ def refine(query: SMTQuery) -> SMTQuery: return SMTQuery(smtlib, query.assertions) -# def package_result( -# model: PotentialModel | None, -# idx: int, -# result: CheckSatResult, -# unsat_core: list | None, -# args: HalmosConfig, -# ) -> ModelWithContext: -# if result == unsat: -# if args.verbose >= 1: -# print(f" Invalid path; ignored (path id: {idx+1})") -# return ModelWithContext(None, idx, result, unsat_core) - -# if result == sat: -# if args.verbose >= 1: -# print(f" Valid path; counterexample generated (path id: {idx+1})") -# return ModelWithContext(model, idx, result, None) - -# else: -# if args.verbose >= 1: -# print(f" Timeout (path id: {idx+1})") -# return ModelWithContext(None, idx, result, None) - - -def is_model_valid(model: ModelRef | str) -> bool: - # TODO: evaluate the path condition against the given model after excluding f_evm_* symbols, - # since the f_evm_* symbols may still appear in valid models. - - # model is a filename, containing solver output - if isinstance(model, str): - with open(model) as f: - for line in f: - if "f_evm_" in line: - return False - return True - - # z3 model object - else: - return all(not str(decl).startswith("f_evm_") for decl in model) - - -def to_str_model(model: ModelRef, print_full_model: bool) -> StrModel: - def select(var): - name = str(var) - return name.startswith("p_") or name.startswith("halmos_") - - select_model = filter(select, model) if not print_full_model else model - return {str(decl): stringify(str(decl), model[decl]) for decl in select_model} - - def get_contract_type( ast_nodes: list, contract_name: str ) -> tuple[str | None, str | None]: diff --git a/src/halmos/smtlib.py b/src/halmos/smtlib.py index f20b2ca8..61eb23fe 100644 --- a/src/halmos/smtlib.py +++ b/src/halmos/smtlib.py @@ -3,6 +3,19 @@ from halmos.logs import logger + +@dataclass +class ModelVariable: + full_name: str + variable_name: str + solidity_type: str + smt_type: str + size_bits: int + value: int + + +ModelVariables = dict[str, ModelVariable] + # Regular expression for capturing halmos variables halmos_pattern = re.compile( r""" @@ -20,21 +33,6 @@ ) -@dataclass -class ModelVariable: - full_name: str - variable_name: str - solidity_type: str - smt_type: str - size_bits: int - value: int - - -def parse_file(file_path: str) -> dict: - with open(file_path) as file: - return parse_string(file.read()) - - def parse_const_value(value: str) -> int: if value.startswith("#b"): return int(value[2:], 2) @@ -51,7 +49,7 @@ def parse_const_value(value: str) -> int: raise ValueError(f"unknown value format: {value}") -def parse_match(match: re.Match) -> ModelVariable: +def _parse_match(match: re.Match) -> ModelVariable: full_name = match.group(1).strip() smt_type = f"{match.group(2)} {match.group(3)}" size_bits = int(match.group(3)) @@ -72,7 +70,12 @@ def parse_match(match: re.Match) -> ModelVariable: ) -def parse_string(smtlib_str: str) -> dict[str, ModelVariable]: +def parse_string(smtlib_str: str) -> ModelVariables: + """Expects a whole smtlib model output file, as produced by a solver + in response to a `(check-sat)` + `(get-model)` command. + + Extracts halmos variables and returns them grouped by their full name""" + model_variables: dict[str, ModelVariable] = {} # use a regex to find all the variables @@ -83,10 +86,15 @@ def parse_string(smtlib_str: str) -> dict[str, ModelVariable]: for match in halmos_pattern.finditer(smtlib_str): try: - variable = parse_match(match) + variable = _parse_match(match) model_variables[variable.full_name] = variable except Exception as e: logger.error(f"error parsing smtlib string '{match.string.strip()}': {e}") raise e return model_variables + + +def parse_file(file_path: str) -> ModelVariables: + with open(file_path) as file: + return parse_string(file.read()) From db346c094ec2006c17c564d0335f588c4e231037 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Thu, 16 Jan 2025 16:00:23 -0800 Subject: [PATCH 10/46] fix counterexample output --- src/halmos/__main__.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 981b6dda..075a00ca 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -118,7 +118,8 @@ def __str__(self) -> str: formatted = [] for v in self.model.values(): - stringified = stringify(v.full_name, v.value) + # TODO: ideally we would avoid wrapping with con() here + stringified = stringify(v.full_name, con(v.value)) formatted.append(f"\n {v.full_name} = {stringified}") return "".join(sorted(formatted)) if formatted else "∅" @@ -239,7 +240,7 @@ def add_model(self, model: PotentialModel | None, result: CheckSatResult): return if model.is_valid: - print(red(f"Counterexample: {model}")) + # we don't print the model here because this may be called from multiple threads self.valid_counterexamples.append(model) # we have a valid counterexample, so we are eligible for early exit @@ -764,6 +765,12 @@ def run_test(ctx: FunctionContext) -> TestResult: # print test result # + for model in ctx.valid_counterexamples: + print(red(f"Counterexample: {model}")) + + if args.early_exit: + break + counter = Counter(str(m.result) for m in ctx.solver_outputs) if counter["sat"] > 0: passfail = red("[FAIL]") @@ -977,7 +984,10 @@ def dump( def solver_callback(future: PopenFuture): - """Invoked by a PopenFuture when it is finished.""" + """Invoked by a PopenFuture when it is finished. + + Note that this may be invoked by different threads + (in particular, not the thread that scheduled the future)""" path_ctx: PathContext = future.metadata["path_ctx"] path_id = path_ctx.path_id @@ -1027,10 +1037,9 @@ def solve(ctx: PathContext): # timeout_seconds = timeout_millis / 1000 cmd = args.solver_command.split() + [smt2_filename] - thread_pool: PopenExecutor = ctx.fun_ctx.thread_pool future = PopenFuture(cmd, metadata={"path_ctx": ctx}) future.add_done_callback(solver_callback) - thread_pool.submit(future) + ctx.fun_ctx.thread_pool.submit(future) def check_unsat_cores(query: SMTQuery, unsat_cores: list[list]) -> bool: From bfe126dd412bfcd5f7e8b6d09204025f1868101b Mon Sep 17 00:00:00 2001 From: karmacoma Date: Tue, 7 Jan 2025 17:10:55 -0800 Subject: [PATCH 11/46] make z3 the default --solver-command (so we always dump queries) --- src/halmos/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/halmos/config.py b/src/halmos/config.py index 2d28e70f..dcf567ca 100644 --- a/src/halmos/config.py +++ b/src/halmos/config.py @@ -441,7 +441,7 @@ class Config: solver_command: str = arg( help="use the given command when invoking the solver", - global_default=None, + global_default="z3", metavar="COMMAND", group=solver, ) From cdf4e16f8190ceb3d9e7badbae0b5f27bc7e97ef Mon Sep 17 00:00:00 2001 From: karmacoma Date: Thu, 16 Jan 2025 16:02:59 -0800 Subject: [PATCH 12/46] bring back refinement --- src/halmos/__main__.py | 310 +++++++++++++++++++++++------------------ 1 file changed, 177 insertions(+), 133 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 075a00ca..a45f285c 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -12,6 +12,7 @@ import traceback import uuid from collections import Counter +from concurrent.futures import Future, ThreadPoolExecutor from copy import deepcopy from dataclasses import asdict, dataclass, field from datetime import timedelta @@ -110,6 +111,7 @@ @dataclass class PotentialModel: model: ModelVariables | str | None + is_valid: bool def __str__(self) -> str: # expected to be a filename @@ -123,14 +125,6 @@ def __str__(self) -> str: formatted.append(f"\n {v.full_name} = {stringified}") return "".join(sorted(formatted)) if formatted else "∅" - @property - def is_valid(self) -> bool: - if self.model is None: - return False - - needs_refinement = any(name.startswith("f_evm_") for name in self.model) - return not needs_refinement - @dataclass(frozen=True) class ContractContext: @@ -175,8 +169,11 @@ class FunctionContext: # dump directory for this function (generated in __post_init__) dump_dirname: str = field(init=False) + # function-level thread pool that drives assertion solving + thread_pool: ThreadPoolExecutor = field(init=False) + # path-specific queries are submitted to this function-specific executor - thread_pool: PopenExecutor = field(default_factory=PopenExecutor) + solver_executor: PopenExecutor = field(default_factory=PopenExecutor) # list of solver outputs for this function _solver_outputs: list["SolverOutput"] = field(default_factory=list) @@ -197,59 +194,14 @@ class FunctionContext: exec_cache: dict[int, Exec] = field(default_factory=dict) def __post_init__(self): - object.__setattr__( - self, "dump_dirname", f"/tmp/{self.info.name}-{uuid.uuid4().hex}" - ) - - def add_solver_output(self, solver_output: "SolverOutput"): - self._solver_outputs.append(solver_output) - args = self.args - path_id = solver_output.path_id - model, result = solver_output.model, solver_output.result - - # retrieve cached exec and clear the cache entry - exec = self.exec_cache.pop(path_id, None) - - if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE: - id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else "" - print(f"Trace{id_str}:") - print(self.traces[path_id], end="") - - if args.print_failed_states: - print(f"# {path_id}") - print(exec) - - if self.thread_pool.is_shutdown(): - # if the thread pool is in the process of shutting down, - # we want to stop processing remaining models/timeouts/errors, etc. - return - - if result == unsat: - if solver_output.unsat_core: - self.unsat_cores.append(solver_output.unsat_core) - return - - self.add_model(model, result) + dirname = f"/tmp/{self.info.name}-{uuid.uuid4().hex}" + object.__setattr__(self, "dump_dirname", dirname) - def add_model(self, model: PotentialModel | None, result: CheckSatResult): - """Triage a new model, potentially adding it to the counterexamples list""" - - # model could be an empty dict here, so compare to None explicitly - if model is None: - warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}") - return - - if model.is_valid: - # we don't print the model here because this may be called from multiple threads - self.valid_counterexamples.append(model) - - # we have a valid counterexample, so we are eligible for early exit - if self.args.early_exit: - self.thread_pool.shutdown(wait=False) - else: - warn_str = f"Counterexample (potentially invalid): {model}" - warn_code(COUNTEREXAMPLE_INVALID, warn_str) - self.invalid_counterexamples.append(model) + thread_pool = ThreadPoolExecutor( + max_workers=self.args.solver_threads, + thread_name_prefix=f"{self.info.name}-", + ) + object.__setattr__(self, "thread_pool", thread_pool) @property def solver_outputs(self): @@ -273,9 +225,23 @@ class PathContext: # filename for this path (generated in __post_init__) dump_filename: str = field(init=False) + is_refined: bool = False + def __post_init__(self): - object.__setattr__( - self, "dump_filename", f"{self.fun_ctx.dump_dirname}/{self.path_id}.smt2" + refined_str = ".refined" if self.is_refined else "" + dirname = self.fun_ctx.dump_dirname + filename = os.path.join(dirname, f"{self.path_id}{refined_str}.smt2") + + # use __setattr__ because this is a frozen dataclass + object.__setattr__(self, "dump_filename", filename) + + def refine(self) -> "PathContext": + return PathContext( + path_id=self.path_id, + ex=self.ex, + query=refine(self.query), + fun_ctx=self.fun_ctx, + is_refined=True, ) @@ -294,6 +260,30 @@ class SolverOutput: # optional unsat core unsat_core: list[str] | None = None + @staticmethod + def from_result( + stdout: str, stderr: str, returncode: int, path_ctx: PathContext + ) -> "SolverOutput": + # extract the first line (we expect sat/unsat/unknown) + newline_idx = stdout.find("\n") + first_line = stdout[:newline_idx] if newline_idx != -1 else stdout + + args = path_ctx.fun_ctx.args + path_id = path_ctx.path_id + if args.verbose >= 1: + debug(f" {first_line}") + + match first_line: + case "unsat": + unsat_core = parse_unsat_core(stdout) if args.cache_solver else None + return SolverOutput(unsat, path_id, unsat_core=unsat_core) + case "sat": + is_valid = is_model_valid(stdout) + model = PotentialModel(model=parse_string(stdout), is_valid=is_valid) + return SolverOutput(sat, path_id, model=model) + case _: + return SolverOutput(unknown, path_id) + @dataclass(frozen=True) class TestResult: @@ -526,9 +516,8 @@ def setup(ctx: FunctionContext) -> Exec: setup_exs.append(setup_exs_no_error[0][0]) case _: for setup_ex, query in setup_exs_no_error: - # XXX fix with new interface - res, _, _ = solve(query, args) - if res != unsat: + solver_output = solve_low_level(query, args) + if solver_output.result != unsat: setup_exs.append(setup_ex) if len(setup_exs) > 1: break @@ -645,6 +634,7 @@ def run_test(ctx: FunctionContext) -> TestResult: # path_id = 0 # default value in case we don't enter the loop body + submitted_futures = [] for path_id, ex in enumerate(exs): # cache exec in case we need to print it later if args.print_failed_states: @@ -682,21 +672,13 @@ def run_test(ctx: FunctionContext) -> TestResult: fun_ctx=ctx, ) - dump(path_ctx) + def log_future_result(future: Future): + if e := future.exception(): + error(f"encountered exception during assertion solving: {e}") - # if the query contains an unsat-core, it is unsat; no need to run the solver - if check_unsat_cores(query, ctx.unsat_cores): - if args.verbose >= 1: - print(" Already proven unsat") - continue - - try: - solve(path_ctx) - except RuntimeError: - # XXX use more specific exception - # XXX notify sevm that we're done - # if the thread pool is shut down, here - break + solve_future = ctx.thread_pool.submit(solve_end_to_end, path_ctx) + solve_future.add_done_callback(log_future_result) + submitted_futures.append(solve_future) # XXX handle refinement # XXX handle timeout @@ -742,13 +724,9 @@ def run_test(ctx: FunctionContext) -> TestResult: # display assertion solving progress # - # XXX clashes with the exploration progress status - if not args.no_status or args.early_exit: while True: - if args.early_exit and ctx.valid_counterexamples: - break - done = sum(fm.done() for fm in ctx.thread_pool.futures) + done = sum(fm.done() for fm in submitted_futures) total = potential if done == total: break @@ -919,15 +897,6 @@ def run_contract(ctx: ContractContext) -> list[TestResult]: return test_results -@dataclass(frozen=True) -class GenModelArgs: - args: HalmosConfig - path_id: int - sexpr: SMTQuery - known_unsat_cores: list[list] - dump_dirname: str | None = None - - def parse_unsat_core(output) -> list[str] | None: # parsing example: # unsat @@ -983,50 +952,27 @@ def dump( f.write("(get-model)\n") -def solver_callback(future: PopenFuture): - """Invoked by a PopenFuture when it is finished. +def is_model_valid(solver_stdout: str) -> bool: + # TODO: evaluate the path condition against the given model after excluding f_evm_* symbols, + # since the f_evm_* symbols may still appear in valid models. - Note that this may be invoked by different threads - (in particular, not the thread that scheduled the future)""" + return "f_evm_" not in solver_stdout - path_ctx: PathContext = future.metadata["path_ctx"] - path_id = path_ctx.path_id - args = path_ctx.fun_ctx.args - smt2_filename = path_ctx.dump_filename - stdout, stderr, returncode = future.result() - - # save solver output to file - with open(f"{smt2_filename}.out", "w") as f: - f.write(stdout) - # extract the first line (we expect sat/unsat/unknown) - newline_idx = stdout.find("\n") - first_line = stdout[:newline_idx] if newline_idx != -1 else stdout - if args.verbose >= 1: - debug(f" {first_line}") - - solver_output = None - match first_line: - case "unsat": - unsat_core = parse_unsat_core(stdout) if args.cache_solver else None - solver_output = SolverOutput(unsat, path_id, unsat_core=unsat_core) - case "sat": - model = PotentialModel(parse_string(stdout)) - solver_output = SolverOutput(sat, path_id, model=model) - case _: - solver_output = SolverOutput(unknown, path_id) +def solve_low_level(path_ctx: PathContext) -> SolverOutput: + """Invokes an external solver process to solve the given query. - fun_ctx = path_ctx.fun_ctx - fun_ctx.add_solver_output(solver_output) + Can raise TimeoutError or some Exception raised during execution""" + fun_ctx, smt2_filename = path_ctx.fun_ctx, path_ctx.dump_filename + args = fun_ctx.args -def solve(ctx: PathContext): - args = ctx.fun_ctx.args - smt2_filename = ctx.dump_filename + # make sure the smt2 file has been written + dump(path_ctx) if args.verbose >= 1: - debug(" Checking with external solver process") - debug(f" {args.solver_command} {smt2_filename} > {smt2_filename}.out") + print(" Checking with external solver process") + print(f" {args.solver_command} {smt2_filename} > {smt2_filename}.out") # XXX fix timeout @@ -1037,9 +983,107 @@ def solve(ctx: PathContext): # timeout_seconds = timeout_millis / 1000 cmd = args.solver_command.split() + [smt2_filename] - future = PopenFuture(cmd, metadata={"path_ctx": ctx}) - future.add_done_callback(solver_callback) - ctx.fun_ctx.thread_pool.submit(future) + future = PopenFuture(cmd, metadata={"path_ctx": path_ctx}) + + # XXX avoiding callbacks now + # future.add_done_callback(solver_callback) + + fun_ctx.solver_executor.submit(future) + + # block until the external solver returns, times out, is interrupted, fails, etc. + stdout, stderr, returncode = future.result() + + # save solver stdout to file + with open(f"{smt2_filename}.out", "w") as f: + f.write(stdout) + + # save solver stderr to file (only if there is an error) + if stderr: + with open(f"{smt2_filename}.err", "w") as f: + f.write(stderr) + + return SolverOutput.from_result(stdout, stderr, returncode, path_ctx) + + +# XXX used to be gen_model_from_sexpr +def solve_end_to_end(ctx: PathContext) -> None: + """Synchronously resolves a query in a given context, which may result in 0, 1 or multiple solver invocations. + + - may result in 0 invocations if the query contains a known unsat core (hence the need for the context) + - may result in exactly 1 invocation if the query is unsat, or sat with a valid model + - may result in multiple invocations if the query is sat and the model is invalid (needs refinement) + + If this produces a model, it _should_ be valid. + """ + fun_ctx, path_id, query = ctx.fun_ctx, ctx.path_id, ctx.query + args, unsat_cores = fun_ctx.args, fun_ctx.unsat_cores + + verbose = print if args.verbose >= 1 else lambda *args, **kwargs: None + verbose(f"Checking path condition {path_id=}") + + # if the query contains an unsat-core, it is unsat; no need to run the solver + if check_unsat_cores(query, unsat_cores): + verbose(" Already proven unsat") + return SolverOutput(unsat, path_id) + + solver_output = solve_low_level(ctx) + result, model = solver_output.result, solver_output.model + + # if the ctx is already refined, we don't need to solve again + if result == sat and not model.is_valid and not ctx.is_refined: + verbose(" Checking again with refinement") + + refined_ctx = ctx.refine() + + if refined_ctx.query.smtlib != query.smtlib: + # recurse with the refined query + return solve_end_to_end(refined_ctx) + else: + verbose(" Refinement did not change the query, no need to solve again") + + # + # we are done solving, process and triage the result + # + + # retrieve cached exec and clear the cache entry + exec = fun_ctx.exec_cache.pop(path_id, None) + + if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE: + id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else "" + print(f"Trace{id_str}:") + print(fun_ctx.traces[path_id], end="") + + if args.print_failed_states: + print(f"# {path_id}") + print(exec) + + if fun_ctx.solver_executor.is_shutdown(): + # if the thread pool is in the process of shutting down, + # we want to stop processing remaining models/timeouts/errors, etc. + return + + if result == unsat: + if solver_output.unsat_core: + fun_ctx.unsat_cores.append(solver_output.unsat_core) + return + + # model could be an empty dict here, so compare to None explicitly + if model is None: + warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}") + return + + if model.is_valid: + # we don't print the model here because this may be called from multiple threads + fun_ctx.valid_counterexamples.append(model) + + # we have a valid counterexample, so we are eligible for early exit + if args.early_exit: + fun_ctx.solver_executor.shutdown(wait=False) + else: + # XXX avoid printing here + warn_str = f"Counterexample (potentially invalid): {model}" + warn_code(COUNTEREXAMPLE_INVALID, warn_str) + fun_ctx.invalid_counterexamples.append(model) def check_unsat_cores(query: SMTQuery, unsat_cores: list[list]) -> bool: From 175ede373537a5c758933036c56cf08b5ceb227f Mon Sep 17 00:00:00 2001 From: karmacoma Date: Wed, 8 Jan 2025 16:22:28 -0800 Subject: [PATCH 13/46] fix solve invocation in setup --- src/halmos/__main__.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index a45f285c..f46d8abb 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -484,17 +484,14 @@ def setup(ctx: FunctionContext) -> Exec: ) setup_exs_all = sevm.run(setup_ex) - setup_exs_no_error = [] + setup_exs_no_error: list[PathContext] = [] - for idx, setup_ex in enumerate(setup_exs_all): + for path_id, setup_ex in enumerate(setup_exs_all): if args.verbose >= VERBOSITY_TRACE_SETUP: - print(f"{setup_sig} trace #{idx+1}:") + print(f"{setup_sig} trace #{path_id}:") render_trace(setup_ex.context) - if not (err := setup_ex.context.output.error): - setup_exs_no_error.append((setup_ex, setup_ex.path.to_smt2(args))) - - else: + if err := setup_ex.context.output.error: opcode = setup_ex.current_opcode() if opcode not in [EVM.REVERT, EVM.INVALID]: warn_code( @@ -507,7 +504,16 @@ def setup(ctx: FunctionContext) -> Exec: print(f"{setup_sig} trace:") render_trace(setup_ex.context) - setup_exs = [] + else: + path_ctx = PathContext( + path_id=path_id, + ex=setup_ex, + query=setup_ex.path.to_smt2(args), + fun_ctx=ctx, + ) + setup_exs_no_error.append(path_ctx) + + setup_exs: list[Exec] = [] match len(setup_exs_no_error): case 0: @@ -515,10 +521,10 @@ def setup(ctx: FunctionContext) -> Exec: case 1: setup_exs.append(setup_exs_no_error[0][0]) case _: - for setup_ex, query in setup_exs_no_error: - solver_output = solve_low_level(query, args) + for path_ctx in setup_exs_no_error: + solver_output = solve_low_level(path_ctx) if solver_output.result != unsat: - setup_exs.append(setup_ex) + setup_exs.append(path_ctx.ex) if len(setup_exs) > 1: break From cf85376d7f7f28c62451d5e4119f5206a956f4b1 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Thu, 16 Jan 2025 16:03:39 -0800 Subject: [PATCH 14/46] bring back timeouts, clean children after timeout --- src/halmos/__main__.py | 43 +++++++++++--------------------- src/halmos/processes.py | 55 +++++++++++++++++++++++------------------ 2 files changed, 46 insertions(+), 52 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index f46d8abb..0ad73710 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -49,7 +49,7 @@ warn_code, ) from .mapper import BuildOut, DeployAddressMapper, Mapper -from .processes import PopenExecutor, PopenFuture +from .processes import PopenExecutor, PopenFuture, TimeoutExpired from .sevm import ( EMPTY_BALANCE, EVM, @@ -176,7 +176,7 @@ class FunctionContext: solver_executor: PopenExecutor = field(default_factory=PopenExecutor) # list of solver outputs for this function - _solver_outputs: list["SolverOutput"] = field(default_factory=list) + solver_outputs: list["SolverOutput"] = field(default_factory=list) # list of valid counterexamples for this function valid_counterexamples: list[PotentialModel] = field(default_factory=list) @@ -203,10 +203,6 @@ def __post_init__(self): ) object.__setattr__(self, "thread_pool", thread_pool) - @property - def solver_outputs(self): - return self._solver_outputs - @dataclass(frozen=True) class PathContext: @@ -680,15 +676,12 @@ def run_test(ctx: FunctionContext) -> TestResult: def log_future_result(future: Future): if e := future.exception(): - error(f"encountered exception during assertion solving: {e}") + error(f"encountered exception during assertion solving: {e!r}") solve_future = ctx.thread_pool.submit(solve_end_to_end, path_ctx) solve_future.add_done_callback(log_future_result) submitted_futures.append(solve_future) - # XXX handle refinement - # XXX handle timeout - elif ex.context.is_stuck(): debug(f"Potential error path (id: {idx+1})") res, _, _ = solve(ex.path.to_smt2(args), args) @@ -749,12 +742,6 @@ def log_future_result(future: Future): # print test result # - for model in ctx.valid_counterexamples: - print(red(f"Counterexample: {model}")) - - if args.early_exit: - break - counter = Counter(str(m.result) for m in ctx.solver_outputs) if counter["sat"] > 0: passfail = red("[FAIL]") @@ -980,24 +967,21 @@ def solve_low_level(path_ctx: PathContext) -> SolverOutput: print(" Checking with external solver process") print(f" {args.solver_command} {smt2_filename} > {smt2_filename}.out") - # XXX fix timeout - # solver_timeout_assertion == 0 means no timeout, # which translates to timeout_seconds=None for subprocess.run - # timeout_seconds = None - # if timeout_millis := args.solver_timeout_assertion: - # timeout_seconds = timeout_millis / 1000 + timeout_seconds = t / 1000 if (t := args.solver_timeout_assertion) else None cmd = args.solver_command.split() + [smt2_filename] - future = PopenFuture(cmd, metadata={"path_ctx": path_ctx}) - - # XXX avoiding callbacks now - # future.add_done_callback(solver_callback) + future = PopenFuture(cmd, timeout=timeout_seconds) + # starts the subprocess asynchronously fun_ctx.solver_executor.submit(future) # block until the external solver returns, times out, is interrupted, fails, etc. - stdout, stderr, returncode = future.result() + try: + stdout, stderr, returncode = future.result() + except TimeoutExpired: + return SolverOutput(result=unknown, path_id=path_ctx.path_id) # save solver stdout to file with open(f"{smt2_filename}.out", "w") as f: @@ -1068,6 +1052,9 @@ def solve_end_to_end(ctx: PathContext) -> None: # we want to stop processing remaining models/timeouts/errors, etc. return + # keep track of the solver outputs, so that we can display PASS/FAIL/TIMEOUT/ERROR later + fun_ctx.solver_outputs.append(solver_output) + if result == unsat: if solver_output.unsat_core: fun_ctx.unsat_cores.append(solver_output.unsat_core) @@ -1079,14 +1066,14 @@ def solve_end_to_end(ctx: PathContext) -> None: return if model.is_valid: - # we don't print the model here because this may be called from multiple threads + print(red(f"Counterexample: {model}")) + fun_ctx.valid_counterexamples.append(model) # we have a valid counterexample, so we are eligible for early exit if args.early_exit: fun_ctx.solver_executor.shutdown(wait=False) else: - # XXX avoid printing here warn_str = f"Counterexample (potentially invalid): {model}" warn_code(COUNTEREXAMPLE_INVALID, warn_str) fun_ctx.invalid_counterexamples.append(model) diff --git a/src/halmos/processes.py b/src/halmos/processes.py index 04c95c47..1eff7389 100644 --- a/src/halmos/processes.py +++ b/src/halmos/processes.py @@ -3,32 +3,32 @@ import subprocess import threading import time -from subprocess import PIPE, Popen +from subprocess import PIPE, Popen, TimeoutExpired import psutil class PopenFuture(concurrent.futures.Future): cmd: list[str] + timeout: float | None # in seconds, None means no timeout process: subprocess.Popen | None stdout: str | None stderr: str | None returncode: int | None start_time: float | None end_time: float | None - metadata: dict | None _exception: Exception | None - def __init__(self, cmd: list[str], metadata: dict | None = None): + def __init__(self, cmd: list[str], timeout: float | None = None): super().__init__() self.cmd = cmd + self.timeout = timeout self.process = None self.stdout = None self.stderr = None self.returncode = None self.start_time = None self.end_time = None - self.metadata = metadata self._exception = None def start(self): @@ -40,9 +40,14 @@ def run(): self.process = Popen(self.cmd, stdout=PIPE, stderr=PIPE, text=True) # blocks until the process terminates - self.stdout, self.stderr = self.process.communicate() + self.stdout, self.stderr = self.process.communicate( + timeout=self.timeout + ) self.end_time = time.time() self.returncode = self.process.returncode + except TimeoutExpired as e: + self._exception = e + self.cancel() except Exception as e: self._exception = e finally: @@ -58,30 +63,32 @@ def run(): def cancel(self): """Attempts to terminate and then kill the process and its children.""" - if self.is_running(): - self.process.terminate() + if not self.is_running(): + return - # use psutil to kill the entire process tree (including children) - try: - parent_process = psutil.Process(self.process.pid) - processes = parent_process.children(recursive=True) - processes.append(parent_process) + self.process.terminate() + + # use psutil to kill the entire process tree (including children) + try: + parent_process = psutil.Process(self.process.pid) + processes = parent_process.children(recursive=True) + processes.append(parent_process) - # ask politely to terminate first - for process in processes: - process.terminate() + # ask politely to terminate first + for process in processes: + process.terminate() - # give them some time to terminate - time.sleep(0.1) + # give them some time to terminate + time.sleep(0.5) - # after grace period, force kill - for process in processes: - if process.is_running(): - process.kill() + # after grace period, force kill + for process in processes: + if process.is_running(): + process.kill() - except psutil.NoSuchProcess: - # process already terminated, nothing to do - pass + except psutil.NoSuchProcess: + # process already terminated, nothing to do + pass def exception(self) -> Exception | None: """Returns any exception raised during the process.""" From ea3d493e847e4b5809e0135eedbc260618905bcc Mon Sep 17 00:00:00 2001 From: karmacoma Date: Wed, 8 Jan 2025 17:21:47 -0800 Subject: [PATCH 15/46] migrate build output parsing function in build.py --- src/halmos/__main__.py | 176 ++--------------------------------------- src/halmos/build.py | 174 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+), 168 deletions(-) create mode 100644 src/halmos/build.py diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 0ad73710..f5a7f309 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -29,6 +29,13 @@ unsat, ) +from .build import ( + build_output_iterator, + import_libs, + parse_build_out, + parse_devdoc, + parse_natspec, +) from .bytevec import ByteVec from .calldata import FunctionInfo, get_abi, mk_calldata from .config import Config as HalmosConfig @@ -39,7 +46,6 @@ COUNTEREXAMPLE_UNKNOWN, INTERNAL_ERROR, LOOP_BOUND, - PARSING_ERROR, REVERT_ALL, debug, error, @@ -48,7 +54,7 @@ warn, warn_code, ) -from .mapper import BuildOut, DeployAddressMapper, Mapper +from .mapper import BuildOut, DeployAddressMapper from .processes import PopenExecutor, PopenFuture, TimeoutExpired from .sevm import ( EMPTY_BALANCE, @@ -1109,172 +1115,6 @@ def refine(query: SMTQuery) -> SMTQuery: return SMTQuery(smtlib, query.assertions) -def get_contract_type( - ast_nodes: list, contract_name: str -) -> tuple[str | None, str | None]: - for node in ast_nodes: - if node["nodeType"] == "ContractDefinition" and node["name"] == contract_name: - abstract = "abstract " if node.get("abstract") else "" - contract_type = abstract + node["contractKind"] - natspec = node.get("documentation") - return contract_type, natspec - - return None, None - - -def parse_build_out(args: HalmosConfig) -> dict: - result = {} # compiler version -> source filename -> contract name -> (json, type) - - out_path = os.path.join(args.root, args.forge_build_out) - if not os.path.exists(out_path): - raise FileNotFoundError( - f"The build output directory `{out_path}` does not exist" - ) - - for sol_dirname in os.listdir(out_path): # for each source filename - if not sol_dirname.endswith(".sol"): - continue - - sol_path = os.path.join(out_path, sol_dirname) - if not os.path.isdir(sol_path): - continue - - for json_filename in os.listdir(sol_path): # for each contract name - try: - if not json_filename.endswith(".json"): - continue - if json_filename.startswith("."): - continue - - json_path = os.path.join(sol_path, json_filename) - with open(json_path, encoding="utf8") as f: - json_out = json.load(f) - - # cut off compiler version number as well - contract_name = json_filename.split(".")[0] - ast_nodes = json_out["ast"]["nodes"] - contract_type, natspec = get_contract_type(ast_nodes, contract_name) - - # can happen to solidity files for multiple reasons: - # - import only (like console2.log) - # - defines only structs or enums - # - defines only free functions - # - ... - if contract_type is None: - debug(f"Skipped {json_filename}, no contract definition found") - continue - - compiler_version = json_out["metadata"]["compiler"]["version"] - result.setdefault(compiler_version, {}) - result[compiler_version].setdefault(sol_dirname, {}) - contract_map = result[compiler_version][sol_dirname] - - if contract_name in contract_map: - raise ValueError( - "duplicate contract names in the same file", - contract_name, - sol_dirname, - ) - - contract_map[contract_name] = (json_out, contract_type, natspec) - parse_symbols(args, contract_map, contract_name) - - except Exception as err: - warn_code( - PARSING_ERROR, - f"Skipped {json_filename} due to parsing failure: {type(err).__name__}: {err}", - ) - if args.debug: - traceback.print_exc() - continue - - return result - - -def parse_symbols(args: HalmosConfig, contract_map: dict, contract_name: str) -> None: - try: - json_out = contract_map[contract_name][0] - bytecode = json_out["bytecode"]["object"] - contract_mapping_info = Mapper().get_or_create(contract_name) - contract_mapping_info.bytecode = bytecode - - Mapper().parse_ast(json_out["ast"]) - - except Exception: - debug(f"error parsing symbols for contract {contract_name}") - debug(traceback.format_exc()) - - # we parse symbols as best effort, don't propagate exceptions - pass - - -def parse_devdoc(funsig: str, contract_json: dict) -> str | None: - try: - return contract_json["metadata"]["output"]["devdoc"]["methods"][funsig][ - "custom:halmos" - ] - except KeyError: - return None - - -def parse_natspec(natspec: dict) -> str: - # This parsing scheme is designed to handle: - # - # - multiline tags: - # /// @custom:halmos --x - # /// --y - # - # - multiple tags: - # /// @custom:halmos --x - # /// @custom:halmos --y - # - # - tags that start in the middle of line: - # /// blah blah @custom:halmos --x - # /// --y - # - # In all the above examples, this scheme returns "--x (whitespaces) --y" - isHalmosTag = False - result = "" - for item in re.split(r"(@\S+)", natspec.get("text", "")): - if item == "@custom:halmos": - isHalmosTag = True - elif re.match(r"^@\S", item): - isHalmosTag = False - elif isHalmosTag: - result += item - return result.strip() - - -def import_libs(build_out_map: dict, hexcode: str, linkReferences: dict) -> dict: - libs = {} - - for filepath in linkReferences: - file_name = filepath.split("/")[-1] - - for lib_name in linkReferences[filepath]: - (lib_json, _, _) = build_out_map[file_name][lib_name] - lib_hexcode = lib_json["deployedBytecode"]["object"] - - # in bytes, multiply indices by 2 and offset 0x - placeholder_index = linkReferences[filepath][lib_name][0]["start"] * 2 + 2 - placeholder = hexcode[placeholder_index : placeholder_index + 40] - - libs[f"{filepath}:{lib_name}"] = { - "placeholder": placeholder, - "hexcode": lib_hexcode, - } - - return libs - - -def build_output_iterator(build_out: dict): - for compiler_version in sorted(build_out): - build_out_map = build_out[compiler_version] - for filename in sorted(build_out_map): - for contract_name in sorted(build_out_map[filename]): - yield (build_out_map, filename, contract_name) - - def contract_regex(args): if args.contract: return f"^{args.contract}$" diff --git a/src/halmos/build.py b/src/halmos/build.py new file mode 100644 index 00000000..b432826d --- /dev/null +++ b/src/halmos/build.py @@ -0,0 +1,174 @@ +import json +import os +import re +import traceback + +from halmos.config import Config as HalmosConfig +from halmos.logs import PARSING_ERROR, debug, warn_code +from halmos.mapper import Mapper + + +def get_contract_type( + ast_nodes: list, contract_name: str +) -> tuple[str | None, str | None]: + for node in ast_nodes: + if node["nodeType"] == "ContractDefinition" and node["name"] == contract_name: + abstract = "abstract " if node.get("abstract") else "" + contract_type = abstract + node["contractKind"] + natspec = node.get("documentation") + return contract_type, natspec + + return None, None + + +def parse_build_out(args: HalmosConfig) -> dict: + result = {} # compiler version -> source filename -> contract name -> (json, type) + + out_path = os.path.join(args.root, args.forge_build_out) + if not os.path.exists(out_path): + raise FileNotFoundError( + f"The build output directory `{out_path}` does not exist" + ) + + for sol_dirname in os.listdir(out_path): # for each source filename + if not sol_dirname.endswith(".sol"): + continue + + sol_path = os.path.join(out_path, sol_dirname) + if not os.path.isdir(sol_path): + continue + + for json_filename in os.listdir(sol_path): # for each contract name + try: + if not json_filename.endswith(".json"): + continue + if json_filename.startswith("."): + continue + + json_path = os.path.join(sol_path, json_filename) + with open(json_path, encoding="utf8") as f: + json_out = json.load(f) + + # cut off compiler version number as well + contract_name = json_filename.split(".")[0] + ast_nodes = json_out["ast"]["nodes"] + contract_type, natspec = get_contract_type(ast_nodes, contract_name) + + # can happen to solidity files for multiple reasons: + # - import only (like console2.log) + # - defines only structs or enums + # - defines only free functions + # - ... + if contract_type is None: + debug(f"Skipped {json_filename}, no contract definition found") + continue + + compiler_version = json_out["metadata"]["compiler"]["version"] + result.setdefault(compiler_version, {}) + result[compiler_version].setdefault(sol_dirname, {}) + contract_map = result[compiler_version][sol_dirname] + + if contract_name in contract_map: + raise ValueError( + "duplicate contract names in the same file", + contract_name, + sol_dirname, + ) + + contract_map[contract_name] = (json_out, contract_type, natspec) + parse_symbols(args, contract_map, contract_name) + + except Exception as err: + warn_code( + PARSING_ERROR, + f"Skipped {json_filename} due to parsing failure: {type(err).__name__}: {err}", + ) + if args.debug: + traceback.print_exc() + continue + + return result + + +def parse_symbols(args: HalmosConfig, contract_map: dict, contract_name: str) -> None: + try: + json_out = contract_map[contract_name][0] + bytecode = json_out["bytecode"]["object"] + contract_mapping_info = Mapper().get_or_create(contract_name) + contract_mapping_info.bytecode = bytecode + + Mapper().parse_ast(json_out["ast"]) + + except Exception: + debug(f"error parsing symbols for contract {contract_name}") + debug(traceback.format_exc()) + + # we parse symbols as best effort, don't propagate exceptions + pass + + +def parse_devdoc(funsig: str, contract_json: dict) -> str | None: + try: + return contract_json["metadata"]["output"]["devdoc"]["methods"][funsig][ + "custom:halmos" + ] + except KeyError: + return None + + +def parse_natspec(natspec: dict) -> str: + # This parsing scheme is designed to handle: + # + # - multiline tags: + # /// @custom:halmos --x + # /// --y + # + # - multiple tags: + # /// @custom:halmos --x + # /// @custom:halmos --y + # + # - tags that start in the middle of line: + # /// blah blah @custom:halmos --x + # /// --y + # + # In all the above examples, this scheme returns "--x (whitespaces) --y" + isHalmosTag = False + result = "" + for item in re.split(r"(@\S+)", natspec.get("text", "")): + if item == "@custom:halmos": + isHalmosTag = True + elif re.match(r"^@\S", item): + isHalmosTag = False + elif isHalmosTag: + result += item + return result.strip() + + +def import_libs(build_out_map: dict, hexcode: str, linkReferences: dict) -> dict: + libs = {} + + for filepath in linkReferences: + file_name = filepath.split("/")[-1] + + for lib_name in linkReferences[filepath]: + (lib_json, _, _) = build_out_map[file_name][lib_name] + lib_hexcode = lib_json["deployedBytecode"]["object"] + + # in bytes, multiply indices by 2 and offset 0x + placeholder_index = linkReferences[filepath][lib_name][0]["start"] * 2 + 2 + placeholder = hexcode[placeholder_index : placeholder_index + 40] + + libs[f"{filepath}:{lib_name}"] = { + "placeholder": placeholder, + "hexcode": lib_hexcode, + } + + return libs + + +def build_output_iterator(build_out: dict): + for compiler_version in sorted(build_out): + build_out_map = build_out[compiler_version] + for filename in sorted(build_out_map): + for contract_name in sorted(build_out_map[filename]): + yield (build_out_map, filename, contract_name) From 223923153713d2b2549d3f2fed94d70cf5db6819 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Wed, 8 Jan 2025 17:39:11 -0800 Subject: [PATCH 16/46] move solving-related functions to solve.py --- src/halmos/__main__.py | 426 ++-------------------------------------- src/halmos/constants.py | 4 + src/halmos/solve.py | 413 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 433 insertions(+), 410 deletions(-) create mode 100644 src/halmos/constants.py create mode 100644 src/halmos/solve.py diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index f5a7f309..5625fe93 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -10,22 +10,17 @@ import sys import time import traceback -import uuid from collections import Counter -from concurrent.futures import Future, ThreadPoolExecutor +from concurrent.futures import Future from copy import deepcopy -from dataclasses import asdict, dataclass, field +from dataclasses import asdict, dataclass from datetime import timedelta from enum import Enum from importlib import metadata from z3 import ( BitVec, - CheckSatResult, - Solver, - sat, set_option, - unknown, unsat, ) @@ -40,10 +35,14 @@ from .calldata import FunctionInfo, get_abi, mk_calldata from .config import Config as HalmosConfig from .config import arg_parser, default_config, resolve_config_files, toml_parser +from .constants import ( + VERBOSITY_TRACE_CONSTRUCTOR, + VERBOSITY_TRACE_COUNTEREXAMPLE, + VERBOSITY_TRACE_PATHS, + VERBOSITY_TRACE_SETUP, +) from .exceptions import HalmosException from .logs import ( - COUNTEREXAMPLE_INVALID, - COUNTEREXAMPLE_UNKNOWN, INTERNAL_ERROR, LOOP_BOUND, REVERT_ALL, @@ -51,11 +50,9 @@ error, logger, logger_unique, - warn, warn_code, ) from .mapper import BuildOut, DeployAddressMapper -from .processes import PopenExecutor, PopenFuture, TimeoutExpired from .sevm import ( EMPTY_BALANCE, EVM, @@ -80,7 +77,14 @@ jumpid_str, mnemonic, ) -from .smtlib import ModelVariables, parse_string +from .solve import ( + ContractContext, + FunctionContext, + PathContext, + SolverOutput, + solve_end_to_end, + solve_low_level, +) from .traces import render_trace, rendered_trace from .utils import ( NamedTimer, @@ -92,7 +96,6 @@ hexify, indent_text, red, - stringify, unbox_int, yellow, ) @@ -108,184 +111,6 @@ if sys.stdout.encoding != "utf-8": sys.stdout.reconfigure(encoding="utf-8") -VERBOSITY_TRACE_COUNTEREXAMPLE = 2 -VERBOSITY_TRACE_SETUP = 3 -VERBOSITY_TRACE_PATHS = 4 -VERBOSITY_TRACE_CONSTRUCTOR = 5 - - -@dataclass -class PotentialModel: - model: ModelVariables | str | None - is_valid: bool - - def __str__(self) -> str: - # expected to be a filename - if isinstance(self.model, str): - return f"see {self.model}" - - formatted = [] - for v in self.model.values(): - # TODO: ideally we would avoid wrapping with con() here - stringified = stringify(v.full_name, con(v.value)) - formatted.append(f"\n {v.full_name} = {stringified}") - return "".join(sorted(formatted)) if formatted else "∅" - - -@dataclass(frozen=True) -class ContractContext: - # config with contract-specific overrides - args: HalmosConfig - - # name of this contract - name: str - - # signatures of test functions to run - funsigs: list[str] - - # data parsed from the build output for this contract - creation_hexcode: str - deployed_hexcode: str - abi: dict - method_identifiers: dict[str, str] - contract_json: dict - libs: dict - - # TODO: check if this is really a contract-level variable - build_out_map: dict - - -@dataclass(frozen=True) -class FunctionContext: - # config with function-specific overrides - args: HalmosConfig - - # function name, signature, and selector - info: FunctionInfo - - # solver using the function-specific config - solver: Solver - - # backlink to the parent contract context - contract_ctx: ContractContext - - # optional starting state - setup_ex: Exec | None = None - - # dump directory for this function (generated in __post_init__) - dump_dirname: str = field(init=False) - - # function-level thread pool that drives assertion solving - thread_pool: ThreadPoolExecutor = field(init=False) - - # path-specific queries are submitted to this function-specific executor - solver_executor: PopenExecutor = field(default_factory=PopenExecutor) - - # list of solver outputs for this function - solver_outputs: list["SolverOutput"] = field(default_factory=list) - - # list of valid counterexamples for this function - valid_counterexamples: list[PotentialModel] = field(default_factory=list) - - # list of potentially invalid counterexamples for this function - invalid_counterexamples: list[PotentialModel] = field(default_factory=list) - - # list of unsat cores for this function - unsat_cores: list[list] = field(default_factory=list) - - # map from path id to trace - traces: dict[int, str] = field(default_factory=dict) - - # map from path id to execution - exec_cache: dict[int, Exec] = field(default_factory=dict) - - def __post_init__(self): - dirname = f"/tmp/{self.info.name}-{uuid.uuid4().hex}" - object.__setattr__(self, "dump_dirname", dirname) - - thread_pool = ThreadPoolExecutor( - max_workers=self.args.solver_threads, - thread_name_prefix=f"{self.info.name}-", - ) - object.__setattr__(self, "thread_pool", thread_pool) - - -@dataclass(frozen=True) -class PathContext: - # id of this path - path_id: int - - # path execution object - ex: Exec - - # SMT query - query: SMTQuery - - # backlink to the parent function context - fun_ctx: FunctionContext - - # filename for this path (generated in __post_init__) - dump_filename: str = field(init=False) - - is_refined: bool = False - - def __post_init__(self): - refined_str = ".refined" if self.is_refined else "" - dirname = self.fun_ctx.dump_dirname - filename = os.path.join(dirname, f"{self.path_id}{refined_str}.smt2") - - # use __setattr__ because this is a frozen dataclass - object.__setattr__(self, "dump_filename", filename) - - def refine(self) -> "PathContext": - return PathContext( - path_id=self.path_id, - ex=self.ex, - query=refine(self.query), - fun_ctx=self.fun_ctx, - is_refined=True, - ) - - -@dataclass(frozen=True) -class SolverOutput: - # solver result - result: CheckSatResult - - # we don't backlink to the parent path context to avoid extra - # references to Exec objects past the lifetime of the path - path_id: int - - # solver model - model: PotentialModel | None = None - - # optional unsat core - unsat_core: list[str] | None = None - - @staticmethod - def from_result( - stdout: str, stderr: str, returncode: int, path_ctx: PathContext - ) -> "SolverOutput": - # extract the first line (we expect sat/unsat/unknown) - newline_idx = stdout.find("\n") - first_line = stdout[:newline_idx] if newline_idx != -1 else stdout - - args = path_ctx.fun_ctx.args - path_id = path_ctx.path_id - if args.verbose >= 1: - debug(f" {first_line}") - - match first_line: - case "unsat": - unsat_core = parse_unsat_core(stdout) if args.cache_solver else None - return SolverOutput(unsat, path_id, unsat_core=unsat_core) - case "sat": - is_valid = is_model_valid(stdout) - model = PotentialModel(model=parse_string(stdout), is_valid=is_valid) - return SolverOutput(sat, path_id, model=model) - case _: - return SolverOutput(unknown, path_id) - @dataclass(frozen=True) class TestResult: @@ -896,225 +721,6 @@ def run_contract(ctx: ContractContext) -> list[TestResult]: return test_results -def parse_unsat_core(output) -> list[str] | None: - # parsing example: - # unsat - # (error "the context is unsatisfiable") # <-- this line is optional - # (<41702> <37030> <36248> <47880>) - # result: - # [41702, 37030, 36248, 47880] - pattern = r"unsat\s*(\(\s*error\s+[^)]*\)\s*)?\(\s*((<[0-9]+>\s*)*)\)" - match = re.search(pattern, output) - if match: - result = [re.sub(r"<([0-9]+)>", r"\1", name) for name in match.group(2).split()] - return result - else: - warn(f"error in parsing unsat core: {output}") - return None - - -def dump( - path_ctx: PathContext, -) -> tuple[CheckSatResult, PotentialModel | None, list | None]: - args = path_ctx.fun_ctx.args - query = path_ctx.query - dump_filename = path_ctx.dump_filename - - if args.verbose >= 1: - debug(f"Writing SMT query to {dump_filename}") - - # for each implication assertion, `(assert (=> |id| c))`, in query.smtlib, - # generate a corresponding named assertion, `(assert (! |id| :named ))`. - # see `svem.Path.to_smt2()` for more details. - if args.cache_solver: - named_assertions = "".join( - [ - f"(assert (! |{assert_id}| :named <{assert_id}>))\n" - for assert_id in query.assertions - ] - ) - - with open(dump_filename, "w") as f: - f.write("(set-option :produce-unsat-cores true)\n") - f.write("(set-logic QF_AUFBV)\n") - f.write(query.smtlib) - f.write(named_assertions) - f.write("(check-sat)\n") - f.write("(get-model)\n") - f.write("(get-unsat-core)\n") - - else: - with open(dump_filename, "w") as f: - f.write("(set-logic QF_AUFBV)\n") - f.write(query.smtlib) - f.write("(check-sat)\n") - f.write("(get-model)\n") - - -def is_model_valid(solver_stdout: str) -> bool: - # TODO: evaluate the path condition against the given model after excluding f_evm_* symbols, - # since the f_evm_* symbols may still appear in valid models. - - return "f_evm_" not in solver_stdout - - -def solve_low_level(path_ctx: PathContext) -> SolverOutput: - """Invokes an external solver process to solve the given query. - - Can raise TimeoutError or some Exception raised during execution""" - - fun_ctx, smt2_filename = path_ctx.fun_ctx, path_ctx.dump_filename - args = fun_ctx.args - - # make sure the smt2 file has been written - dump(path_ctx) - - if args.verbose >= 1: - print(" Checking with external solver process") - print(f" {args.solver_command} {smt2_filename} > {smt2_filename}.out") - - # solver_timeout_assertion == 0 means no timeout, - # which translates to timeout_seconds=None for subprocess.run - timeout_seconds = t / 1000 if (t := args.solver_timeout_assertion) else None - - cmd = args.solver_command.split() + [smt2_filename] - future = PopenFuture(cmd, timeout=timeout_seconds) - - # starts the subprocess asynchronously - fun_ctx.solver_executor.submit(future) - - # block until the external solver returns, times out, is interrupted, fails, etc. - try: - stdout, stderr, returncode = future.result() - except TimeoutExpired: - return SolverOutput(result=unknown, path_id=path_ctx.path_id) - - # save solver stdout to file - with open(f"{smt2_filename}.out", "w") as f: - f.write(stdout) - - # save solver stderr to file (only if there is an error) - if stderr: - with open(f"{smt2_filename}.err", "w") as f: - f.write(stderr) - - return SolverOutput.from_result(stdout, stderr, returncode, path_ctx) - - -# XXX used to be gen_model_from_sexpr -def solve_end_to_end(ctx: PathContext) -> None: - """Synchronously resolves a query in a given context, which may result in 0, 1 or multiple solver invocations. - - - may result in 0 invocations if the query contains a known unsat core (hence the need for the context) - - may result in exactly 1 invocation if the query is unsat, or sat with a valid model - - may result in multiple invocations if the query is sat and the model is invalid (needs refinement) - - If this produces a model, it _should_ be valid. - """ - fun_ctx, path_id, query = ctx.fun_ctx, ctx.path_id, ctx.query - args, unsat_cores = fun_ctx.args, fun_ctx.unsat_cores - - verbose = print if args.verbose >= 1 else lambda *args, **kwargs: None - verbose(f"Checking path condition {path_id=}") - - # if the query contains an unsat-core, it is unsat; no need to run the solver - if check_unsat_cores(query, unsat_cores): - verbose(" Already proven unsat") - return SolverOutput(unsat, path_id) - - solver_output = solve_low_level(ctx) - result, model = solver_output.result, solver_output.model - - # if the ctx is already refined, we don't need to solve again - if result == sat and not model.is_valid and not ctx.is_refined: - verbose(" Checking again with refinement") - - refined_ctx = ctx.refine() - - if refined_ctx.query.smtlib != query.smtlib: - # recurse with the refined query - return solve_end_to_end(refined_ctx) - else: - verbose(" Refinement did not change the query, no need to solve again") - - # - # we are done solving, process and triage the result - # - - # retrieve cached exec and clear the cache entry - exec = fun_ctx.exec_cache.pop(path_id, None) - - if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE: - id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else "" - print(f"Trace{id_str}:") - print(fun_ctx.traces[path_id], end="") - - if args.print_failed_states: - print(f"# {path_id}") - print(exec) - - if fun_ctx.solver_executor.is_shutdown(): - # if the thread pool is in the process of shutting down, - # we want to stop processing remaining models/timeouts/errors, etc. - return - - # keep track of the solver outputs, so that we can display PASS/FAIL/TIMEOUT/ERROR later - fun_ctx.solver_outputs.append(solver_output) - - if result == unsat: - if solver_output.unsat_core: - fun_ctx.unsat_cores.append(solver_output.unsat_core) - return - - # model could be an empty dict here, so compare to None explicitly - if model is None: - warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}") - return - - if model.is_valid: - print(red(f"Counterexample: {model}")) - - fun_ctx.valid_counterexamples.append(model) - - # we have a valid counterexample, so we are eligible for early exit - if args.early_exit: - fun_ctx.solver_executor.shutdown(wait=False) - else: - warn_str = f"Counterexample (potentially invalid): {model}" - warn_code(COUNTEREXAMPLE_INVALID, warn_str) - fun_ctx.invalid_counterexamples.append(model) - - -def check_unsat_cores(query: SMTQuery, unsat_cores: list[list]) -> bool: - # return true if the given query contains any given unsat core - for unsat_core in unsat_cores: - if all(core in query.assertions for core in unsat_core): - return True - return False - - -def refine(query: SMTQuery) -> SMTQuery: - smtlib = query.smtlib - - # replace uninterpreted abstraction with actual symbols for assertion solving - smtlib = re.sub( - r"\(declare-fun f_evm_(bvmul)_([0-9]+) \(\(_ BitVec \2\) \(_ BitVec \2\)\) \(_ BitVec \2\)\)", - r"(define-fun f_evm_\1_\2 ((x (_ BitVec \2)) (y (_ BitVec \2))) (_ BitVec \2) (\1 x y))", - smtlib, - ) - - # replace `(f_evm_bvudiv_N x y)` with `(ite (= y (_ bv0 N)) (_ bv0 N) (bvudiv x y))` - # similarly for bvurem, bvsdiv, and bvsrem - # NOTE: (bvudiv x (_ bv0 N)) is *defined* to (bvneg (_ bv1 N)); while (div x 0) is undefined - smtlib = re.sub( - r"\(declare-fun f_evm_(bvudiv|bvurem|bvsdiv|bvsrem)_([0-9]+) \(\(_ BitVec \2\) \(_ BitVec \2\)\) \(_ BitVec \2\)\)", - r"(define-fun f_evm_\1_\2 ((x (_ BitVec \2)) (y (_ BitVec \2))) (_ BitVec \2) (ite (= y (_ bv0 \2)) (_ bv0 \2) (\1 x y)))", - smtlib, - ) - - return SMTQuery(smtlib, query.assertions) - - def contract_regex(args): if args.contract: return f"^{args.contract}$" diff --git a/src/halmos/constants.py b/src/halmos/constants.py new file mode 100644 index 00000000..f4583ae9 --- /dev/null +++ b/src/halmos/constants.py @@ -0,0 +1,4 @@ +VERBOSITY_TRACE_COUNTEREXAMPLE = 2 +VERBOSITY_TRACE_SETUP = 3 +VERBOSITY_TRACE_PATHS = 4 +VERBOSITY_TRACE_CONSTRUCTOR = 5 diff --git a/src/halmos/solve.py b/src/halmos/solve.py new file mode 100644 index 00000000..e234b455 --- /dev/null +++ b/src/halmos/solve.py @@ -0,0 +1,413 @@ +import os +import re +import uuid +from concurrent.futures import ThreadPoolExecutor +from dataclasses import dataclass, field + +from z3 import CheckSatResult, Solver, sat, unknown, unsat + +from halmos.calldata import FunctionInfo +from halmos.config import Config as HalmosConfig +from halmos.constants import VERBOSITY_TRACE_COUNTEREXAMPLE, VERBOSITY_TRACE_PATHS +from halmos.logs import ( + COUNTEREXAMPLE_INVALID, + COUNTEREXAMPLE_UNKNOWN, + debug, + warn, + warn_code, +) +from halmos.processes import PopenExecutor, PopenFuture, TimeoutExpired +from halmos.sevm import Exec, SMTQuery +from halmos.smtlib import ModelVariables, parse_string +from halmos.utils import con, red, stringify + + +@dataclass(frozen=True) +class PotentialModel: + model: ModelVariables | str | None + is_valid: bool + + def __str__(self) -> str: + # expected to be a filename + if isinstance(self.model, str): + return f"see {self.model}" + + formatted = [] + for v in self.model.values(): + # TODO: ideally we would avoid wrapping with con() here + stringified = stringify(v.full_name, con(v.value)) + formatted.append(f"\n {v.full_name} = {stringified}") + return "".join(sorted(formatted)) if formatted else "∅" + + +@dataclass(frozen=True) +class ContractContext: + # config with contract-specific overrides + args: HalmosConfig + + # name of this contract + name: str + + # signatures of test functions to run + funsigs: list[str] + + # data parsed from the build output for this contract + creation_hexcode: str + deployed_hexcode: str + abi: dict + method_identifiers: dict[str, str] + contract_json: dict + libs: dict + + # TODO: check if this is really a contract-level variable + build_out_map: dict + + +@dataclass(frozen=True) +class FunctionContext: + # config with function-specific overrides + args: HalmosConfig + + # function name, signature, and selector + info: FunctionInfo + + # solver using the function-specific config + solver: Solver + + # backlink to the parent contract context + contract_ctx: ContractContext + + # optional starting state + setup_ex: Exec | None = None + + # dump directory for this function (generated in __post_init__) + dump_dirname: str = field(init=False) + + # function-level thread pool that drives assertion solving + thread_pool: ThreadPoolExecutor = field(init=False) + + # path-specific queries are submitted to this function-specific executor + solver_executor: PopenExecutor = field(default_factory=PopenExecutor) + + # list of solver outputs for this function + solver_outputs: list["SolverOutput"] = field(default_factory=list) + + # list of valid counterexamples for this function + valid_counterexamples: list[PotentialModel] = field(default_factory=list) + + # list of potentially invalid counterexamples for this function + invalid_counterexamples: list[PotentialModel] = field(default_factory=list) + + # list of unsat cores for this function + unsat_cores: list[list] = field(default_factory=list) + + # map from path id to trace + traces: dict[int, str] = field(default_factory=dict) + + # map from path id to execution + exec_cache: dict[int, Exec] = field(default_factory=dict) + + def __post_init__(self): + dirname = f"/tmp/{self.info.name}-{uuid.uuid4().hex}" + object.__setattr__(self, "dump_dirname", dirname) + + thread_pool = ThreadPoolExecutor( + max_workers=self.args.solver_threads, + thread_name_prefix=f"{self.info.name}-", + ) + object.__setattr__(self, "thread_pool", thread_pool) + + +@dataclass(frozen=True) +class PathContext: + # id of this path + path_id: int + + # path execution object + ex: Exec + + # SMT query + query: SMTQuery + + # backlink to the parent function context + fun_ctx: FunctionContext + + # filename for this path (generated in __post_init__) + dump_filename: str = field(init=False) + + is_refined: bool = False + + def __post_init__(self): + refined_str = ".refined" if self.is_refined else "" + dirname = self.fun_ctx.dump_dirname + filename = os.path.join(dirname, f"{self.path_id}{refined_str}.smt2") + + # use __setattr__ because this is a frozen dataclass + object.__setattr__(self, "dump_filename", filename) + + def refine(self) -> "PathContext": + return PathContext( + path_id=self.path_id, + ex=self.ex, + query=refine(self.query), + fun_ctx=self.fun_ctx, + is_refined=True, + ) + + +@dataclass(frozen=True) +class SolverOutput: + # solver result + result: CheckSatResult + + # we don't backlink to the parent path context to avoid extra + # references to Exec objects past the lifetime of the path + path_id: int + + # solver model + model: PotentialModel | None = None + + # optional unsat core + unsat_core: list[str] | None = None + + @staticmethod + def from_result( + stdout: str, stderr: str, returncode: int, path_ctx: PathContext + ) -> "SolverOutput": + # extract the first line (we expect sat/unsat/unknown) + newline_idx = stdout.find("\n") + first_line = stdout[:newline_idx] if newline_idx != -1 else stdout + + args = path_ctx.fun_ctx.args + path_id = path_ctx.path_id + if args.verbose >= 1: + debug(f" {first_line}") + + match first_line: + case "unsat": + unsat_core = parse_unsat_core(stdout) if args.cache_solver else None + return SolverOutput(unsat, path_id, unsat_core=unsat_core) + case "sat": + is_valid = is_model_valid(stdout) + model = PotentialModel(model=parse_string(stdout), is_valid=is_valid) + return SolverOutput(sat, path_id, model=model) + case _: + return SolverOutput(unknown, path_id) + + +def parse_unsat_core(output) -> list[str] | None: + # parsing example: + # unsat + # (error "the context is unsatisfiable") # <-- this line is optional + # (<41702> <37030> <36248> <47880>) + # result: + # [41702, 37030, 36248, 47880] + pattern = r"unsat\s*(\(\s*error\s+[^)]*\)\s*)?\(\s*((<[0-9]+>\s*)*)\)" + match = re.search(pattern, output) + if match: + result = [re.sub(r"<([0-9]+)>", r"\1", name) for name in match.group(2).split()] + return result + else: + warn(f"error in parsing unsat core: {output}") + return None + + +def dump( + path_ctx: PathContext, +) -> tuple[CheckSatResult, PotentialModel | None, list | None]: + args = path_ctx.fun_ctx.args + query = path_ctx.query + dump_filename = path_ctx.dump_filename + + if args.verbose >= 1: + debug(f"Writing SMT query to {dump_filename}") + + # for each implication assertion, `(assert (=> |id| c))`, in query.smtlib, + # generate a corresponding named assertion, `(assert (! |id| :named ))`. + # see `svem.Path.to_smt2()` for more details. + if args.cache_solver: + named_assertions = "".join( + [ + f"(assert (! |{assert_id}| :named <{assert_id}>))\n" + for assert_id in query.assertions + ] + ) + + with open(dump_filename, "w") as f: + f.write("(set-option :produce-unsat-cores true)\n") + f.write("(set-logic QF_AUFBV)\n") + f.write(query.smtlib) + f.write(named_assertions) + f.write("(check-sat)\n") + f.write("(get-model)\n") + f.write("(get-unsat-core)\n") + + else: + with open(dump_filename, "w") as f: + f.write("(set-logic QF_AUFBV)\n") + f.write(query.smtlib) + f.write("(check-sat)\n") + f.write("(get-model)\n") + + +def is_model_valid(solver_stdout: str) -> bool: + # TODO: evaluate the path condition against the given model after excluding f_evm_* symbols, + # since the f_evm_* symbols may still appear in valid models. + + return "f_evm_" not in solver_stdout + + +def solve_low_level(path_ctx: PathContext) -> SolverOutput: + """Invokes an external solver process to solve the given query. + + Can raise TimeoutError or some Exception raised during execution""" + + fun_ctx, smt2_filename = path_ctx.fun_ctx, path_ctx.dump_filename + args = fun_ctx.args + + # make sure the smt2 file has been written + dump(path_ctx) + + if args.verbose >= 1: + print(" Checking with external solver process") + print(f" {args.solver_command} {smt2_filename} > {smt2_filename}.out") + + # solver_timeout_assertion == 0 means no timeout, + # which translates to timeout_seconds=None for subprocess.run + timeout_seconds = t / 1000 if (t := args.solver_timeout_assertion) else None + + cmd = args.solver_command.split() + [smt2_filename] + future = PopenFuture(cmd, timeout=timeout_seconds) + + # starts the subprocess asynchronously + fun_ctx.solver_executor.submit(future) + + # block until the external solver returns, times out, is interrupted, fails, etc. + try: + stdout, stderr, returncode = future.result() + except TimeoutExpired: + return SolverOutput(result=unknown, path_id=path_ctx.path_id) + + # save solver stdout to file + with open(f"{smt2_filename}.out", "w") as f: + f.write(stdout) + + # save solver stderr to file (only if there is an error) + if stderr: + with open(f"{smt2_filename}.err", "w") as f: + f.write(stderr) + + return SolverOutput.from_result(stdout, stderr, returncode, path_ctx) + + +def solve_end_to_end(ctx: PathContext) -> None: + """Synchronously resolves a query in a given context, which may result in 0, 1 or multiple solver invocations. + + - may result in 0 invocations if the query contains a known unsat core (hence the need for the context) + - may result in exactly 1 invocation if the query is unsat, or sat with a valid model + - may result in multiple invocations if the query is sat and the model is invalid (needs refinement) + + If this produces a model, it _should_ be valid. + """ + fun_ctx, path_id, query = ctx.fun_ctx, ctx.path_id, ctx.query + args, unsat_cores = fun_ctx.args, fun_ctx.unsat_cores + + verbose = print if args.verbose >= 1 else lambda *args, **kwargs: None + verbose(f"Checking path condition {path_id=}") + + # if the query contains an unsat-core, it is unsat; no need to run the solver + if check_unsat_cores(query, unsat_cores): + verbose(" Already proven unsat") + return SolverOutput(unsat, path_id) + + solver_output = solve_low_level(ctx) + result, model = solver_output.result, solver_output.model + + # if the ctx is already refined, we don't need to solve again + if result == sat and not model.is_valid and not ctx.is_refined: + verbose(" Checking again with refinement") + + refined_ctx = ctx.refine() + + if refined_ctx.query.smtlib != query.smtlib: + # recurse with the refined query + return solve_end_to_end(refined_ctx) + else: + verbose(" Refinement did not change the query, no need to solve again") + + # + # we are done solving, process and triage the result + # + + # retrieve cached exec and clear the cache entry + exec = fun_ctx.exec_cache.pop(path_id, None) + + if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE: + id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else "" + print(f"Trace{id_str}:") + print(fun_ctx.traces[path_id], end="") + + if args.print_failed_states: + print(f"# {path_id}") + print(exec) + + if fun_ctx.solver_executor.is_shutdown(): + # if the thread pool is in the process of shutting down, + # we want to stop processing remaining models/timeouts/errors, etc. + return + + # keep track of the solver outputs, so that we can display PASS/FAIL/TIMEOUT/ERROR later + fun_ctx.solver_outputs.append(solver_output) + + if result == unsat: + if solver_output.unsat_core: + fun_ctx.unsat_cores.append(solver_output.unsat_core) + return + + # model could be an empty dict here, so compare to None explicitly + if model is None: + warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}") + return + + if model.is_valid: + print(red(f"Counterexample: {model}")) + + fun_ctx.valid_counterexamples.append(model) + + # we have a valid counterexample, so we are eligible for early exit + if args.early_exit: + fun_ctx.solver_executor.shutdown(wait=False) + else: + warn_str = f"Counterexample (potentially invalid): {model}" + warn_code(COUNTEREXAMPLE_INVALID, warn_str) + fun_ctx.invalid_counterexamples.append(model) + + +def check_unsat_cores(query: SMTQuery, unsat_cores: list[list]) -> bool: + # return true if the given query contains any given unsat core + for unsat_core in unsat_cores: + if all(core in query.assertions for core in unsat_core): + return True + return False + + +def refine(query: SMTQuery) -> SMTQuery: + smtlib = query.smtlib + + # replace uninterpreted abstraction with actual symbols for assertion solving + smtlib = re.sub( + r"\(declare-fun f_evm_(bvmul)_([0-9]+) \(\(_ BitVec \2\) \(_ BitVec \2\)\) \(_ BitVec \2\)\)", + r"(define-fun f_evm_\1_\2 ((x (_ BitVec \2)) (y (_ BitVec \2))) (_ BitVec \2) (\1 x y))", + smtlib, + ) + + # replace `(f_evm_bvudiv_N x y)` with `(ite (= y (_ bv0 N)) (_ bv0 N) (bvudiv x y))` + # similarly for bvurem, bvsdiv, and bvsrem + # NOTE: (bvudiv x (_ bv0 N)) is *defined* to (bvneg (_ bv1 N)); while (div x 0) is undefined + smtlib = re.sub( + r"\(declare-fun f_evm_(bvudiv|bvurem|bvsdiv|bvsrem)_([0-9]+) \(\(_ BitVec \2\) \(_ BitVec \2\)\) \(_ BitVec \2\)\)", + r"(define-fun f_evm_\1_\2 ((x (_ BitVec \2)) (y (_ BitVec \2))) (_ BitVec \2) (ite (= y (_ bv0 \2)) (_ bv0 \2) (\1 x y)))", + smtlib, + ) + + return SMTQuery(smtlib, query.assertions) From 19c772fed1742e1947e4e6785396dfdc0e1a21da Mon Sep 17 00:00:00 2001 From: karmacoma Date: Thu, 9 Jan 2025 10:28:31 -0800 Subject: [PATCH 17/46] fix setup_exs_no_error access pattern --- src/halmos/__main__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 5625fe93..733b7faf 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -342,11 +342,11 @@ def setup(ctx: FunctionContext) -> Exec: setup_exs: list[Exec] = [] - match len(setup_exs_no_error): - case 0: + match setup_exs_no_error: + case []: pass - case 1: - setup_exs.append(setup_exs_no_error[0][0]) + case [path_ctx]: + setup_exs.append(path_ctx.ex) case _: for path_ctx in setup_exs_no_error: solver_output = solve_low_level(path_ctx) From 5d1a24fbe7db0d5b06384eda739197ad3be33944 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Thu, 9 Jan 2025 10:29:00 -0800 Subject: [PATCH 18/46] merge smtlib.py into solve.py --- src/halmos/smtlib.py | 100 ----------------------- src/halmos/solve.py | 104 +++++++++++++++++++++++- tests/{test_smtlib.py => test_solve.py} | 10 +-- 3 files changed, 106 insertions(+), 108 deletions(-) delete mode 100644 src/halmos/smtlib.py rename tests/{test_smtlib.py => test_solve.py} (91%) diff --git a/src/halmos/smtlib.py b/src/halmos/smtlib.py deleted file mode 100644 index 61eb23fe..00000000 --- a/src/halmos/smtlib.py +++ /dev/null @@ -1,100 +0,0 @@ -import re -from dataclasses import dataclass - -from halmos.logs import logger - - -@dataclass -class ModelVariable: - full_name: str - variable_name: str - solidity_type: str - smt_type: str - size_bits: int - value: int - - -ModelVariables = dict[str, ModelVariable] - -# Regular expression for capturing halmos variables -halmos_pattern = re.compile( - r""" - \(\s*define-fun\s+ # Match "(define-fun" - \|?(halmos_[^ |]+)\|?\s+ # Capture the full variable name, optionally wrapped in "|" - \(\)\s+\(_\s+([^ ]+)\s+ # Capture the SMTLIB type (e.g., "BitVec 256") - (\d+)\)\s+ # Capture the bit-width or type argument - ( # Group for the value - \#b[01]+ # Binary value (e.g., "#b1010") - |\#x[0-9a-fA-F]+ # Hexadecimal value (e.g., "#xFF") - |\(_\s+bv\d+\s+\d+\) # Decimal value (e.g., "(_ bv42 256)") - ) - """, - re.VERBOSE, -) - - -def parse_const_value(value: str) -> int: - if value.startswith("#b"): - return int(value[2:], 2) - - if value.startswith("#x"): - return int(value[2:], 16) - - # we may have a group like (_ bv123 256) - tokens = value.split() - for token in tokens: - if token.startswith("bv"): - return int(token[2:]) - - raise ValueError(f"unknown value format: {value}") - - -def _parse_match(match: re.Match) -> ModelVariable: - full_name = match.group(1).strip() - smt_type = f"{match.group(2)} {match.group(3)}" - size_bits = int(match.group(3)) - value = parse_const_value(match.group(4)) - - # Extract name and typename from the variable name - parts = full_name.split("_") - variable_name = parts[1] - solidity_type = parts[2] - - return ModelVariable( - full_name=full_name, - variable_name=variable_name, - solidity_type=solidity_type, - smt_type=smt_type, - size_bits=size_bits, - value=value, - ) - - -def parse_string(smtlib_str: str) -> ModelVariables: - """Expects a whole smtlib model output file, as produced by a solver - in response to a `(check-sat)` + `(get-model)` command. - - Extracts halmos variables and returns them grouped by their full name""" - - model_variables: dict[str, ModelVariable] = {} - - # use a regex to find all the variables - # for now we explicitly don't try to properly parse the smtlib output - # because of idiosyncrasies of different solvers: - # - ignores the initial sat/unsat on the first line - # - ignores the occasional `(model)` command used by yices, stp, cvc4, etc. - - for match in halmos_pattern.finditer(smtlib_str): - try: - variable = _parse_match(match) - model_variables[variable.full_name] = variable - except Exception as e: - logger.error(f"error parsing smtlib string '{match.string.strip()}': {e}") - raise e - - return model_variables - - -def parse_file(file_path: str) -> ModelVariables: - with open(file_path) as file: - return parse_string(file.read()) diff --git a/src/halmos/solve.py b/src/halmos/solve.py index e234b455..27dba415 100644 --- a/src/halmos/solve.py +++ b/src/halmos/solve.py @@ -13,15 +13,44 @@ COUNTEREXAMPLE_INVALID, COUNTEREXAMPLE_UNKNOWN, debug, + error, warn, warn_code, ) from halmos.processes import PopenExecutor, PopenFuture, TimeoutExpired from halmos.sevm import Exec, SMTQuery -from halmos.smtlib import ModelVariables, parse_string from halmos.utils import con, red, stringify +@dataclass +class ModelVariable: + full_name: str + variable_name: str + solidity_type: str + smt_type: str + size_bits: int + value: int + + +ModelVariables = dict[str, ModelVariable] + +# Regular expression for capturing halmos variables +halmos_var_pattern = re.compile( + r""" + \(\s*define-fun\s+ # Match "(define-fun" + \|?(halmos_[^ |]+)\|?\s+ # Capture the full variable name, optionally wrapped in "|" + \(\)\s+\(_\s+([^ ]+)\s+ # Capture the SMTLIB type (e.g., "BitVec 256") + (\d+)\)\s+ # Capture the bit-width or type argument + ( # Group for the value + \#b[01]+ # Binary value (e.g., "#b1010") + |\#x[0-9a-fA-F]+ # Hexadecimal value (e.g., "#xFF") + |\(_\s+bv\d+\s+\d+\) # Decimal value (e.g., "(_ bv42 256)") + ) + """, + re.VERBOSE, +) + + @dataclass(frozen=True) class PotentialModel: model: ModelVariables | str | None @@ -189,13 +218,82 @@ def from_result( return SolverOutput(unsat, path_id, unsat_core=unsat_core) case "sat": is_valid = is_model_valid(stdout) - model = PotentialModel(model=parse_string(stdout), is_valid=is_valid) + model = PotentialModel(model=parse_model_str(stdout), is_valid=is_valid) return SolverOutput(sat, path_id, model=model) case _: return SolverOutput(unknown, path_id) -def parse_unsat_core(output) -> list[str] | None: +def parse_const_value(value: str) -> int: + match value[:2]: + case "#b": + return int(value[2:], 2) + case "#x": + return int(value[2:], 16) + case "bv": + return int(value[2:]) + case _: + # we may have a group like (_ bv123 256) + tokens = value.split() + for token in tokens: + if token.startswith("bv"): + return int(token[2:]) + + raise ValueError(f"unknown value format: {value}") + + +def _parse_halmos_var_match(match: re.Match) -> ModelVariable: + full_name = match.group(1).strip() + smt_type = f"{match.group(2)} {match.group(3)}" + size_bits = int(match.group(3)) + value = parse_const_value(match.group(4)) + + # Extract name and typename from the variable name + parts = full_name.split("_") + variable_name = parts[1] + solidity_type = parts[2] + + return ModelVariable( + full_name=full_name, + variable_name=variable_name, + solidity_type=solidity_type, + smt_type=smt_type, + size_bits=size_bits, + value=value, + ) + + +def parse_model_str(smtlib_str: str) -> ModelVariables: + """Expects a whole smtlib model output file, as produced by a solver + in response to a `(check-sat)` + `(get-model)` command. + + Extracts halmos variables and returns them grouped by their full name""" + + model_variables: dict[str, ModelVariable] = {} + + # use a regex to find all the variables + # for now we explicitly don't try to properly parse the smtlib output + # because of idiosyncrasies of different solvers: + # - ignores the initial sat/unsat on the first line + # - ignores the occasional `(model)` command used by yices, stp, cvc4, etc. + + for match in halmos_var_pattern.finditer(smtlib_str): + try: + variable = _parse_halmos_var_match(match) + model_variables[variable.full_name] = variable + except Exception as e: + error(f"error parsing smtlib string '{match.string.strip()}': {e}") + raise e + + return model_variables + + +def parse_model_file(file_path: str) -> ModelVariables: + with open(file_path) as file: + return parse_model_str(file.read()) + + +def parse_unsat_core(output: str) -> list[str] | None: # parsing example: # unsat # (error "the context is unsatisfiable") # <-- this line is optional diff --git a/tests/test_smtlib.py b/tests/test_solve.py similarity index 91% rename from tests/test_smtlib.py rename to tests/test_solve.py index 0e3d2aaa..601b9f56 100644 --- a/tests/test_smtlib.py +++ b/tests/test_solve.py @@ -1,4 +1,4 @@ -from halmos.smtlib import ModelVariable, parse_string +from halmos.solve import ModelVariable, parse_model_str def test_smtlib_z3_bv_output(): @@ -7,7 +7,7 @@ def test_smtlib_z3_bv_output(): (define-fun {full_name} () (_ BitVec 256) #x0000000000000000000000000000000000000000000000000000000000000000) """ - model = parse_string(smtlib_str) + model = parse_model_str(smtlib_str) assert model[full_name] == ModelVariable( full_name=full_name, @@ -30,7 +30,7 @@ def test_smtlib_yices_binary_output(): (_ BitVec 256) #b1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) """ - model = parse_string(smtlib_str) + model = parse_model_str(smtlib_str) assert model[full_name] == ModelVariable( full_name=full_name, variable_name="z", @@ -47,7 +47,7 @@ def test_smtlib_yices_decimal_output(): smtlib_str = f""" (define-fun {full_name} () (_ BitVec 256) (_ bv{val} 256)) """ - model = parse_string(smtlib_str) + model = parse_model_str(smtlib_str) assert model[full_name] == ModelVariable( full_name=full_name, variable_name="z", @@ -76,7 +76,7 @@ def test_smtlib_stp_output(): ( define-fun |{full_name}| () (_ BitVec 8) #x04 ) ) """ - model = parse_string(smtlib_str) + model = parse_model_str(smtlib_str) assert model[full_name] == ModelVariable( full_name=full_name, variable_name="x", From b1ebdc686d7c45a7012ad231ff8c2007b2444a41 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Thu, 9 Jan 2025 11:20:25 -0800 Subject: [PATCH 19/46] adjust halmos_var_pattern to support p_ prefix --- src/halmos/solve.py | 16 ++++++++-------- tests/test_solve.py | 43 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/src/halmos/solve.py b/src/halmos/solve.py index 27dba415..e5d321a8 100644 --- a/src/halmos/solve.py +++ b/src/halmos/solve.py @@ -37,14 +37,14 @@ class ModelVariable: # Regular expression for capturing halmos variables halmos_var_pattern = re.compile( r""" - \(\s*define-fun\s+ # Match "(define-fun" - \|?(halmos_[^ |]+)\|?\s+ # Capture the full variable name, optionally wrapped in "|" - \(\)\s+\(_\s+([^ ]+)\s+ # Capture the SMTLIB type (e.g., "BitVec 256") - (\d+)\)\s+ # Capture the bit-width or type argument - ( # Group for the value - \#b[01]+ # Binary value (e.g., "#b1010") - |\#x[0-9a-fA-F]+ # Hexadecimal value (e.g., "#xFF") - |\(_\s+bv\d+\s+\d+\) # Decimal value (e.g., "(_ bv42 256)") + \(\s*define-fun\s+ # Match "(define-fun" + \|?((?:halmos_|p_)[^ |]+)\|?\s+ # Capture either halmos_* or p_*, optionally wrapped in "|" + \(\)\s+\(_\s+([^ ]+)\s+ # Capture the SMTLIB type (e.g., "BitVec 256") + (\d+)\)\s+ # Capture the bit-width or type argument + ( # Group for the value + \#b[01]+ # Binary value (e.g., "#b1010") + |\#x[0-9a-fA-F]+ # Hexadecimal value (e.g., "#xFF") + |\(_\s+bv\d+\s+\d+\) # Decimal value (e.g., "(_ bv42 256)") ) """, re.VERBOSE, diff --git a/tests/test_solve.py b/tests/test_solve.py index 601b9f56..7f9d76cb 100644 --- a/tests/test_solve.py +++ b/tests/test_solve.py @@ -1,8 +1,16 @@ +import pytest + from halmos.solve import ModelVariable, parse_model_str -def test_smtlib_z3_bv_output(): - full_name = "halmos_y_uint256_043cfd7_01" +@pytest.mark.parametrize( + "full_name", + [ + "halmos_y_uint256_043cfd7_01", + "p_y_uint256_043cfd7_01", + ], +) +def test_smtlib_z3_bv_output(full_name): smtlib_str = f""" (define-fun {full_name} () (_ BitVec 256) #x0000000000000000000000000000000000000000000000000000000000000000) @@ -21,8 +29,14 @@ def test_smtlib_z3_bv_output(): # note that yices only produces output like this with --smt2-model-format # otherwise we get something like (= x #b00000100) -def test_smtlib_yices_binary_output(): - full_name = "halmos_z_uint256_cabf047_02" +@pytest.mark.parametrize( + "full_name", + [ + "halmos_z_uint256_cabf047_02", + "p_z_uint256_cabf047_02", + ], +) +def test_smtlib_yices_binary_output(full_name): smtlib_str = f""" (define-fun {full_name} @@ -41,8 +55,14 @@ def test_smtlib_yices_binary_output(): ) -def test_smtlib_yices_decimal_output(): - full_name = "halmos_z_uint256_11ce021_08" +@pytest.mark.parametrize( + "full_name", + [ + "halmos_z_uint256_11ce021_08", + "p_z_uint256_11ce021_08", + ], +) +def test_smtlib_yices_decimal_output(full_name): val = 57896044618658097711785492504343953926634992332820282019728792003956564819968 smtlib_str = f""" (define-fun {full_name} () (_ BitVec 256) (_ bv{val} 256)) @@ -58,9 +78,14 @@ def test_smtlib_yices_decimal_output(): ) -def test_smtlib_stp_output(): - full_name = "halmos_x_uint8_043cfd7_01" - +@pytest.mark.parametrize( + "full_name", + [ + "halmos_x_uint8_043cfd7_01", + "p_x_uint8_043cfd7_01", + ], +) +def test_smtlib_stp_output(full_name): # we should tolerate: # - the extra (model) command # - duplicate variable names From d847464e6febbd19bcbf000b45d1fb6308534ecb Mon Sep 17 00:00:00 2001 From: karmacoma Date: Thu, 9 Jan 2025 15:47:11 -0800 Subject: [PATCH 20/46] speed up early exit --- src/halmos/processes.py | 16 +++++++++------- src/halmos/solve.py | 8 ++------ 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/halmos/processes.py b/src/halmos/processes.py index 1eff7389..15a614b6 100644 --- a/src/halmos/processes.py +++ b/src/halmos/processes.py @@ -66,8 +66,6 @@ def cancel(self): if not self.is_running(): return - self.process.terminate() - # use psutil to kill the entire process tree (including children) try: parent_process = psutil.Process(self.process.pid) @@ -78,8 +76,9 @@ def cancel(self): for process in processes: process.terminate() - # give them some time to terminate - time.sleep(0.5) + # termination grace period + with contextlib.suppress(TimeoutExpired): + parent_process.wait(timeout=0.5) # after grace period, force kill for process in processes: @@ -161,9 +160,12 @@ def shutdown(self, wait=True, cancel_futures=False): # if asked for immediate shutdown we cancel everything else: - with self._lock: - for future in self._futures: - future.cancel() + with self._lock, concurrent.futures.ThreadPoolExecutor() as executor: + # kick off all cancellations in parallel + cancel_tasks = [executor.submit(f.cancel) for f in self._futures] + + # wait for them to finish + concurrent.futures.wait(cancel_tasks) def map(self, fn, *iterables, timeout=None, chunksize=1): raise NotImplementedError() diff --git a/src/halmos/solve.py b/src/halmos/solve.py index e5d321a8..d46fad74 100644 --- a/src/halmos/solve.py +++ b/src/halmos/solve.py @@ -10,7 +10,6 @@ from halmos.config import Config as HalmosConfig from halmos.constants import VERBOSITY_TRACE_COUNTEREXAMPLE, VERBOSITY_TRACE_PATHS from halmos.logs import ( - COUNTEREXAMPLE_INVALID, COUNTEREXAMPLE_UNKNOWN, debug, error, @@ -19,7 +18,7 @@ ) from halmos.processes import PopenExecutor, PopenFuture, TimeoutExpired from halmos.sevm import Exec, SMTQuery -from halmos.utils import con, red, stringify +from halmos.utils import con, stringify @dataclass @@ -468,16 +467,13 @@ def solve_end_to_end(ctx: PathContext) -> None: return if model.is_valid: - print(red(f"Counterexample: {model}")) - fun_ctx.valid_counterexamples.append(model) # we have a valid counterexample, so we are eligible for early exit if args.early_exit: + debug(f"Shutting down {fun_ctx.info.name}'s solver executor") fun_ctx.solver_executor.shutdown(wait=False) else: - warn_str = f"Counterexample (potentially invalid): {model}" - warn_code(COUNTEREXAMPLE_INVALID, warn_str) fun_ctx.invalid_counterexamples.append(model) From 03ed9489a8f40fc003d16cd3d97e256c57aece1d Mon Sep 17 00:00:00 2001 From: karmacoma Date: Thu, 9 Jan 2025 15:55:28 -0800 Subject: [PATCH 21/46] bring back model output in solve_end_to_end --- src/halmos/solve.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/halmos/solve.py b/src/halmos/solve.py index d46fad74..b93485b4 100644 --- a/src/halmos/solve.py +++ b/src/halmos/solve.py @@ -10,6 +10,7 @@ from halmos.config import Config as HalmosConfig from halmos.constants import VERBOSITY_TRACE_COUNTEREXAMPLE, VERBOSITY_TRACE_PATHS from halmos.logs import ( + COUNTEREXAMPLE_INVALID, COUNTEREXAMPLE_UNKNOWN, debug, error, @@ -18,7 +19,7 @@ ) from halmos.processes import PopenExecutor, PopenFuture, TimeoutExpired from halmos.sevm import Exec, SMTQuery -from halmos.utils import con, stringify +from halmos.utils import con, red, stringify @dataclass @@ -467,6 +468,7 @@ def solve_end_to_end(ctx: PathContext) -> None: return if model.is_valid: + print(red(f"Counterexample: {model}")) fun_ctx.valid_counterexamples.append(model) # we have a valid counterexample, so we are eligible for early exit @@ -474,6 +476,9 @@ def solve_end_to_end(ctx: PathContext) -> None: debug(f"Shutting down {fun_ctx.info.name}'s solver executor") fun_ctx.solver_executor.shutdown(wait=False) else: + warn_str = f"Counterexample (potentially invalid): {model}" + warn_code(COUNTEREXAMPLE_INVALID, warn_str) + fun_ctx.invalid_counterexamples.append(model) From a6fd0671c5aa4fb5602c764a32b86ff6930a7538 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Thu, 9 Jan 2025 16:50:12 -0800 Subject: [PATCH 22/46] shutdown executors on signal --- src/halmos/__main__.py | 3 +++ src/halmos/processes.py | 20 ++++++++++++++++++++ src/halmos/solve.py | 10 +++++++++- 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 733b7faf..31cf18fb 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -53,6 +53,7 @@ warn_code, ) from .mapper import BuildOut, DeployAddressMapper +from .processes import ExecutorRegistry from .sevm import ( EMPTY_BALANCE, EVM, @@ -821,6 +822,8 @@ def _main(_args=None) -> MainResult: # def on_exit(exitcode: int) -> MainResult: + ExecutorRegistry().shutdown_all() + result = MainResult(exitcode, test_results_map) if args.json_output: diff --git a/src/halmos/processes.py b/src/halmos/processes.py index 15a614b6..83009942 100644 --- a/src/halmos/processes.py +++ b/src/halmos/processes.py @@ -3,11 +3,31 @@ import subprocess import threading import time +import weakref from subprocess import PIPE, Popen, TimeoutExpired import psutil +class ExecutorRegistry: + _instance = None + + # Singleton pattern + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance._executors = weakref.WeakSet() + return cls._instance + + def register(self, executor): + self._executors.add(executor) + + def shutdown_all(self): + print("Shutting down all executors") + for ex in list(self._executors): + ex.shutdown(wait=False) + + class PopenFuture(concurrent.futures.Future): cmd: list[str] timeout: float | None # in seconds, None means no timeout diff --git a/src/halmos/solve.py b/src/halmos/solve.py index b93485b4..6532a00e 100644 --- a/src/halmos/solve.py +++ b/src/halmos/solve.py @@ -17,7 +17,12 @@ warn, warn_code, ) -from halmos.processes import PopenExecutor, PopenFuture, TimeoutExpired +from halmos.processes import ( + ExecutorRegistry, + PopenExecutor, + PopenFuture, + TimeoutExpired, +) from halmos.sevm import Exec, SMTQuery from halmos.utils import con, red, stringify @@ -146,6 +151,9 @@ def __post_init__(self): ) object.__setattr__(self, "thread_pool", thread_pool) + # register the solver executor to be shutdown on exit + ExecutorRegistry().register(self.solver_executor) + @dataclass(frozen=True) class PathContext: From f41a3cbe35be432d9ec50fd2024d844f73d9a696 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Thu, 16 Jan 2025 15:41:13 -0800 Subject: [PATCH 23/46] disable ExecutorRegistry().shutdown_all() for now --- src/halmos/__main__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 31cf18fb..0016a1c3 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -53,7 +53,6 @@ warn_code, ) from .mapper import BuildOut, DeployAddressMapper -from .processes import ExecutorRegistry from .sevm import ( EMPTY_BALANCE, EVM, @@ -822,7 +821,8 @@ def _main(_args=None) -> MainResult: # def on_exit(exitcode: int) -> MainResult: - ExecutorRegistry().shutdown_all() + # disabling this for now because it causes an assertion violation in z3 + # ExecutorRegistry().shutdown_all() result = MainResult(exitcode, test_results_map) From 7b38cdce649543e66ea591cff373164316acd2d3 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Thu, 16 Jan 2025 16:16:57 -0800 Subject: [PATCH 24/46] fix bad merge --- src/halmos/__main__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 0016a1c3..8769b14c 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -50,6 +50,7 @@ error, logger, logger_unique, + warn, warn_code, ) from .mapper import BuildOut, DeployAddressMapper @@ -292,8 +293,6 @@ def setup(ctx: FunctionContext) -> Exec: print(setup_timer.report()) return setup_ex - setup_timer.create_subtimer("run") - # TODO: dyn_params may need to be passed to mk_calldata in run() calldata, dyn_params = mk_calldata(ctx.contract_ctx.abi, setup_info, args) setup_ex.path.process_dyn_params(dyn_params) From 0125a3d5ad155a4c4e05da5af1cbf5605d2eb708 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Fri, 17 Jan 2025 11:00:55 -0800 Subject: [PATCH 25/46] fix z3 thread safety issue (--print-failed-states disabled for now) --- src/halmos/__main__.py | 72 +++++++++++++++++++++++++--- src/halmos/sevm.py | 1 + src/halmos/solve.py | 106 +++++++++++------------------------------ 3 files changed, 95 insertions(+), 84 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 8769b14c..9ff83a98 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -43,6 +43,8 @@ ) from .exceptions import HalmosException from .logs import ( + COUNTEREXAMPLE_INVALID, + COUNTEREXAMPLE_UNKNOWN, INTERNAL_ERROR, LOOP_BOUND, REVERT_ALL, @@ -460,6 +462,62 @@ def run_test(ctx: FunctionContext) -> TestResult: potential = 0 stuck = [] + def solve_end_to_end_callback(future: Future): + # beware: this function may be called from threads other than the main thread, + # so we must be careful to avoid referencing any z3 objects / contexts + + if e := future.exception(): + error(f"encountered exception during assertion solving: {e!r}") + + # + # we are done solving, process and triage the result + # + + solver_output = future.result() + result, model = solver_output.result, solver_output.model + + if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE: + id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else "" + print(f"Trace{id_str}:") + print(ctx.traces[path_id], end="") + + # unsafe, can't print an Exec from a different thread + # if args.print_failed_states: + # print(f"# {path_id}") + # print(exec) + + if ctx.solver_executor.is_shutdown(): + # if the thread pool is in the process of shutting down, + # we want to stop processing remaining models/timeouts/errors, etc. + return + + # keep track of the solver outputs, so that we can display PASS/FAIL/TIMEOUT/ERROR later + ctx.solver_outputs.append(solver_output) + + if result == unsat: + if solver_output.unsat_core: + ctx.unsat_cores.append(solver_output.unsat_core) + return + + # model could be an empty dict here, so compare to None explicitly + if model is None: + warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}") + return + + if model.is_valid: + print(red(f"Counterexample: {model}")) + ctx.valid_counterexamples.append(model) + + # we have a valid counterexample, so we are eligible for early exit + if args.early_exit: + debug(f"Shutting down {ctx.info.name}'s solver executor") + ctx.solver_executor.shutdown(wait=False) + else: + warn_str = f"Counterexample (potentially invalid): {model}" + warn_code(COUNTEREXAMPLE_INVALID, warn_str) + + ctx.invalid_counterexamples.append(model) + # # consume the sevm.run() generator # (actually triggers path exploration) @@ -497,19 +555,19 @@ def run_test(ctx: FunctionContext) -> TestResult: query: SMTQuery = ex.path.to_smt2(args) + # beware: because this object crosses thread boundaries, we must be careful to + # avoid any reference to z3 objects path_ctx = PathContext( + args=args, path_id=path_id, - ex=ex, query=query, - fun_ctx=ctx, + dump_dirname=dump_dirname, + unsat_cores=ctx.unsat_cores, + solver_executor=ctx.solver_executor, ) - def log_future_result(future: Future): - if e := future.exception(): - error(f"encountered exception during assertion solving: {e!r}") - solve_future = ctx.thread_pool.submit(solve_end_to_end, path_ctx) - solve_future.add_done_callback(log_future_result) + solve_future.add_done_callback(solve_end_to_end_callback) submitted_futures.append(solve_future) elif ex.context.is_stuck(): diff --git a/src/halmos/sevm.py b/src/halmos/sevm.py index fbfe35a8..efd9961c 100644 --- a/src/halmos/sevm.py +++ b/src/halmos/sevm.py @@ -484,6 +484,7 @@ def push(self, v: Word) -> None: else: if not (eq(v.sort(), BitVecSort256) or is_bool(v)): raise ValueError(v) + self.stack.append(simplify(v)) def pop(self) -> Word: diff --git a/src/halmos/solve.py b/src/halmos/solve.py index 6532a00e..9554bc7a 100644 --- a/src/halmos/solve.py +++ b/src/halmos/solve.py @@ -8,14 +8,10 @@ from halmos.calldata import FunctionInfo from halmos.config import Config as HalmosConfig -from halmos.constants import VERBOSITY_TRACE_COUNTEREXAMPLE, VERBOSITY_TRACE_PATHS from halmos.logs import ( - COUNTEREXAMPLE_INVALID, - COUNTEREXAMPLE_UNKNOWN, debug, error, warn, - warn_code, ) from halmos.processes import ( ExecutorRegistry, @@ -24,7 +20,7 @@ TimeoutExpired, ) from halmos.sevm import Exec, SMTQuery -from halmos.utils import con, red, stringify +from halmos.utils import hexify @dataclass @@ -68,9 +64,8 @@ def __str__(self) -> str: formatted = [] for v in self.model.values(): - # TODO: ideally we would avoid wrapping with con() here - stringified = stringify(v.full_name, con(v.value)) - formatted.append(f"\n {v.full_name} = {stringified}") + # TODO: parse type and render accordingly + formatted.append(f"\n {v.full_name} = {hexify(v.value)}") return "".join(sorted(formatted)) if formatted else "∅" @@ -157,17 +152,22 @@ def __post_init__(self): @dataclass(frozen=True) class PathContext: + args: HalmosConfig + # id of this path path_id: int - # path execution object - ex: Exec - # SMT query query: SMTQuery - # backlink to the parent function context - fun_ctx: FunctionContext + # the solver executor is shared across all paths in the same function + # we just hold a reference to it + solver_executor: PopenExecutor + + # list of unsat cores (inherited from parent function context) + unsat_cores: list[list] + + dump_dirname: str # filename for this path (generated in __post_init__) dump_filename: str = field(init=False) @@ -176,18 +176,21 @@ class PathContext: def __post_init__(self): refined_str = ".refined" if self.is_refined else "" - dirname = self.fun_ctx.dump_dirname - filename = os.path.join(dirname, f"{self.path_id}{refined_str}.smt2") + filename = os.path.join(self.dump_dirname, f"{self.path_id}{refined_str}.smt2") # use __setattr__ because this is a frozen dataclass object.__setattr__(self, "dump_filename", filename) def refine(self) -> "PathContext": return PathContext( + # inherited + args=self.args, path_id=self.path_id, - ex=self.ex, + solver_executor=self.solver_executor, + unsat_cores=self.unsat_cores, + dump_dirname=self.dump_dirname, + # refined query=refine(self.query), - fun_ctx=self.fun_ctx, is_refined=True, ) @@ -215,8 +218,7 @@ def from_result( newline_idx = stdout.find("\n") first_line = stdout[:newline_idx] if newline_idx != -1 else stdout - args = path_ctx.fun_ctx.args - path_id = path_ctx.path_id + args, path_id = path_ctx.args, path_ctx.path_id if args.verbose >= 1: debug(f" {first_line}") @@ -321,9 +323,7 @@ def parse_unsat_core(output: str) -> list[str] | None: def dump( path_ctx: PathContext, ) -> tuple[CheckSatResult, PotentialModel | None, list | None]: - args = path_ctx.fun_ctx.args - query = path_ctx.query - dump_filename = path_ctx.dump_filename + args, query, dump_filename = path_ctx.args, path_ctx.query, path_ctx.dump_filename if args.verbose >= 1: debug(f"Writing SMT query to {dump_filename}") @@ -368,8 +368,7 @@ def solve_low_level(path_ctx: PathContext) -> SolverOutput: Can raise TimeoutError or some Exception raised during execution""" - fun_ctx, smt2_filename = path_ctx.fun_ctx, path_ctx.dump_filename - args = fun_ctx.args + args, smt2_filename = path_ctx.args, path_ctx.dump_filename # make sure the smt2 file has been written dump(path_ctx) @@ -386,7 +385,7 @@ def solve_low_level(path_ctx: PathContext) -> SolverOutput: future = PopenFuture(cmd, timeout=timeout_seconds) # starts the subprocess asynchronously - fun_ctx.solver_executor.submit(future) + path_ctx.solver_executor.submit(future) # block until the external solver returns, times out, is interrupted, fails, etc. try: @@ -406,7 +405,7 @@ def solve_low_level(path_ctx: PathContext) -> SolverOutput: return SolverOutput.from_result(stdout, stderr, returncode, path_ctx) -def solve_end_to_end(ctx: PathContext) -> None: +def solve_end_to_end(ctx: PathContext) -> SolverOutput: """Synchronously resolves a query in a given context, which may result in 0, 1 or multiple solver invocations. - may result in 0 invocations if the query contains a known unsat core (hence the need for the context) @@ -415,14 +414,13 @@ def solve_end_to_end(ctx: PathContext) -> None: If this produces a model, it _should_ be valid. """ - fun_ctx, path_id, query = ctx.fun_ctx, ctx.path_id, ctx.query - args, unsat_cores = fun_ctx.args, fun_ctx.unsat_cores + path_id, query = ctx.path_id, ctx.query - verbose = print if args.verbose >= 1 else lambda *args, **kwargs: None + verbose = print if ctx.args.verbose >= 1 else lambda *args, **kwargs: None verbose(f"Checking path condition {path_id=}") # if the query contains an unsat-core, it is unsat; no need to run the solver - if check_unsat_cores(query, unsat_cores): + if check_unsat_cores(query, ctx.unsat_cores): verbose(" Already proven unsat") return SolverOutput(unsat, path_id) @@ -441,53 +439,7 @@ def solve_end_to_end(ctx: PathContext) -> None: else: verbose(" Refinement did not change the query, no need to solve again") - # - # we are done solving, process and triage the result - # - - # retrieve cached exec and clear the cache entry - exec = fun_ctx.exec_cache.pop(path_id, None) - - if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE: - id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else "" - print(f"Trace{id_str}:") - print(fun_ctx.traces[path_id], end="") - - if args.print_failed_states: - print(f"# {path_id}") - print(exec) - - if fun_ctx.solver_executor.is_shutdown(): - # if the thread pool is in the process of shutting down, - # we want to stop processing remaining models/timeouts/errors, etc. - return - - # keep track of the solver outputs, so that we can display PASS/FAIL/TIMEOUT/ERROR later - fun_ctx.solver_outputs.append(solver_output) - - if result == unsat: - if solver_output.unsat_core: - fun_ctx.unsat_cores.append(solver_output.unsat_core) - return - - # model could be an empty dict here, so compare to None explicitly - if model is None: - warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}") - return - - if model.is_valid: - print(red(f"Counterexample: {model}")) - fun_ctx.valid_counterexamples.append(model) - - # we have a valid counterexample, so we are eligible for early exit - if args.early_exit: - debug(f"Shutting down {fun_ctx.info.name}'s solver executor") - fun_ctx.solver_executor.shutdown(wait=False) - else: - warn_str = f"Counterexample (potentially invalid): {model}" - warn_code(COUNTEREXAMPLE_INVALID, warn_str) - - fun_ctx.invalid_counterexamples.append(model) + return solver_output def check_unsat_cores(query: SMTQuery, unsat_cores: list[list]) -> bool: From 51657bace279cb6fa0f9cb88f545f752eda30081 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Fri, 17 Jan 2025 11:16:48 -0800 Subject: [PATCH 26/46] fix setup() with multiple paths --- src/halmos/__main__.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 9ff83a98..6ea3db08 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -312,7 +312,7 @@ def setup(ctx: FunctionContext) -> Exec: ) setup_exs_all = sevm.run(setup_ex) - setup_exs_no_error: list[PathContext] = [] + setup_exs_no_error: list[Exec] = [] for path_id, setup_ex in enumerate(setup_exs_all): if args.verbose >= VERBOSITY_TRACE_SETUP: @@ -333,26 +333,29 @@ def setup(ctx: FunctionContext) -> Exec: render_trace(setup_ex.context) else: - path_ctx = PathContext( - path_id=path_id, - ex=setup_ex, - query=setup_ex.path.to_smt2(args), - fun_ctx=ctx, - ) - setup_exs_no_error.append(path_ctx) + setup_exs_no_error.append(setup_ex) setup_exs: list[Exec] = [] match setup_exs_no_error: case []: pass - case [path_ctx]: - setup_exs.append(path_ctx.ex) + case [ex]: + setup_exs.append(ex) case _: - for path_ctx in setup_exs_no_error: + for path_id, ex in enumerate(setup_exs_no_error): + path_ctx = PathContext( + args=args, + path_id=path_id, + query=ex.path.to_smt2(args), + solver_executor=ctx.solver_executor, + unsat_cores=ctx.unsat_cores, + dump_dirname=ctx.dump_dirname, + ) + solver_output = solve_low_level(path_ctx) if solver_output.result != unsat: - setup_exs.append(path_ctx.ex) + setup_exs.append(ex) if len(setup_exs) > 1: break From 01dcb3b18cec242d6dc3127cee43af80dd65fdd8 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Fri, 17 Jan 2025 14:46:37 -0800 Subject: [PATCH 27/46] share a SolvingContext between FunctionContext and PathContext --- src/halmos/__main__.py | 17 +++++------ src/halmos/solve.py | 69 ++++++++++++++++++------------------------ 2 files changed, 37 insertions(+), 49 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 6ea3db08..d1cf22ee 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -348,9 +348,7 @@ def setup(ctx: FunctionContext) -> Exec: args=args, path_id=path_id, query=ex.path.to_smt2(args), - solver_executor=ctx.solver_executor, - unsat_cores=ctx.unsat_cores, - dump_dirname=ctx.dump_dirname, + solving_ctx=ctx.solving_ctx, ) solver_output = solve_low_level(path_ctx) @@ -400,11 +398,12 @@ def run_test(ctx: FunctionContext) -> TestResult: # prepare test dump directory if needed # - dump_dirname = ctx.dump_dirname + dump_dirname = ctx.solving_ctx.dump_dirname should_dump = args.dump_smt_queries or args.solver_command if should_dump and not os.path.isdir(dump_dirname): + if args.verbose >= 1: + print(f"Generating SMT queries in {dump_dirname}") os.makedirs(dump_dirname) - print(f"Generating SMT queries in {dump_dirname}") # # prepare calldata @@ -489,7 +488,7 @@ def solve_end_to_end_callback(future: Future): # print(f"# {path_id}") # print(exec) - if ctx.solver_executor.is_shutdown(): + if ctx.solving_ctx.executor.is_shutdown(): # if the thread pool is in the process of shutting down, # we want to stop processing remaining models/timeouts/errors, etc. return @@ -499,7 +498,7 @@ def solve_end_to_end_callback(future: Future): if result == unsat: if solver_output.unsat_core: - ctx.unsat_cores.append(solver_output.unsat_core) + ctx.append_unsat_core(solver_output.unsat_core) return # model could be an empty dict here, so compare to None explicitly @@ -564,9 +563,7 @@ def solve_end_to_end_callback(future: Future): args=args, path_id=path_id, query=query, - dump_dirname=dump_dirname, - unsat_cores=ctx.unsat_cores, - solver_executor=ctx.solver_executor, + solving_ctx=ctx.solving_ctx, ) solve_future = ctx.thread_pool.submit(solve_end_to_end, path_ctx) diff --git a/src/halmos/solve.py b/src/halmos/solve.py index 9554bc7a..87382882 100644 --- a/src/halmos/solve.py +++ b/src/halmos/solve.py @@ -92,6 +92,18 @@ class ContractContext: build_out_map: dict +@dataclass(frozen=True) +class SolvingContext: + # directory for dumping solver files + dump_dirname: str + + # shared solver executor for all paths in the same function + executor: PopenExecutor = field(default_factory=PopenExecutor) + + # list of unsat cores + unsat_cores: list[list] = field(default_factory=list) + + @dataclass(frozen=True) class FunctionContext: # config with function-specific overrides @@ -109,15 +121,12 @@ class FunctionContext: # optional starting state setup_ex: Exec | None = None - # dump directory for this function (generated in __post_init__) - dump_dirname: str = field(init=False) + # function-level solving context + solving_ctx: SolvingContext = field(init=False) # function-level thread pool that drives assertion solving thread_pool: ThreadPoolExecutor = field(init=False) - # path-specific queries are submitted to this function-specific executor - solver_executor: PopenExecutor = field(default_factory=PopenExecutor) - # list of solver outputs for this function solver_outputs: list["SolverOutput"] = field(default_factory=list) @@ -127,9 +136,6 @@ class FunctionContext: # list of potentially invalid counterexamples for this function invalid_counterexamples: list[PotentialModel] = field(default_factory=list) - # list of unsat cores for this function - unsat_cores: list[list] = field(default_factory=list) - # map from path id to trace traces: dict[int, str] = field(default_factory=dict) @@ -138,7 +144,8 @@ class FunctionContext: def __post_init__(self): dirname = f"/tmp/{self.info.name}-{uuid.uuid4().hex}" - object.__setattr__(self, "dump_dirname", dirname) + solving_ctx = SolvingContext(dump_dirname=dirname) + object.__setattr__(self, "solving_ctx", solving_ctx) thread_pool = ThreadPoolExecutor( max_workers=self.args.solver_threads, @@ -147,49 +154,33 @@ def __post_init__(self): object.__setattr__(self, "thread_pool", thread_pool) # register the solver executor to be shutdown on exit - ExecutorRegistry().register(self.solver_executor) + ExecutorRegistry().register(solving_ctx.executor) + + def append_unsat_core(self, unsat_core: list[str]) -> None: + self.solving_ctx.unsat_cores.append(unsat_core) @dataclass(frozen=True) class PathContext: args: HalmosConfig - - # id of this path path_id: int - - # SMT query + solving_ctx: SolvingContext query: SMTQuery - - # the solver executor is shared across all paths in the same function - # we just hold a reference to it - solver_executor: PopenExecutor - - # list of unsat cores (inherited from parent function context) - unsat_cores: list[list] - - dump_dirname: str - - # filename for this path (generated in __post_init__) - dump_filename: str = field(init=False) - is_refined: bool = False - def __post_init__(self): + @property + def dump_filename(self) -> str: refined_str = ".refined" if self.is_refined else "" - filename = os.path.join(self.dump_dirname, f"{self.path_id}{refined_str}.smt2") - - # use __setattr__ because this is a frozen dataclass - object.__setattr__(self, "dump_filename", filename) + filename = os.path.join( + self.solving_ctx.dump_dirname, f"{self.path_id}{refined_str}.smt2" + ) + return filename def refine(self) -> "PathContext": return PathContext( - # inherited args=self.args, path_id=self.path_id, - solver_executor=self.solver_executor, - unsat_cores=self.unsat_cores, - dump_dirname=self.dump_dirname, - # refined + solving_ctx=self.solving_ctx, query=refine(self.query), is_refined=True, ) @@ -385,7 +376,7 @@ def solve_low_level(path_ctx: PathContext) -> SolverOutput: future = PopenFuture(cmd, timeout=timeout_seconds) # starts the subprocess asynchronously - path_ctx.solver_executor.submit(future) + path_ctx.solving_ctx.executor.submit(future) # block until the external solver returns, times out, is interrupted, fails, etc. try: @@ -420,7 +411,7 @@ def solve_end_to_end(ctx: PathContext) -> SolverOutput: verbose(f"Checking path condition {path_id=}") # if the query contains an unsat-core, it is unsat; no need to run the solver - if check_unsat_cores(query, ctx.unsat_cores): + if check_unsat_cores(query, ctx.solving_ctx.unsat_cores): verbose(" Already proven unsat") return SolverOutput(unsat, path_id) From 85ca04c4b5018cde16ac3e1e084830541650e0cb Mon Sep 17 00:00:00 2001 From: karmacoma Date: Fri, 17 Jan 2025 16:19:46 -0800 Subject: [PATCH 28/46] use tempfile and path to manage smt2 files (hopefully works on windows too) --- src/halmos/__main__.py | 11 ------- src/halmos/solve.py | 68 ++++++++++++++++++++++++++---------------- 2 files changed, 42 insertions(+), 37 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index d1cf22ee..0bcd9466 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -394,17 +394,6 @@ def run_test(ctx: FunctionContext) -> TestResult: if args.verbose >= 1: print(f"Executing {funname}") - # - # prepare test dump directory if needed - # - - dump_dirname = ctx.solving_ctx.dump_dirname - should_dump = args.dump_smt_queries or args.solver_command - if should_dump and not os.path.isdir(dump_dirname): - if args.verbose >= 1: - print(f"Generating SMT queries in {dump_dirname}") - os.makedirs(dump_dirname) - # # prepare calldata # diff --git a/src/halmos/solve.py b/src/halmos/solve.py index 87382882..b9e91768 100644 --- a/src/halmos/solve.py +++ b/src/halmos/solve.py @@ -1,8 +1,8 @@ -import os import re -import uuid from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass, field +from pathlib import Path +from tempfile import TemporaryDirectory from z3 import CheckSatResult, Solver, sat, unknown, unsat @@ -95,7 +95,7 @@ class ContractContext: @dataclass(frozen=True) class SolvingContext: # directory for dumping solver files - dump_dirname: str + dump_dir: TemporaryDirectory # shared solver executor for all paths in the same function executor: PopenExecutor = field(default_factory=PopenExecutor) @@ -122,6 +122,7 @@ class FunctionContext: setup_ex: Exec | None = None # function-level solving context + # the FunctionContext initializes and owns the SolvingContext solving_ctx: SolvingContext = field(init=False) # function-level thread pool that drives assertion solving @@ -143,8 +144,25 @@ class FunctionContext: exec_cache: dict[int, Exec] = field(default_factory=dict) def __post_init__(self): - dirname = f"/tmp/{self.info.name}-{uuid.uuid4().hex}" - solving_ctx = SolvingContext(dump_dirname=dirname) + args = self.args + + # create a temporary directory for dumping solver files + prefix = ( + f"{self.info.name}-" + if self.info.name + else f"{self.contract_ctx.name}-deploy-" + ) + + # if the user explicitly enabled dumping, we don't want to delete the directory on exit + delete = not self.args.dump_smt_queries + dump_dir = TemporaryDirectory( + prefix=prefix, ignore_cleanup_errors=True, delete=delete + ) + + if args.verbose >= 1 or args.dump_smt_queries: + print(f"Generating SMT queries in {dump_dir.name}") + + solving_ctx = SolvingContext(dump_dir=dump_dir) object.__setattr__(self, "solving_ctx", solving_ctx) thread_pool = ThreadPoolExecutor( @@ -169,12 +187,11 @@ class PathContext: is_refined: bool = False @property - def dump_filename(self) -> str: + def dump_file(self) -> Path: refined_str = ".refined" if self.is_refined else "" - filename = os.path.join( - self.solving_ctx.dump_dirname, f"{self.path_id}{refined_str}.smt2" - ) - return filename + filename = f"{self.path_id}{refined_str}.smt2" + + return Path(self.solving_ctx.dump_dir.name) / filename def refine(self) -> "PathContext": return PathContext( @@ -314,10 +331,10 @@ def parse_unsat_core(output: str) -> list[str] | None: def dump( path_ctx: PathContext, ) -> tuple[CheckSatResult, PotentialModel | None, list | None]: - args, query, dump_filename = path_ctx.args, path_ctx.query, path_ctx.dump_filename + args, query, dump_file = path_ctx.args, path_ctx.query, path_ctx.dump_file if args.verbose >= 1: - debug(f"Writing SMT query to {dump_filename}") + debug(f"Writing SMT query to {dump_file}") # for each implication assertion, `(assert (=> |id| c))`, in query.smtlib, # generate a corresponding named assertion, `(assert (! |id| :named ))`. @@ -330,21 +347,20 @@ def dump( ] ) - with open(dump_filename, "w") as f: - f.write("(set-option :produce-unsat-cores true)\n") - f.write("(set-logic QF_AUFBV)\n") - f.write(query.smtlib) - f.write(named_assertions) - f.write("(check-sat)\n") - f.write("(get-model)\n") - f.write("(get-unsat-core)\n") + dump_file.write_text( + "(set-option :produce-unsat-cores true)\n" + "(set-logic QF_AUFBV)\n" + f"{query.smtlib}\n" + f"{named_assertions}" + "(check-sat)\n" + "(get-model)\n" + "(get-unsat-core)\n" + ) else: - with open(dump_filename, "w") as f: - f.write("(set-logic QF_AUFBV)\n") - f.write(query.smtlib) - f.write("(check-sat)\n") - f.write("(get-model)\n") + dump_file.write_text( + f"(set-logic QF_AUFBV)\n{query.smtlib}\n(check-sat)\n(get-model)\n" + ) def is_model_valid(solver_stdout: str) -> bool: @@ -359,7 +375,7 @@ def solve_low_level(path_ctx: PathContext) -> SolverOutput: Can raise TimeoutError or some Exception raised during execution""" - args, smt2_filename = path_ctx.args, path_ctx.dump_filename + args, smt2_filename = path_ctx.args, str(path_ctx.dump_file) # make sure the smt2 file has been written dump(path_ctx) From 5831899c90c758d2b2ac0df0a08457db3626d52b Mon Sep 17 00:00:00 2001 From: karmacoma Date: Fri, 17 Jan 2025 16:41:01 -0800 Subject: [PATCH 29/46] TemporaryDirectory 3.11 compatibility support --- src/halmos/solve.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/halmos/solve.py b/src/halmos/solve.py index b9e91768..7518c238 100644 --- a/src/halmos/solve.py +++ b/src/halmos/solve.py @@ -150,14 +150,18 @@ def __post_init__(self): prefix = ( f"{self.info.name}-" if self.info.name - else f"{self.contract_ctx.name}-deploy-" + else f"{self.contract_ctx.name}-constructor-" ) # if the user explicitly enabled dumping, we don't want to delete the directory on exit delete = not self.args.dump_smt_queries - dump_dir = TemporaryDirectory( - prefix=prefix, ignore_cleanup_errors=True, delete=delete - ) + + # ideally we would pass `delete=delete` to the constructor, but it's in >=3.12 + dump_dir = TemporaryDirectory(prefix=prefix, ignore_cleanup_errors=True) + + # If user wants to keep the files, prevent cleanup on exit + if not delete: + dump_dir._finalizer.detach() if args.verbose >= 1 or args.dump_smt_queries: print(f"Generating SMT queries in {dump_dir.name}") From 61834c08fac9dae85911bfeb2413f5f6292a9734 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Fri, 17 Jan 2025 16:47:17 -0800 Subject: [PATCH 30/46] trying a thing on windows --- src/halmos/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/halmos/config.py b/src/halmos/config.py index dcf567ca..e7f10381 100644 --- a/src/halmos/config.py +++ b/src/halmos/config.py @@ -441,7 +441,7 @@ class Config: solver_command: str = arg( help="use the given command when invoking the solver", - global_default="z3", + global_default="z3.exe" if os.name == "nt" else "z3", metavar="COMMAND", group=solver, ) From d3a63a3fa20b5851e0d79ed2bc88e9621c5dea6f Mon Sep 17 00:00:00 2001 From: karmacoma Date: Fri, 17 Jan 2025 16:54:12 -0800 Subject: [PATCH 31/46] try specifying full path to z3 --- src/halmos/config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/halmos/config.py b/src/halmos/config.py index e7f10381..c1f89396 100644 --- a/src/halmos/config.py +++ b/src/halmos/config.py @@ -1,6 +1,7 @@ import argparse import os import re +import shutil import sys from collections import OrderedDict from collections.abc import Callable, Generator @@ -441,7 +442,7 @@ class Config: solver_command: str = arg( help="use the given command when invoking the solver", - global_default="z3.exe" if os.name == "nt" else "z3", + global_default=shutil.which("z3"), metavar="COMMAND", group=solver, ) From 955f4106cdc64ddf17b3942aa87c7fc601a94556 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Fri, 17 Jan 2025 16:59:34 -0800 Subject: [PATCH 32/46] warn if z3 is not found --- src/halmos/__main__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 0bcd9466..f833f802 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -161,6 +161,11 @@ def with_natspec( def load_config(_args) -> HalmosConfig: config = default_config() + if not config.solver_command: + warn( + "could not find z3 on the PATH -- check your PATH/venv or pass --solver-command explicitly" + ) + # parse CLI args first, so that can get `--help` out of the way and resolve `--debug` # but don't apply the CLI overrides yet cli_overrides = arg_parser().parse_args(_args) From 9355d09d721a4416efc775984265c6a08316a1d1 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Fri, 17 Jan 2025 17:14:53 -0800 Subject: [PATCH 33/46] update uv version --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a21a1780..a517f7ab 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -47,7 +47,7 @@ jobs: uses: astral-sh/setup-uv@v4 with: # Install a specific version of uv. - version: "0.5.6" + version: "0.5.21" - name: Set up python ${{ matrix.python-version }} run: uv python install ${{ matrix.python-version }} From 985957985466cf2b92eba7b6f1c823360626c72b Mon Sep 17 00:00:00 2001 From: karmacoma Date: Wed, 22 Jan 2025 14:36:08 -0800 Subject: [PATCH 34/46] fix bad reference --- src/halmos/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index f833f802..17c5b516 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -507,7 +507,7 @@ def solve_end_to_end_callback(future: Future): # we have a valid counterexample, so we are eligible for early exit if args.early_exit: debug(f"Shutting down {ctx.info.name}'s solver executor") - ctx.solver_executor.shutdown(wait=False) + ctx.solving_ctx.executor.shutdown(wait=False) else: warn_str = f"Counterexample (potentially invalid): {model}" warn_code(COUNTEREXAMPLE_INVALID, warn_str) From b9f298594a54cac9668ff6770724263a1c56f375 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Wed, 22 Jan 2025 14:38:40 -0800 Subject: [PATCH 35/46] XXX print path to see what's up on windows ci --- src/halmos/__main__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 17c5b516..4956dd1c 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -166,6 +166,9 @@ def load_config(_args) -> HalmosConfig: "could not find z3 on the PATH -- check your PATH/venv or pass --solver-command explicitly" ) + # XXX undo this + print(f"PATH: {os.environ['PATH']}") + # parse CLI args first, so that can get `--help` out of the way and resolve `--debug` # but don't apply the CLI overrides yet cli_overrides = arg_parser().parse_args(_args) From 5068e5901ee39466755244953da9ade51000ba12 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Wed, 22 Jan 2025 15:00:00 -0800 Subject: [PATCH 36/46] finish merge --- src/halmos/__main__.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 4956dd1c..720b2c52 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -568,12 +568,20 @@ def solve_end_to_end_callback(future: Future): submitted_futures.append(solve_future) elif ex.context.is_stuck(): - debug(f"Potential error path (id: {idx+1})") - res, _, _ = solve(ex.path.to_smt2(args), args) + debug(f"Potential error path (id: {path_id})") + path_ctx = PathContext( + args=args, + path_id=path_id, + query=ex.path.to_smt2(args), + solving_ctx=ctx.solving_ctx, + ) + res, _, _ = solve_low_level(path_ctx) if res != unsat: - stuck.append((idx, ex, ex.context.get_stuck_reason())) + stuck.append((path_id, ex, ex.context.get_stuck_reason())) if args.print_blocked_states: - traces[idx] = f"{hexify(ex.path)}\n{rendered_trace(ex.context)}" + ctx.traces[path_id] = ( + f"{hexify(ex.path)}\n{rendered_trace(ex.context)}" + ) elif not error_output: if args.print_success_states: From 60ef79c3438db2db0636815b4bd20802bc576756 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Wed, 22 Jan 2025 15:18:57 -0800 Subject: [PATCH 37/46] print venv contents --- src/halmos/__main__.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 720b2c52..572c5a31 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -167,7 +167,13 @@ def load_config(_args) -> HalmosConfig: ) # XXX undo this - print(f"PATH: {os.environ['PATH']}") + path = os.environ["PATH"] + print(f"PATH: {path}") + for p in path.split(";"): + if ".venv" in p: + print(f"venv: {p}") + for f in os.listdir(p): + print(f" {f}") # parse CLI args first, so that can get `--help` out of the way and resolve `--debug` # but don't apply the CLI overrides yet @@ -575,8 +581,8 @@ def solve_end_to_end_callback(future: Future): query=ex.path.to_smt2(args), solving_ctx=ctx.solving_ctx, ) - res, _, _ = solve_low_level(path_ctx) - if res != unsat: + solver_output = solve_low_level(path_ctx) + if solver_output.result != unsat: stuck.append((path_id, ex, ex.context.get_stuck_reason())) if args.print_blocked_states: ctx.traces[path_id] = ( From 58030515f7b76cd2c4e18622a43d33ab96179f5a Mon Sep 17 00:00:00 2001 From: karmacoma Date: Wed, 22 Jan 2025 17:08:38 -0800 Subject: [PATCH 38/46] dynamically locate z3 in the venv --- src/halmos/__main__.py | 9 --------- src/halmos/config.py | 27 +++++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 572c5a31..a3ada1d9 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -166,15 +166,6 @@ def load_config(_args) -> HalmosConfig: "could not find z3 on the PATH -- check your PATH/venv or pass --solver-command explicitly" ) - # XXX undo this - path = os.environ["PATH"] - print(f"PATH: {path}") - for p in path.split(";"): - if ".venv" in p: - print(f"venv: {p}") - for f in os.listdir(p): - print(f" {f}") - # parse CLI args first, so that can get `--help` out of the way and resolve `--debug` # but don't apply the CLI overrides yet cli_overrides = arg_parser().parse_args(_args) diff --git a/src/halmos/config.py b/src/halmos/config.py index c1f89396..eaa78270 100644 --- a/src/halmos/config.py +++ b/src/halmos/config.py @@ -1,12 +1,12 @@ import argparse import os import re -import shutil import sys from collections import OrderedDict from collections.abc import Callable, Generator from dataclasses import MISSING, dataclass, fields from dataclasses import field as dataclass_field +from pathlib import Path from typing import Any import toml @@ -26,6 +26,29 @@ ) +def find_venv_root() -> Path | None: + # If the environment variable is set, use that + if "VIRTUAL_ENV" in os.environ: + return Path(os.environ["VIRTUAL_ENV"]) + + # Otherwise, if we're in a venv, sys.prefix != sys.base_prefix + if sys.prefix != sys.base_prefix: + return Path(sys.prefix) + + # Not in a virtual environment + return None + + +def find_z3_path() -> Path | None: + venv_path = find_venv_root() + if venv_path: + z3_bin = "z3.exe" if sys.platform == "win32" else "z3" + z3_path = venv_path / "bin" / z3_bin + if z3_path.exists(): + return z3_path + return None + + # helper to define config fields def arg( help: str, @@ -442,7 +465,7 @@ class Config: solver_command: str = arg( help="use the given command when invoking the solver", - global_default=shutil.which("z3"), + global_default=str(find_z3_path()), metavar="COMMAND", group=solver, ) From 1fc8649235cfdf62e451f1a57d99ba2ec3e9c4b0 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Thu, 23 Jan 2025 10:35:56 -0800 Subject: [PATCH 39/46] fix: avoid raising when path exploration > first cex --- src/halmos/__main__.py | 18 +++++++++++++++--- src/halmos/processes.py | 15 +++++++++++---- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index a3ada1d9..5554bb71 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -56,6 +56,7 @@ warn_code, ) from .mapper import BuildOut, DeployAddressMapper +from .processes import ShutdownError from .sevm import ( EMPTY_BALANCE, EVM, @@ -522,6 +523,12 @@ def solve_end_to_end_callback(future: Future): path_id = 0 # default value in case we don't enter the loop body submitted_futures = [] for path_id, ex in enumerate(exs): + # check if early exit is triggered + if ctx.solving_ctx.executor.is_shutdown(): + if args.debug: + print("aborting path exploration, executor has been shutdown") + break + # cache exec in case we need to print it later if args.print_failed_states: ctx.exec_cache[path_id] = ex @@ -560,9 +567,14 @@ def solve_end_to_end_callback(future: Future): solving_ctx=ctx.solving_ctx, ) - solve_future = ctx.thread_pool.submit(solve_end_to_end, path_ctx) - solve_future.add_done_callback(solve_end_to_end_callback) - submitted_futures.append(solve_future) + try: + solve_future = ctx.thread_pool.submit(solve_end_to_end, path_ctx) + solve_future.add_done_callback(solve_end_to_end_callback) + submitted_futures.append(solve_future) + except ShutdownError: + if args.debug: + print("aborting path exploration, executor has been shutdown") + break elif ex.context.is_stuck(): debug(f"Potential error path (id: {path_id})") diff --git a/src/halmos/processes.py b/src/halmos/processes.py index 83009942..05109a75 100644 --- a/src/halmos/processes.py +++ b/src/halmos/processes.py @@ -23,7 +23,8 @@ def register(self, executor): self._executors.add(executor) def shutdown_all(self): - print("Shutting down all executors") + """Shuts down all registered executors.""" + for ex in list(self._executors): ex.shutdown(wait=False) @@ -134,6 +135,10 @@ def is_running(self): return self.process and self.process.poll() is None +class ShutdownError(RuntimeError): + """Raised when submitting a future to an executor that has been shutdown.""" + + class PopenExecutor(concurrent.futures.Executor): """ An executor that runs commands in subprocesses. @@ -154,11 +159,13 @@ def __init__(self, max_workers: int = 1): def futures(self): return self._futures - def submit(self, future: PopenFuture): - """Accepts an unstarted PopenFuture and schedules it for execution.""" + def submit(self, future: PopenFuture) -> PopenFuture: + """Accepts an unstarted PopenFuture and schedules it for execution. + + Raises ShutdownError if the executor has been shutdown.""" if self._shutdown.is_set(): - raise RuntimeError("Cannot submit to a shutdown executor.") + raise ShutdownError() with self._lock: self._futures.append(future) From f7059546b86b2bc5503dd114fcda5074acd4d74a Mon Sep 17 00:00:00 2001 From: karmacoma Date: Fri, 24 Jan 2025 17:08:20 -0800 Subject: [PATCH 40/46] cleanup: print trace counterexample in the right place and fix off by one error in num_execs --- src/halmos/__main__.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 5554bb71..2808d117 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -473,16 +473,6 @@ def solve_end_to_end_callback(future: Future): solver_output = future.result() result, model = solver_output.result, solver_output.model - if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE: - id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else "" - print(f"Trace{id_str}:") - print(ctx.traces[path_id], end="") - - # unsafe, can't print an Exec from a different thread - # if args.print_failed_states: - # print(f"# {path_id}") - # print(exec) - if ctx.solving_ctx.executor.is_shutdown(): # if the thread pool is in the process of shutting down, # we want to stop processing remaining models/timeouts/errors, etc. @@ -501,6 +491,13 @@ def solve_end_to_end_callback(future: Future): warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}") return + # print counterexample trace + if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE: + path_id = solver_output.path_id + id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else "" + print(f"Trace{id_str}:") + print(ctx.traces[path_id], end="") + if model.is_valid: print(red(f"Counterexample: {model}")) ctx.valid_counterexamples.append(model) @@ -609,7 +606,7 @@ def solve_end_to_end_callback(future: Future): warn(f"{funsig}: {msg}: --width {args.width}") break - num_execs = path_id + num_execs = path_id + 1 # the name is a bit misleading: this timer only starts after the exploration phase is complete # but it's possible that solvers have already been running for a while From 3a501aebefb1e9ec08540bb8b38b7c08980a7453 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Fri, 24 Jan 2025 17:18:41 -0800 Subject: [PATCH 41/46] bring back the executor shutdown in on_exit --- src/halmos/__main__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 2808d117..6bef4247 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -56,7 +56,7 @@ warn_code, ) from .mapper import BuildOut, DeployAddressMapper -from .processes import ShutdownError +from .processes import ExecutorRegistry, ShutdownError from .sevm import ( EMPTY_BALANCE, EVM, @@ -889,8 +889,7 @@ def _main(_args=None) -> MainResult: # def on_exit(exitcode: int) -> MainResult: - # disabling this for now because it causes an assertion violation in z3 - # ExecutorRegistry().shutdown_all() + ExecutorRegistry().shutdown_all() result = MainResult(exitcode, test_results_map) From d7dddf820911199ea2038eaaeca590f955007aa4 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Fri, 24 Jan 2025 17:22:04 -0800 Subject: [PATCH 42/46] fix status display condition Co-authored-by: Daejun Park --- src/halmos/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 6bef4247..3cca1ebb 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -622,7 +622,7 @@ def solve_end_to_end_callback(future: Future): # display assertion solving progress # - if not args.no_status or args.early_exit: + if not args.no_status: while True: done = sum(fm.done() for fm in submitted_futures) total = potential From 9a9d3cd719dd8ba48a46335928f51f706545df02 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Mon, 27 Jan 2025 14:47:15 -0800 Subject: [PATCH 43/46] fix unsafe setup_ex.path.to_smt2() access --- src/halmos/__main__.py | 13 +++++++------ src/halmos/solve.py | 6 ++++-- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 3cca1ebb..16d33c10 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -318,7 +318,7 @@ def setup(ctx: FunctionContext) -> Exec: ) setup_exs_all = sevm.run(setup_ex) - setup_exs_no_error: list[Exec] = [] + setup_exs_no_error: list[tuple[Exec, SMTQuery]] = [] for path_id, setup_ex in enumerate(setup_exs_all): if args.verbose >= VERBOSITY_TRACE_SETUP: @@ -339,24 +339,25 @@ def setup(ctx: FunctionContext) -> Exec: render_trace(setup_ex.context) else: - setup_exs_no_error.append(setup_ex) + # note: ex.path.to_smt2() needs to be called at this point. The solver object is shared across paths, + # and solver.to_smt2() will return a different query if it is called after a different path is explored. + setup_exs_no_error.append((setup_ex, setup_ex.path.to_smt2(args))) setup_exs: list[Exec] = [] match setup_exs_no_error: case []: pass - case [ex]: + case [(ex, _)]: setup_exs.append(ex) case _: - for path_id, ex in enumerate(setup_exs_no_error): + for path_id, (ex, query) in enumerate(setup_exs_no_error): path_ctx = PathContext( args=args, path_id=path_id, - query=ex.path.to_smt2(args), + query=query, solving_ctx=ctx.solving_ctx, ) - solver_output = solve_low_level(path_ctx) if solver_output.result != unsat: setup_exs.append(ex) diff --git a/src/halmos/solve.py b/src/halmos/solve.py index 7518c238..6374e25c 100644 --- a/src/halmos/solve.py +++ b/src/halmos/solve.py @@ -445,8 +445,10 @@ def solve_end_to_end(ctx: PathContext) -> SolverOutput: refined_ctx = ctx.refine() if refined_ctx.query.smtlib != query.smtlib: - # recurse with the refined query - return solve_end_to_end(refined_ctx) + # note that check_unsat_cores cannot return true for the refined query, because it relies on only + # constraint ids, which don't change after refinement + # therefore we can skip the unsat core check in solve_end_to_end and go directly to solve_low_level + return solve_low_level(refined_ctx) else: verbose(" Refinement did not change the query, no need to solve again") From 7d5d3502e5bbe581e0cdef9983699672a55cc92d Mon Sep 17 00:00:00 2001 From: karmacoma Date: Mon, 27 Jan 2025 14:55:48 -0800 Subject: [PATCH 44/46] simplify PotentialModel --- src/halmos/solve.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/halmos/solve.py b/src/halmos/solve.py index 6374e25c..3f4eabeb 100644 --- a/src/halmos/solve.py +++ b/src/halmos/solve.py @@ -54,14 +54,10 @@ class ModelVariable: @dataclass(frozen=True) class PotentialModel: - model: ModelVariables | str | None + model: ModelVariables is_valid: bool def __str__(self) -> str: - # expected to be a filename - if isinstance(self.model, str): - return f"see {self.model}" - formatted = [] for v in self.model.values(): # TODO: parse type and render accordingly @@ -88,7 +84,8 @@ class ContractContext: contract_json: dict libs: dict - # TODO: check if this is really a contract-level variable + # note: build_out_map is shared across all contracts compiled using the same compiler version + # so in principle, we could consider having another context, say CompileUnitContext, and put build_out_map there build_out_map: dict From e9184084a9e46da745faa01041b2377de135307e Mon Sep 17 00:00:00 2001 From: karmacoma Date: Mon, 27 Jan 2025 17:01:21 -0800 Subject: [PATCH 45/46] only render paths to string when debug is active 10x faster morpho-data-structures testProveNextBucket [PASS] testProveNextBucket(uint256) (paths: 100, time: 123.20s (paths: 123.20s, models: 0.00s), bounds: []) [PASS] testProveNextBucket(uint256) (paths: 100, time: 12.11s (paths: 11.28s, models: 0.83s), bounds: []) --- src/halmos/sevm.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/halmos/sevm.py b/src/halmos/sevm.py index efd9961c..d319588c 100644 --- a/src/halmos/sevm.py +++ b/src/halmos/sevm.py @@ -2609,12 +2609,14 @@ def jumpi( follow_false = visited[False] < self.options.loop if not (follow_true and follow_false): self.logs.bounded_loops.append(jid) - debug( - f"\nloop id: {jid}\n" - f"loop condition: {cond}\n" - f"calldata: {ex.calldata()}\n" - f"path condition:\n{ex.path}\n" - ) + if self.options.debug: + # rendering ex.path to string can be expensive, so only do it if debug is enabled + debug( + f"\nloop id: {jid}\n" + f"loop condition: {cond}\n" + f"calldata: {ex.calldata()}\n" + f"path condition:\n{ex.path}\n" + ) else: # for constant-bounded loops follow_true = potential_true From 0b7dc6e89f5a4b223b617d612707c51f8bfa9bd7 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Mon, 27 Jan 2025 17:48:16 -0800 Subject: [PATCH 46/46] switch halmos-solady to bitwuzla --- .github/workflows/test-external.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-external.yml b/.github/workflows/test-external.yml index bfdf7ff3..0b7c217e 100644 --- a/.github/workflows/test-external.yml +++ b/.github/workflows/test-external.yml @@ -47,7 +47,7 @@ jobs: profile: "" - repo: "zobront/halmos-solady" dir: "halmos-solady" - cmd: "--function testCheck --solver-command yices-smt2 --solver-threads 3" + cmd: "--function testCheck --solver-command 'bitwuzla --produce-models' --solver-threads 3" branch: "" profile: "" - repo: "pcaversaccio/snekmate"