From 93c32f65bfc325766d029ae288692b8bb2e8f37e Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Wed, 18 Dec 2024 15:58:19 -0800
Subject: [PATCH 01/46] add build directory to .gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 2f368a47..de66ab4d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,3 +31,6 @@ out/
 # - adds friction to CI
 # (at the cost of reproducible builds)
 uv.lock
+
+# https://docs.astral.sh/uv/concepts/projects/layout/#the-build-directory
+build/

From d419691b546a1fcd6a8fd972ec5accaed4d2e718 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Wed, 18 Dec 2024 15:58:43 -0800
Subject: [PATCH 02/46] add psutil to dependencies

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 796b5820..c2566bff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,7 +27,8 @@ dependencies = [
     "z3-solver==4.12.6.0",
     "eth_hash[pysha3]>=0.7.0",
     "rich>=13.9.4",
-    "xxhash>=3.5.0"
+    "xxhash>=3.5.0",
+    "psutil>=6.1.0",
 ]
 dynamic = ["version"]
 

From 57151cff76b6f333e92aff0f5014e896e24e366f Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Wed, 18 Dec 2024 15:59:08 -0800
Subject: [PATCH 03/46] add PopenFuture and PopenExecutor

---
 src/halmos/processes.py | 196 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 196 insertions(+)
 create mode 100644 src/halmos/processes.py

diff --git a/src/halmos/processes.py b/src/halmos/processes.py
new file mode 100644
index 00000000..7f0fe920
--- /dev/null
+++ b/src/halmos/processes.py
@@ -0,0 +1,196 @@
+import concurrent.futures
+import contextlib
+import subprocess
+import threading
+import time
+from subprocess import PIPE, Popen
+
+import psutil
+
+
+class PopenFuture(concurrent.futures.Future):
+    cmd: list[str]
+    process: subprocess.Popen | None
+    stdout: str | None
+    stderr: str | None
+    returncode: int | None
+    start_time: float | None
+    end_time: float | None
+    _exception: Exception | None
+
+    def __init__(self, cmd: list[str]):
+        super().__init__()
+        self.cmd = cmd
+        self.process = None
+        self.stdout = None
+        self.stderr = None
+        self.returncode = None
+        self.start_time = None
+        self.end_time = None
+        self._exception = None
+
+    def start(self):
+        """Starts the subprocess and immediately returns."""
+
+        def run():
+            try:
+                self.start_time = time.time()
+                self.process = Popen(self.cmd, stdout=PIPE, stderr=PIPE, text=True)
+
+                # blocks until the process terminates
+                self.stdout, self.stderr = self.process.communicate()
+                self.end_time = time.time()
+                self.returncode = self.process.returncode
+            except Exception as e:
+                self._exception = e
+            finally:
+                self.set_result((self.stdout, self.stderr, self.returncode))
+
+        # avoid daemon threads because they can cause issues during shutdown
+        # we don't expect them to actually prevent halmos from terminating,
+        # as long as the underlying processes are terminated (either by natural
+        # causes or by forceful termination)
+        threading.Thread(target=run, daemon=False).start()
+
+        return self
+
+    def cancel(self):
+        """Attempts to terminate and then kill the process and its children."""
+        if self.is_running():
+            self.process.terminate()
+
+            # use psutil to kill the entire process tree (including children)
+            try:
+                parent_process = psutil.Process(self.process.pid)
+                processes = parent_process.children(recursive=True)
+                processes.append(parent_process)
+
+                # ask politely to terminate first
+                for process in processes:
+                    process.terminate()
+
+                # give them some time to terminate
+                time.sleep(0.1)
+
+                # after grace period, force kill
+                for process in processes:
+                    if process.is_running():
+                        process.kill()
+
+            except psutil.NoSuchProcess:
+                # process already terminated, nothing to do
+                pass
+
+    def exception(self) -> Exception | None:
+        """Returns any exception raised during the process."""
+
+        return self._exception
+
+    def result(self, timeout=None) -> tuple[str | None, str | None, int]:
+        """Blocks until the process is finished and returns the result (stdout, stderr, returncode).
+
+        Can raise TimeoutError or some Exception raised during execution"""
+
+        return super().result(timeout=timeout)
+
+    def done(self):
+        """Returns True if the process has finished."""
+
+        return super().done()
+
+    def is_running(self):
+        """Returns True if the process is still running.
+
+        Returns False before start() and after termination."""
+
+        return self.process and self.process.poll() is None
+
+
+class PopenExecutor(concurrent.futures.Executor):
+    """
+    An executor that runs commands in subprocesses.
+
+    Simple implementation that has no concept of max workers or pending futures.
+
+    The explicit goal is to support killing running subprocesses.
+    """
+
+    def __init__(self):
+        self._futures: list[PopenFuture] = list()
+        self._shutdown = threading.Event()
+        self._lock = threading.Lock()
+
+    def submit(self, command: list[str]):
+        if self._shutdown.is_set():
+            raise RuntimeError("Cannot submit to a shutdown executor.")
+
+        future = PopenFuture(command)
+        with self._lock:
+            self._futures.append(future)
+            future.start()
+            return future
+
+    def shutdown(self, wait=True):
+        self._shutdown.set()
+
+        # we have no concept of pending futures,
+        # therefore no cancellation of pending futures
+        if wait:
+            self._join()
+
+        # if asked for immediate shutdown we cancel everything
+        else:
+            with self._lock:
+                for future in self._futures:
+                    future.cancel()
+
+    def map(self, fn, *iterables, timeout=None, chunksize=1):
+        raise NotImplementedError()
+
+    def _join(self):
+        """Wait until all futures are finished or cancelled."""
+
+        # submitting new futures after join() would be bad,
+        # so we make this internal and only call it from shutdown()
+        with contextlib.suppress(concurrent.futures.CancelledError):
+            for future in list(self._futures):
+                future.result()
+
+
+def main():
+    # example usage
+
+    with PopenExecutor() as executor:
+        # Submit multiple commands
+        commands = [
+            "sleep 1",
+            "sleep 10",
+            "echo hello",
+        ]
+
+        futures = [executor.submit(command.split()) for command in commands]
+
+        def done_callback(future: PopenFuture):
+            stdout, stderr, exitcode = future.result()
+            cmd = " ".join(future.cmd)
+            elapsed = future.end_time - future.start_time
+            print(
+                f"{cmd}\n"
+                f"  exitcode={exitcode}\n"
+                f"  stdout={stdout.strip()}\n"
+                f"  stderr={stderr.strip()}\n"
+                f"  elapsed={elapsed:.2f}s"
+            )
+            executor.shutdown(wait=False)
+
+        for future in futures:
+            future.add_done_callback(done_callback)
+
+        # exiting the context manager will shutdown the executor with wait=True
+        # so no new futures can be submitted
+        # the first call to done_callback will cause the remaining futures to be cancelled
+        # (and the underlying processes to be terminated)
+
+
+if __name__ == "__main__":
+    main()

From 771dea6621d378085f429cf6225c51d1caf535d2 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Thu, 16 Jan 2025 15:48:22 -0800
Subject: [PATCH 04/46] WIP: add traces.py

---
 src/halmos/__main__.py  | 421 +++++++++++++---------------------------
 src/halmos/processes.py |   8 +-
 src/halmos/traces.py    | 162 ++++++++++++++++
 3 files changed, 303 insertions(+), 288 deletions(-)
 create mode 100644 src/halmos/traces.py

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index b6daa409..bfba44ed 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: AGPL-3.0
 
 import gc
-import io
 import json
 import logging
 import os
@@ -13,7 +12,6 @@
 import traceback
 import uuid
 from collections import Counter
-from concurrent.futures import ThreadPoolExecutor
 from copy import deepcopy
 from dataclasses import asdict, dataclass
 from datetime import timedelta
@@ -21,17 +19,10 @@
 from importlib import metadata
 
 from z3 import (
-    Z3_OP_CONCAT,
     BitVec,
-    BitVecNumRef,
-    BitVecRef,
-    Bool,
     CheckSatResult,
-    Context,
     ModelRef,
     Solver,
-    is_app,
-    is_bv,
     sat,
     set_option,
     unknown,
@@ -58,6 +49,7 @@
     warn_code,
 )
 from .mapper import BuildOut, DeployAddressMapper, Mapper
+from .processes import PopenExecutor, PopenFuture
 from .sevm import (
     EMPTY_BALANCE,
     EVM,
@@ -72,7 +64,6 @@
     CallContext,
     CallOutput,
     Contract,
-    EventLog,
     Exec,
     FailCheatcode,
     Message,
@@ -83,14 +74,13 @@
     jumpid_str,
     mnemonic,
 )
+from .traces import render_trace, rendered_trace
 from .utils import (
     NamedTimer,
     address,
-    byte_length,
     color_error,
     con,
     create_solver,
-    cyan,
     green,
     hexify,
     indent_text,
@@ -198,147 +188,6 @@ def mk_solver(args: HalmosConfig, logic="QF_AUFBV", ctx=None, assertion=False):
     return create_solver(logic, ctx, timeout, args.solver_max_memory)
 
 
-def rendered_initcode(context: CallContext) -> str:
-    message = context.message
-    data = message.data
-
-    initcode_str = ""
-    args_str = ""
-
-    if (
-        isinstance(data, BitVecRef)
-        and is_app(data)
-        and data.decl().kind() == Z3_OP_CONCAT
-    ):
-        children = [arg for arg in data.children()]
-        if isinstance(children[0], BitVecNumRef):
-            initcode_str = hex(children[0].as_long())
-            args_str = ", ".join(map(str, children[1:]))
-    else:
-        initcode_str = hexify(data)
-
-    return f"{initcode_str}({cyan(args_str)})"
-
-
-def render_output(context: CallContext, file=sys.stdout) -> None:
-    output = context.output
-    returndata_str = "0x"
-    failed = output.error is not None
-
-    if not failed and context.is_stuck():
-        return
-
-    data = output.data
-    if data is not None:
-        is_create = context.message.is_create()
-        if hasattr(data, "unwrap"):
-            data = data.unwrap()
-
-        returndata_str = (
-            f"<{byte_length(data)} bytes of code>"
-            if (is_create and not failed)
-            else hexify(data)
-        )
-
-    ret_scheme = context.output.return_scheme
-    ret_scheme_str = f"{cyan(mnemonic(ret_scheme))} " if ret_scheme is not None else ""
-    error_str = f" (error: {repr(output.error)})" if failed else ""
-
-    color = red if failed else green
-    indent = context.depth * "    "
-    print(
-        f"{indent}{color('↩ ')}{ret_scheme_str}{color(returndata_str)}{color(error_str)}",
-        file=file,
-    )
-
-
-def rendered_log(log: EventLog) -> str:
-    opcode_str = f"LOG{len(log.topics)}"
-    topics = [
-        f"{cyan(f'topic{i}')}={hexify(topic)}" for i, topic in enumerate(log.topics)
-    ]
-    data_str = f"{cyan('data')}={hexify(log.data)}"
-    args_str = ", ".join(topics + [data_str])
-
-    return f"{opcode_str}({args_str})"
-
-
-def rendered_trace(context: CallContext) -> str:
-    with io.StringIO() as output:
-        render_trace(context, file=output)
-        return output.getvalue()
-
-
-def rendered_calldata(calldata: ByteVec, contract_name: str | None = None) -> str:
-    if not calldata:
-        return "0x"
-
-    if len(calldata) < 4:
-        return hexify(calldata)
-
-    if len(calldata) == 4:
-        return f"{hexify(calldata.unwrap(), contract_name)}()"
-
-    selector = calldata[:4].unwrap()
-    args = calldata[4:].unwrap()
-    return f"{hexify(selector, contract_name)}({hexify(args)})"
-
-
-def render_trace(context: CallContext, file=sys.stdout) -> None:
-    message = context.message
-    addr = unbox_int(message.target)
-    addr_str = str(addr) if is_bv(addr) else hex(addr)
-    # check if we have a contract name for this address in our deployment mapper
-    addr_str = DeployAddressMapper().get_deployed_contract(addr_str)
-
-    value = unbox_int(message.value)
-    value_str = f" (value: {value})" if is_bv(value) or value > 0 else ""
-
-    call_scheme_str = f"{cyan(mnemonic(message.call_scheme))} "
-    indent = context.depth * "    "
-
-    if message.is_create():
-        # TODO: select verbosity level to render full initcode
-        # initcode_str = rendered_initcode(context)
-
-        try:
-            if context.output.error is None:
-                target = hex(int(str(message.target)))
-                bytecode = context.output.data.unwrap().hex()
-                contract_name = Mapper().get_by_bytecode(bytecode).contract_name
-
-                DeployAddressMapper().add_deployed_contract(target, contract_name)
-                addr_str = contract_name
-        except Exception:
-            # TODO: print in debug mode
-            ...
-
-        initcode_str = f"<{byte_length(message.data)} bytes of initcode>"
-        print(
-            f"{indent}{call_scheme_str}{addr_str}::{initcode_str}{value_str}", file=file
-        )
-
-    else:
-        calldata = rendered_calldata(message.data, addr_str)
-        call_str = f"{addr_str}::{calldata}"
-        static_str = yellow(" [static]") if message.is_static else ""
-        print(f"{indent}{call_scheme_str}{call_str}{static_str}{value_str}", file=file)
-
-    log_indent = (context.depth + 1) * "    "
-    for trace_element in context.trace:
-        if isinstance(trace_element, CallContext):
-            render_trace(trace_element, file=file)
-        elif isinstance(trace_element, EventLog):
-            print(f"{log_indent}{rendered_log(trace_element)}", file=file)
-        else:
-            raise HalmosException(f"unexpected trace element: {trace_element}")
-
-    render_output(context, file=file)
-
-    if context.depth == 1:
-        print(file=file)
-
-
 def deploy_test(
     creation_hexcode: str,
     deployed_hexcode: str,
@@ -567,7 +416,7 @@ def is_global_fail_set(context: CallContext) -> bool:
     return hevm_fail or any(is_global_fail_set(x) for x in context.subcalls())
 
 
-def run(
+def run_test(
     setup_ex: Exec,
     abi: dict,
     fun_info: FunctionInfo,
@@ -578,10 +427,18 @@ def run(
     if args.verbose >= 1:
         print(f"Executing {funname}")
 
+    #
+    # prepare test dump directory if needed
+    #
+
     dump_dirname = f"/tmp/{funname}-{uuid.uuid4().hex}"
+    should_dump = args.dump_smt_queries or args.solver_command
+    if should_dump and not os.path.isdir(dump_dirname):
+        os.makedirs(dump_dirname)
+        print(f"Generating SMT queries in {dump_dirname}")
 
     #
-    # calldata
+    # prepare calldata
     #
 
     sevm = SEVM(args, fun_info)
@@ -636,20 +493,21 @@ def run(
 
     (logs, steps) = (sevm.logs, sevm.steps)
 
-    # check assertion violations
     normal = 0
+    potential = 0
     models: list[ModelWithContext] = []
     stuck = []
 
-    thread_pool = ThreadPoolExecutor(max_workers=args.solver_threads)
-    future_models = []
+    thread_pool = PopenExecutor(max_workers=args.solver_threads)
+    futures = []
     counterexamples = []
-    unsat_cores = []
+    unsat_cores: list[list] = []
     traces: dict[int, str] = {}
     exec_cache: dict[int, Exec] = {}
 
-    def future_callback(future_model):
-        m = future_model.result()
+    def future_callback(future: PopenFuture):
+        # XXX stdout, stderr, exitcode
+        m = future.result()
         models.append(m)
 
         model, index, result = m.model, m.index, m.result
@@ -688,16 +546,19 @@ def future_callback(future_model):
             print(f"# {index + 1}")
             print(exec)
 
-    # initialize with default value in case we don't enter the loop body
-    idx = -1
+    #
+    # consume the sevm.run() generator
+    # (actually triggers path exploration)
+    #
 
-    for idx, ex in enumerate(exs):
+    path_id = 0  # default value in case we don't enter the loop body
+    for path_id, ex in enumerate(exs):
         # cache exec in case we need to print it later
         if args.print_failed_states:
-            exec_cache[idx] = ex
+            exec_cache[path_id] = ex
 
         if args.verbose >= VERBOSITY_TRACE_PATHS:
-            print(f"Path #{idx+1}:")
+            print(f"Path #{path_id}:")
             print(indent_text(hexify(ex.path)))
 
             print("\nTrace:")
@@ -706,22 +567,28 @@ def future_callback(future_model):
         output = ex.context.output
         error_output = output.error
         if ex.is_panic_of(args.panic_error_codes) or is_global_fail_set(ex.context):
+            potential += 1
+
             if args.verbose >= 1:
-                print(f"Found potential path (id: {idx+1})")
+                print(f"Found potential path (id: {path_id})")
                 panic_code = unbox_int(output.data[4:36].unwrap())
                 print(f"Panic(0x{panic_code:02x}) {error_output}")
 
+            # we don't know yet if this will lead to a counterexample
+            # so we save the rendered trace here and potentially print it later
+            # if a valid counterexample is found
             if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE:
-                traces[idx] = rendered_trace(ex.context)
+                traces[path_id] = rendered_trace(ex.context)
 
-            query = ex.path.to_smt2(args)
+            query: SMTQuery = ex.path.to_smt2(args)
 
-            future_model = thread_pool.submit(
-                gen_model_from_sexpr,
-                GenModelArgs(args, idx, query, unsat_cores, dump_dirname),
-            )
-            future_model.add_done_callback(future_callback)
-            future_models.append(future_model)
+            dump(query, args, dump_filename=f"{dump_dirname}/{path_id}.smt2")
+
+            # if the query contains an unsat-core, it is unsat; no need to run the solver
+            if check_unsat_cores(query, unsat_cores):
+                if args.verbose >= 1:
+                    print("  Already proven unsat")
+                continue
 
         elif ex.context.is_stuck():
             debug(f"Potential error path (id: {idx+1})")
@@ -733,28 +600,27 @@ def future_callback(future_model):
 
         elif not error_output:
             if args.print_success_states:
-                print(f"# {idx+1}")
+                print(f"# {path_id}")
                 print(ex)
             normal += 1
 
         # print post-states
         if args.print_states:
-            print(f"# {idx+1}")
+            print(f"# {path_id}")
             print(ex)
 
         # 0 width is unlimited
-        if args.width and idx >= args.width:
-            warn(
-                f"{funsig}: incomplete execution due to the specified limit: --width {args.width}"
-            )
+        if args.width and path_id >= args.width:
+            msg = "incomplete execution due to the specified limit"
+            warn(f"{funsig}: {msg}: --width {args.width}")
             break
 
-    num_execs = idx + 1
+    num_execs = path_id
     timer.create_subtimer("models")
 
-    if future_models and args.verbose >= 1:
+    if potential > 0 and args.verbose >= 1:
         print(
-            f"# of potential paths involving assertion violations: {len(future_models)} / {num_execs}  (--solver-threads {args.solver_threads})"
+            f"# of potential paths involving assertion violations: {potential} / {num_execs}  (--solver-threads {args.solver_threads})"
         )
 
     # display assertion solving progress
@@ -762,8 +628,8 @@ def future_callback(future_model):
         while True:
             if args.early_exit and len(counterexamples) > 0:
                 break
-            done = sum(fm.done() for fm in future_models)
-            total = len(future_models)
+            done = sum(fm.done() for fm in futures)
+            total = potential
             if done == total:
                 break
             elapsed = timedelta(seconds=int(timer.elapsed()))
@@ -805,11 +671,11 @@ def future_callback(future_model):
         f"{passfail} {funsig} (paths: {num_execs}, {time_info}, bounds: [{', '.join([str(x) for x in dyn_params])}])"
     )
 
-    for idx, _, err in stuck:
+    for path_id, _, err in stuck:
         warn_code(INTERNAL_ERROR, f"Encountered {err}")
         if args.print_blocked_states:
-            print(f"\nPath #{idx+1}")
-            print(traces[idx], end="")
+            print(f"\nPath #{path_id}")
+            print(traces[path_id], end="")
 
     if logs.bounded_loops:
         warn_code(
@@ -874,7 +740,7 @@ class RunArgs:
     build_out_map: dict
 
 
-def run_sequential(run_args: RunArgs) -> list[TestResult]:
+def run_contract(run_args: RunArgs) -> list[TestResult]:
     BuildOut().set_build_out(run_args.build_out_map)
 
     args = run_args.args
@@ -909,7 +775,9 @@ def run_sequential(run_args: RunArgs) -> list[TestResult]:
             test_config = with_devdoc(args, funsig, run_args.contract_json)
             solver = mk_solver(test_config)
             debug(f"{test_config.formatted_layers()}")
-            test_result = run(setup_ex, run_args.abi, fun_info, test_config, solver)
+            test_result = run_test(
+                setup_ex, run_args.abi, fun_info, test_config, solver
+            )
         except Exception as err:
             print(f"{color_error('[ERROR]')} {funsig}")
             error(f"{type(err).__name__}: {err}")
@@ -932,13 +800,13 @@ def run_sequential(run_args: RunArgs) -> list[TestResult]:
 @dataclass(frozen=True)
 class GenModelArgs:
     args: HalmosConfig
-    idx: int
+    path_id: int
     sexpr: SMTQuery
     known_unsat_cores: list[list]
     dump_dirname: str | None = None
 
 
-def parse_unsat_core(output) -> list | None:
+def parse_unsat_core(output) -> list[str] | None:
     # parsing example:
     #   unsat
     #   (error "the context is unsatisfiable")  # <-- this line is optional
@@ -955,93 +823,88 @@ def parse_unsat_core(output) -> list | None:
         return None
 
 
-def solve(
+def dump(
     query: SMTQuery, args: HalmosConfig, dump_filename: str | None = None
 ) -> tuple[CheckSatResult, PotentialModel | None, list | None]:
-    if args.dump_smt_queries or args.solver_command:
-        if not dump_filename:
-            dump_filename = f"/tmp/{uuid.uuid4().hex}.smt2"
-
-        # for each implication assertion, `(assert (=> |id| c))`, in query.smtlib,
-        # generate a corresponding named assertion, `(assert (! |id| :named <id>))`.
-        # see `svem.Path.to_smt2()` for more details.
-        if args.cache_solver:
-            named_assertions = "".join(
-                [
-                    f"(assert (! |{assert_id}| :named <{assert_id}>))\n"
-                    for assert_id in query.assertions
-                ]
-            )
+    if not dump_filename:
+        dump_filename = f"/tmp/{uuid.uuid4().hex}.smt2"
+
+    if args.verbose >= 1:
+        debug(f"Writing SMT query to {dump_filename}")
+
+    # for each implication assertion, `(assert (=> |id| c))`, in query.smtlib,
+    # generate a corresponding named assertion, `(assert (! |id| :named <id>))`.
+    # see `svem.Path.to_smt2()` for more details.
+    if args.cache_solver:
+        named_assertions = "".join(
+            [
+                f"(assert (! |{assert_id}| :named <{assert_id}>))\n"
+                for assert_id in query.assertions
+            ]
+        )
 
         with open(dump_filename, "w") as f:
-            if args.verbose >= 1:
-                debug(f"Writing SMT query to {dump_filename}")
-            if args.cache_solver:
-                f.write("(set-option :produce-unsat-cores true)\n")
+            f.write("(set-option :produce-unsat-cores true)\n")
             f.write("(set-logic QF_AUFBV)\n")
             f.write(query.smtlib)
-            if args.cache_solver:
-                f.write(named_assertions)
+            f.write(named_assertions)
             f.write("(check-sat)\n")
             f.write("(get-model)\n")
-            if args.cache_solver:
-                f.write("(get-unsat-core)\n")
+            f.write("(get-unsat-core)\n")
 
-    if args.solver_command:
-        if args.verbose >= 1:
-            debug("  Checking with external solver process")
-            debug(f"    {args.solver_command} {dump_filename} >{dump_filename}.out")
+    else:
+        with open(dump_filename, "w") as f:
+            f.write("(set-logic QF_AUFBV)\n")
+            f.write(query.smtlib)
+            f.write("(check-sat)\n")
+            f.write("(get-model)\n")
 
-        # solver_timeout_assertion == 0 means no timeout,
-        # which translates to timeout_seconds=None for subprocess.run
-        timeout_seconds = None
-        if timeout_millis := args.solver_timeout_assertion:
-            timeout_seconds = timeout_millis / 1000
 
-        cmd = args.solver_command.split() + [dump_filename]
-        try:
-            res_str = subprocess.run(
-                cmd, capture_output=True, text=True, timeout=timeout_seconds
-            ).stdout.strip()
-            res_str_head = res_str.split("\n", 1)[0]
+@dataclass(frozen=True)
+class SolverOutput:
+    result: CheckSatResult
+    model: PotentialModel | None = None
+    unsat_core: list[str] | None = None
 
-            with open(f"{dump_filename}.out", "w") as f:
-                f.write(res_str)
 
-            if args.verbose >= 1:
-                debug(f"    {res_str_head}")
+def solve(smt2_filename: str, args: HalmosConfig) -> SolverOutput:
+    if args.verbose >= 1:
+        debug("  Checking with external solver process")
+        debug(f"    {args.solver_command} {smt2_filename} >{smt2_filename}.out")
 
-            if res_str_head == "unsat":
-                unsat_core = parse_unsat_core(res_str) if args.cache_solver else None
-                return unsat, None, unsat_core
-            elif res_str_head == "sat":
-                return sat, PotentialModel(f"{dump_filename}.out", args), None
-            else:
-                return unknown, None, None
-        except subprocess.TimeoutExpired:
-            return unknown, None, None
+    # solver_timeout_assertion == 0 means no timeout,
+    # which translates to timeout_seconds=None for subprocess.run
+    timeout_seconds = None
+    if timeout_millis := args.solver_timeout_assertion:
+        timeout_seconds = timeout_millis / 1000
 
-    else:
-        ctx = Context()
-        solver = mk_solver(args, ctx=ctx, assertion=True)
-        solver.from_string(query.smtlib)
-        if args.cache_solver:
-            solver.set(unsat_core=True)
-            ids = [Bool(f"{x}", ctx) for x in query.assertions]
-            result = solver.check(*ids)
+    cmd = args.solver_command.split() + [smt2_filename]
+    try:
+        # XXX submit to PopenExecutor, process output in callback
+        res_str = subprocess.run(
+            cmd, capture_output=True, text=True, timeout=timeout_seconds
+        ).stdout.strip()
+        res_str_head = res_str.split("\n", 1)[0]
+
+        with open(f"{smt2_filename}.out", "w") as f:
+            f.write(res_str)
+
+        if args.verbose >= 1:
+            debug(f"    {res_str_head}")
+
+        if res_str_head == "unsat":
+            unsat_core = parse_unsat_core(res_str) if args.cache_solver else None
+            return SolverOutput(result=unsat, unsat_core=unsat_core)
+        elif res_str_head == "sat":
+            model = PotentialModel(f"{smt2_filename}.out", args)
+            return SolverOutput(result=sat, model=model)
         else:
-            result = solver.check()
-        model = PotentialModel(solver.model(), args) if result == sat else None
-        unsat_core = (
-            [str(core) for core in solver.unsat_core()]
-            if args.cache_solver and result == unsat
-            else None
-        )
-        solver.reset()
-        return result, model, unsat_core
+            return SolverOutput(result=unknown)
+    except subprocess.TimeoutExpired:
+        return SolverOutput(result=unknown)
 
 
-def check_unsat_cores(query, unsat_cores) -> bool:
+def check_unsat_cores(query: SMTQuery, unsat_cores: list[list]) -> bool:
     # return true if the given query contains any given unsat core
     for unsat_core in unsat_cores:
         if all(core in query.assertions for core in unsat_core):
@@ -1050,34 +913,20 @@ def check_unsat_cores(query, unsat_cores) -> bool:
 
 
 def gen_model_from_sexpr(fn_args: GenModelArgs) -> ModelWithContext:
-    args, idx, sexpr = fn_args.args, fn_args.idx, fn_args.sexpr
+    args, path_id, sexpr = fn_args.args, fn_args.path_id, fn_args.sexpr
 
-    dump_dirname = fn_args.dump_dirname
-    dump_filename = f"{dump_dirname}/{idx+1}.smt2"
-    should_dump = args.dump_smt_queries or args.solver_command
-    if should_dump and not os.path.isdir(dump_dirname):
-        os.makedirs(dump_dirname)
-        print(f"Generating SMT queries in {dump_dirname}")
+    res, model, unsat_core = solve(sexpr, args)
 
-    if args.verbose >= 1:
-        print(f"Checking path condition (path id: {idx+1})")
-
-    if check_unsat_cores(sexpr, fn_args.known_unsat_cores):
-        # if the given query contains an unsat-core, it is unsat; no need to run the solver.
-        if args.verbose >= 1:
-            print("  Already proven unsat")
-        return package_result(None, idx, unsat, None, args)
+    # XXX fix refinement
 
-    res, model, unsat_core = solve(sexpr, args, dump_filename)
-
-    if res == sat and not model.is_valid:
-        if args.verbose >= 1:
-            print("  Checking again with refinement")
+    # if res == sat and not model.is_valid:
+    #     if args.verbose >= 1:
+    #         print("  Checking again with refinement")
 
-        refined_filename = dump_filename.replace(".smt2", ".refined.smt2")
-        res, model, unsat_core = solve(refine(sexpr), args, refined_filename)
+    # refined_filename = dump_filename.replace(".smt2", ".refined.smt2")
+    # res, model, unsat_core = solve(refine(sexpr), args, refined_filename)
 
-    return package_result(model, idx, res, unsat_core, args)
+    return package_result(model, path_id, res, unsat_core, args)
 
 
 def refine(query: SMTQuery) -> SMTQuery:
@@ -1482,7 +1331,7 @@ def on_signal(signum, frame):
             build_out_map,
         )
 
-        test_results = run_sequential(run_args)
+        test_results = run_contract(run_args)
 
         num_passed = sum(r.exitcode == 0 for r in test_results)
         num_failed = num_found - num_passed
diff --git a/src/halmos/processes.py b/src/halmos/processes.py
index 7f0fe920..1396c799 100644
--- a/src/halmos/processes.py
+++ b/src/halmos/processes.py
@@ -115,11 +115,13 @@ class PopenExecutor(concurrent.futures.Executor):
     The explicit goal is to support killing running subprocesses.
     """
 
-    def __init__(self):
+    def __init__(self, max_workers: int = 1):
         self._futures: list[PopenFuture] = list()
         self._shutdown = threading.Event()
         self._lock = threading.Lock()
 
+        # TODO: support max_workers
+
     def submit(self, command: list[str]):
         if self._shutdown.is_set():
             raise RuntimeError("Cannot submit to a shutdown executor.")
@@ -130,7 +132,9 @@ def submit(self, command: list[str]):
             future.start()
             return future
 
-    def shutdown(self, wait=True):
+    def shutdown(self, wait=True, cancel_futures=False):
+        # TODO: support max_workers
+
         self._shutdown.set()
 
         # we have no concept of pending futures,
diff --git a/src/halmos/traces.py b/src/halmos/traces.py
new file mode 100644
index 00000000..53dc1bd2
--- /dev/null
+++ b/src/halmos/traces.py
@@ -0,0 +1,162 @@
+import io
+import sys
+
+from z3 import Z3_OP_CONCAT, BitVecNumRef, BitVecRef, is_app
+
+from halmos.bytevec import ByteVec
+from halmos.exceptions import HalmosException
+from halmos.sevm import CallContext, EventLog
+from halmos.utils import (
+    DeployAddressMapper,
+    Mapper,
+    byte_length,
+    cyan,
+    green,
+    hexify,
+    is_bv,
+    mnemonic,
+    red,
+    unbox_int,
+    yellow,
+)
+
+
+def rendered_initcode(context: CallContext) -> str:
+    message = context.message
+    data = message.data
+
+    initcode_str = ""
+    args_str = ""
+
+    if (
+        isinstance(data, BitVecRef)
+        and is_app(data)
+        and data.decl().kind() == Z3_OP_CONCAT
+    ):
+        children = [arg for arg in data.children()]
+        if isinstance(children[0], BitVecNumRef):
+            initcode_str = hex(children[0].as_long())
+            args_str = ", ".join(map(str, children[1:]))
+    else:
+        initcode_str = hexify(data)
+
+    return f"{initcode_str}({cyan(args_str)})"
+
+
+def render_output(context: CallContext, file=sys.stdout) -> None:
+    output = context.output
+    returndata_str = "0x"
+    failed = output.error is not None
+
+    if not failed and context.is_stuck():
+        return
+
+    data = output.data
+    if data is not None:
+        is_create = context.message.is_create()
+        if hasattr(data, "unwrap"):
+            data = data.unwrap()
+
+        returndata_str = (
+            f"<{byte_length(data)} bytes of code>"
+            if (is_create and not failed)
+            else hexify(data)
+        )
+
+    ret_scheme = context.output.return_scheme
+    ret_scheme_str = f"{cyan(mnemonic(ret_scheme))} " if ret_scheme is not None else ""
+    error_str = f" (error: {repr(output.error)})" if failed else ""
+
+    color = red if failed else green
+    indent = context.depth * "    "
+    print(
+        f"{indent}{color('↩ ')}{ret_scheme_str}{color(returndata_str)}{color(error_str)}",
+        file=file,
+    )
+
+
+def rendered_log(log: EventLog) -> str:
+    opcode_str = f"LOG{len(log.topics)}"
+    topics = [
+        f"{cyan(f'topic{i}')}={hexify(topic)}" for i, topic in enumerate(log.topics)
+    ]
+    data_str = f"{cyan('data')}={hexify(log.data)}"
+    args_str = ", ".join(topics + [data_str])
+
+    return f"{opcode_str}({args_str})"
+
+
+def rendered_trace(context: CallContext) -> str:
+    with io.StringIO() as output:
+        render_trace(context, file=output)
+        return output.getvalue()
+
+
+def rendered_calldata(calldata: ByteVec, contract_name: str | None = None) -> str:
+    if not calldata:
+        return "0x"
+
+    if len(calldata) < 4:
+        return hexify(calldata)
+
+    if len(calldata) == 4:
+        return f"{hexify(calldata.unwrap(), contract_name)}()"
+
+    selector = calldata[:4].unwrap()
+    args = calldata[4:].unwrap()
+    return f"{hexify(selector, contract_name)}({hexify(args)})"
+
+
+def render_trace(context: CallContext, file=sys.stdout) -> None:
+    message = context.message
+    addr = unbox_int(message.target)
+    addr_str = str(addr) if is_bv(addr) else hex(addr)
+    # check if we have a contract name for this address in our deployment mapper
+    addr_str = DeployAddressMapper().get_deployed_contract(addr_str)
+
+    value = unbox_int(message.value)
+    value_str = f" (value: {value})" if is_bv(value) or value > 0 else ""
+
+    call_scheme_str = f"{cyan(mnemonic(message.call_scheme))} "
+    indent = context.depth * "    "
+
+    if message.is_create():
+        # TODO: select verbosity level to render full initcode
+        # initcode_str = rendered_initcode(context)
+
+        try:
+            if context.output.error is None:
+                target = hex(int(str(message.target)))
+                bytecode = context.output.data.unwrap().hex()
+                contract_name = Mapper().get_by_bytecode(bytecode).contract_name
+
+                DeployAddressMapper().add_deployed_contract(target, contract_name)
+                addr_str = contract_name
+        except Exception:
+            # TODO: print in debug mode
+            ...
+
+        initcode_str = f"<{byte_length(message.data)} bytes of initcode>"
+        print(
+            f"{indent}{call_scheme_str}{addr_str}::{initcode_str}{value_str}", file=file
+        )
+
+    else:
+        calldata = rendered_calldata(message.data, addr_str)
+        call_str = f"{addr_str}::{calldata}"
+        static_str = yellow(" [static]") if message.is_static else ""
+        print(f"{indent}{call_scheme_str}{call_str}{static_str}{value_str}", file=file)
+
+    log_indent = (context.depth + 1) * "    "
+    for trace_element in context.trace:
+        if isinstance(trace_element, CallContext):
+            render_trace(trace_element, file=file)
+        elif isinstance(trace_element, EventLog):
+            print(f"{log_indent}{rendered_log(trace_element)}", file=file)
+        else:
+            raise HalmosException(f"unexpected trace element: {trace_element}")
+
+    render_output(context, file=file)
+
+    if context.depth == 1:
+        print(file=file)

From 076caae23dd1f8a2c30ee2b9c014b2dbcc2d1cc8 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Thu, 16 Jan 2025 15:53:48 -0800
Subject: [PATCH 05/46] XXX sort data into contract, function and path contexts

---
 src/halmos/__main__.py | 508 +++++++++++++++++++++++------------------
 1 file changed, 288 insertions(+), 220 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index bfba44ed..d9dd1ab0 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -13,7 +13,7 @@
 import uuid
 from collections import Counter
 from copy import deepcopy
-from dataclasses import asdict, dataclass
+from dataclasses import asdict, dataclass, field
 from datetime import timedelta
 from enum import Enum
 from importlib import metadata
@@ -110,6 +110,138 @@
 VERBOSITY_TRACE_CONSTRUCTOR = 5
 
 
+@dataclass
+class PotentialModel:
+    model: AnyModel
+    is_valid: bool
+
+    def __init__(self, model: ModelRef | str, args: HalmosConfig) -> None:
+        # convert model into string to avoid pickling errors for z3 (ctypes) objects containing pointers
+        self.model = (
+            to_str_model(model, args.print_full_model)
+            if isinstance(model, ModelRef)
+            else model
+        )
+        self.is_valid = is_model_valid(model)
+
+    def __str__(self) -> str:
+        # expected to be a filename
+        if isinstance(self.model, str):
+            return f"see {self.model}"
+
+        formatted = [f"\n    {decl} = {val}" for decl, val in self.model.items()]
+        return "".join(sorted(formatted)) if formatted else "∅"
+
+
+@dataclass(frozen=True)
+class ContractContext:
+    # config with contract-specific overrides
+    args: HalmosConfig
+
+    # name of this contract
+    name: str
+
+    # signatures of test functions to run
+    funsigs: list[str]
+
+    # data parsed from the build output for this contract
+    creation_hexcode: str
+    deployed_hexcode: str
+    abi: dict
+    method_identifiers: dict[str, str]
+    contract_json: dict
+    libs: dict
+
+    # TODO: check if this is really a contract-level variable
+    build_out_map: dict
+
+
+@dataclass(frozen=True)
+class FunctionContext:
+    # config with function-specific overrides
+    args: HalmosConfig
+
+    # function name, signature, and selector
+    info: FunctionInfo
+
+    # solver using the function-specific config
+    solver: Solver
+
+    # backlink to the parent contract context
+    contract_ctx: ContractContext
+
+    # optional starting state
+    setup_ex: Exec | None = None
+
+    # dump directory for this function (generated in __post_init__)
+    dump_dirname: str = field(init=False)
+
+    def __post_init__(self):
+        object.__setattr__(
+            self, "dump_dirname", f"/tmp/{self.info.name}-{uuid.uuid4().hex}"
+        )
+
+
+@dataclass
+class PathContext:
+    # id of this path
+    path_id: int
+
+    # path execution object
+    ex: Exec
+
+    # SMT query
+    query: SMTQuery
+
+    # backlink to the parent function context
+    fun_ctx: FunctionContext
+
+    # filename for this path (generated in __post_init__)
+    dump_filename: str = field(init=False)
+
+    def __post_init__(self):
+        object.__setattr__(
+            self, "dump_filename", f"{self.fun_ctx.dump_dirname}/{self.path_id}.smt2"
+        )
+
+
+# XXX remove ModelWithContext
+@dataclass(frozen=True)
+class ModelWithContext:
+    # can be a filename containing the model or a dict with variable assignments
+    model: PotentialModel | None
+    index: int
+    result: CheckSatResult
+    unsat_core: list | None
+
+
+@dataclass(frozen=True)
+class TestResult:
+    name: str  # test function name
+    exitcode: int
+    num_models: int = None
+    models: list[ModelWithContext] = None
+    num_paths: tuple[int, int, int] = None  # number of paths: [total, success, blocked]
+    time: tuple[int, int, int] = None  # time: [total, paths, models]
+    num_bounded_loops: int = None  # number of incomplete loops
+
+
+@dataclass(frozen=True)
+class SolverOutput:
+    result: CheckSatResult
+    model: PotentialModel | None = None
+    unsat_core: list[str] | None = None
+
+
+class Exitcode(Enum):
+    PASS = 0
+    COUNTEREXAMPLE = 1
+    TIMEOUT = 2
+    STUCK = 3
+    REVERT_ALL = 4
+    EXCEPTION = 5
+
+
 def with_devdoc(args: HalmosConfig, fn_sig: str, contract_json: dict) -> HalmosConfig:
     devdoc = parse_devdoc(fn_sig, contract_json)
     if not devdoc:
@@ -181,21 +313,16 @@ def mk_this() -> Address:
     return con_addr(FOUNDRY_TEST)
 
 
-def mk_solver(args: HalmosConfig, logic="QF_AUFBV", ctx=None, assertion=False):
-    timeout = (
-        args.solver_timeout_assertion if assertion else args.solver_timeout_branching
+def mk_solver(args: HalmosConfig, logic="QF_AUFBV", ctx=None):
+    return create_solver(
+        logic=logic,
+        ctx=ctx,
+        timeout=args.solver_timeout_branching,
+        max_memory=args.solver_max_memory,
     )
-    return create_solver(logic, ctx, timeout, args.solver_max_memory)
 
 
-def deploy_test(
-    creation_hexcode: str,
-    deployed_hexcode: str,
-    sevm: SEVM,
-    args: HalmosConfig,
-    libs: dict,
-    solver: Solver,
-) -> Exec:
+def deploy_test(ctx: ContractContext, sevm: SEVM) -> Exec:
     this = mk_this()
     message = Message(
         target=this,
@@ -213,12 +340,12 @@ def deploy_test(
         block=mk_block(),
         context=CallContext(message=message),
         pgm=None,  # to be added
-        path=Path(solver),
+        path=Path(ctx.solver),
     )
 
     # deploy libraries and resolve library placeholders in hexcode
-    (creation_hexcode, deployed_hexcode) = ex.resolve_libs(
-        creation_hexcode, deployed_hexcode, libs
+    (creation_hexcode, _) = ex.resolve_libs(
+        ctx.creation_hexcode, ctx.deployed_hexcode, ctx.libs
     )
 
     # test contract creation bytecode
@@ -232,9 +359,9 @@ def deploy_test(
     if len(exs) != 1:
         raise ValueError(f"constructor: # of paths: {len(exs)}")
 
-    ex = exs[0]
+    [ex] = exs
 
-    if args.verbose >= VERBOSITY_TRACE_CONSTRUCTOR:
+    if ctx.args.verbose >= VERBOSITY_TRACE_CONSTRUCTOR:
         print("Constructor trace:")
         render_trace(ex.context)
 
@@ -258,71 +385,72 @@ def deploy_test(
     return ex
 
 
-def setup(
-    creation_hexcode: str,
-    deployed_hexcode: str,
-    abi: dict,
-    setup_info: FunctionInfo,
-    args: HalmosConfig,
-    libs: dict,
-    solver: Solver,
-) -> Exec:
+def setup(ctx: FunctionContext) -> Exec:
     setup_timer = NamedTimer("setup")
     setup_timer.create_subtimer("decode")
 
+    args, setup_info = ctx.args, ctx.info
     sevm = SEVM(args, setup_info)
-    setup_ex = deploy_test(creation_hexcode, deployed_hexcode, sevm, args, libs, solver)
+    setup_ex = deploy_test(ctx, sevm)
 
     setup_timer.create_subtimer("run")
 
     setup_sig = setup_info.sig
-    if setup_sig:
-        # TODO: dyn_params may need to be passed to mk_calldata in run()
-        calldata, dyn_params = mk_calldata(abi, setup_info, args)
-        setup_ex.path.process_dyn_params(dyn_params)
-
-        parent_message = setup_ex.message()
-        setup_ex.context = CallContext(
-            message=Message(
-                target=parent_message.target,
-                caller=parent_message.caller,
-                origin=parent_message.origin,
-                value=0,
-                data=calldata,
-                call_scheme=EVM.CALL,
-            ),
-        )
+    if not setup_sig:
+        if args.statistics:
+            print(setup_timer.report())
+        return setup_ex
 
-        setup_exs_all = sevm.run(setup_ex)
-        setup_exs_no_error = []
+    setup_timer.create_subtimer("run")
 
-        for idx, setup_ex in enumerate(setup_exs_all):
-            if args.verbose >= VERBOSITY_TRACE_SETUP:
-                print(f"{setup_sig} trace #{idx+1}:")
-                render_trace(setup_ex.context)
+    # TODO: dyn_params may need to be passed to mk_calldata in run()
+    calldata, dyn_params = mk_calldata(ctx.abi, setup_info, args)
+    setup_ex.path.process_dyn_params(dyn_params)
+
+    parent_message = setup_ex.message()
+    setup_ex.context = CallContext(
+        message=Message(
+            target=parent_message.target,
+            caller=parent_message.caller,
+            origin=parent_message.origin,
+            value=0,
+            data=calldata,
+            call_scheme=EVM.CALL,
+        ),
+    )
 
-            if not (err := setup_ex.context.output.error):
-                setup_exs_no_error.append((setup_ex, setup_ex.path.to_smt2(args)))
+    setup_exs_all = sevm.run(setup_ex)
+    setup_exs_no_error = []
 
-            else:
-                opcode = setup_ex.current_opcode()
-                if opcode not in [EVM.REVERT, EVM.INVALID]:
-                    warn_code(
-                        INTERNAL_ERROR,
-                        f"in {setup_sig}, executing {mnemonic(opcode)} failed with: {err}",
-                    )
+    for idx, setup_ex in enumerate(setup_exs_all):
+        if args.verbose >= VERBOSITY_TRACE_SETUP:
+            print(f"{setup_sig} trace #{idx+1}:")
+            render_trace(setup_ex.context)
 
-                # only render the trace if we didn't already do it
-                if (
-                    args.verbose < VERBOSITY_TRACE_SETUP
-                    and args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE
-                ):
-                    print(f"{setup_sig} trace:")
-                    render_trace(setup_ex.context)
+        if not (err := setup_ex.context.output.error):
+            setup_exs_no_error.append((setup_ex, setup_ex.path.to_smt2(args)))
 
-        setup_exs = []
+        else:
+            opcode = setup_ex.current_opcode()
+            if opcode not in [EVM.REVERT, EVM.INVALID]:
+                warn_code(
+                    INTERNAL_ERROR,
+                    f"in {setup_sig}, executing {mnemonic(opcode)} failed with: {err}",
+                )
 
-        if len(setup_exs_no_error) > 1:
+            # only render the trace if we didn't already do it
+            if VERBOSITY_TRACE_COUNTEREXAMPLE <= args.verbose < VERBOSITY_TRACE_SETUP:
+                print(f"{setup_sig} trace:")
+                render_trace(setup_ex.context)
+
+    setup_exs = []
+
+    match len(setup_exs_no_error):
+        case 0:
+            pass
+        case 1:
+            setup_exs.append(setup_exs_no_error[0][0])
+        case _:
             for setup_ex, query in setup_exs_no_error:
                 res, _, _ = solve(query, args)
                 if res != unsat:
@@ -330,28 +458,24 @@ def setup(
                     if len(setup_exs) > 1:
                         break
 
-        elif len(setup_exs_no_error) == 1:
-            setup_exs.append(setup_exs_no_error[0][0])
-
-        if len(setup_exs) == 0:
+    match len(setup_exs):
+        case 0:
             raise HalmosException(f"No successful path found in {setup_sig}")
-
-        if len(setup_exs) > 1:
+        case n if n > 1:
             debug("\n".join(map(str, setup_exs)))
-
             raise HalmosException(f"Multiple paths were found in {setup_sig}")
 
-        setup_ex = setup_exs[0]
+    [setup_ex] = setup_exs
 
-        if args.print_setup_states:
-            print(setup_ex)
+    if args.print_setup_states:
+        print(setup_ex)
 
-        if sevm.logs.bounded_loops:
-            warn_code(
-                LOOP_BOUND,
-                f"{setup_sig}: paths have not been fully explored due to the loop unrolling bound: {args.loop}",
-            )
-            debug("\n".join(jumpid_str(x) for x in sevm.logs.bounded_loops))
+    if sevm.logs.bounded_loops:
+        warn_code(
+            LOOP_BOUND,
+            f"{setup_sig}: paths have not been fully explored due to the loop unrolling bound: {args.loop}",
+        )
+        debug("\n".join(jumpid_str(x) for x in sevm.logs.bounded_loops))
 
     if args.statistics:
         print(setup_timer.report())
@@ -359,70 +483,14 @@ def setup(
     return setup_ex
 
 
-@dataclass
-class PotentialModel:
-    model: AnyModel
-    is_valid: bool
-
-    def __init__(self, model: ModelRef | str, args: HalmosConfig) -> None:
-        # convert model into string to avoid pickling errors for z3 (ctypes) objects containing pointers
-        self.model = (
-            to_str_model(model, args.print_full_model)
-            if isinstance(model, ModelRef)
-            else model
-        )
-        self.is_valid = is_model_valid(model)
-
-    def __str__(self) -> str:
-        # expected to be a filename
-        if isinstance(self.model, str):
-            return f"see {self.model}"
-
-        formatted = [f"\n    {decl} = {val}" for decl, val in self.model.items()]
-        return "".join(sorted(formatted)) if formatted else "∅"
-
-
-@dataclass(frozen=True)
-class ModelWithContext:
-    # can be a filename containing the model or a dict with variable assignments
-    model: PotentialModel | None
-    index: int
-    result: CheckSatResult
-    unsat_core: list | None
-
-
-@dataclass(frozen=True)
-class TestResult:
-    name: str  # test function name
-    exitcode: int
-    num_models: int = None
-    models: list[ModelWithContext] = None
-    num_paths: tuple[int, int, int] = None  # number of paths: [total, success, blocked]
-    time: tuple[int, int, int] = None  # time: [total, paths, models]
-    num_bounded_loops: int = None  # number of incomplete loops
-
-
-class Exitcode(Enum):
-    PASS = 0
-    COUNTEREXAMPLE = 1
-    TIMEOUT = 2
-    STUCK = 3
-    REVERT_ALL = 4
-    EXCEPTION = 5
-
-
 def is_global_fail_set(context: CallContext) -> bool:
     hevm_fail = isinstance(context.output.error, FailCheatcode)
     return hevm_fail or any(is_global_fail_set(x) for x in context.subcalls())
 
 
-def run_test(
-    setup_ex: Exec,
-    abi: dict,
-    fun_info: FunctionInfo,
-    args: HalmosConfig,
-    solver: Solver,
-) -> TestResult:
+def run_test(ctx: FunctionContext) -> TestResult:
+    args = ctx.args
+    fun_info = ctx.info
     funname, funsig = fun_info.name, fun_info.sig
     if args.verbose >= 1:
         print(f"Executing {funname}")
@@ -431,7 +499,7 @@ def run_test(
     # prepare test dump directory if needed
     #
 
-    dump_dirname = f"/tmp/{funname}-{uuid.uuid4().hex}"
+    dump_dirname = ctx.dump_dirname
     should_dump = args.dump_smt_queries or args.solver_command
     if should_dump and not os.path.isdir(dump_dirname):
         os.makedirs(dump_dirname)
@@ -441,11 +509,12 @@ def run_test(
     # prepare calldata
     #
 
+    setup_ex = ctx.setup_ex
     sevm = SEVM(args, fun_info)
-    path = Path(solver)
+    path = Path(ctx.solver)
     path.extend_path(setup_ex.path)
 
-    cd, dyn_params = mk_calldata(abi, fun_info, args)
+    cd, dyn_params = mk_calldata(ctx.abi, fun_info, args)
     path.process_dyn_params(dyn_params)
 
     message = Message(
@@ -582,7 +651,14 @@ def future_callback(future: PopenFuture):
 
             query: SMTQuery = ex.path.to_smt2(args)
 
-            dump(query, args, dump_filename=f"{dump_dirname}/{path_id}.smt2")
+            path_ctx = PathContext(
+                path_id=path_id,
+                ex=ex,
+                query=query,
+                fun_info=ctx,
+            )
+
+            dump(path_ctx)
 
             # if the query contains an unsat-core, it is unsat; no need to run the solver
             if check_unsat_cores(query, unsat_cores):
@@ -616,11 +692,15 @@ def future_callback(future: PopenFuture):
             break
 
     num_execs = path_id
+
+    # the name is a bit misleading: this timer only starts after the exploration phase is complete
+    # but it's possible that solvers have already been running for a while
     timer.create_subtimer("models")
 
     if potential > 0 and args.verbose >= 1:
         print(
-            f"# of potential paths involving assertion violations: {potential} / {num_execs}  (--solver-threads {args.solver_threads})"
+            f"# of potential paths involving assertion violations: {potential} / {num_execs}"
+            f" (--solver-threads {args.solver_threads})"
         )
 
     # display assertion solving progress
@@ -721,63 +801,51 @@ def extract_setup(methodIdentifiers: dict[str, str]) -> FunctionInfo:
     return FunctionInfo(setup_name, setup_sig, setup_selector)
 
 
-@dataclass(frozen=True)
-class RunArgs:
-    # signatures of test functions to run
-    funsigs: list[str]
-
-    # code of the current contract
-    creation_hexcode: str
-    deployed_hexcode: str
+def run_contract(ctx: ContractContext) -> list[TestResult]:
+    BuildOut().set_build_out(ctx.build_out_map)
 
-    abi: dict
-    methodIdentifiers: dict[str, str]
-
-    args: HalmosConfig
-    contract_json: dict
-    libs: dict
-
-    build_out_map: dict
-
-
-def run_contract(run_args: RunArgs) -> list[TestResult]:
-    BuildOut().set_build_out(run_args.build_out_map)
-
-    args = run_args.args
-    setup_info = extract_setup(run_args.methodIdentifiers)
+    args = ctx.args
+    setup_info = extract_setup(ctx.method_identifiers)
 
     try:
-        setup_config = with_devdoc(args, setup_info.sig, run_args.contract_json)
+        setup_config = with_devdoc(args, setup_info.sig, ctx.contract_json)
         setup_solver = mk_solver(setup_config)
-        setup_ex = setup(
-            run_args.creation_hexcode,
-            run_args.deployed_hexcode,
-            run_args.abi,
-            setup_info,
-            setup_config,
-            run_args.libs,
-            setup_solver,
+        setup_ctx = FunctionContext(
+            args=setup_config,
+            info=setup_info,
+            solver=setup_solver,
+            contract_ctx=ctx,
         )
+
+        setup_ex = setup(setup_ctx)
     except Exception as err:
         error(f"{setup_info.sig} failed: {type(err).__name__}: {err}")
         if args.debug:
             traceback.print_exc()
+
         # reset any remaining solver states from the default context
         setup_solver.reset()
+
         return []
 
     test_results = []
-    for funsig in run_args.funsigs:
-        fun_info = FunctionInfo(
-            funsig.split("(")[0], funsig, run_args.methodIdentifiers[funsig]
-        )
+    for funsig in ctx.funsigs:
+        selector = ctx.method_identifiers[funsig]
+        fun_info = FunctionInfo(funsig.split("(")[0], funsig, selector)
         try:
-            test_config = with_devdoc(args, funsig, run_args.contract_json)
+            test_config = with_devdoc(args, funsig, ctx.contract_json)
             solver = mk_solver(test_config)
             debug(f"{test_config.formatted_layers()}")
-            test_result = run_test(
-                setup_ex, run_args.abi, fun_info, test_config, solver
+
+            test_ctx = FunctionContext(
+                args=test_config,
+                info=fun_info,
+                solver=solver,
+                contract_ctx=ctx,
+                setup_ex=setup_ex,
             )
+
+            test_result = run_test(test_ctx)
         except Exception as err:
             print(f"{color_error('[ERROR]')} {funsig}")
             error(f"{type(err).__name__}: {err}")
@@ -824,10 +892,11 @@ def parse_unsat_core(output) -> list[str] | None:
 
 
 def dump(
-    query: SMTQuery, args: HalmosConfig, dump_filename: str | None = None
+    path_ctx: PathContext,
 ) -> tuple[CheckSatResult, PotentialModel | None, list | None]:
-    if not dump_filename:
-        dump_filename = f"/tmp/{uuid.uuid4().hex}.smt2"
+    args = path_ctx.fun_ctx.args
+    query = path_ctx.query
+    dump_filename = path_ctx.dump_filename
 
     if args.verbose >= 1:
         debug(f"Writing SMT query to {dump_filename}")
@@ -860,13 +929,6 @@ def dump(
             f.write("(get-model)\n")
 
 
-@dataclass(frozen=True)
-class SolverOutput:
-    result: CheckSatResult
-    model: PotentialModel | None = None
-    unsat_core: list[str] | None = None
-
-
 def solve(smt2_filename: str, args: HalmosConfig) -> SolverOutput:
     if args.verbose >= 1:
         debug("  Checking with external solver process")
@@ -884,22 +946,27 @@ def solve(smt2_filename: str, args: HalmosConfig) -> SolverOutput:
         res_str = subprocess.run(
             cmd, capture_output=True, text=True, timeout=timeout_seconds
         ).stdout.strip()
-        res_str_head = res_str.split("\n", 1)[0]
 
+        # save solver output to file
         with open(f"{smt2_filename}.out", "w") as f:
             f.write(res_str)
 
+        # extract the first line (we expect sat/unsat/unknown)
+        newline_idx = res_str.find("\n")
+        res_str_head = res_str[:newline_idx] if newline_idx != -1 else res_str
         if args.verbose >= 1:
             debug(f"    {res_str_head}")
 
-        if res_str_head == "unsat":
-            unsat_core = parse_unsat_core(res_str) if args.cache_solver else None
-            return SolverOutput(result=unsat, unsat_core=unsat_core)
-        elif res_str_head == "sat":
-            model = PotentialModel(f"{smt2_filename}.out", args)
-            return SolverOutput(result=sat, model=model)
-        else:
-            return SolverOutput(result=unknown)
+        match res_str_head:
+            case "unsat":
+                unsat_core = parse_unsat_core(res_str) if args.cache_solver else None
+                return SolverOutput(result=unsat, unsat_core=unsat_core)
+            case "sat":
+                model = PotentialModel(f"{smt2_filename}.out", args)
+                return SolverOutput(result=sat, model=model)
+            case _:
+                return SolverOutput(result=unknown)
+
     except subprocess.TimeoutExpired:
         return SolverOutput(result=unknown)
 
@@ -1319,26 +1386,27 @@ def on_signal(signum, frame):
 
         # support for `/// @custom:halmos` annotations
         contract_args = with_natspec(args, contract_name, natspec)
-        run_args = RunArgs(
-            funsigs,
-            creation_hexcode,
-            deployed_hexcode,
-            abi,
-            methodIdentifiers,
-            contract_args,
-            contract_json,
-            libs,
-            build_out_map,
+        contract_ctx = ContractContext(
+            args=contract_args,
+            name=contract_name,
+            creation_hexcode=creation_hexcode,
+            deployed_hexcode=deployed_hexcode,
+            abi=abi,
+            method_identifiers=methodIdentifiers,
+            contract_json=contract_json,
+            libs=libs,
+            build_out_map=build_out_map,
         )
 
-        test_results = run_contract(run_args)
-
+        test_results = run_contract(contract_ctx)
         num_passed = sum(r.exitcode == 0 for r in test_results)
         num_failed = num_found - num_passed
 
         print(
-            f"Symbolic test result: {num_passed} passed; "
-            f"{num_failed} failed; {contract_timer.report()}"
+            "Symbolic test result: "
+            f"{num_passed} passed; "
+            f"{num_failed} failed; "
+            f"{contract_timer.report()}"
         )
 
         total_found += num_found

From 926e733affe7c5445a8181f36f28563376fbc683 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Thu, 16 Jan 2025 15:57:47 -0800
Subject: [PATCH 06/46] XXX early exit wired actually wired

---
 src/halmos/__main__.py  | 323 +++++++++++++++++++++-------------------
 src/halmos/processes.py |  37 +++--
 src/halmos/traces.py    |   6 +-
 3 files changed, 193 insertions(+), 173 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index d9dd1ab0..07bf3e66 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -176,13 +176,38 @@ class FunctionContext:
     # dump directory for this function (generated in __post_init__)
     dump_dirname: str = field(init=False)
 
+    # path-specific queries are submitted to this function-specific executor
+    thread_pool: PopenExecutor = field(default_factory=PopenExecutor)
+
+    # list of solver outputs for this function
+    _solver_outputs: list["SolverOutput"] = field(default_factory=list)
+
+    # list of counterexamples for this function
+    counterexamples: list[PotentialModel] = field(default_factory=list)
+
+    # list of unsat cores for this function
+    unsat_cores: list[list] = field(default_factory=list)
+
+    # map from path id to trace
+    traces: dict[int, str] = field(default_factory=dict)
+
+    # map from path id to execution
+    exec_cache: dict[int, Exec] = field(default_factory=dict)
+
     def __post_init__(self):
         object.__setattr__(
             self, "dump_dirname", f"/tmp/{self.info.name}-{uuid.uuid4().hex}"
         )
 
+    def add_solver_output(self, solver_output: "SolverOutput"):
+        self._solver_outputs.append(solver_output)
 
-@dataclass
+    @property
+    def solver_outputs(self):
+        return self._solver_outputs
+
+
+@dataclass(frozen=True)
 class PathContext:
     # id of this path
     path_id: int
@@ -205,14 +230,20 @@ def __post_init__(self):
         )
 
 
-# XXX remove ModelWithContext
 @dataclass(frozen=True)
-class ModelWithContext:
-    # can be a filename containing the model or a dict with variable assignments
-    model: PotentialModel | None
-    index: int
+class SolverOutput:
+    # solver result
     result: CheckSatResult
-    unsat_core: list | None
+
+    # we don't backlink to the parent path context to avoid extra
+    # references to Exec objects past the lifetime of the path
+    path_id: int
+
+    # solver model
+    model: PotentialModel | None = None
+
+    # optional unsat core
+    unsat_core: list[str] | None = None
 
 
 @dataclass(frozen=True)
@@ -220,19 +251,12 @@ class TestResult:
     name: str  # test function name
     exitcode: int
     num_models: int = None
-    models: list[ModelWithContext] = None
+    models: list[SolverOutput] = None
     num_paths: tuple[int, int, int] = None  # number of paths: [total, success, blocked]
     time: tuple[int, int, int] = None  # time: [total, paths, models]
     num_bounded_loops: int = None  # number of incomplete loops
 
 
-@dataclass(frozen=True)
-class SolverOutput:
-    result: CheckSatResult
-    model: PotentialModel | None = None
-    unsat_core: list[str] | None = None
-
-
 class Exitcode(Enum):
     PASS = 0
     COUNTEREXAMPLE = 1
@@ -322,7 +346,7 @@ def mk_solver(args: HalmosConfig, logic="QF_AUFBV", ctx=None):
     )
 
 
-def deploy_test(ctx: ContractContext, sevm: SEVM) -> Exec:
+def deploy_test(ctx: FunctionContext, sevm: SEVM) -> Exec:
     this = mk_this()
     message = Message(
         target=this,
@@ -344,8 +368,9 @@ def deploy_test(ctx: ContractContext, sevm: SEVM) -> Exec:
     )
 
     # deploy libraries and resolve library placeholders in hexcode
+    contract_ctx = ctx.contract_ctx
     (creation_hexcode, _) = ex.resolve_libs(
-        ctx.creation_hexcode, ctx.deployed_hexcode, ctx.libs
+        contract_ctx.creation_hexcode, contract_ctx.deployed_hexcode, contract_ctx.libs
     )
 
     # test contract creation bytecode
@@ -404,7 +429,7 @@ def setup(ctx: FunctionContext) -> Exec:
     setup_timer.create_subtimer("run")
 
     # TODO: dyn_params may need to be passed to mk_calldata in run()
-    calldata, dyn_params = mk_calldata(ctx.abi, setup_info, args)
+    calldata, dyn_params = mk_calldata(ctx.contract_ctx.abi, setup_info, args)
     setup_ex.path.process_dyn_params(dyn_params)
 
     parent_message = setup_ex.message()
@@ -514,7 +539,7 @@ def run_test(ctx: FunctionContext) -> TestResult:
     path = Path(ctx.solver)
     path.extend_path(setup_ex.path)
 
-    cd, dyn_params = mk_calldata(ctx.abi, fun_info, args)
+    cd, dyn_params = mk_calldata(ctx.contract_ctx.abi, fun_info, args)
     path.process_dyn_params(dyn_params)
 
     message = Message(
@@ -560,61 +585,10 @@ def run_test(ctx: FunctionContext) -> TestResult:
         )
     )
 
-    (logs, steps) = (sevm.logs, sevm.steps)
-
     normal = 0
     potential = 0
-    models: list[ModelWithContext] = []
     stuck = []
 
-    thread_pool = PopenExecutor(max_workers=args.solver_threads)
-    futures = []
-    counterexamples = []
-    unsat_cores: list[list] = []
-    traces: dict[int, str] = {}
-    exec_cache: dict[int, Exec] = {}
-
-    def future_callback(future: PopenFuture):
-        # XXX stdout, stderr, exitcode
-        m = future.result()
-        models.append(m)
-
-        model, index, result = m.model, m.index, m.result
-
-        # retrieve cached exec and clear the cache entry
-        exec = exec_cache.pop(index, None)
-
-        if result == unsat:
-            if m.unsat_core:
-                unsat_cores.append(m.unsat_core)
-            return
-
-        # model could be an empty dict here
-        if model is not None:
-            if model.is_valid:
-                print(red(f"Counterexample: {model}"))
-                counterexamples.append(model)
-            else:
-                warn_code(
-                    COUNTEREXAMPLE_INVALID,
-                    f"Counterexample (potentially invalid): {model}",
-                )
-                counterexamples.append(model)
-        else:
-            warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}")
-
-        if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE:
-            print(
-                f"Trace #{index + 1}:"
-                if args.verbose == VERBOSITY_TRACE_PATHS
-                else "Trace:"
-            )
-            print(traces[index], end="")
-
-        if args.print_failed_states:
-            print(f"# {index + 1}")
-            print(exec)
-
     #
     # consume the sevm.run() generator
     # (actually triggers path exploration)
@@ -624,7 +598,7 @@ def future_callback(future: PopenFuture):
     for path_id, ex in enumerate(exs):
         # cache exec in case we need to print it later
         if args.print_failed_states:
-            exec_cache[path_id] = ex
+            ctx.exec_cache[path_id] = ex
 
         if args.verbose >= VERBOSITY_TRACE_PATHS:
             print(f"Path #{path_id}:")
@@ -647,7 +621,7 @@ def future_callback(future: PopenFuture):
             # so we save the rendered trace here and potentially print it later
             # if a valid counterexample is found
             if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE:
-                traces[path_id] = rendered_trace(ex.context)
+                ctx.traces[path_id] = rendered_trace(ex.context)
 
             query: SMTQuery = ex.path.to_smt2(args)
 
@@ -655,17 +629,22 @@ def future_callback(future: PopenFuture):
                 path_id=path_id,
                 ex=ex,
                 query=query,
-                fun_info=ctx,
+                fun_ctx=ctx,
             )
 
             dump(path_ctx)
 
             # if the query contains an unsat-core, it is unsat; no need to run the solver
-            if check_unsat_cores(query, unsat_cores):
+            if check_unsat_cores(query, ctx.unsat_cores):
                 if args.verbose >= 1:
                     print("  Already proven unsat")
                 continue
 
+            solve(path_ctx)
+
+            # XXX handle refinement
+            # XXX handle timeout
+
         elif ex.context.is_stuck():
             debug(f"Potential error path (id: {idx+1})")
             res, _, _ = solve(ex.path.to_smt2(args), args)
@@ -706,9 +685,9 @@ def future_callback(future: PopenFuture):
     # display assertion solving progress
     if not args.no_status or args.early_exit:
         while True:
-            if args.early_exit and len(counterexamples) > 0:
+            if args.early_exit and len(ctx.counterexamples) > 0:
                 break
-            done = sum(fm.done() for fm in futures)
+            done = sum(fm.done() for fm in ctx.thread_pool.futures)
             total = potential
             if done == total:
                 break
@@ -717,11 +696,11 @@ def future_callback(future: PopenFuture):
             time.sleep(0.1)
 
     if args.early_exit:
-        thread_pool.shutdown(wait=False, cancel_futures=True)
+        ctx.thread_pool.shutdown(wait=False, cancel_futures=True)
     else:
-        thread_pool.shutdown(wait=True)
+        ctx.thread_pool.shutdown(wait=True)
 
-    counter = Counter(str(m.result) for m in models)
+    counter = Counter(str(m.result) for m in ctx.solver_outputs)
     if counter["sat"] > 0:
         passfail = red("[FAIL]")
         exitcode = Exitcode.COUNTEREXAMPLE.value
@@ -746,16 +725,19 @@ def future_callback(future: PopenFuture):
     timer.stop()
     time_info = timer.report(include_subtimers=args.statistics)
 
-    # print result
+    # print test result
     print(
-        f"{passfail} {funsig} (paths: {num_execs}, {time_info}, bounds: [{', '.join([str(x) for x in dyn_params])}])"
+        f"{passfail} {funsig} (paths: {num_execs}, {time_info}, "
+        f"bounds: [{', '.join([str(x) for x in dyn_params])}])"
     )
 
     for path_id, _, err in stuck:
         warn_code(INTERNAL_ERROR, f"Encountered {err}")
         if args.print_blocked_states:
             print(f"\nPath #{path_id}")
-            print(traces[path_id], end="")
+            print(ctx.traces[path_id], end="")
+
+    (logs, steps) = (sevm.logs, sevm.steps)
 
     if logs.bounded_loops:
         warn_code(
@@ -771,13 +753,13 @@ def future_callback(future: PopenFuture):
 
     # return test result
     if args.minimal_json_output:
-        return TestResult(funsig, exitcode, len(counterexamples))
+        return TestResult(funsig, exitcode, len(ctx.counterexamples))
     else:
         return TestResult(
             funsig,
             exitcode,
-            len(counterexamples),
-            counterexamples,
+            len(ctx.counterexamples),
+            ctx.counterexamples,
             (num_execs, normal, len(stuck)),
             (timer.elapsed(), timer["paths"].elapsed(), timer["models"].elapsed()),
             len(logs.bounded_loops),
@@ -929,46 +911,95 @@ def dump(
             f.write("(get-model)\n")
 
 
-def solve(smt2_filename: str, args: HalmosConfig) -> SolverOutput:
+def solver_callback(future: PopenFuture):
+    """Invoked by a PopenFuture when it is finished."""
+
+    path_ctx: PathContext = future.metadata["path_ctx"]
+    path_id = path_ctx.path_id
+    args = path_ctx.fun_ctx.args
+    smt2_filename = path_ctx.dump_filename
+    stdout, stderr, returncode = future.result()
+
+    # save solver output to file
+    with open(f"{smt2_filename}.out", "w") as f:
+        f.write(stdout)
+
+    # extract the first line (we expect sat/unsat/unknown)
+    newline_idx = stdout.find("\n")
+    first_line = stdout[:newline_idx] if newline_idx != -1 else stdout
+    if args.verbose >= 1:
+        debug(f"    {first_line}")
+
+    solver_output = None
+    match first_line:
+        case "unsat":
+            unsat_core = parse_unsat_core(stdout) if args.cache_solver else None
+            solver_output = SolverOutput(unsat, path_id, unsat_core=unsat_core)
+        case "sat":
+            model = PotentialModel(f"{smt2_filename}.out", args)
+            solver_output = SolverOutput(sat, path_id, model=model)
+        case _:
+            solver_output = SolverOutput(unknown, path_id)
+
+    fun_ctx = path_ctx.fun_ctx
+    fun_ctx.add_solver_output(solver_output)
+
+    model, result = solver_output.model, solver_output.result
+
+    # retrieve cached exec and clear the cache entry
+    exec = fun_ctx.exec_cache.pop(path_id, None)
+
+    if result == unsat:
+        if solver_output.unsat_core:
+            fun_ctx.unsat_cores.append(solver_output.unsat_core)
+        return
+
+    # model could be an empty dict here
+    if model is not None:
+        if model.is_valid:
+            print(red(f"Counterexample: {model}"))
+            fun_ctx.counterexamples.append(model)
+        else:
+            warn_code(
+                COUNTEREXAMPLE_INVALID,
+                f"Counterexample (potentially invalid): {model}",
+            )
+            fun_ctx.counterexamples.append(model)
+    else:
+        warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}")
+
+    if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE:
+        print(
+            f"Trace #{path_id}:" if args.verbose == VERBOSITY_TRACE_PATHS else "Trace:"
+        )
+        print(fun_ctx.traces[path_id], end="")
+
+    if args.print_failed_states:
+        print(f"# {path_id}")
+        print(exec)
+
+
+def solve(ctx: PathContext):
+    args = ctx.fun_ctx.args
+    smt2_filename = ctx.dump_filename
+
     if args.verbose >= 1:
         debug("  Checking with external solver process")
-        debug(f"    {args.solver_command} {smt2_filename} >{smt2_filename}.out")
+        debug(f"    {args.solver_command} {smt2_filename} > {smt2_filename}.out")
+
+    # XXX fix timeout
 
     # solver_timeout_assertion == 0 means no timeout,
     # which translates to timeout_seconds=None for subprocess.run
-    timeout_seconds = None
-    if timeout_millis := args.solver_timeout_assertion:
-        timeout_seconds = timeout_millis / 1000
+    # timeout_seconds = None
+    # if timeout_millis := args.solver_timeout_assertion:
+    #     timeout_seconds = timeout_millis / 1000
 
     cmd = args.solver_command.split() + [smt2_filename]
-    try:
-        # XXX submit to PopenExecutor, process output in callback
-        res_str = subprocess.run(
-            cmd, capture_output=True, text=True, timeout=timeout_seconds
-        ).stdout.strip()
-
-        # save solver output to file
-        with open(f"{smt2_filename}.out", "w") as f:
-            f.write(res_str)
-
-        # extract the first line (we expect sat/unsat/unknown)
-        newline_idx = res_str.find("\n")
-        res_str_head = res_str[:newline_idx] if newline_idx != -1 else res_str
-        if args.verbose >= 1:
-            debug(f"    {res_str_head}")
-
-        match res_str_head:
-            case "unsat":
-                unsat_core = parse_unsat_core(res_str) if args.cache_solver else None
-                return SolverOutput(result=unsat, unsat_core=unsat_core)
-            case "sat":
-                model = PotentialModel(f"{smt2_filename}.out", args)
-                return SolverOutput(result=sat, model=model)
-            case _:
-                return SolverOutput(result=unknown)
-
-    except subprocess.TimeoutExpired:
-        return SolverOutput(result=unknown)
+    thread_pool: PopenExecutor = ctx.fun_ctx.thread_pool
+    future = PopenFuture(cmd, metadata={"path_ctx": ctx})
+    future.add_done_callback(solver_callback)
+    thread_pool.submit(future)
 
 
 def check_unsat_cores(query: SMTQuery, unsat_cores: list[list]) -> bool:
@@ -979,23 +1010,6 @@ def check_unsat_cores(query: SMTQuery, unsat_cores: list[list]) -> bool:
     return False
 
 
-def gen_model_from_sexpr(fn_args: GenModelArgs) -> ModelWithContext:
-    args, path_id, sexpr = fn_args.args, fn_args.path_id, fn_args.sexpr
-
-    res, model, unsat_core = solve(sexpr, args)
-
-    # XXX fix refinement
-
-    # if res == sat and not model.is_valid:
-    #     if args.verbose >= 1:
-    #         print("  Checking again with refinement")
-
-    # refined_filename = dump_filename.replace(".smt2", ".refined.smt2")
-    # res, model, unsat_core = solve(refine(sexpr), args, refined_filename)
-
-    return package_result(model, path_id, res, unsat_core, args)
-
-
 def refine(query: SMTQuery) -> SMTQuery:
     smtlib = query.smtlib
 
@@ -1018,27 +1032,27 @@ def refine(query: SMTQuery) -> SMTQuery:
     return SMTQuery(smtlib, query.assertions)
 
 
-def package_result(
-    model: PotentialModel | None,
-    idx: int,
-    result: CheckSatResult,
-    unsat_core: list | None,
-    args: HalmosConfig,
-) -> ModelWithContext:
-    if result == unsat:
-        if args.verbose >= 1:
-            print(f"  Invalid path; ignored (path id: {idx+1})")
-        return ModelWithContext(None, idx, result, unsat_core)
+# def package_result(
+#     model: PotentialModel | None,
+#     idx: int,
+#     result: CheckSatResult,
+#     unsat_core: list | None,
+#     args: HalmosConfig,
+# ) -> ModelWithContext:
+#     if result == unsat:
+#         if args.verbose >= 1:
+#             print(f"  Invalid path; ignored (path id: {idx+1})")
+#         return ModelWithContext(None, idx, result, unsat_core)
 
-    if result == sat:
-        if args.verbose >= 1:
-            print(f"  Valid path; counterexample generated (path id: {idx+1})")
-        return ModelWithContext(model, idx, result, None)
+#     if result == sat:
+#         if args.verbose >= 1:
+#             print(f"  Valid path; counterexample generated (path id: {idx+1})")
+#         return ModelWithContext(model, idx, result, None)
 
-    else:
-        if args.verbose >= 1:
-            print(f"  Timeout (path id: {idx+1})")
-        return ModelWithContext(None, idx, result, None)
+#     else:
+#         if args.verbose >= 1:
+#             print(f"  Timeout (path id: {idx+1})")
+#         return ModelWithContext(None, idx, result, None)
 
 
 def is_model_valid(model: ModelRef | str) -> bool:
@@ -1389,6 +1403,7 @@ def on_signal(signum, frame):
         contract_ctx = ContractContext(
             args=contract_args,
             name=contract_name,
+            funsigs=funsigs,
             creation_hexcode=creation_hexcode,
             deployed_hexcode=deployed_hexcode,
             abi=abi,
diff --git a/src/halmos/processes.py b/src/halmos/processes.py
index 1396c799..ca117860 100644
--- a/src/halmos/processes.py
+++ b/src/halmos/processes.py
@@ -16,9 +16,10 @@ class PopenFuture(concurrent.futures.Future):
     returncode: int | None
     start_time: float | None
     end_time: float | None
+    metadata: dict | None
     _exception: Exception | None
 
-    def __init__(self, cmd: list[str]):
+    def __init__(self, cmd: list[str], metadata: dict | None = None):
         super().__init__()
         self.cmd = cmd
         self.process = None
@@ -27,6 +28,7 @@ def __init__(self, cmd: list[str]):
         self.returncode = None
         self.start_time = None
         self.end_time = None
+        self.metadata = metadata
         self._exception = None
 
     def start(self):
@@ -122,18 +124,23 @@ def __init__(self, max_workers: int = 1):
 
         # TODO: support max_workers
 
-    def submit(self, command: list[str]):
+    @property
+    def futures(self):
+        return self._futures
+
+    def submit(self, future: PopenFuture):
+        """Accepts an unstarted PopenFuture and schedules it for execution."""
+
         if self._shutdown.is_set():
             raise RuntimeError("Cannot submit to a shutdown executor.")
 
-        future = PopenFuture(command)
         with self._lock:
             self._futures.append(future)
             future.start()
             return future
 
     def shutdown(self, wait=True, cancel_futures=False):
-        # TODO: support max_workers
+        # TODO: support max_workers / pending futures
 
         self._shutdown.set()
 
@@ -162,18 +169,8 @@ def _join(self):
 
 
 def main():
-    # example usage
-
     with PopenExecutor() as executor:
-        # Submit multiple commands
-        commands = [
-            "sleep 1",
-            "sleep 10",
-            "echo hello",
-        ]
-
-        futures = [executor.submit(command.split()) for command in commands]
-
+        # example usage
         def done_callback(future: PopenFuture):
             stdout, stderr, exitcode = future.result()
             cmd = " ".join(future.cmd)
@@ -187,8 +184,18 @@ def done_callback(future: PopenFuture):
             )
             executor.shutdown(wait=False)
 
+        # Submit multiple commands
+        commands = [
+            "sleep 1",
+            "sleep 10",
+            "echo hello",
+        ]
+
+        futures = [PopenFuture(command.split()) for command in commands]
+
         for future in futures:
             future.add_done_callback(done_callback)
+            executor.submit(future)
 
         # exiting the context manager will shutdown the executor with wait=True
         # so no new futures can be submitted
diff --git a/src/halmos/traces.py b/src/halmos/traces.py
index 53dc1bd2..c5d58bc8 100644
--- a/src/halmos/traces.py
+++ b/src/halmos/traces.py
@@ -5,16 +5,14 @@
 
 from halmos.bytevec import ByteVec
 from halmos.exceptions import HalmosException
-from halmos.sevm import CallContext, EventLog
+from halmos.mapper import DeployAddressMapper, Mapper
+from halmos.sevm import CallContext, EventLog, mnemonic
 from halmos.utils import (
-    DeployAddressMapper,
-    Mapper,
     byte_length,
     cyan,
     green,
     hexify,
     is_bv,
-    mnemonic,
     red,
     unbox_int,
     yellow,

From e72b36d81fef163c1b44aecf643b7fc3e5d9354d Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Thu, 16 Jan 2025 15:59:31 -0800
Subject: [PATCH 07/46] add triage for solver models

---
 src/halmos/__main__.py  | 112 ++++++++++++++++++++++++----------------
 src/halmos/processes.py |   3 ++
 2 files changed, 71 insertions(+), 44 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 07bf3e66..75037d1c 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -182,8 +182,11 @@ class FunctionContext:
     # list of solver outputs for this function
     _solver_outputs: list["SolverOutput"] = field(default_factory=list)
 
-    # list of counterexamples for this function
-    counterexamples: list[PotentialModel] = field(default_factory=list)
+    # list of valid counterexamples for this function
+    valid_counterexamples: list[PotentialModel] = field(default_factory=list)
+
+    # list of potentially invalid counterexamples for this function
+    invalid_counterexamples: list[PotentialModel] = field(default_factory=list)
 
     # list of unsat cores for this function
     unsat_cores: list[list] = field(default_factory=list)
@@ -201,6 +204,53 @@ def __post_init__(self):
 
     def add_solver_output(self, solver_output: "SolverOutput"):
         self._solver_outputs.append(solver_output)
+        args = self.args
+        path_id = solver_output.path_id
+        model, result = solver_output.model, solver_output.result
+
+        # retrieve cached exec and clear the cache entry
+        exec = self.exec_cache.pop(path_id, None)
+
+        if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE:
+            id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else ""
+            print(f"Trace{id_str}:")
+            print(self.traces[path_id], end="")
+
+        if args.print_failed_states:
+            print(f"# {path_id}")
+            print(exec)
+
+        if self.thread_pool.is_shutdown():
+            # if the thread pool is in the process of shutting down,
+            # we want to stop processing remaining models/timeouts/errors, etc.
+            return
+
+        if result == unsat:
+            if solver_output.unsat_core:
+                self.unsat_cores.append(solver_output.unsat_core)
+            return
+
+        self.add_model(model, result)
+
+    def add_model(self, model: PotentialModel | None, result: CheckSatResult):
+        """Triage a new model, potentially adding it to the counterexamples list"""
+
+        # model could be an empty dict here, so compare to None explicitly
+        if model is None:
+            warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}")
+            return
+
+        if model.is_valid:
+            print(red(f"Counterexample: {model}"))
+            self.valid_counterexamples.append(model)
+
+            # we have a valid counterexample, so we are eligible for early exit
+            if self.args.early_exit:
+                self.thread_pool.shutdown(wait=False)
+        else:
+            warn_str = f"Counterexample (potentially invalid): {model}"
+            warn_code(COUNTEREXAMPLE_INVALID, warn_str)
+            self.invalid_counterexamples.append(model)
 
     @property
     def solver_outputs(self):
@@ -682,10 +732,13 @@ def run_test(ctx: FunctionContext) -> TestResult:
             f" (--solver-threads {args.solver_threads})"
         )
 
+    #
     # display assertion solving progress
+    #
+
     if not args.no_status or args.early_exit:
         while True:
-            if args.early_exit and len(ctx.counterexamples) > 0:
+            if args.early_exit and ctx.valid_counterexamples:
                 break
             done = sum(fm.done() for fm in ctx.thread_pool.futures)
             total = potential
@@ -695,10 +748,14 @@ def run_test(ctx: FunctionContext) -> TestResult:
             sevm.status.update(f"[{elapsed}] solving queries: {done} / {total}")
             time.sleep(0.1)
 
-    if args.early_exit:
-        ctx.thread_pool.shutdown(wait=False, cancel_futures=True)
-    else:
-        ctx.thread_pool.shutdown(wait=True)
+    ctx.thread_pool.shutdown(wait=True)
+
+    timer.stop()
+    time_info = timer.report(include_subtimers=args.statistics)
+
+    #
+    # print test result
+    #
 
     counter = Counter(str(m.result) for m in ctx.solver_outputs)
     if counter["sat"] > 0:
@@ -752,14 +809,15 @@ def run_test(ctx: FunctionContext) -> TestResult:
             json.dump(steps, json_file)
 
     # return test result
+    num_cexes = len(ctx.valid_counterexamples) + len(ctx.invalid_counterexamples)
     if args.minimal_json_output:
-        return TestResult(funsig, exitcode, len(ctx.counterexamples))
+        return TestResult(funsig, exitcode, num_cexes)
     else:
         return TestResult(
             funsig,
             exitcode,
-            len(ctx.counterexamples),
-            ctx.counterexamples,
+            num_cexes,
+            ctx.valid_counterexamples + ctx.invalid_counterexamples,
             (num_execs, normal, len(stuck)),
             (timer.elapsed(), timer["paths"].elapsed(), timer["models"].elapsed()),
             len(logs.bounded_loops),
@@ -944,40 +1002,6 @@ def solver_callback(future: PopenFuture):
     fun_ctx = path_ctx.fun_ctx
     fun_ctx.add_solver_output(solver_output)
 
-    model, result = solver_output.model, solver_output.result
-
-    # retrieve cached exec and clear the cache entry
-    exec = fun_ctx.exec_cache.pop(path_id, None)
-
-    if result == unsat:
-        if solver_output.unsat_core:
-            fun_ctx.unsat_cores.append(solver_output.unsat_core)
-        return
-
-    # model could be an empty dict here
-    if model is not None:
-        if model.is_valid:
-            print(red(f"Counterexample: {model}"))
-            fun_ctx.counterexamples.append(model)
-        else:
-            warn_code(
-                COUNTEREXAMPLE_INVALID,
-                f"Counterexample (potentially invalid): {model}",
-            )
-            fun_ctx.counterexamples.append(model)
-    else:
-        warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}")
-
-    if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE:
-        print(
-            f"Trace #{path_id}:" if args.verbose == VERBOSITY_TRACE_PATHS else "Trace:"
-        )
-        print(fun_ctx.traces[path_id], end="")
-
-    if args.print_failed_states:
-        print(f"# {path_id}")
-        print(exec)
-
 
 def solve(ctx: PathContext):
     args = ctx.fun_ctx.args
diff --git a/src/halmos/processes.py b/src/halmos/processes.py
index ca117860..04c95c47 100644
--- a/src/halmos/processes.py
+++ b/src/halmos/processes.py
@@ -139,6 +139,9 @@ def submit(self, future: PopenFuture):
             future.start()
             return future
 
+    def is_shutdown(self) -> bool:
+        return self._shutdown.is_set()
+
     def shutdown(self, wait=True, cancel_futures=False):
         # TODO: support max_workers / pending futures
 

From 45d9e67168d9d2f71c8a1d155086bd6a7cb6960e Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Fri, 20 Dec 2024 14:54:21 -0800
Subject: [PATCH 08/46] add basic smtlib parsing

---
 src/halmos/smtlib.py | 92 ++++++++++++++++++++++++++++++++++++++++++++
 tests/test_halmos.py |  3 +-
 tests/test_smtlib.py | 87 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 181 insertions(+), 1 deletion(-)
 create mode 100644 src/halmos/smtlib.py
 create mode 100644 tests/test_smtlib.py

diff --git a/src/halmos/smtlib.py b/src/halmos/smtlib.py
new file mode 100644
index 00000000..f20b2ca8
--- /dev/null
+++ b/src/halmos/smtlib.py
@@ -0,0 +1,92 @@
+import re
+from dataclasses import dataclass
+
+from halmos.logs import logger
+
+# Regular expression for capturing halmos variables
+halmos_pattern = re.compile(
+    r"""
+    \(\s*define-fun\s+           # Match "(define-fun"
+    \|?(halmos_[^ |]+)\|?\s+     # Capture the full variable name, optionally wrapped in "|"
+    \(\)\s+\(_\s+([^ ]+)\s+      # Capture the SMTLIB type (e.g., "BitVec 256")
+    (\d+)\)\s+                   # Capture the bit-width or type argument
+    (                            # Group for the value
+        \#b[01]+                 # Binary value (e.g., "#b1010")
+        |\#x[0-9a-fA-F]+         # Hexadecimal value (e.g., "#xFF")
+        |\(_\s+bv\d+\s+\d+\)     # Decimal value (e.g., "(_ bv42 256)")
+    )
+    """,
+    re.VERBOSE,
+)
+
+
+@dataclass
+class ModelVariable:
+    full_name: str
+    variable_name: str
+    solidity_type: str
+    smt_type: str
+    size_bits: int
+    value: int
+
+
+def parse_file(file_path: str) -> dict:
+    with open(file_path) as file:
+        return parse_string(file.read())
+
+
+def parse_const_value(value: str) -> int:
+    if value.startswith("#b"):
+        return int(value[2:], 2)
+
+    if value.startswith("#x"):
+        return int(value[2:], 16)
+
+    # we may have a group like (_ bv123 256)
+    tokens = value.split()
+    for token in tokens:
+        if token.startswith("bv"):
+            return int(token[2:])
+
+    raise ValueError(f"unknown value format: {value}")
+
+
+def parse_match(match: re.Match) -> ModelVariable:
+    full_name = match.group(1).strip()
+    smt_type = f"{match.group(2)} {match.group(3)}"
+    size_bits = int(match.group(3))
+    value = parse_const_value(match.group(4))
+
+    # Extract name and typename from the variable name
+    parts = full_name.split("_")
+    variable_name = parts[1]
+    solidity_type = parts[2]
+
+    return ModelVariable(
+        full_name=full_name,
+        variable_name=variable_name,
+        solidity_type=solidity_type,
+        smt_type=smt_type,
+        size_bits=size_bits,
+        value=value,
+    )
+
+
+def parse_string(smtlib_str: str) -> dict[str, ModelVariable]:
+    model_variables: dict[str, ModelVariable] = {}
+
+    # use a regex to find all the variables
+    # for now we explicitly don't try to properly parse the smtlib output
+    # because of idiosyncrasies of different solvers:
+    # - ignores the initial sat/unsat on the first line
+    # - ignores the occasional `(model)` command used by yices, stp, cvc4, etc.
+
+    for match in halmos_pattern.finditer(smtlib_str):
+        try:
+            variable = parse_match(match)
+            model_variables[variable.full_name] = variable
+        except Exception as e:
+            logger.error(f"error parsing smtlib string '{match.string.strip()}': {e}")
+            raise e
+
+    return model_variables
diff --git a/tests/test_halmos.py b/tests/test_halmos.py
index 3cd40d41..a8aba028 100644
--- a/tests/test_halmos.py
+++ b/tests/test_halmos.py
@@ -3,9 +3,10 @@
 
 import pytest
 
-from halmos.__main__ import _main, rendered_calldata
+from halmos.__main__ import _main
 from halmos.bytevec import ByteVec
 from halmos.sevm import con
+from halmos.traces import rendered_calldata
 
 
 @pytest.mark.parametrize(
diff --git a/tests/test_smtlib.py b/tests/test_smtlib.py
new file mode 100644
index 00000000..0e3d2aaa
--- /dev/null
+++ b/tests/test_smtlib.py
@@ -0,0 +1,87 @@
+from halmos.smtlib import ModelVariable, parse_string
+
+
+def test_smtlib_z3_bv_output():
+    full_name = "halmos_y_uint256_043cfd7_01"
+    smtlib_str = f"""
+        (define-fun {full_name} () (_ BitVec 256)
+        #x0000000000000000000000000000000000000000000000000000000000000000)
+    """
+    model = parse_string(smtlib_str)
+
+    assert model[full_name] == ModelVariable(
+        full_name=full_name,
+        variable_name="y",
+        solidity_type="uint256",
+        smt_type="BitVec 256",
+        size_bits=256,
+        value=0,
+    )
+
+
+# note that yices only produces output like this with --smt2-model-format
+# otherwise we get something like (= x #b00000100)
+def test_smtlib_yices_binary_output():
+    full_name = "halmos_z_uint256_cabf047_02"
+    smtlib_str = f"""
+    (define-fun
+        {full_name}
+        ()
+        (_ BitVec 256)
+        #b1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000)
+    """
+    model = parse_string(smtlib_str)
+    assert model[full_name] == ModelVariable(
+        full_name=full_name,
+        variable_name="z",
+        solidity_type="uint256",
+        smt_type="BitVec 256",
+        size_bits=256,
+        value=1 << 255,
+    )
+
+
+def test_smtlib_yices_decimal_output():
+    full_name = "halmos_z_uint256_11ce021_08"
+    val = 57896044618658097711785492504343953926634992332820282019728792003956564819968
+    smtlib_str = f"""
+        (define-fun {full_name} () (_ BitVec 256) (_ bv{val} 256))
+    """
+    model = parse_string(smtlib_str)
+    assert model[full_name] == ModelVariable(
+        full_name=full_name,
+        variable_name="z",
+        solidity_type="uint256",
+        smt_type="BitVec 256",
+        size_bits=256,
+        value=val,
+    )
+
+
+def test_smtlib_stp_output():
+    full_name = "halmos_x_uint8_043cfd7_01"
+
+    # we should tolerate:
+    # - the extra (model) command
+    # - duplicate variable names
+    # - the initial `sat` result
+    # - the `|` around the variable name
+    # - the space in `( define-fun ...)`
+    smtlib_str = f"""
+        sat
+        (model
+        ( define-fun |{full_name}| () (_ BitVec 8) #x04 )
+        )
+        (model
+        ( define-fun |{full_name}| () (_ BitVec 8) #x04 )
+        )
+    """
+    model = parse_string(smtlib_str)
+    assert model[full_name] == ModelVariable(
+        full_name=full_name,
+        variable_name="x",
+        solidity_type="uint8",
+        smt_type="BitVec 8",
+        size_bits=8,
+        value=4,
+    )

From 1bdafec2e201d3fb6f895d6e8f9f7db0482ab8e7 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Fri, 20 Dec 2024 17:53:09 -0800
Subject: [PATCH 09/46] actually parse external solver output using smtlib

---
 src/halmos/__main__.py | 92 ++++++++++++------------------------------
 src/halmos/smtlib.py   | 44 +++++++++++---------
 2 files changed, 51 insertions(+), 85 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 75037d1c..981b6dda 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -21,7 +21,6 @@
 from z3 import (
     BitVec,
     CheckSatResult,
-    ModelRef,
     Solver,
     sat,
     set_option,
@@ -74,6 +73,7 @@
     jumpid_str,
     mnemonic,
 )
+from .smtlib import ModelVariables, parse_string
 from .traces import render_trace, rendered_trace
 from .utils import (
     NamedTimer,
@@ -90,9 +90,6 @@
     yellow,
 )
 
-StrModel = dict[str, str]
-AnyModel = StrModel | str
-
 # Python version >=3.8.14, >=3.9.14, >=3.10.7, or >=3.11
 if hasattr(sys, "set_int_max_str_digits"):
     sys.set_int_max_str_digits(0)
@@ -112,26 +109,27 @@
 
 @dataclass
 class PotentialModel:
-    model: AnyModel
-    is_valid: bool
-
-    def __init__(self, model: ModelRef | str, args: HalmosConfig) -> None:
-        # convert model into string to avoid pickling errors for z3 (ctypes) objects containing pointers
-        self.model = (
-            to_str_model(model, args.print_full_model)
-            if isinstance(model, ModelRef)
-            else model
-        )
-        self.is_valid = is_model_valid(model)
+    model: ModelVariables | str | None
 
     def __str__(self) -> str:
         # expected to be a filename
         if isinstance(self.model, str):
             return f"see {self.model}"
 
-        formatted = [f"\n    {decl} = {val}" for decl, val in self.model.items()]
+        formatted = []
+        for v in self.model.values():
+            stringified = stringify(v.full_name, v.value)
+            formatted.append(f"\n    {v.full_name} = {stringified}")
         return "".join(sorted(formatted)) if formatted else "∅"
 
+    @property
+    def is_valid(self) -> bool:
+        if self.model is None:
+            return False
+
+        needs_refinement = any(name.startswith("f_evm_") for name in self.model)
+        return not needs_refinement
+
 
 @dataclass(frozen=True)
 class ContractContext:
@@ -527,6 +525,7 @@ def setup(ctx: FunctionContext) -> Exec:
             setup_exs.append(setup_exs_no_error[0][0])
         case _:
             for setup_ex, query in setup_exs_no_error:
+                # XXX fix with new interface
                 res, _, _ = solve(query, args)
                 if res != unsat:
                     setup_exs.append(setup_ex)
@@ -690,7 +689,13 @@ def run_test(ctx: FunctionContext) -> TestResult:
                     print("  Already proven unsat")
                 continue
 
-            solve(path_ctx)
+            try:
+                solve(path_ctx)
+            except RuntimeError:
+                # XXX use more specific exception
+                # XXX notify sevm that we're done
+                # if the thread pool is shut down, here
+                break
 
             # XXX handle refinement
             # XXX handle timeout
@@ -736,6 +741,8 @@ def run_test(ctx: FunctionContext) -> TestResult:
     # display assertion solving progress
     #
 
+    # XXX clashes with the exploration progress status
+
     if not args.no_status or args.early_exit:
         while True:
             if args.early_exit and ctx.valid_counterexamples:
@@ -994,7 +1001,7 @@ def solver_callback(future: PopenFuture):
             unsat_core = parse_unsat_core(stdout) if args.cache_solver else None
             solver_output = SolverOutput(unsat, path_id, unsat_core=unsat_core)
         case "sat":
-            model = PotentialModel(f"{smt2_filename}.out", args)
+            model = PotentialModel(parse_string(stdout))
             solver_output = SolverOutput(sat, path_id, model=model)
         case _:
             solver_output = SolverOutput(unknown, path_id)
@@ -1056,55 +1063,6 @@ def refine(query: SMTQuery) -> SMTQuery:
     return SMTQuery(smtlib, query.assertions)
 
 
-# def package_result(
-#     model: PotentialModel | None,
-#     idx: int,
-#     result: CheckSatResult,
-#     unsat_core: list | None,
-#     args: HalmosConfig,
-# ) -> ModelWithContext:
-#     if result == unsat:
-#         if args.verbose >= 1:
-#             print(f"  Invalid path; ignored (path id: {idx+1})")
-#         return ModelWithContext(None, idx, result, unsat_core)
-
-#     if result == sat:
-#         if args.verbose >= 1:
-#             print(f"  Valid path; counterexample generated (path id: {idx+1})")
-#         return ModelWithContext(model, idx, result, None)
-
-#     else:
-#         if args.verbose >= 1:
-#             print(f"  Timeout (path id: {idx+1})")
-#         return ModelWithContext(None, idx, result, None)
-
-
-def is_model_valid(model: ModelRef | str) -> bool:
-    # TODO: evaluate the path condition against the given model after excluding f_evm_* symbols,
-    #       since the f_evm_* symbols may still appear in valid models.
-
-    # model is a filename, containing solver output
-    if isinstance(model, str):
-        with open(model) as f:
-            for line in f:
-                if "f_evm_" in line:
-                    return False
-        return True
-
-    # z3 model object
-    else:
-        return all(not str(decl).startswith("f_evm_") for decl in model)
-
-
-def to_str_model(model: ModelRef, print_full_model: bool) -> StrModel:
-    def select(var):
-        name = str(var)
-        return name.startswith("p_") or name.startswith("halmos_")
-
-    select_model = filter(select, model) if not print_full_model else model
-    return {str(decl): stringify(str(decl), model[decl]) for decl in select_model}
-
-
 def get_contract_type(
     ast_nodes: list, contract_name: str
 ) -> tuple[str | None, str | None]:
diff --git a/src/halmos/smtlib.py b/src/halmos/smtlib.py
index f20b2ca8..61eb23fe 100644
--- a/src/halmos/smtlib.py
+++ b/src/halmos/smtlib.py
@@ -3,6 +3,19 @@
 
 from halmos.logs import logger
 
+
+@dataclass
+class ModelVariable:
+    full_name: str
+    variable_name: str
+    solidity_type: str
+    smt_type: str
+    size_bits: int
+    value: int
+
+
+ModelVariables = dict[str, ModelVariable]
+
 # Regular expression for capturing halmos variables
 halmos_pattern = re.compile(
     r"""
@@ -20,21 +33,6 @@
 )
 
 
-@dataclass
-class ModelVariable:
-    full_name: str
-    variable_name: str
-    solidity_type: str
-    smt_type: str
-    size_bits: int
-    value: int
-
-
-def parse_file(file_path: str) -> dict:
-    with open(file_path) as file:
-        return parse_string(file.read())
-
-
 def parse_const_value(value: str) -> int:
     if value.startswith("#b"):
         return int(value[2:], 2)
@@ -51,7 +49,7 @@ def parse_const_value(value: str) -> int:
     raise ValueError(f"unknown value format: {value}")
 
 
-def parse_match(match: re.Match) -> ModelVariable:
+def _parse_match(match: re.Match) -> ModelVariable:
     full_name = match.group(1).strip()
     smt_type = f"{match.group(2)} {match.group(3)}"
     size_bits = int(match.group(3))
@@ -72,7 +70,12 @@ def parse_match(match: re.Match) -> ModelVariable:
     )
 
 
-def parse_string(smtlib_str: str) -> dict[str, ModelVariable]:
+def parse_string(smtlib_str: str) -> ModelVariables:
+    """Expects a whole smtlib model output file, as produced by a solver
+    in response to a `(check-sat)` + `(get-model)` command.
+
+    Extracts halmos variables and returns them grouped by their full name"""
+
     model_variables: dict[str, ModelVariable] = {}
 
     # use a regex to find all the variables
@@ -83,10 +86,15 @@ def parse_string(smtlib_str: str) -> dict[str, ModelVariable]:
 
     for match in halmos_pattern.finditer(smtlib_str):
         try:
-            variable = parse_match(match)
+            variable = _parse_match(match)
             model_variables[variable.full_name] = variable
         except Exception as e:
             logger.error(f"error parsing smtlib string '{match.string.strip()}': {e}")
             raise e
 
     return model_variables
+
+
+def parse_file(file_path: str) -> ModelVariables:
+    with open(file_path) as file:
+        return parse_string(file.read())

From db346c094ec2006c17c564d0335f588c4e231037 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Thu, 16 Jan 2025 16:00:23 -0800
Subject: [PATCH 10/46] fix counterexample output

---
 src/halmos/__main__.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 981b6dda..075a00ca 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -118,7 +118,8 @@ def __str__(self) -> str:
 
         formatted = []
         for v in self.model.values():
-            stringified = stringify(v.full_name, v.value)
+            # TODO: ideally we would avoid wrapping with con() here
+            stringified = stringify(v.full_name, con(v.value))
             formatted.append(f"\n    {v.full_name} = {stringified}")
         return "".join(sorted(formatted)) if formatted else "∅"
 
@@ -239,7 +240,7 @@ def add_model(self, model: PotentialModel | None, result: CheckSatResult):
             return
 
         if model.is_valid:
-            print(red(f"Counterexample: {model}"))
+            # we don't print the model here because this may be called from multiple threads
             self.valid_counterexamples.append(model)
 
             # we have a valid counterexample, so we are eligible for early exit
@@ -764,6 +765,12 @@ def run_test(ctx: FunctionContext) -> TestResult:
     # print test result
     #
 
+    for model in ctx.valid_counterexamples:
+        print(red(f"Counterexample: {model}"))
+
+        if args.early_exit:
+            break
+
     counter = Counter(str(m.result) for m in ctx.solver_outputs)
     if counter["sat"] > 0:
         passfail = red("[FAIL]")
@@ -977,7 +984,10 @@ def dump(
 
 
 def solver_callback(future: PopenFuture):
-    """Invoked by a PopenFuture when it is finished."""
+    """Invoked by a PopenFuture when it is finished.
+
+    Note that this may be invoked by different threads
+    (in particular, not the thread that scheduled the future)"""
 
     path_ctx: PathContext = future.metadata["path_ctx"]
     path_id = path_ctx.path_id
@@ -1027,10 +1037,9 @@ def solve(ctx: PathContext):
     #     timeout_seconds = timeout_millis / 1000
 
     cmd = args.solver_command.split() + [smt2_filename]
-    thread_pool: PopenExecutor = ctx.fun_ctx.thread_pool
     future = PopenFuture(cmd, metadata={"path_ctx": ctx})
     future.add_done_callback(solver_callback)
-    thread_pool.submit(future)
+    ctx.fun_ctx.thread_pool.submit(future)
 
 
 def check_unsat_cores(query: SMTQuery, unsat_cores: list[list]) -> bool:

From bfe126dd412bfcd5f7e8b6d09204025f1868101b Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Tue, 7 Jan 2025 17:10:55 -0800
Subject: [PATCH 11/46] make z3 the default --solver-command (so we always dump
 queries)

---
 src/halmos/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/halmos/config.py b/src/halmos/config.py
index 2d28e70f..dcf567ca 100644
--- a/src/halmos/config.py
+++ b/src/halmos/config.py
@@ -441,7 +441,7 @@ class Config:
 
     solver_command: str = arg(
         help="use the given command when invoking the solver",
-        global_default=None,
+        global_default="z3",
         metavar="COMMAND",
         group=solver,
     )

From cdf4e16f8190ceb3d9e7badbae0b5f27bc7e97ef Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Thu, 16 Jan 2025 16:02:59 -0800
Subject: [PATCH 12/46] bring back refinement

---
 src/halmos/__main__.py | 310 +++++++++++++++++++++++------------------
 1 file changed, 177 insertions(+), 133 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 075a00ca..a45f285c 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -12,6 +12,7 @@
 import traceback
 import uuid
 from collections import Counter
+from concurrent.futures import Future, ThreadPoolExecutor
 from copy import deepcopy
 from dataclasses import asdict, dataclass, field
 from datetime import timedelta
@@ -110,6 +111,7 @@
 @dataclass
 class PotentialModel:
     model: ModelVariables | str | None
+    is_valid: bool
 
     def __str__(self) -> str:
         # expected to be a filename
@@ -123,14 +125,6 @@ def __str__(self) -> str:
             formatted.append(f"\n    {v.full_name} = {stringified}")
         return "".join(sorted(formatted)) if formatted else "∅"
 
-    @property
-    def is_valid(self) -> bool:
-        if self.model is None:
-            return False
-
-        needs_refinement = any(name.startswith("f_evm_") for name in self.model)
-        return not needs_refinement
-
 
 @dataclass(frozen=True)
 class ContractContext:
@@ -175,8 +169,11 @@ class FunctionContext:
     # dump directory for this function (generated in __post_init__)
     dump_dirname: str = field(init=False)
 
+    # function-level thread pool that drives assertion solving
+    thread_pool: ThreadPoolExecutor = field(init=False)
+
     # path-specific queries are submitted to this function-specific executor
-    thread_pool: PopenExecutor = field(default_factory=PopenExecutor)
+    solver_executor: PopenExecutor = field(default_factory=PopenExecutor)
 
     # list of solver outputs for this function
     _solver_outputs: list["SolverOutput"] = field(default_factory=list)
@@ -197,59 +194,14 @@ class FunctionContext:
     exec_cache: dict[int, Exec] = field(default_factory=dict)
 
     def __post_init__(self):
-        object.__setattr__(
-            self, "dump_dirname", f"/tmp/{self.info.name}-{uuid.uuid4().hex}"
-        )
-
-    def add_solver_output(self, solver_output: "SolverOutput"):
-        self._solver_outputs.append(solver_output)
-        args = self.args
-        path_id = solver_output.path_id
-        model, result = solver_output.model, solver_output.result
-
-        # retrieve cached exec and clear the cache entry
-        exec = self.exec_cache.pop(path_id, None)
-
-        if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE:
-            id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else ""
-            print(f"Trace{id_str}:")
-            print(self.traces[path_id], end="")
-
-        if args.print_failed_states:
-            print(f"# {path_id}")
-            print(exec)
-
-        if self.thread_pool.is_shutdown():
-            # if the thread pool is in the process of shutting down,
-            # we want to stop processing remaining models/timeouts/errors, etc.
-            return
-
-        if result == unsat:
-            if solver_output.unsat_core:
-                self.unsat_cores.append(solver_output.unsat_core)
-            return
-
-        self.add_model(model, result)
+        dirname = f"/tmp/{self.info.name}-{uuid.uuid4().hex}"
+        object.__setattr__(self, "dump_dirname", dirname)
 
-    def add_model(self, model: PotentialModel | None, result: CheckSatResult):
-        """Triage a new model, potentially adding it to the counterexamples list"""
-
-        # model could be an empty dict here, so compare to None explicitly
-        if model is None:
-            warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}")
-            return
-
-        if model.is_valid:
-            # we don't print the model here because this may be called from multiple threads
-            self.valid_counterexamples.append(model)
-
-            # we have a valid counterexample, so we are eligible for early exit
-            if self.args.early_exit:
-                self.thread_pool.shutdown(wait=False)
-        else:
-            warn_str = f"Counterexample (potentially invalid): {model}"
-            warn_code(COUNTEREXAMPLE_INVALID, warn_str)
-            self.invalid_counterexamples.append(model)
+        thread_pool = ThreadPoolExecutor(
+            max_workers=self.args.solver_threads,
+            thread_name_prefix=f"{self.info.name}-",
+        )
+        object.__setattr__(self, "thread_pool", thread_pool)
 
     @property
     def solver_outputs(self):
@@ -273,9 +225,23 @@ class PathContext:
     # filename for this path (generated in __post_init__)
     dump_filename: str = field(init=False)
 
+    is_refined: bool = False
+
     def __post_init__(self):
-        object.__setattr__(
-            self, "dump_filename", f"{self.fun_ctx.dump_dirname}/{self.path_id}.smt2"
+        refined_str = ".refined" if self.is_refined else ""
+        dirname = self.fun_ctx.dump_dirname
+        filename = os.path.join(dirname, f"{self.path_id}{refined_str}.smt2")
+
+        # use __setattr__ because this is a frozen dataclass
+        object.__setattr__(self, "dump_filename", filename)
+
+    def refine(self) -> "PathContext":
+        return PathContext(
+            path_id=self.path_id,
+            ex=self.ex,
+            query=refine(self.query),
+            fun_ctx=self.fun_ctx,
+            is_refined=True,
         )
 
 
@@ -294,6 +260,30 @@ class SolverOutput:
     # optional unsat core
     unsat_core: list[str] | None = None
 
+    @staticmethod
+    def from_result(
+        stdout: str, stderr: str, returncode: int, path_ctx: PathContext
+    ) -> "SolverOutput":
+        # extract the first line (we expect sat/unsat/unknown)
+        newline_idx = stdout.find("\n")
+        first_line = stdout[:newline_idx] if newline_idx != -1 else stdout
+
+        args = path_ctx.fun_ctx.args
+        path_id = path_ctx.path_id
+        if args.verbose >= 1:
+            debug(f"    {first_line}")
+
+        match first_line:
+            case "unsat":
+                unsat_core = parse_unsat_core(stdout) if args.cache_solver else None
+                return SolverOutput(unsat, path_id, unsat_core=unsat_core)
+            case "sat":
+                is_valid = is_model_valid(stdout)
+                model = PotentialModel(model=parse_string(stdout), is_valid=is_valid)
+                return SolverOutput(sat, path_id, model=model)
+            case _:
+                return SolverOutput(unknown, path_id)
+
 
 @dataclass(frozen=True)
 class TestResult:
@@ -526,9 +516,8 @@ def setup(ctx: FunctionContext) -> Exec:
             setup_exs.append(setup_exs_no_error[0][0])
         case _:
             for setup_ex, query in setup_exs_no_error:
-                # XXX fix with new interface
-                res, _, _ = solve(query, args)
-                if res != unsat:
+                solver_output = solve_low_level(query, args)
+                if solver_output.result != unsat:
                     setup_exs.append(setup_ex)
                     if len(setup_exs) > 1:
                         break
@@ -645,6 +634,7 @@ def run_test(ctx: FunctionContext) -> TestResult:
     #
 
     path_id = 0  # default value in case we don't enter the loop body
+    submitted_futures = []
     for path_id, ex in enumerate(exs):
         # cache exec in case we need to print it later
         if args.print_failed_states:
@@ -682,21 +672,13 @@ def run_test(ctx: FunctionContext) -> TestResult:
                 fun_ctx=ctx,
             )
 
-            dump(path_ctx)
+            def log_future_result(future: Future):
+                if e := future.exception():
+                    error(f"encountered exception during assertion solving: {e}")
 
-            # if the query contains an unsat-core, it is unsat; no need to run the solver
-            if check_unsat_cores(query, ctx.unsat_cores):
-                if args.verbose >= 1:
-                    print("  Already proven unsat")
-                continue
-
-            try:
-                solve(path_ctx)
-            except RuntimeError:
-                # XXX use more specific exception
-                # XXX notify sevm that we're done
-                # if the thread pool is shut down, here
-                break
+            solve_future = ctx.thread_pool.submit(solve_end_to_end, path_ctx)
+            solve_future.add_done_callback(log_future_result)
+            submitted_futures.append(solve_future)
 
             # XXX handle refinement
             # XXX handle timeout
@@ -742,13 +724,9 @@ def run_test(ctx: FunctionContext) -> TestResult:
     # display assertion solving progress
     #
 
-    # XXX clashes with the exploration progress status
-
     if not args.no_status or args.early_exit:
         while True:
-            if args.early_exit and ctx.valid_counterexamples:
-                break
-            done = sum(fm.done() for fm in ctx.thread_pool.futures)
+            done = sum(fm.done() for fm in submitted_futures)
             total = potential
             if done == total:
                 break
@@ -919,15 +897,6 @@ def run_contract(ctx: ContractContext) -> list[TestResult]:
     return test_results
 
 
-@dataclass(frozen=True)
-class GenModelArgs:
-    args: HalmosConfig
-    path_id: int
-    sexpr: SMTQuery
-    known_unsat_cores: list[list]
-    dump_dirname: str | None = None
-
-
 def parse_unsat_core(output) -> list[str] | None:
     # parsing example:
     #   unsat
@@ -983,50 +952,27 @@ def dump(
             f.write("(get-model)\n")
 
 
-def solver_callback(future: PopenFuture):
-    """Invoked by a PopenFuture when it is finished.
+def is_model_valid(solver_stdout: str) -> bool:
+    # TODO: evaluate the path condition against the given model after excluding f_evm_* symbols,
+    #       since the f_evm_* symbols may still appear in valid models.
 
-    Note that this may be invoked by different threads
-    (in particular, not the thread that scheduled the future)"""
+    return "f_evm_" not in solver_stdout
 
-    path_ctx: PathContext = future.metadata["path_ctx"]
-    path_id = path_ctx.path_id
-    args = path_ctx.fun_ctx.args
-    smt2_filename = path_ctx.dump_filename
-    stdout, stderr, returncode = future.result()
-
-    # save solver output to file
-    with open(f"{smt2_filename}.out", "w") as f:
-        f.write(stdout)
 
-    # extract the first line (we expect sat/unsat/unknown)
-    newline_idx = stdout.find("\n")
-    first_line = stdout[:newline_idx] if newline_idx != -1 else stdout
-    if args.verbose >= 1:
-        debug(f"    {first_line}")
-
-    solver_output = None
-    match first_line:
-        case "unsat":
-            unsat_core = parse_unsat_core(stdout) if args.cache_solver else None
-            solver_output = SolverOutput(unsat, path_id, unsat_core=unsat_core)
-        case "sat":
-            model = PotentialModel(parse_string(stdout))
-            solver_output = SolverOutput(sat, path_id, model=model)
-        case _:
-            solver_output = SolverOutput(unknown, path_id)
+def solve_low_level(path_ctx: PathContext) -> SolverOutput:
+    """Invokes an external solver process to solve the given query.
 
-    fun_ctx = path_ctx.fun_ctx
-    fun_ctx.add_solver_output(solver_output)
+    Can raise TimeoutError or some Exception raised during execution"""
 
+    fun_ctx, smt2_filename = path_ctx.fun_ctx, path_ctx.dump_filename
+    args = fun_ctx.args
 
-def solve(ctx: PathContext):
-    args = ctx.fun_ctx.args
-    smt2_filename = ctx.dump_filename
+    # make sure the smt2 file has been written
+    dump(path_ctx)
 
     if args.verbose >= 1:
-        debug("  Checking with external solver process")
-        debug(f"    {args.solver_command} {smt2_filename} > {smt2_filename}.out")
+        print("  Checking with external solver process")
+        print(f"    {args.solver_command} {smt2_filename} > {smt2_filename}.out")
 
     # XXX fix timeout
 
@@ -1037,9 +983,107 @@ def solve(ctx: PathContext):
     #     timeout_seconds = timeout_millis / 1000
 
     cmd = args.solver_command.split() + [smt2_filename]
-    future = PopenFuture(cmd, metadata={"path_ctx": ctx})
-    future.add_done_callback(solver_callback)
-    ctx.fun_ctx.thread_pool.submit(future)
+    future = PopenFuture(cmd, metadata={"path_ctx": path_ctx})
+
+    # XXX avoiding callbacks now
+    # future.add_done_callback(solver_callback)
+
+    fun_ctx.solver_executor.submit(future)
+
+    # block until the external solver returns, times out, is interrupted, fails, etc.
+    stdout, stderr, returncode = future.result()
+
+    # save solver stdout to file
+    with open(f"{smt2_filename}.out", "w") as f:
+        f.write(stdout)
+
+    # save solver stderr to file (only if there is an error)
+    if stderr:
+        with open(f"{smt2_filename}.err", "w") as f:
+            f.write(stderr)
+
+    return SolverOutput.from_result(stdout, stderr, returncode, path_ctx)
+
+
+# XXX used to be gen_model_from_sexpr
+def solve_end_to_end(ctx: PathContext) -> None:
+    """Synchronously resolves a query in a given context, which may result in 0, 1 or multiple solver invocations.
+
+    - may result in 0 invocations if the query contains a known unsat core (hence the need for the context)
+    - may result in exactly 1 invocation if the query is unsat, or sat with a valid model
+    - may result in multiple invocations if the query is sat and the model is invalid (needs refinement)
+
+    If this produces a model, it _should_ be valid.
+    """
+    fun_ctx, path_id, query = ctx.fun_ctx, ctx.path_id, ctx.query
+    args, unsat_cores = fun_ctx.args, fun_ctx.unsat_cores
+
+    verbose = print if args.verbose >= 1 else lambda *args, **kwargs: None
+    verbose(f"Checking path condition {path_id=}")
+
+    # if the query contains an unsat-core, it is unsat; no need to run the solver
+    if check_unsat_cores(query, unsat_cores):
+        verbose("  Already proven unsat")
+        return SolverOutput(unsat, path_id)
+
+    solver_output = solve_low_level(ctx)
+    result, model = solver_output.result, solver_output.model
+
+    # if the ctx is already refined, we don't need to solve again
+    if result == sat and not model.is_valid and not ctx.is_refined:
+        verbose("  Checking again with refinement")
+
+        refined_ctx = ctx.refine()
+
+        if refined_ctx.query.smtlib != query.smtlib:
+            # recurse with the refined query
+            return solve_end_to_end(refined_ctx)
+        else:
+            verbose("    Refinement did not change the query, no need to solve again")
+
+    #
+    # we are done solving, process and triage the result
+    #
+
+    # retrieve cached exec and clear the cache entry
+    exec = fun_ctx.exec_cache.pop(path_id, None)
+
+    if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE:
+        id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else ""
+        print(f"Trace{id_str}:")
+        print(fun_ctx.traces[path_id], end="")
+
+    if args.print_failed_states:
+        print(f"# {path_id}")
+        print(exec)
+
+    if fun_ctx.solver_executor.is_shutdown():
+        # if the thread pool is in the process of shutting down,
+        # we want to stop processing remaining models/timeouts/errors, etc.
+        return
+
+    if result == unsat:
+        if solver_output.unsat_core:
+            fun_ctx.unsat_cores.append(solver_output.unsat_core)
+        return
+
+    # model could be an empty dict here, so compare to None explicitly
+    if model is None:
+        warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}")
+        return
+
+    if model.is_valid:
+        # we don't print the model here because this may be called from multiple threads
+        fun_ctx.valid_counterexamples.append(model)
+
+        # we have a valid counterexample, so we are eligible for early exit
+        if args.early_exit:
+            fun_ctx.solver_executor.shutdown(wait=False)
+    else:
+        # XXX avoid printing here
+        warn_str = f"Counterexample (potentially invalid): {model}"
+        warn_code(COUNTEREXAMPLE_INVALID, warn_str)
+        fun_ctx.invalid_counterexamples.append(model)
 
 
 def check_unsat_cores(query: SMTQuery, unsat_cores: list[list]) -> bool:

From 175ede373537a5c758933036c56cf08b5ceb227f Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Wed, 8 Jan 2025 16:22:28 -0800
Subject: [PATCH 13/46] fix solve invocation in setup

---
 src/halmos/__main__.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index a45f285c..f46d8abb 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -484,17 +484,14 @@ def setup(ctx: FunctionContext) -> Exec:
     )
 
     setup_exs_all = sevm.run(setup_ex)
-    setup_exs_no_error = []
+    setup_exs_no_error: list[PathContext] = []
 
-    for idx, setup_ex in enumerate(setup_exs_all):
+    for path_id, setup_ex in enumerate(setup_exs_all):
         if args.verbose >= VERBOSITY_TRACE_SETUP:
-            print(f"{setup_sig} trace #{idx+1}:")
+            print(f"{setup_sig} trace #{path_id}:")
             render_trace(setup_ex.context)
 
-        if not (err := setup_ex.context.output.error):
-            setup_exs_no_error.append((setup_ex, setup_ex.path.to_smt2(args)))
-
-        else:
+        if err := setup_ex.context.output.error:
             opcode = setup_ex.current_opcode()
             if opcode not in [EVM.REVERT, EVM.INVALID]:
                 warn_code(
@@ -507,7 +504,16 @@ def setup(ctx: FunctionContext) -> Exec:
                 print(f"{setup_sig} trace:")
                 render_trace(setup_ex.context)
 
-    setup_exs = []
+        else:
+            path_ctx = PathContext(
+                path_id=path_id,
+                ex=setup_ex,
+                query=setup_ex.path.to_smt2(args),
+                fun_ctx=ctx,
+            )
+            setup_exs_no_error.append(path_ctx)
+
+    setup_exs: list[Exec] = []
 
     match len(setup_exs_no_error):
         case 0:
@@ -515,10 +521,10 @@ def setup(ctx: FunctionContext) -> Exec:
         case 1:
             setup_exs.append(setup_exs_no_error[0][0])
         case _:
-            for setup_ex, query in setup_exs_no_error:
-                solver_output = solve_low_level(query, args)
+            for path_ctx in setup_exs_no_error:
+                solver_output = solve_low_level(path_ctx)
                 if solver_output.result != unsat:
-                    setup_exs.append(setup_ex)
+                    setup_exs.append(path_ctx.ex)
                     if len(setup_exs) > 1:
                         break
 

From cf85376d7f7f28c62451d5e4119f5206a956f4b1 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Thu, 16 Jan 2025 16:03:39 -0800
Subject: [PATCH 14/46] bring back timeouts, clean children after timeout

---
 src/halmos/__main__.py  | 43 +++++++++++---------------------
 src/halmos/processes.py | 55 +++++++++++++++++++++++------------------
 2 files changed, 46 insertions(+), 52 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index f46d8abb..0ad73710 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -49,7 +49,7 @@
     warn_code,
 )
 from .mapper import BuildOut, DeployAddressMapper, Mapper
-from .processes import PopenExecutor, PopenFuture
+from .processes import PopenExecutor, PopenFuture, TimeoutExpired
 from .sevm import (
     EMPTY_BALANCE,
     EVM,
@@ -176,7 +176,7 @@ class FunctionContext:
     solver_executor: PopenExecutor = field(default_factory=PopenExecutor)
 
     # list of solver outputs for this function
-    _solver_outputs: list["SolverOutput"] = field(default_factory=list)
+    solver_outputs: list["SolverOutput"] = field(default_factory=list)
 
     # list of valid counterexamples for this function
     valid_counterexamples: list[PotentialModel] = field(default_factory=list)
@@ -203,10 +203,6 @@ def __post_init__(self):
         )
         object.__setattr__(self, "thread_pool", thread_pool)
 
-    @property
-    def solver_outputs(self):
-        return self._solver_outputs
-
 
 @dataclass(frozen=True)
 class PathContext:
@@ -680,15 +676,12 @@ def run_test(ctx: FunctionContext) -> TestResult:
 
             def log_future_result(future: Future):
                 if e := future.exception():
-                    error(f"encountered exception during assertion solving: {e}")
+                    error(f"encountered exception during assertion solving: {e!r}")
 
             solve_future = ctx.thread_pool.submit(solve_end_to_end, path_ctx)
             solve_future.add_done_callback(log_future_result)
             submitted_futures.append(solve_future)
 
-            # XXX handle refinement
-            # XXX handle timeout
-
         elif ex.context.is_stuck():
             debug(f"Potential error path (id: {idx+1})")
             res, _, _ = solve(ex.path.to_smt2(args), args)
@@ -749,12 +742,6 @@ def log_future_result(future: Future):
     # print test result
     #
 
-    for model in ctx.valid_counterexamples:
-        print(red(f"Counterexample: {model}"))
-
-        if args.early_exit:
-            break
-
     counter = Counter(str(m.result) for m in ctx.solver_outputs)
     if counter["sat"] > 0:
         passfail = red("[FAIL]")
@@ -980,24 +967,21 @@ def solve_low_level(path_ctx: PathContext) -> SolverOutput:
         print("  Checking with external solver process")
         print(f"    {args.solver_command} {smt2_filename} > {smt2_filename}.out")
 
-    # XXX fix timeout
-
     # solver_timeout_assertion == 0 means no timeout,
     # which translates to timeout_seconds=None for subprocess.run
-    # timeout_seconds = None
-    # if timeout_millis := args.solver_timeout_assertion:
-    #     timeout_seconds = timeout_millis / 1000
+    timeout_seconds = t / 1000 if (t := args.solver_timeout_assertion) else None
 
     cmd = args.solver_command.split() + [smt2_filename]
-    future = PopenFuture(cmd, metadata={"path_ctx": path_ctx})
-
-    # XXX avoiding callbacks now
-    # future.add_done_callback(solver_callback)
+    future = PopenFuture(cmd, timeout=timeout_seconds)
 
+    # starts the subprocess asynchronously
     fun_ctx.solver_executor.submit(future)
 
     # block until the external solver returns, times out, is interrupted, fails, etc.
-    stdout, stderr, returncode = future.result()
+    try:
+        stdout, stderr, returncode = future.result()
+    except TimeoutExpired:
+        return SolverOutput(result=unknown, path_id=path_ctx.path_id)
 
     # save solver stdout to file
     with open(f"{smt2_filename}.out", "w") as f:
@@ -1068,6 +1052,9 @@ def solve_end_to_end(ctx: PathContext) -> None:
         # we want to stop processing remaining models/timeouts/errors, etc.
         return
 
+    # keep track of the solver outputs, so that we can display PASS/FAIL/TIMEOUT/ERROR later
+    fun_ctx.solver_outputs.append(solver_output)
+
     if result == unsat:
         if solver_output.unsat_core:
             fun_ctx.unsat_cores.append(solver_output.unsat_core)
@@ -1079,14 +1066,14 @@ def solve_end_to_end(ctx: PathContext) -> None:
         return
 
     if model.is_valid:
-        # we don't print the model here because this may be called from multiple threads
+        print(red(f"Counterexample: {model}"))
+
         fun_ctx.valid_counterexamples.append(model)
 
         # we have a valid counterexample, so we are eligible for early exit
         if args.early_exit:
             fun_ctx.solver_executor.shutdown(wait=False)
     else:
-        # XXX avoid printing here
         warn_str = f"Counterexample (potentially invalid): {model}"
         warn_code(COUNTEREXAMPLE_INVALID, warn_str)
         fun_ctx.invalid_counterexamples.append(model)
diff --git a/src/halmos/processes.py b/src/halmos/processes.py
index 04c95c47..1eff7389 100644
--- a/src/halmos/processes.py
+++ b/src/halmos/processes.py
@@ -3,32 +3,32 @@
 import subprocess
 import threading
 import time
-from subprocess import PIPE, Popen
+from subprocess import PIPE, Popen, TimeoutExpired
 
 import psutil
 
 
 class PopenFuture(concurrent.futures.Future):
     cmd: list[str]
+    timeout: float | None  # in seconds, None means no timeout
     process: subprocess.Popen | None
     stdout: str | None
     stderr: str | None
     returncode: int | None
     start_time: float | None
     end_time: float | None
-    metadata: dict | None
     _exception: Exception | None
 
-    def __init__(self, cmd: list[str], metadata: dict | None = None):
+    def __init__(self, cmd: list[str], timeout: float | None = None):
         super().__init__()
         self.cmd = cmd
+        self.timeout = timeout
         self.process = None
         self.stdout = None
         self.stderr = None
         self.returncode = None
         self.start_time = None
         self.end_time = None
-        self.metadata = metadata
         self._exception = None
 
     def start(self):
@@ -40,9 +40,14 @@ def run():
                 self.process = Popen(self.cmd, stdout=PIPE, stderr=PIPE, text=True)
 
                 # blocks until the process terminates
-                self.stdout, self.stderr = self.process.communicate()
+                self.stdout, self.stderr = self.process.communicate(
+                    timeout=self.timeout
+                )
                 self.end_time = time.time()
                 self.returncode = self.process.returncode
+            except TimeoutExpired as e:
+                self._exception = e
+                self.cancel()
             except Exception as e:
                 self._exception = e
             finally:
@@ -58,30 +63,32 @@ def run():
 
     def cancel(self):
         """Attempts to terminate and then kill the process and its children."""
-        if self.is_running():
-            self.process.terminate()
+        if not self.is_running():
+            return
 
-            # use psutil to kill the entire process tree (including children)
-            try:
-                parent_process = psutil.Process(self.process.pid)
-                processes = parent_process.children(recursive=True)
-                processes.append(parent_process)
+        self.process.terminate()
+
+        # use psutil to kill the entire process tree (including children)
+        try:
+            parent_process = psutil.Process(self.process.pid)
+            processes = parent_process.children(recursive=True)
+            processes.append(parent_process)
 
-                # ask politely to terminate first
-                for process in processes:
-                    process.terminate()
+            # ask politely to terminate first
+            for process in processes:
+                process.terminate()
 
-                # give them some time to terminate
-                time.sleep(0.1)
+            # give them some time to terminate
+            time.sleep(0.5)
 
-                # after grace period, force kill
-                for process in processes:
-                    if process.is_running():
-                        process.kill()
+            # after grace period, force kill
+            for process in processes:
+                if process.is_running():
+                    process.kill()
 
-            except psutil.NoSuchProcess:
-                # process already terminated, nothing to do
-                pass
+        except psutil.NoSuchProcess:
+            # process already terminated, nothing to do
+            pass
 
     def exception(self) -> Exception | None:
         """Returns any exception raised during the process."""

From ea3d493e847e4b5809e0135eedbc260618905bcc Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Wed, 8 Jan 2025 17:21:47 -0800
Subject: [PATCH 15/46] migrate build output parsing function in build.py

---
 src/halmos/__main__.py | 176 ++---------------------------------------
 src/halmos/build.py    | 174 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 182 insertions(+), 168 deletions(-)
 create mode 100644 src/halmos/build.py

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 0ad73710..f5a7f309 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -29,6 +29,13 @@
     unsat,
 )
 
+from .build import (
+    build_output_iterator,
+    import_libs,
+    parse_build_out,
+    parse_devdoc,
+    parse_natspec,
+)
 from .bytevec import ByteVec
 from .calldata import FunctionInfo, get_abi, mk_calldata
 from .config import Config as HalmosConfig
@@ -39,7 +46,6 @@
     COUNTEREXAMPLE_UNKNOWN,
     INTERNAL_ERROR,
     LOOP_BOUND,
-    PARSING_ERROR,
     REVERT_ALL,
     debug,
     error,
@@ -48,7 +54,7 @@
     warn,
     warn_code,
 )
-from .mapper import BuildOut, DeployAddressMapper, Mapper
+from .mapper import BuildOut, DeployAddressMapper
 from .processes import PopenExecutor, PopenFuture, TimeoutExpired
 from .sevm import (
     EMPTY_BALANCE,
@@ -1109,172 +1115,6 @@ def refine(query: SMTQuery) -> SMTQuery:
     return SMTQuery(smtlib, query.assertions)
 
 
-def get_contract_type(
-    ast_nodes: list, contract_name: str
-) -> tuple[str | None, str | None]:
-    for node in ast_nodes:
-        if node["nodeType"] == "ContractDefinition" and node["name"] == contract_name:
-            abstract = "abstract " if node.get("abstract") else ""
-            contract_type = abstract + node["contractKind"]
-            natspec = node.get("documentation")
-            return contract_type, natspec
-
-    return None, None
-
-
-def parse_build_out(args: HalmosConfig) -> dict:
-    result = {}  # compiler version -> source filename -> contract name -> (json, type)
-
-    out_path = os.path.join(args.root, args.forge_build_out)
-    if not os.path.exists(out_path):
-        raise FileNotFoundError(
-            f"The build output directory `{out_path}` does not exist"
-        )
-
-    for sol_dirname in os.listdir(out_path):  # for each source filename
-        if not sol_dirname.endswith(".sol"):
-            continue
-
-        sol_path = os.path.join(out_path, sol_dirname)
-        if not os.path.isdir(sol_path):
-            continue
-
-        for json_filename in os.listdir(sol_path):  # for each contract name
-            try:
-                if not json_filename.endswith(".json"):
-                    continue
-                if json_filename.startswith("."):
-                    continue
-
-                json_path = os.path.join(sol_path, json_filename)
-                with open(json_path, encoding="utf8") as f:
-                    json_out = json.load(f)
-
-                # cut off compiler version number as well
-                contract_name = json_filename.split(".")[0]
-                ast_nodes = json_out["ast"]["nodes"]
-                contract_type, natspec = get_contract_type(ast_nodes, contract_name)
-
-                # can happen to solidity files for multiple reasons:
-                # - import only (like console2.log)
-                # - defines only structs or enums
-                # - defines only free functions
-                # - ...
-                if contract_type is None:
-                    debug(f"Skipped {json_filename}, no contract definition found")
-                    continue
-
-                compiler_version = json_out["metadata"]["compiler"]["version"]
-                result.setdefault(compiler_version, {})
-                result[compiler_version].setdefault(sol_dirname, {})
-                contract_map = result[compiler_version][sol_dirname]
-
-                if contract_name in contract_map:
-                    raise ValueError(
-                        "duplicate contract names in the same file",
-                        contract_name,
-                        sol_dirname,
-                    )
-
-                contract_map[contract_name] = (json_out, contract_type, natspec)
-                parse_symbols(args, contract_map, contract_name)
-
-            except Exception as err:
-                warn_code(
-                    PARSING_ERROR,
-                    f"Skipped {json_filename} due to parsing failure: {type(err).__name__}: {err}",
-                )
-                if args.debug:
-                    traceback.print_exc()
-                continue
-
-    return result
-
-
-def parse_symbols(args: HalmosConfig, contract_map: dict, contract_name: str) -> None:
-    try:
-        json_out = contract_map[contract_name][0]
-        bytecode = json_out["bytecode"]["object"]
-        contract_mapping_info = Mapper().get_or_create(contract_name)
-        contract_mapping_info.bytecode = bytecode
-
-        Mapper().parse_ast(json_out["ast"])
-
-    except Exception:
-        debug(f"error parsing symbols for contract {contract_name}")
-        debug(traceback.format_exc())
-
-        # we parse symbols as best effort, don't propagate exceptions
-        pass
-
-
-def parse_devdoc(funsig: str, contract_json: dict) -> str | None:
-    try:
-        return contract_json["metadata"]["output"]["devdoc"]["methods"][funsig][
-            "custom:halmos"
-        ]
-    except KeyError:
-        return None
-
-
-def parse_natspec(natspec: dict) -> str:
-    # This parsing scheme is designed to handle:
-    #
-    # - multiline tags:
-    #   /// @custom:halmos --x
-    #   ///                --y
-    #
-    # - multiple tags:
-    #   /// @custom:halmos --x
-    #   /// @custom:halmos --y
-    #
-    # - tags that start in the middle of line:
-    #   /// blah blah @custom:halmos --x
-    #   /// --y
-    #
-    # In all the above examples, this scheme returns "--x (whitespaces) --y"
-    isHalmosTag = False
-    result = ""
-    for item in re.split(r"(@\S+)", natspec.get("text", "")):
-        if item == "@custom:halmos":
-            isHalmosTag = True
-        elif re.match(r"^@\S", item):
-            isHalmosTag = False
-        elif isHalmosTag:
-            result += item
-    return result.strip()
-
-
-def import_libs(build_out_map: dict, hexcode: str, linkReferences: dict) -> dict:
-    libs = {}
-
-    for filepath in linkReferences:
-        file_name = filepath.split("/")[-1]
-
-        for lib_name in linkReferences[filepath]:
-            (lib_json, _, _) = build_out_map[file_name][lib_name]
-            lib_hexcode = lib_json["deployedBytecode"]["object"]
-
-            # in bytes, multiply indices by 2 and offset 0x
-            placeholder_index = linkReferences[filepath][lib_name][0]["start"] * 2 + 2
-            placeholder = hexcode[placeholder_index : placeholder_index + 40]
-
-            libs[f"{filepath}:{lib_name}"] = {
-                "placeholder": placeholder,
-                "hexcode": lib_hexcode,
-            }
-
-    return libs
-
-
-def build_output_iterator(build_out: dict):
-    for compiler_version in sorted(build_out):
-        build_out_map = build_out[compiler_version]
-        for filename in sorted(build_out_map):
-            for contract_name in sorted(build_out_map[filename]):
-                yield (build_out_map, filename, contract_name)
-
-
 def contract_regex(args):
     if args.contract:
         return f"^{args.contract}$"
diff --git a/src/halmos/build.py b/src/halmos/build.py
new file mode 100644
index 00000000..b432826d
--- /dev/null
+++ b/src/halmos/build.py
@@ -0,0 +1,174 @@
+import json
+import os
+import re
+import traceback
+
+from halmos.config import Config as HalmosConfig
+from halmos.logs import PARSING_ERROR, debug, warn_code
+from halmos.mapper import Mapper
+
+
+def get_contract_type(
+    ast_nodes: list, contract_name: str
+) -> tuple[str | None, str | None]:
+    for node in ast_nodes:
+        if node["nodeType"] == "ContractDefinition" and node["name"] == contract_name:
+            abstract = "abstract " if node.get("abstract") else ""
+            contract_type = abstract + node["contractKind"]
+            natspec = node.get("documentation")
+            return contract_type, natspec
+
+    return None, None
+
+
+def parse_build_out(args: HalmosConfig) -> dict:
+    result = {}  # compiler version -> source filename -> contract name -> (json, type)
+
+    out_path = os.path.join(args.root, args.forge_build_out)
+    if not os.path.exists(out_path):
+        raise FileNotFoundError(
+            f"The build output directory `{out_path}` does not exist"
+        )
+
+    for sol_dirname in os.listdir(out_path):  # for each source filename
+        if not sol_dirname.endswith(".sol"):
+            continue
+
+        sol_path = os.path.join(out_path, sol_dirname)
+        if not os.path.isdir(sol_path):
+            continue
+
+        for json_filename in os.listdir(sol_path):  # for each contract name
+            try:
+                if not json_filename.endswith(".json"):
+                    continue
+                if json_filename.startswith("."):
+                    continue
+
+                json_path = os.path.join(sol_path, json_filename)
+                with open(json_path, encoding="utf8") as f:
+                    json_out = json.load(f)
+
+                # cut off compiler version number as well
+                contract_name = json_filename.split(".")[0]
+                ast_nodes = json_out["ast"]["nodes"]
+                contract_type, natspec = get_contract_type(ast_nodes, contract_name)
+
+                # can happen to solidity files for multiple reasons:
+                # - import only (like console2.log)
+                # - defines only structs or enums
+                # - defines only free functions
+                # - ...
+                if contract_type is None:
+                    debug(f"Skipped {json_filename}, no contract definition found")
+                    continue
+
+                compiler_version = json_out["metadata"]["compiler"]["version"]
+                result.setdefault(compiler_version, {})
+                result[compiler_version].setdefault(sol_dirname, {})
+                contract_map = result[compiler_version][sol_dirname]
+
+                if contract_name in contract_map:
+                    raise ValueError(
+                        "duplicate contract names in the same file",
+                        contract_name,
+                        sol_dirname,
+                    )
+
+                contract_map[contract_name] = (json_out, contract_type, natspec)
+                parse_symbols(args, contract_map, contract_name)
+
+            except Exception as err:
+                warn_code(
+                    PARSING_ERROR,
+                    f"Skipped {json_filename} due to parsing failure: {type(err).__name__}: {err}",
+                )
+                if args.debug:
+                    traceback.print_exc()
+                continue
+
+    return result
+
+
+def parse_symbols(args: HalmosConfig, contract_map: dict, contract_name: str) -> None:
+    try:
+        json_out = contract_map[contract_name][0]
+        bytecode = json_out["bytecode"]["object"]
+        contract_mapping_info = Mapper().get_or_create(contract_name)
+        contract_mapping_info.bytecode = bytecode
+
+        Mapper().parse_ast(json_out["ast"])
+
+    except Exception:
+        debug(f"error parsing symbols for contract {contract_name}")
+        debug(traceback.format_exc())
+
+        # we parse symbols as best effort, don't propagate exceptions
+        pass
+
+
+def parse_devdoc(funsig: str, contract_json: dict) -> str | None:
+    try:
+        return contract_json["metadata"]["output"]["devdoc"]["methods"][funsig][
+            "custom:halmos"
+        ]
+    except KeyError:
+        return None
+
+
+def parse_natspec(natspec: dict) -> str:
+    # This parsing scheme is designed to handle:
+    #
+    # - multiline tags:
+    #   /// @custom:halmos --x
+    #   ///                --y
+    #
+    # - multiple tags:
+    #   /// @custom:halmos --x
+    #   /// @custom:halmos --y
+    #
+    # - tags that start in the middle of line:
+    #   /// blah blah @custom:halmos --x
+    #   /// --y
+    #
+    # In all the above examples, this scheme returns "--x (whitespaces) --y"
+    isHalmosTag = False
+    result = ""
+    for item in re.split(r"(@\S+)", natspec.get("text", "")):
+        if item == "@custom:halmos":
+            isHalmosTag = True
+        elif re.match(r"^@\S", item):
+            isHalmosTag = False
+        elif isHalmosTag:
+            result += item
+    return result.strip()
+
+
+def import_libs(build_out_map: dict, hexcode: str, linkReferences: dict) -> dict:
+    libs = {}
+
+    for filepath in linkReferences:
+        file_name = filepath.split("/")[-1]
+
+        for lib_name in linkReferences[filepath]:
+            (lib_json, _, _) = build_out_map[file_name][lib_name]
+            lib_hexcode = lib_json["deployedBytecode"]["object"]
+
+            # in bytes, multiply indices by 2 and offset 0x
+            placeholder_index = linkReferences[filepath][lib_name][0]["start"] * 2 + 2
+            placeholder = hexcode[placeholder_index : placeholder_index + 40]
+
+            libs[f"{filepath}:{lib_name}"] = {
+                "placeholder": placeholder,
+                "hexcode": lib_hexcode,
+            }
+
+    return libs
+
+
+def build_output_iterator(build_out: dict):
+    for compiler_version in sorted(build_out):
+        build_out_map = build_out[compiler_version]
+        for filename in sorted(build_out_map):
+            for contract_name in sorted(build_out_map[filename]):
+                yield (build_out_map, filename, contract_name)

From 223923153713d2b2549d3f2fed94d70cf5db6819 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Wed, 8 Jan 2025 17:39:11 -0800
Subject: [PATCH 16/46] move solving-related functions to solve.py

---
 src/halmos/__main__.py  | 426 ++--------------------------------------
 src/halmos/constants.py |   4 +
 src/halmos/solve.py     | 413 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 433 insertions(+), 410 deletions(-)
 create mode 100644 src/halmos/constants.py
 create mode 100644 src/halmos/solve.py

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index f5a7f309..5625fe93 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -10,22 +10,17 @@
 import sys
 import time
 import traceback
-import uuid
 from collections import Counter
-from concurrent.futures import Future, ThreadPoolExecutor
+from concurrent.futures import Future
 from copy import deepcopy
-from dataclasses import asdict, dataclass, field
+from dataclasses import asdict, dataclass
 from datetime import timedelta
 from enum import Enum
 from importlib import metadata
 
 from z3 import (
     BitVec,
-    CheckSatResult,
-    Solver,
-    sat,
     set_option,
-    unknown,
     unsat,
 )
 
@@ -40,10 +35,14 @@
 from .calldata import FunctionInfo, get_abi, mk_calldata
 from .config import Config as HalmosConfig
 from .config import arg_parser, default_config, resolve_config_files, toml_parser
+from .constants import (
+    VERBOSITY_TRACE_CONSTRUCTOR,
+    VERBOSITY_TRACE_COUNTEREXAMPLE,
+    VERBOSITY_TRACE_PATHS,
+    VERBOSITY_TRACE_SETUP,
+)
 from .exceptions import HalmosException
 from .logs import (
-    COUNTEREXAMPLE_INVALID,
-    COUNTEREXAMPLE_UNKNOWN,
     INTERNAL_ERROR,
     LOOP_BOUND,
     REVERT_ALL,
@@ -51,11 +50,9 @@
     error,
     logger,
     logger_unique,
-    warn,
     warn_code,
 )
 from .mapper import BuildOut, DeployAddressMapper
-from .processes import PopenExecutor, PopenFuture, TimeoutExpired
 from .sevm import (
     EMPTY_BALANCE,
     EVM,
@@ -80,7 +77,14 @@
     jumpid_str,
     mnemonic,
 )
-from .smtlib import ModelVariables, parse_string
+from .solve import (
+    ContractContext,
+    FunctionContext,
+    PathContext,
+    SolverOutput,
+    solve_end_to_end,
+    solve_low_level,
+)
 from .traces import render_trace, rendered_trace
 from .utils import (
     NamedTimer,
@@ -92,7 +96,6 @@
     hexify,
     indent_text,
     red,
-    stringify,
     unbox_int,
     yellow,
 )
@@ -108,184 +111,6 @@
 if sys.stdout.encoding != "utf-8":
     sys.stdout.reconfigure(encoding="utf-8")
 
-VERBOSITY_TRACE_COUNTEREXAMPLE = 2
-VERBOSITY_TRACE_SETUP = 3
-VERBOSITY_TRACE_PATHS = 4
-VERBOSITY_TRACE_CONSTRUCTOR = 5
-
-
-@dataclass
-class PotentialModel:
-    model: ModelVariables | str | None
-    is_valid: bool
-
-    def __str__(self) -> str:
-        # expected to be a filename
-        if isinstance(self.model, str):
-            return f"see {self.model}"
-
-        formatted = []
-        for v in self.model.values():
-            # TODO: ideally we would avoid wrapping with con() here
-            stringified = stringify(v.full_name, con(v.value))
-            formatted.append(f"\n    {v.full_name} = {stringified}")
-        return "".join(sorted(formatted)) if formatted else "∅"
-
-
-@dataclass(frozen=True)
-class ContractContext:
-    # config with contract-specific overrides
-    args: HalmosConfig
-
-    # name of this contract
-    name: str
-
-    # signatures of test functions to run
-    funsigs: list[str]
-
-    # data parsed from the build output for this contract
-    creation_hexcode: str
-    deployed_hexcode: str
-    abi: dict
-    method_identifiers: dict[str, str]
-    contract_json: dict
-    libs: dict
-
-    # TODO: check if this is really a contract-level variable
-    build_out_map: dict
-
-
-@dataclass(frozen=True)
-class FunctionContext:
-    # config with function-specific overrides
-    args: HalmosConfig
-
-    # function name, signature, and selector
-    info: FunctionInfo
-
-    # solver using the function-specific config
-    solver: Solver
-
-    # backlink to the parent contract context
-    contract_ctx: ContractContext
-
-    # optional starting state
-    setup_ex: Exec | None = None
-
-    # dump directory for this function (generated in __post_init__)
-    dump_dirname: str = field(init=False)
-
-    # function-level thread pool that drives assertion solving
-    thread_pool: ThreadPoolExecutor = field(init=False)
-
-    # path-specific queries are submitted to this function-specific executor
-    solver_executor: PopenExecutor = field(default_factory=PopenExecutor)
-
-    # list of solver outputs for this function
-    solver_outputs: list["SolverOutput"] = field(default_factory=list)
-
-    # list of valid counterexamples for this function
-    valid_counterexamples: list[PotentialModel] = field(default_factory=list)
-
-    # list of potentially invalid counterexamples for this function
-    invalid_counterexamples: list[PotentialModel] = field(default_factory=list)
-
-    # list of unsat cores for this function
-    unsat_cores: list[list] = field(default_factory=list)
-
-    # map from path id to trace
-    traces: dict[int, str] = field(default_factory=dict)
-
-    # map from path id to execution
-    exec_cache: dict[int, Exec] = field(default_factory=dict)
-
-    def __post_init__(self):
-        dirname = f"/tmp/{self.info.name}-{uuid.uuid4().hex}"
-        object.__setattr__(self, "dump_dirname", dirname)
-
-        thread_pool = ThreadPoolExecutor(
-            max_workers=self.args.solver_threads,
-            thread_name_prefix=f"{self.info.name}-",
-        )
-        object.__setattr__(self, "thread_pool", thread_pool)
-
-
-@dataclass(frozen=True)
-class PathContext:
-    # id of this path
-    path_id: int
-
-    # path execution object
-    ex: Exec
-
-    # SMT query
-    query: SMTQuery
-
-    # backlink to the parent function context
-    fun_ctx: FunctionContext
-
-    # filename for this path (generated in __post_init__)
-    dump_filename: str = field(init=False)
-
-    is_refined: bool = False
-
-    def __post_init__(self):
-        refined_str = ".refined" if self.is_refined else ""
-        dirname = self.fun_ctx.dump_dirname
-        filename = os.path.join(dirname, f"{self.path_id}{refined_str}.smt2")
-
-        # use __setattr__ because this is a frozen dataclass
-        object.__setattr__(self, "dump_filename", filename)
-
-    def refine(self) -> "PathContext":
-        return PathContext(
-            path_id=self.path_id,
-            ex=self.ex,
-            query=refine(self.query),
-            fun_ctx=self.fun_ctx,
-            is_refined=True,
-        )
-
-
-@dataclass(frozen=True)
-class SolverOutput:
-    # solver result
-    result: CheckSatResult
-
-    # we don't backlink to the parent path context to avoid extra
-    # references to Exec objects past the lifetime of the path
-    path_id: int
-
-    # solver model
-    model: PotentialModel | None = None
-
-    # optional unsat core
-    unsat_core: list[str] | None = None
-
-    @staticmethod
-    def from_result(
-        stdout: str, stderr: str, returncode: int, path_ctx: PathContext
-    ) -> "SolverOutput":
-        # extract the first line (we expect sat/unsat/unknown)
-        newline_idx = stdout.find("\n")
-        first_line = stdout[:newline_idx] if newline_idx != -1 else stdout
-
-        args = path_ctx.fun_ctx.args
-        path_id = path_ctx.path_id
-        if args.verbose >= 1:
-            debug(f"    {first_line}")
-
-        match first_line:
-            case "unsat":
-                unsat_core = parse_unsat_core(stdout) if args.cache_solver else None
-                return SolverOutput(unsat, path_id, unsat_core=unsat_core)
-            case "sat":
-                is_valid = is_model_valid(stdout)
-                model = PotentialModel(model=parse_string(stdout), is_valid=is_valid)
-                return SolverOutput(sat, path_id, model=model)
-            case _:
-                return SolverOutput(unknown, path_id)
-
 
 @dataclass(frozen=True)
 class TestResult:
@@ -896,225 +721,6 @@ def run_contract(ctx: ContractContext) -> list[TestResult]:
     return test_results
 
 
-def parse_unsat_core(output) -> list[str] | None:
-    # parsing example:
-    #   unsat
-    #   (error "the context is unsatisfiable")  # <-- this line is optional
-    #   (<41702> <37030> <36248> <47880>)
-    # result:
-    #   [41702, 37030, 36248, 47880]
-    pattern = r"unsat\s*(\(\s*error\s+[^)]*\)\s*)?\(\s*((<[0-9]+>\s*)*)\)"
-    match = re.search(pattern, output)
-    if match:
-        result = [re.sub(r"<([0-9]+)>", r"\1", name) for name in match.group(2).split()]
-        return result
-    else:
-        warn(f"error in parsing unsat core: {output}")
-        return None
-
-
-def dump(
-    path_ctx: PathContext,
-) -> tuple[CheckSatResult, PotentialModel | None, list | None]:
-    args = path_ctx.fun_ctx.args
-    query = path_ctx.query
-    dump_filename = path_ctx.dump_filename
-
-    if args.verbose >= 1:
-        debug(f"Writing SMT query to {dump_filename}")
-
-    # for each implication assertion, `(assert (=> |id| c))`, in query.smtlib,
-    # generate a corresponding named assertion, `(assert (! |id| :named <id>))`.
-    # see `svem.Path.to_smt2()` for more details.
-    if args.cache_solver:
-        named_assertions = "".join(
-            [
-                f"(assert (! |{assert_id}| :named <{assert_id}>))\n"
-                for assert_id in query.assertions
-            ]
-        )
-
-        with open(dump_filename, "w") as f:
-            f.write("(set-option :produce-unsat-cores true)\n")
-            f.write("(set-logic QF_AUFBV)\n")
-            f.write(query.smtlib)
-            f.write(named_assertions)
-            f.write("(check-sat)\n")
-            f.write("(get-model)\n")
-            f.write("(get-unsat-core)\n")
-
-    else:
-        with open(dump_filename, "w") as f:
-            f.write("(set-logic QF_AUFBV)\n")
-            f.write(query.smtlib)
-            f.write("(check-sat)\n")
-            f.write("(get-model)\n")
-
-
-def is_model_valid(solver_stdout: str) -> bool:
-    # TODO: evaluate the path condition against the given model after excluding f_evm_* symbols,
-    #       since the f_evm_* symbols may still appear in valid models.
-
-    return "f_evm_" not in solver_stdout
-
-
-def solve_low_level(path_ctx: PathContext) -> SolverOutput:
-    """Invokes an external solver process to solve the given query.
-
-    Can raise TimeoutError or some Exception raised during execution"""
-
-    fun_ctx, smt2_filename = path_ctx.fun_ctx, path_ctx.dump_filename
-    args = fun_ctx.args
-
-    # make sure the smt2 file has been written
-    dump(path_ctx)
-
-    if args.verbose >= 1:
-        print("  Checking with external solver process")
-        print(f"    {args.solver_command} {smt2_filename} > {smt2_filename}.out")
-
-    # solver_timeout_assertion == 0 means no timeout,
-    # which translates to timeout_seconds=None for subprocess.run
-    timeout_seconds = t / 1000 if (t := args.solver_timeout_assertion) else None
-
-    cmd = args.solver_command.split() + [smt2_filename]
-    future = PopenFuture(cmd, timeout=timeout_seconds)
-
-    # starts the subprocess asynchronously
-    fun_ctx.solver_executor.submit(future)
-
-    # block until the external solver returns, times out, is interrupted, fails, etc.
-    try:
-        stdout, stderr, returncode = future.result()
-    except TimeoutExpired:
-        return SolverOutput(result=unknown, path_id=path_ctx.path_id)
-
-    # save solver stdout to file
-    with open(f"{smt2_filename}.out", "w") as f:
-        f.write(stdout)
-
-    # save solver stderr to file (only if there is an error)
-    if stderr:
-        with open(f"{smt2_filename}.err", "w") as f:
-            f.write(stderr)
-
-    return SolverOutput.from_result(stdout, stderr, returncode, path_ctx)
-
-
-# XXX used to be gen_model_from_sexpr
-def solve_end_to_end(ctx: PathContext) -> None:
-    """Synchronously resolves a query in a given context, which may result in 0, 1 or multiple solver invocations.
-
-    - may result in 0 invocations if the query contains a known unsat core (hence the need for the context)
-    - may result in exactly 1 invocation if the query is unsat, or sat with a valid model
-    - may result in multiple invocations if the query is sat and the model is invalid (needs refinement)
-
-    If this produces a model, it _should_ be valid.
-    """
-    fun_ctx, path_id, query = ctx.fun_ctx, ctx.path_id, ctx.query
-    args, unsat_cores = fun_ctx.args, fun_ctx.unsat_cores
-
-    verbose = print if args.verbose >= 1 else lambda *args, **kwargs: None
-    verbose(f"Checking path condition {path_id=}")
-
-    # if the query contains an unsat-core, it is unsat; no need to run the solver
-    if check_unsat_cores(query, unsat_cores):
-        verbose("  Already proven unsat")
-        return SolverOutput(unsat, path_id)
-
-    solver_output = solve_low_level(ctx)
-    result, model = solver_output.result, solver_output.model
-
-    # if the ctx is already refined, we don't need to solve again
-    if result == sat and not model.is_valid and not ctx.is_refined:
-        verbose("  Checking again with refinement")
-
-        refined_ctx = ctx.refine()
-
-        if refined_ctx.query.smtlib != query.smtlib:
-            # recurse with the refined query
-            return solve_end_to_end(refined_ctx)
-        else:
-            verbose("    Refinement did not change the query, no need to solve again")
-
-    #
-    # we are done solving, process and triage the result
-    #
-
-    # retrieve cached exec and clear the cache entry
-    exec = fun_ctx.exec_cache.pop(path_id, None)
-
-    if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE:
-        id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else ""
-        print(f"Trace{id_str}:")
-        print(fun_ctx.traces[path_id], end="")
-
-    if args.print_failed_states:
-        print(f"# {path_id}")
-        print(exec)
-
-    if fun_ctx.solver_executor.is_shutdown():
-        # if the thread pool is in the process of shutting down,
-        # we want to stop processing remaining models/timeouts/errors, etc.
-        return
-
-    # keep track of the solver outputs, so that we can display PASS/FAIL/TIMEOUT/ERROR later
-    fun_ctx.solver_outputs.append(solver_output)
-
-    if result == unsat:
-        if solver_output.unsat_core:
-            fun_ctx.unsat_cores.append(solver_output.unsat_core)
-        return
-
-    # model could be an empty dict here, so compare to None explicitly
-    if model is None:
-        warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}")
-        return
-
-    if model.is_valid:
-        print(red(f"Counterexample: {model}"))
-
-        fun_ctx.valid_counterexamples.append(model)
-
-        # we have a valid counterexample, so we are eligible for early exit
-        if args.early_exit:
-            fun_ctx.solver_executor.shutdown(wait=False)
-    else:
-        warn_str = f"Counterexample (potentially invalid): {model}"
-        warn_code(COUNTEREXAMPLE_INVALID, warn_str)
-        fun_ctx.invalid_counterexamples.append(model)
-
-
-def check_unsat_cores(query: SMTQuery, unsat_cores: list[list]) -> bool:
-    # return true if the given query contains any given unsat core
-    for unsat_core in unsat_cores:
-        if all(core in query.assertions for core in unsat_core):
-            return True
-    return False
-
-
-def refine(query: SMTQuery) -> SMTQuery:
-    smtlib = query.smtlib
-
-    # replace uninterpreted abstraction with actual symbols for assertion solving
-    smtlib = re.sub(
-        r"\(declare-fun f_evm_(bvmul)_([0-9]+) \(\(_ BitVec \2\) \(_ BitVec \2\)\) \(_ BitVec \2\)\)",
-        r"(define-fun f_evm_\1_\2 ((x (_ BitVec \2)) (y (_ BitVec \2))) (_ BitVec \2) (\1 x y))",
-        smtlib,
-    )
-
-    # replace `(f_evm_bvudiv_N x y)` with `(ite (= y (_ bv0 N)) (_ bv0 N) (bvudiv x y))`
-    # similarly for bvurem, bvsdiv, and bvsrem
-    # NOTE: (bvudiv x (_ bv0 N)) is *defined* to (bvneg (_ bv1 N)); while (div x 0) is undefined
-    smtlib = re.sub(
-        r"\(declare-fun f_evm_(bvudiv|bvurem|bvsdiv|bvsrem)_([0-9]+) \(\(_ BitVec \2\) \(_ BitVec \2\)\) \(_ BitVec \2\)\)",
-        r"(define-fun f_evm_\1_\2 ((x (_ BitVec \2)) (y (_ BitVec \2))) (_ BitVec \2) (ite (= y (_ bv0 \2)) (_ bv0 \2) (\1 x y)))",
-        smtlib,
-    )
-
-    return SMTQuery(smtlib, query.assertions)
-
-
 def contract_regex(args):
     if args.contract:
         return f"^{args.contract}$"
diff --git a/src/halmos/constants.py b/src/halmos/constants.py
new file mode 100644
index 00000000..f4583ae9
--- /dev/null
+++ b/src/halmos/constants.py
@@ -0,0 +1,4 @@
+VERBOSITY_TRACE_COUNTEREXAMPLE = 2
+VERBOSITY_TRACE_SETUP = 3
+VERBOSITY_TRACE_PATHS = 4
+VERBOSITY_TRACE_CONSTRUCTOR = 5
diff --git a/src/halmos/solve.py b/src/halmos/solve.py
new file mode 100644
index 00000000..e234b455
--- /dev/null
+++ b/src/halmos/solve.py
@@ -0,0 +1,413 @@
+import os
+import re
+import uuid
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass, field
+
+from z3 import CheckSatResult, Solver, sat, unknown, unsat
+
+from halmos.calldata import FunctionInfo
+from halmos.config import Config as HalmosConfig
+from halmos.constants import VERBOSITY_TRACE_COUNTEREXAMPLE, VERBOSITY_TRACE_PATHS
+from halmos.logs import (
+    COUNTEREXAMPLE_INVALID,
+    COUNTEREXAMPLE_UNKNOWN,
+    debug,
+    warn,
+    warn_code,
+)
+from halmos.processes import PopenExecutor, PopenFuture, TimeoutExpired
+from halmos.sevm import Exec, SMTQuery
+from halmos.smtlib import ModelVariables, parse_string
+from halmos.utils import con, red, stringify
+
+
+@dataclass(frozen=True)
+class PotentialModel:
+    model: ModelVariables | str | None
+    is_valid: bool
+
+    def __str__(self) -> str:
+        # expected to be a filename
+        if isinstance(self.model, str):
+            return f"see {self.model}"
+
+        formatted = []
+        for v in self.model.values():
+            # TODO: ideally we would avoid wrapping with con() here
+            stringified = stringify(v.full_name, con(v.value))
+            formatted.append(f"\n    {v.full_name} = {stringified}")
+        return "".join(sorted(formatted)) if formatted else "∅"
+
+
+@dataclass(frozen=True)
+class ContractContext:
+    # config with contract-specific overrides
+    args: HalmosConfig
+
+    # name of this contract
+    name: str
+
+    # signatures of test functions to run
+    funsigs: list[str]
+
+    # data parsed from the build output for this contract
+    creation_hexcode: str
+    deployed_hexcode: str
+    abi: dict
+    method_identifiers: dict[str, str]
+    contract_json: dict
+    libs: dict
+
+    # TODO: check if this is really a contract-level variable
+    build_out_map: dict
+
+
+@dataclass(frozen=True)
+class FunctionContext:
+    # config with function-specific overrides
+    args: HalmosConfig
+
+    # function name, signature, and selector
+    info: FunctionInfo
+
+    # solver using the function-specific config
+    solver: Solver
+
+    # backlink to the parent contract context
+    contract_ctx: ContractContext
+
+    # optional starting state
+    setup_ex: Exec | None = None
+
+    # dump directory for this function (generated in __post_init__)
+    dump_dirname: str = field(init=False)
+
+    # function-level thread pool that drives assertion solving
+    thread_pool: ThreadPoolExecutor = field(init=False)
+
+    # path-specific queries are submitted to this function-specific executor
+    solver_executor: PopenExecutor = field(default_factory=PopenExecutor)
+
+    # list of solver outputs for this function
+    solver_outputs: list["SolverOutput"] = field(default_factory=list)
+
+    # list of valid counterexamples for this function
+    valid_counterexamples: list[PotentialModel] = field(default_factory=list)
+
+    # list of potentially invalid counterexamples for this function
+    invalid_counterexamples: list[PotentialModel] = field(default_factory=list)
+
+    # list of unsat cores for this function
+    unsat_cores: list[list] = field(default_factory=list)
+
+    # map from path id to trace
+    traces: dict[int, str] = field(default_factory=dict)
+
+    # map from path id to execution
+    exec_cache: dict[int, Exec] = field(default_factory=dict)
+
+    def __post_init__(self):
+        dirname = f"/tmp/{self.info.name}-{uuid.uuid4().hex}"
+        object.__setattr__(self, "dump_dirname", dirname)
+
+        thread_pool = ThreadPoolExecutor(
+            max_workers=self.args.solver_threads,
+            thread_name_prefix=f"{self.info.name}-",
+        )
+        object.__setattr__(self, "thread_pool", thread_pool)
+
+
+@dataclass(frozen=True)
+class PathContext:
+    # id of this path
+    path_id: int
+
+    # path execution object
+    ex: Exec
+
+    # SMT query
+    query: SMTQuery
+
+    # backlink to the parent function context
+    fun_ctx: FunctionContext
+
+    # filename for this path (generated in __post_init__)
+    dump_filename: str = field(init=False)
+
+    is_refined: bool = False
+
+    def __post_init__(self):
+        refined_str = ".refined" if self.is_refined else ""
+        dirname = self.fun_ctx.dump_dirname
+        filename = os.path.join(dirname, f"{self.path_id}{refined_str}.smt2")
+
+        # use __setattr__ because this is a frozen dataclass
+        object.__setattr__(self, "dump_filename", filename)
+
+    def refine(self) -> "PathContext":
+        return PathContext(
+            path_id=self.path_id,
+            ex=self.ex,
+            query=refine(self.query),
+            fun_ctx=self.fun_ctx,
+            is_refined=True,
+        )
+
+
+@dataclass(frozen=True)
+class SolverOutput:
+    # solver result
+    result: CheckSatResult
+
+    # we don't backlink to the parent path context to avoid extra
+    # references to Exec objects past the lifetime of the path
+    path_id: int
+
+    # solver model
+    model: PotentialModel | None = None
+
+    # optional unsat core
+    unsat_core: list[str] | None = None
+
+    @staticmethod
+    def from_result(
+        stdout: str, stderr: str, returncode: int, path_ctx: PathContext
+    ) -> "SolverOutput":
+        # extract the first line (we expect sat/unsat/unknown)
+        newline_idx = stdout.find("\n")
+        first_line = stdout[:newline_idx] if newline_idx != -1 else stdout
+
+        args = path_ctx.fun_ctx.args
+        path_id = path_ctx.path_id
+        if args.verbose >= 1:
+            debug(f"    {first_line}")
+
+        match first_line:
+            case "unsat":
+                unsat_core = parse_unsat_core(stdout) if args.cache_solver else None
+                return SolverOutput(unsat, path_id, unsat_core=unsat_core)
+            case "sat":
+                is_valid = is_model_valid(stdout)
+                model = PotentialModel(model=parse_string(stdout), is_valid=is_valid)
+                return SolverOutput(sat, path_id, model=model)
+            case _:
+                return SolverOutput(unknown, path_id)
+
+
+def parse_unsat_core(output) -> list[str] | None:
+    # parsing example:
+    #   unsat
+    #   (error "the context is unsatisfiable")  # <-- this line is optional
+    #   (<41702> <37030> <36248> <47880>)
+    # result:
+    #   [41702, 37030, 36248, 47880]
+    pattern = r"unsat\s*(\(\s*error\s+[^)]*\)\s*)?\(\s*((<[0-9]+>\s*)*)\)"
+    match = re.search(pattern, output)
+    if match:
+        result = [re.sub(r"<([0-9]+)>", r"\1", name) for name in match.group(2).split()]
+        return result
+    else:
+        warn(f"error in parsing unsat core: {output}")
+        return None
+
+
+def dump(
+    path_ctx: PathContext,
+) -> tuple[CheckSatResult, PotentialModel | None, list | None]:
+    args = path_ctx.fun_ctx.args
+    query = path_ctx.query
+    dump_filename = path_ctx.dump_filename
+
+    if args.verbose >= 1:
+        debug(f"Writing SMT query to {dump_filename}")
+
+    # for each implication assertion, `(assert (=> |id| c))`, in query.smtlib,
+    # generate a corresponding named assertion, `(assert (! |id| :named <id>))`.
+    # see `svem.Path.to_smt2()` for more details.
+    if args.cache_solver:
+        named_assertions = "".join(
+            [
+                f"(assert (! |{assert_id}| :named <{assert_id}>))\n"
+                for assert_id in query.assertions
+            ]
+        )
+
+        with open(dump_filename, "w") as f:
+            f.write("(set-option :produce-unsat-cores true)\n")
+            f.write("(set-logic QF_AUFBV)\n")
+            f.write(query.smtlib)
+            f.write(named_assertions)
+            f.write("(check-sat)\n")
+            f.write("(get-model)\n")
+            f.write("(get-unsat-core)\n")
+
+    else:
+        with open(dump_filename, "w") as f:
+            f.write("(set-logic QF_AUFBV)\n")
+            f.write(query.smtlib)
+            f.write("(check-sat)\n")
+            f.write("(get-model)\n")
+
+
+def is_model_valid(solver_stdout: str) -> bool:
+    # TODO: evaluate the path condition against the given model after excluding f_evm_* symbols,
+    #       since the f_evm_* symbols may still appear in valid models.
+
+    return "f_evm_" not in solver_stdout
+
+
+def solve_low_level(path_ctx: PathContext) -> SolverOutput:
+    """Invokes an external solver process to solve the given query.
+
+    Can raise TimeoutError or some Exception raised during execution"""
+
+    fun_ctx, smt2_filename = path_ctx.fun_ctx, path_ctx.dump_filename
+    args = fun_ctx.args
+
+    # make sure the smt2 file has been written
+    dump(path_ctx)
+
+    if args.verbose >= 1:
+        print("  Checking with external solver process")
+        print(f"    {args.solver_command} {smt2_filename} > {smt2_filename}.out")
+
+    # solver_timeout_assertion == 0 means no timeout,
+    # which translates to timeout_seconds=None for subprocess.run
+    timeout_seconds = t / 1000 if (t := args.solver_timeout_assertion) else None
+
+    cmd = args.solver_command.split() + [smt2_filename]
+    future = PopenFuture(cmd, timeout=timeout_seconds)
+
+    # starts the subprocess asynchronously
+    fun_ctx.solver_executor.submit(future)
+
+    # block until the external solver returns, times out, is interrupted, fails, etc.
+    try:
+        stdout, stderr, returncode = future.result()
+    except TimeoutExpired:
+        return SolverOutput(result=unknown, path_id=path_ctx.path_id)
+
+    # save solver stdout to file
+    with open(f"{smt2_filename}.out", "w") as f:
+        f.write(stdout)
+
+    # save solver stderr to file (only if there is an error)
+    if stderr:
+        with open(f"{smt2_filename}.err", "w") as f:
+            f.write(stderr)
+
+    return SolverOutput.from_result(stdout, stderr, returncode, path_ctx)
+
+
+def solve_end_to_end(ctx: PathContext) -> None:
+    """Synchronously resolves a query in a given context, which may result in 0, 1 or multiple solver invocations.
+
+    - may result in 0 invocations if the query contains a known unsat core (hence the need for the context)
+    - may result in exactly 1 invocation if the query is unsat, or sat with a valid model
+    - may result in multiple invocations if the query is sat and the model is invalid (needs refinement)
+
+    If this produces a model, it _should_ be valid.
+    """
+    fun_ctx, path_id, query = ctx.fun_ctx, ctx.path_id, ctx.query
+    args, unsat_cores = fun_ctx.args, fun_ctx.unsat_cores
+
+    verbose = print if args.verbose >= 1 else lambda *args, **kwargs: None
+    verbose(f"Checking path condition {path_id=}")
+
+    # if the query contains an unsat-core, it is unsat; no need to run the solver
+    if check_unsat_cores(query, unsat_cores):
+        verbose("  Already proven unsat")
+        return SolverOutput(unsat, path_id)
+
+    solver_output = solve_low_level(ctx)
+    result, model = solver_output.result, solver_output.model
+
+    # if the ctx is already refined, we don't need to solve again
+    if result == sat and not model.is_valid and not ctx.is_refined:
+        verbose("  Checking again with refinement")
+
+        refined_ctx = ctx.refine()
+
+        if refined_ctx.query.smtlib != query.smtlib:
+            # recurse with the refined query
+            return solve_end_to_end(refined_ctx)
+        else:
+            verbose("    Refinement did not change the query, no need to solve again")
+
+    #
+    # we are done solving, process and triage the result
+    #
+
+    # retrieve cached exec and clear the cache entry
+    exec = fun_ctx.exec_cache.pop(path_id, None)
+
+    if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE:
+        id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else ""
+        print(f"Trace{id_str}:")
+        print(fun_ctx.traces[path_id], end="")
+
+    if args.print_failed_states:
+        print(f"# {path_id}")
+        print(exec)
+
+    if fun_ctx.solver_executor.is_shutdown():
+        # if the thread pool is in the process of shutting down,
+        # we want to stop processing remaining models/timeouts/errors, etc.
+        return
+
+    # keep track of the solver outputs, so that we can display PASS/FAIL/TIMEOUT/ERROR later
+    fun_ctx.solver_outputs.append(solver_output)
+
+    if result == unsat:
+        if solver_output.unsat_core:
+            fun_ctx.unsat_cores.append(solver_output.unsat_core)
+        return
+
+    # model could be an empty dict here, so compare to None explicitly
+    if model is None:
+        warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}")
+        return
+
+    if model.is_valid:
+        print(red(f"Counterexample: {model}"))
+
+        fun_ctx.valid_counterexamples.append(model)
+
+        # we have a valid counterexample, so we are eligible for early exit
+        if args.early_exit:
+            fun_ctx.solver_executor.shutdown(wait=False)
+    else:
+        warn_str = f"Counterexample (potentially invalid): {model}"
+        warn_code(COUNTEREXAMPLE_INVALID, warn_str)
+        fun_ctx.invalid_counterexamples.append(model)
+
+
+def check_unsat_cores(query: SMTQuery, unsat_cores: list[list]) -> bool:
+    # return true if the given query contains any given unsat core
+    for unsat_core in unsat_cores:
+        if all(core in query.assertions for core in unsat_core):
+            return True
+    return False
+
+
+def refine(query: SMTQuery) -> SMTQuery:
+    smtlib = query.smtlib
+
+    # replace uninterpreted abstraction with actual symbols for assertion solving
+    smtlib = re.sub(
+        r"\(declare-fun f_evm_(bvmul)_([0-9]+) \(\(_ BitVec \2\) \(_ BitVec \2\)\) \(_ BitVec \2\)\)",
+        r"(define-fun f_evm_\1_\2 ((x (_ BitVec \2)) (y (_ BitVec \2))) (_ BitVec \2) (\1 x y))",
+        smtlib,
+    )
+
+    # replace `(f_evm_bvudiv_N x y)` with `(ite (= y (_ bv0 N)) (_ bv0 N) (bvudiv x y))`
+    # similarly for bvurem, bvsdiv, and bvsrem
+    # NOTE: (bvudiv x (_ bv0 N)) is *defined* to (bvneg (_ bv1 N)); while (div x 0) is undefined
+    smtlib = re.sub(
+        r"\(declare-fun f_evm_(bvudiv|bvurem|bvsdiv|bvsrem)_([0-9]+) \(\(_ BitVec \2\) \(_ BitVec \2\)\) \(_ BitVec \2\)\)",
+        r"(define-fun f_evm_\1_\2 ((x (_ BitVec \2)) (y (_ BitVec \2))) (_ BitVec \2) (ite (= y (_ bv0 \2)) (_ bv0 \2) (\1 x y)))",
+        smtlib,
+    )
+
+    return SMTQuery(smtlib, query.assertions)

From 19c772fed1742e1947e4e6785396dfdc0e1a21da Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Thu, 9 Jan 2025 10:28:31 -0800
Subject: [PATCH 17/46] fix setup_exs_no_error access pattern

---
 src/halmos/__main__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 5625fe93..733b7faf 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -342,11 +342,11 @@ def setup(ctx: FunctionContext) -> Exec:
 
     setup_exs: list[Exec] = []
 
-    match len(setup_exs_no_error):
-        case 0:
+    match setup_exs_no_error:
+        case []:
             pass
-        case 1:
-            setup_exs.append(setup_exs_no_error[0][0])
+        case [path_ctx]:
+            setup_exs.append(path_ctx.ex)
         case _:
             for path_ctx in setup_exs_no_error:
                 solver_output = solve_low_level(path_ctx)

From 5d1a24fbe7db0d5b06384eda739197ad3be33944 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Thu, 9 Jan 2025 10:29:00 -0800
Subject: [PATCH 18/46] merge smtlib.py into solve.py

---
 src/halmos/smtlib.py                    | 100 -----------------------
 src/halmos/solve.py                     | 104 +++++++++++++++++++++++-
 tests/{test_smtlib.py => test_solve.py} |  10 +--
 3 files changed, 106 insertions(+), 108 deletions(-)
 delete mode 100644 src/halmos/smtlib.py
 rename tests/{test_smtlib.py => test_solve.py} (91%)

diff --git a/src/halmos/smtlib.py b/src/halmos/smtlib.py
deleted file mode 100644
index 61eb23fe..00000000
--- a/src/halmos/smtlib.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import re
-from dataclasses import dataclass
-
-from halmos.logs import logger
-
-
-@dataclass
-class ModelVariable:
-    full_name: str
-    variable_name: str
-    solidity_type: str
-    smt_type: str
-    size_bits: int
-    value: int
-
-
-ModelVariables = dict[str, ModelVariable]
-
-# Regular expression for capturing halmos variables
-halmos_pattern = re.compile(
-    r"""
-    \(\s*define-fun\s+           # Match "(define-fun"
-    \|?(halmos_[^ |]+)\|?\s+     # Capture the full variable name, optionally wrapped in "|"
-    \(\)\s+\(_\s+([^ ]+)\s+      # Capture the SMTLIB type (e.g., "BitVec 256")
-    (\d+)\)\s+                   # Capture the bit-width or type argument
-    (                            # Group for the value
-        \#b[01]+                 # Binary value (e.g., "#b1010")
-        |\#x[0-9a-fA-F]+         # Hexadecimal value (e.g., "#xFF")
-        |\(_\s+bv\d+\s+\d+\)     # Decimal value (e.g., "(_ bv42 256)")
-    )
-    """,
-    re.VERBOSE,
-)
-
-
-def parse_const_value(value: str) -> int:
-    if value.startswith("#b"):
-        return int(value[2:], 2)
-
-    if value.startswith("#x"):
-        return int(value[2:], 16)
-
-    # we may have a group like (_ bv123 256)
-    tokens = value.split()
-    for token in tokens:
-        if token.startswith("bv"):
-            return int(token[2:])
-
-    raise ValueError(f"unknown value format: {value}")
-
-
-def _parse_match(match: re.Match) -> ModelVariable:
-    full_name = match.group(1).strip()
-    smt_type = f"{match.group(2)} {match.group(3)}"
-    size_bits = int(match.group(3))
-    value = parse_const_value(match.group(4))
-
-    # Extract name and typename from the variable name
-    parts = full_name.split("_")
-    variable_name = parts[1]
-    solidity_type = parts[2]
-
-    return ModelVariable(
-        full_name=full_name,
-        variable_name=variable_name,
-        solidity_type=solidity_type,
-        smt_type=smt_type,
-        size_bits=size_bits,
-        value=value,
-    )
-
-
-def parse_string(smtlib_str: str) -> ModelVariables:
-    """Expects a whole smtlib model output file, as produced by a solver
-    in response to a `(check-sat)` + `(get-model)` command.
-
-    Extracts halmos variables and returns them grouped by their full name"""
-
-    model_variables: dict[str, ModelVariable] = {}
-
-    # use a regex to find all the variables
-    # for now we explicitly don't try to properly parse the smtlib output
-    # because of idiosyncrasies of different solvers:
-    # - ignores the initial sat/unsat on the first line
-    # - ignores the occasional `(model)` command used by yices, stp, cvc4, etc.
-
-    for match in halmos_pattern.finditer(smtlib_str):
-        try:
-            variable = _parse_match(match)
-            model_variables[variable.full_name] = variable
-        except Exception as e:
-            logger.error(f"error parsing smtlib string '{match.string.strip()}': {e}")
-            raise e
-
-    return model_variables
-
-
-def parse_file(file_path: str) -> ModelVariables:
-    with open(file_path) as file:
-        return parse_string(file.read())
diff --git a/src/halmos/solve.py b/src/halmos/solve.py
index e234b455..27dba415 100644
--- a/src/halmos/solve.py
+++ b/src/halmos/solve.py
@@ -13,15 +13,44 @@
     COUNTEREXAMPLE_INVALID,
     COUNTEREXAMPLE_UNKNOWN,
     debug,
+    error,
     warn,
     warn_code,
 )
 from halmos.processes import PopenExecutor, PopenFuture, TimeoutExpired
 from halmos.sevm import Exec, SMTQuery
-from halmos.smtlib import ModelVariables, parse_string
 from halmos.utils import con, red, stringify
 
 
+@dataclass
+class ModelVariable:
+    full_name: str
+    variable_name: str
+    solidity_type: str
+    smt_type: str
+    size_bits: int
+    value: int
+
+
+ModelVariables = dict[str, ModelVariable]
+
+# Regular expression for capturing halmos variables
+halmos_var_pattern = re.compile(
+    r"""
+    \(\s*define-fun\s+           # Match "(define-fun"
+    \|?(halmos_[^ |]+)\|?\s+     # Capture the full variable name, optionally wrapped in "|"
+    \(\)\s+\(_\s+([^ ]+)\s+      # Capture the SMTLIB type (e.g., "BitVec 256")
+    (\d+)\)\s+                   # Capture the bit-width or type argument
+    (                            # Group for the value
+        \#b[01]+                 # Binary value (e.g., "#b1010")
+        |\#x[0-9a-fA-F]+         # Hexadecimal value (e.g., "#xFF")
+        |\(_\s+bv\d+\s+\d+\)     # Decimal value (e.g., "(_ bv42 256)")
+    )
+    """,
+    re.VERBOSE,
+)
+
+
 @dataclass(frozen=True)
 class PotentialModel:
     model: ModelVariables | str | None
@@ -189,13 +218,82 @@ def from_result(
                 return SolverOutput(unsat, path_id, unsat_core=unsat_core)
             case "sat":
                 is_valid = is_model_valid(stdout)
-                model = PotentialModel(model=parse_string(stdout), is_valid=is_valid)
+                model = PotentialModel(model=parse_model_str(stdout), is_valid=is_valid)
                 return SolverOutput(sat, path_id, model=model)
             case _:
                 return SolverOutput(unknown, path_id)
 
 
-def parse_unsat_core(output) -> list[str] | None:
+def parse_const_value(value: str) -> int:
+    match value[:2]:
+        case "#b":
+            return int(value[2:], 2)
+        case "#x":
+            return int(value[2:], 16)
+        case "bv":
+            return int(value[2:])
+        case _:
+            # we may have a group like (_ bv123 256)
+            tokens = value.split()
+            for token in tokens:
+                if token.startswith("bv"):
+                    return int(token[2:])
+
+    raise ValueError(f"unknown value format: {value}")
+
+
+def _parse_halmos_var_match(match: re.Match) -> ModelVariable:
+    full_name = match.group(1).strip()
+    smt_type = f"{match.group(2)} {match.group(3)}"
+    size_bits = int(match.group(3))
+    value = parse_const_value(match.group(4))
+
+    # Extract name and typename from the variable name
+    parts = full_name.split("_")
+    variable_name = parts[1]
+    solidity_type = parts[2]
+
+    return ModelVariable(
+        full_name=full_name,
+        variable_name=variable_name,
+        solidity_type=solidity_type,
+        smt_type=smt_type,
+        size_bits=size_bits,
+        value=value,
+    )
+
+
+def parse_model_str(smtlib_str: str) -> ModelVariables:
+    """Expects a whole smtlib model output file, as produced by a solver
+    in response to a `(check-sat)` + `(get-model)` command.
+
+    Extracts halmos variables and returns them grouped by their full name"""
+
+    model_variables: dict[str, ModelVariable] = {}
+
+    # use a regex to find all the variables
+    # for now we explicitly don't try to properly parse the smtlib output
+    # because of idiosyncrasies of different solvers:
+    # - ignores the initial sat/unsat on the first line
+    # - ignores the occasional `(model)` command used by yices, stp, cvc4, etc.
+
+    for match in halmos_var_pattern.finditer(smtlib_str):
+        try:
+            variable = _parse_halmos_var_match(match)
+            model_variables[variable.full_name] = variable
+        except Exception as e:
+            error(f"error parsing smtlib string '{match.string.strip()}': {e}")
+            raise e
+
+    return model_variables
+
+
+def parse_model_file(file_path: str) -> ModelVariables:
+    with open(file_path) as file:
+        return parse_model_str(file.read())
+
+
+def parse_unsat_core(output: str) -> list[str] | None:
     # parsing example:
     #   unsat
     #   (error "the context is unsatisfiable")  # <-- this line is optional
diff --git a/tests/test_smtlib.py b/tests/test_solve.py
similarity index 91%
rename from tests/test_smtlib.py
rename to tests/test_solve.py
index 0e3d2aaa..601b9f56 100644
--- a/tests/test_smtlib.py
+++ b/tests/test_solve.py
@@ -1,4 +1,4 @@
-from halmos.smtlib import ModelVariable, parse_string
+from halmos.solve import ModelVariable, parse_model_str
 
 
 def test_smtlib_z3_bv_output():
@@ -7,7 +7,7 @@ def test_smtlib_z3_bv_output():
         (define-fun {full_name} () (_ BitVec 256)
         #x0000000000000000000000000000000000000000000000000000000000000000)
     """
-    model = parse_string(smtlib_str)
+    model = parse_model_str(smtlib_str)
 
     assert model[full_name] == ModelVariable(
         full_name=full_name,
@@ -30,7 +30,7 @@ def test_smtlib_yices_binary_output():
         (_ BitVec 256)
         #b1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000)
     """
-    model = parse_string(smtlib_str)
+    model = parse_model_str(smtlib_str)
     assert model[full_name] == ModelVariable(
         full_name=full_name,
         variable_name="z",
@@ -47,7 +47,7 @@ def test_smtlib_yices_decimal_output():
     smtlib_str = f"""
         (define-fun {full_name} () (_ BitVec 256) (_ bv{val} 256))
     """
-    model = parse_string(smtlib_str)
+    model = parse_model_str(smtlib_str)
     assert model[full_name] == ModelVariable(
         full_name=full_name,
         variable_name="z",
@@ -76,7 +76,7 @@ def test_smtlib_stp_output():
         ( define-fun |{full_name}| () (_ BitVec 8) #x04 )
         )
     """
-    model = parse_string(smtlib_str)
+    model = parse_model_str(smtlib_str)
     assert model[full_name] == ModelVariable(
         full_name=full_name,
         variable_name="x",

From b1ebdc686d7c45a7012ad231ff8c2007b2444a41 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Thu, 9 Jan 2025 11:20:25 -0800
Subject: [PATCH 19/46] adjust halmos_var_pattern to support p_ prefix

---
 src/halmos/solve.py | 16 ++++++++--------
 tests/test_solve.py | 43 ++++++++++++++++++++++++++++++++++---------
 2 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/src/halmos/solve.py b/src/halmos/solve.py
index 27dba415..e5d321a8 100644
--- a/src/halmos/solve.py
+++ b/src/halmos/solve.py
@@ -37,14 +37,14 @@ class ModelVariable:
 # Regular expression for capturing halmos variables
 halmos_var_pattern = re.compile(
     r"""
-    \(\s*define-fun\s+           # Match "(define-fun"
-    \|?(halmos_[^ |]+)\|?\s+     # Capture the full variable name, optionally wrapped in "|"
-    \(\)\s+\(_\s+([^ ]+)\s+      # Capture the SMTLIB type (e.g., "BitVec 256")
-    (\d+)\)\s+                   # Capture the bit-width or type argument
-    (                            # Group for the value
-        \#b[01]+                 # Binary value (e.g., "#b1010")
-        |\#x[0-9a-fA-F]+         # Hexadecimal value (e.g., "#xFF")
-        |\(_\s+bv\d+\s+\d+\)     # Decimal value (e.g., "(_ bv42 256)")
+    \(\s*define-fun\s+               # Match "(define-fun"
+    \|?((?:halmos_|p_)[^ |]+)\|?\s+  # Capture either halmos_* or p_*, optionally wrapped in "|"
+    \(\)\s+\(_\s+([^ ]+)\s+          # Capture the SMTLIB type (e.g., "BitVec 256")
+    (\d+)\)\s+                       # Capture the bit-width or type argument
+    (                                # Group for the value
+        \#b[01]+                     # Binary value (e.g., "#b1010")
+        |\#x[0-9a-fA-F]+             # Hexadecimal value (e.g., "#xFF")
+        |\(_\s+bv\d+\s+\d+\)         # Decimal value (e.g., "(_ bv42 256)")
     )
     """,
     re.VERBOSE,
diff --git a/tests/test_solve.py b/tests/test_solve.py
index 601b9f56..7f9d76cb 100644
--- a/tests/test_solve.py
+++ b/tests/test_solve.py
@@ -1,8 +1,16 @@
+import pytest
+
 from halmos.solve import ModelVariable, parse_model_str
 
 
-def test_smtlib_z3_bv_output():
-    full_name = "halmos_y_uint256_043cfd7_01"
+@pytest.mark.parametrize(
+    "full_name",
+    [
+        "halmos_y_uint256_043cfd7_01",
+        "p_y_uint256_043cfd7_01",
+    ],
+)
+def test_smtlib_z3_bv_output(full_name):
     smtlib_str = f"""
         (define-fun {full_name} () (_ BitVec 256)
         #x0000000000000000000000000000000000000000000000000000000000000000)
@@ -21,8 +29,14 @@ def test_smtlib_z3_bv_output():
 
 # note that yices only produces output like this with --smt2-model-format
 # otherwise we get something like (= x #b00000100)
-def test_smtlib_yices_binary_output():
-    full_name = "halmos_z_uint256_cabf047_02"
+@pytest.mark.parametrize(
+    "full_name",
+    [
+        "halmos_z_uint256_cabf047_02",
+        "p_z_uint256_cabf047_02",
+    ],
+)
+def test_smtlib_yices_binary_output(full_name):
     smtlib_str = f"""
     (define-fun
         {full_name}
@@ -41,8 +55,14 @@ def test_smtlib_yices_binary_output():
     )
 
 
-def test_smtlib_yices_decimal_output():
-    full_name = "halmos_z_uint256_11ce021_08"
+@pytest.mark.parametrize(
+    "full_name",
+    [
+        "halmos_z_uint256_11ce021_08",
+        "p_z_uint256_11ce021_08",
+    ],
+)
+def test_smtlib_yices_decimal_output(full_name):
     val = 57896044618658097711785492504343953926634992332820282019728792003956564819968
     smtlib_str = f"""
         (define-fun {full_name} () (_ BitVec 256) (_ bv{val} 256))
@@ -58,9 +78,14 @@ def test_smtlib_yices_decimal_output():
     )
 
 
-def test_smtlib_stp_output():
-    full_name = "halmos_x_uint8_043cfd7_01"
-
+@pytest.mark.parametrize(
+    "full_name",
+    [
+        "halmos_x_uint8_043cfd7_01",
+        "p_x_uint8_043cfd7_01",
+    ],
+)
+def test_smtlib_stp_output(full_name):
     # we should tolerate:
     # - the extra (model) command
     # - duplicate variable names

From d847464e6febbd19bcbf000b45d1fb6308534ecb Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Thu, 9 Jan 2025 15:47:11 -0800
Subject: [PATCH 20/46] speed up early exit

---
 src/halmos/processes.py | 16 +++++++++-------
 src/halmos/solve.py     |  8 ++------
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/src/halmos/processes.py b/src/halmos/processes.py
index 1eff7389..15a614b6 100644
--- a/src/halmos/processes.py
+++ b/src/halmos/processes.py
@@ -66,8 +66,6 @@ def cancel(self):
         if not self.is_running():
             return
 
-        self.process.terminate()
-
         # use psutil to kill the entire process tree (including children)
         try:
             parent_process = psutil.Process(self.process.pid)
@@ -78,8 +76,9 @@ def cancel(self):
             for process in processes:
                 process.terminate()
 
-            # give them some time to terminate
-            time.sleep(0.5)
+            # termination grace period
+            with contextlib.suppress(TimeoutExpired):
+                parent_process.wait(timeout=0.5)
 
             # after grace period, force kill
             for process in processes:
@@ -161,9 +160,12 @@ def shutdown(self, wait=True, cancel_futures=False):
 
         # if asked for immediate shutdown we cancel everything
         else:
-            with self._lock:
-                for future in self._futures:
-                    future.cancel()
+            with self._lock, concurrent.futures.ThreadPoolExecutor() as executor:
+                # kick off all cancellations in parallel
+                cancel_tasks = [executor.submit(f.cancel) for f in self._futures]
+
+                # wait for them to finish
+                concurrent.futures.wait(cancel_tasks)
 
     def map(self, fn, *iterables, timeout=None, chunksize=1):
         raise NotImplementedError()
diff --git a/src/halmos/solve.py b/src/halmos/solve.py
index e5d321a8..d46fad74 100644
--- a/src/halmos/solve.py
+++ b/src/halmos/solve.py
@@ -10,7 +10,6 @@
 from halmos.config import Config as HalmosConfig
 from halmos.constants import VERBOSITY_TRACE_COUNTEREXAMPLE, VERBOSITY_TRACE_PATHS
 from halmos.logs import (
-    COUNTEREXAMPLE_INVALID,
     COUNTEREXAMPLE_UNKNOWN,
     debug,
     error,
@@ -19,7 +18,7 @@
 )
 from halmos.processes import PopenExecutor, PopenFuture, TimeoutExpired
 from halmos.sevm import Exec, SMTQuery
-from halmos.utils import con, red, stringify
+from halmos.utils import con, stringify
 
 
 @dataclass
@@ -468,16 +467,13 @@ def solve_end_to_end(ctx: PathContext) -> None:
         return
 
     if model.is_valid:
-        print(red(f"Counterexample: {model}"))
-
         fun_ctx.valid_counterexamples.append(model)
 
         # we have a valid counterexample, so we are eligible for early exit
         if args.early_exit:
+            debug(f"Shutting down {fun_ctx.info.name}'s solver executor")
             fun_ctx.solver_executor.shutdown(wait=False)
     else:
-        warn_str = f"Counterexample (potentially invalid): {model}"
-        warn_code(COUNTEREXAMPLE_INVALID, warn_str)
         fun_ctx.invalid_counterexamples.append(model)
 
 

From 03ed9489a8f40fc003d16cd3d97e256c57aece1d Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Thu, 9 Jan 2025 15:55:28 -0800
Subject: [PATCH 21/46] bring back model output in solve_end_to_end

---
 src/halmos/solve.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/halmos/solve.py b/src/halmos/solve.py
index d46fad74..b93485b4 100644
--- a/src/halmos/solve.py
+++ b/src/halmos/solve.py
@@ -10,6 +10,7 @@
 from halmos.config import Config as HalmosConfig
 from halmos.constants import VERBOSITY_TRACE_COUNTEREXAMPLE, VERBOSITY_TRACE_PATHS
 from halmos.logs import (
+    COUNTEREXAMPLE_INVALID,
     COUNTEREXAMPLE_UNKNOWN,
     debug,
     error,
@@ -18,7 +19,7 @@
 )
 from halmos.processes import PopenExecutor, PopenFuture, TimeoutExpired
 from halmos.sevm import Exec, SMTQuery
-from halmos.utils import con, stringify
+from halmos.utils import con, red, stringify
 
 
 @dataclass
@@ -467,6 +468,7 @@ def solve_end_to_end(ctx: PathContext) -> None:
         return
 
     if model.is_valid:
+        print(red(f"Counterexample: {model}"))
         fun_ctx.valid_counterexamples.append(model)
 
         # we have a valid counterexample, so we are eligible for early exit
@@ -474,6 +476,9 @@ def solve_end_to_end(ctx: PathContext) -> None:
             debug(f"Shutting down {fun_ctx.info.name}'s solver executor")
             fun_ctx.solver_executor.shutdown(wait=False)
     else:
+        warn_str = f"Counterexample (potentially invalid): {model}"
+        warn_code(COUNTEREXAMPLE_INVALID, warn_str)
+
         fun_ctx.invalid_counterexamples.append(model)
 
 

From a6fd0671c5aa4fb5602c764a32b86ff6930a7538 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Thu, 9 Jan 2025 16:50:12 -0800
Subject: [PATCH 22/46] shutdown executors on signal

---
 src/halmos/__main__.py  |  3 +++
 src/halmos/processes.py | 20 ++++++++++++++++++++
 src/halmos/solve.py     | 10 +++++++++-
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 733b7faf..31cf18fb 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -53,6 +53,7 @@
     warn_code,
 )
 from .mapper import BuildOut, DeployAddressMapper
+from .processes import ExecutorRegistry
 from .sevm import (
     EMPTY_BALANCE,
     EVM,
@@ -821,6 +822,8 @@ def _main(_args=None) -> MainResult:
     #
 
     def on_exit(exitcode: int) -> MainResult:
+        ExecutorRegistry().shutdown_all()
+
         result = MainResult(exitcode, test_results_map)
 
         if args.json_output:
diff --git a/src/halmos/processes.py b/src/halmos/processes.py
index 15a614b6..83009942 100644
--- a/src/halmos/processes.py
+++ b/src/halmos/processes.py
@@ -3,11 +3,31 @@
 import subprocess
 import threading
 import time
+import weakref
 from subprocess import PIPE, Popen, TimeoutExpired
 
 import psutil
 
 
+class ExecutorRegistry:
+    _instance = None
+
+    # Singleton pattern
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+            cls._instance._executors = weakref.WeakSet()
+        return cls._instance
+
+    def register(self, executor):
+        self._executors.add(executor)
+
+    def shutdown_all(self):
+        print("Shutting down all executors")
+        for ex in list(self._executors):
+            ex.shutdown(wait=False)
+
+
 class PopenFuture(concurrent.futures.Future):
     cmd: list[str]
     timeout: float | None  # in seconds, None means no timeout
diff --git a/src/halmos/solve.py b/src/halmos/solve.py
index b93485b4..6532a00e 100644
--- a/src/halmos/solve.py
+++ b/src/halmos/solve.py
@@ -17,7 +17,12 @@
     warn,
     warn_code,
 )
-from halmos.processes import PopenExecutor, PopenFuture, TimeoutExpired
+from halmos.processes import (
+    ExecutorRegistry,
+    PopenExecutor,
+    PopenFuture,
+    TimeoutExpired,
+)
 from halmos.sevm import Exec, SMTQuery
 from halmos.utils import con, red, stringify
 
@@ -146,6 +151,9 @@ def __post_init__(self):
         )
         object.__setattr__(self, "thread_pool", thread_pool)
 
+        # register the solver executor to be shutdown on exit
+        ExecutorRegistry().register(self.solver_executor)
+
 
 @dataclass(frozen=True)
 class PathContext:

From f41a3cbe35be432d9ec50fd2024d844f73d9a696 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Thu, 16 Jan 2025 15:41:13 -0800
Subject: [PATCH 23/46] disable ExecutorRegistry().shutdown_all() for now

---
 src/halmos/__main__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 31cf18fb..0016a1c3 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -53,7 +53,6 @@
     warn_code,
 )
 from .mapper import BuildOut, DeployAddressMapper
-from .processes import ExecutorRegistry
 from .sevm import (
     EMPTY_BALANCE,
     EVM,
@@ -822,7 +821,8 @@ def _main(_args=None) -> MainResult:
     #
 
     def on_exit(exitcode: int) -> MainResult:
-        ExecutorRegistry().shutdown_all()
+        # disabling this for now because it causes an assertion violation in z3
+        # ExecutorRegistry().shutdown_all()
 
         result = MainResult(exitcode, test_results_map)
 

From 7b38cdce649543e66ea591cff373164316acd2d3 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Thu, 16 Jan 2025 16:16:57 -0800
Subject: [PATCH 24/46] fix bad merge

---
 src/halmos/__main__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 0016a1c3..8769b14c 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -50,6 +50,7 @@
     error,
     logger,
     logger_unique,
+    warn,
     warn_code,
 )
 from .mapper import BuildOut, DeployAddressMapper
@@ -292,8 +293,6 @@ def setup(ctx: FunctionContext) -> Exec:
             print(setup_timer.report())
         return setup_ex
 
-    setup_timer.create_subtimer("run")
-
     # TODO: dyn_params may need to be passed to mk_calldata in run()
     calldata, dyn_params = mk_calldata(ctx.contract_ctx.abi, setup_info, args)
     setup_ex.path.process_dyn_params(dyn_params)

From 0125a3d5ad155a4c4e05da5af1cbf5605d2eb708 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Fri, 17 Jan 2025 11:00:55 -0800
Subject: [PATCH 25/46] fix z3 thread safety issue (--print-failed-states
 disabled for now)

---
 src/halmos/__main__.py |  72 +++++++++++++++++++++++++---
 src/halmos/sevm.py     |   1 +
 src/halmos/solve.py    | 106 +++++++++++------------------------------
 3 files changed, 95 insertions(+), 84 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 8769b14c..9ff83a98 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -43,6 +43,8 @@
 )
 from .exceptions import HalmosException
 from .logs import (
+    COUNTEREXAMPLE_INVALID,
+    COUNTEREXAMPLE_UNKNOWN,
     INTERNAL_ERROR,
     LOOP_BOUND,
     REVERT_ALL,
@@ -460,6 +462,62 @@ def run_test(ctx: FunctionContext) -> TestResult:
     potential = 0
     stuck = []
 
+    def solve_end_to_end_callback(future: Future):
+        # beware: this function may be called from threads other than the main thread,
+        # so we must be careful to avoid referencing any z3 objects / contexts
+
+        if e := future.exception():
+            error(f"encountered exception during assertion solving: {e!r}")
+
+        #
+        # we are done solving, process and triage the result
+        #
+
+        solver_output = future.result()
+        result, model = solver_output.result, solver_output.model
+
+        if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE:
+            id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else ""
+            print(f"Trace{id_str}:")
+            print(ctx.traces[path_id], end="")
+
+        # unsafe, can't print an Exec from a different thread
+        # if args.print_failed_states:
+        #     print(f"# {path_id}")
+        #     print(exec)
+
+        if ctx.solver_executor.is_shutdown():
+            # if the thread pool is in the process of shutting down,
+            # we want to stop processing remaining models/timeouts/errors, etc.
+            return
+
+        # keep track of the solver outputs, so that we can display PASS/FAIL/TIMEOUT/ERROR later
+        ctx.solver_outputs.append(solver_output)
+
+        if result == unsat:
+            if solver_output.unsat_core:
+                ctx.unsat_cores.append(solver_output.unsat_core)
+            return
+
+        # model could be an empty dict here, so compare to None explicitly
+        if model is None:
+            warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}")
+            return
+
+        if model.is_valid:
+            print(red(f"Counterexample: {model}"))
+            ctx.valid_counterexamples.append(model)
+
+            # we have a valid counterexample, so we are eligible for early exit
+            if args.early_exit:
+                debug(f"Shutting down {ctx.info.name}'s solver executor")
+                ctx.solver_executor.shutdown(wait=False)
+        else:
+            warn_str = f"Counterexample (potentially invalid): {model}"
+            warn_code(COUNTEREXAMPLE_INVALID, warn_str)
+
+            ctx.invalid_counterexamples.append(model)
+
     #
     # consume the sevm.run() generator
     # (actually triggers path exploration)
@@ -497,19 +555,19 @@ def run_test(ctx: FunctionContext) -> TestResult:
 
             query: SMTQuery = ex.path.to_smt2(args)
 
+            # beware: because this object crosses thread boundaries, we must be careful to
+            # avoid any reference to z3 objects
             path_ctx = PathContext(
+                args=args,
                 path_id=path_id,
-                ex=ex,
                 query=query,
-                fun_ctx=ctx,
+                dump_dirname=dump_dirname,
+                unsat_cores=ctx.unsat_cores,
+                solver_executor=ctx.solver_executor,
             )
 
-            def log_future_result(future: Future):
-                if e := future.exception():
-                    error(f"encountered exception during assertion solving: {e!r}")
-
             solve_future = ctx.thread_pool.submit(solve_end_to_end, path_ctx)
-            solve_future.add_done_callback(log_future_result)
+            solve_future.add_done_callback(solve_end_to_end_callback)
             submitted_futures.append(solve_future)
 
         elif ex.context.is_stuck():
diff --git a/src/halmos/sevm.py b/src/halmos/sevm.py
index fbfe35a8..efd9961c 100644
--- a/src/halmos/sevm.py
+++ b/src/halmos/sevm.py
@@ -484,6 +484,7 @@ def push(self, v: Word) -> None:
         else:
             if not (eq(v.sort(), BitVecSort256) or is_bool(v)):
                 raise ValueError(v)
+
             self.stack.append(simplify(v))
 
     def pop(self) -> Word:
diff --git a/src/halmos/solve.py b/src/halmos/solve.py
index 6532a00e..9554bc7a 100644
--- a/src/halmos/solve.py
+++ b/src/halmos/solve.py
@@ -8,14 +8,10 @@
 
 from halmos.calldata import FunctionInfo
 from halmos.config import Config as HalmosConfig
-from halmos.constants import VERBOSITY_TRACE_COUNTEREXAMPLE, VERBOSITY_TRACE_PATHS
 from halmos.logs import (
-    COUNTEREXAMPLE_INVALID,
-    COUNTEREXAMPLE_UNKNOWN,
     debug,
     error,
     warn,
-    warn_code,
 )
 from halmos.processes import (
     ExecutorRegistry,
@@ -24,7 +20,7 @@
     TimeoutExpired,
 )
 from halmos.sevm import Exec, SMTQuery
-from halmos.utils import con, red, stringify
+from halmos.utils import hexify
 
 
 @dataclass
@@ -68,9 +64,8 @@ def __str__(self) -> str:
 
         formatted = []
         for v in self.model.values():
-            # TODO: ideally we would avoid wrapping with con() here
-            stringified = stringify(v.full_name, con(v.value))
-            formatted.append(f"\n    {v.full_name} = {stringified}")
+            # TODO: parse type and render accordingly
+            formatted.append(f"\n    {v.full_name} = {hexify(v.value)}")
         return "".join(sorted(formatted)) if formatted else "∅"
 
 
@@ -157,17 +152,22 @@ def __post_init__(self):
 
 @dataclass(frozen=True)
 class PathContext:
+    args: HalmosConfig
+
     # id of this path
     path_id: int
 
-    # path execution object
-    ex: Exec
-
     # SMT query
     query: SMTQuery
 
-    # backlink to the parent function context
-    fun_ctx: FunctionContext
+    # the solver executor is shared across all paths in the same function
+    # we just hold a reference to it
+    solver_executor: PopenExecutor
+
+    # list of unsat cores (inherited from parent function context)
+    unsat_cores: list[list]
+
+    dump_dirname: str
 
     # filename for this path (generated in __post_init__)
     dump_filename: str = field(init=False)
@@ -176,18 +176,21 @@ class PathContext:
 
     def __post_init__(self):
         refined_str = ".refined" if self.is_refined else ""
-        dirname = self.fun_ctx.dump_dirname
-        filename = os.path.join(dirname, f"{self.path_id}{refined_str}.smt2")
+        filename = os.path.join(self.dump_dirname, f"{self.path_id}{refined_str}.smt2")
 
         # use __setattr__ because this is a frozen dataclass
         object.__setattr__(self, "dump_filename", filename)
 
     def refine(self) -> "PathContext":
         return PathContext(
+            # inherited
+            args=self.args,
             path_id=self.path_id,
-            ex=self.ex,
+            solver_executor=self.solver_executor,
+            unsat_cores=self.unsat_cores,
+            dump_dirname=self.dump_dirname,
+            # refined
             query=refine(self.query),
-            fun_ctx=self.fun_ctx,
             is_refined=True,
         )
 
@@ -215,8 +218,7 @@ def from_result(
         newline_idx = stdout.find("\n")
         first_line = stdout[:newline_idx] if newline_idx != -1 else stdout
 
-        args = path_ctx.fun_ctx.args
-        path_id = path_ctx.path_id
+        args, path_id = path_ctx.args, path_ctx.path_id
         if args.verbose >= 1:
             debug(f"    {first_line}")
 
@@ -321,9 +323,7 @@ def parse_unsat_core(output: str) -> list[str] | None:
 def dump(
     path_ctx: PathContext,
 ) -> tuple[CheckSatResult, PotentialModel | None, list | None]:
-    args = path_ctx.fun_ctx.args
-    query = path_ctx.query
-    dump_filename = path_ctx.dump_filename
+    args, query, dump_filename = path_ctx.args, path_ctx.query, path_ctx.dump_filename
 
     if args.verbose >= 1:
         debug(f"Writing SMT query to {dump_filename}")
@@ -368,8 +368,7 @@ def solve_low_level(path_ctx: PathContext) -> SolverOutput:
 
     Can raise TimeoutError or some Exception raised during execution"""
 
-    fun_ctx, smt2_filename = path_ctx.fun_ctx, path_ctx.dump_filename
-    args = fun_ctx.args
+    args, smt2_filename = path_ctx.args, path_ctx.dump_filename
 
     # make sure the smt2 file has been written
     dump(path_ctx)
@@ -386,7 +385,7 @@ def solve_low_level(path_ctx: PathContext) -> SolverOutput:
     future = PopenFuture(cmd, timeout=timeout_seconds)
 
     # starts the subprocess asynchronously
-    fun_ctx.solver_executor.submit(future)
+    path_ctx.solver_executor.submit(future)
 
     # block until the external solver returns, times out, is interrupted, fails, etc.
     try:
@@ -406,7 +405,7 @@ def solve_low_level(path_ctx: PathContext) -> SolverOutput:
     return SolverOutput.from_result(stdout, stderr, returncode, path_ctx)
 
 
-def solve_end_to_end(ctx: PathContext) -> None:
+def solve_end_to_end(ctx: PathContext) -> SolverOutput:
     """Synchronously resolves a query in a given context, which may result in 0, 1 or multiple solver invocations.
 
     - may result in 0 invocations if the query contains a known unsat core (hence the need for the context)
@@ -415,14 +414,13 @@ def solve_end_to_end(ctx: PathContext) -> None:
 
     If this produces a model, it _should_ be valid.
     """
-    fun_ctx, path_id, query = ctx.fun_ctx, ctx.path_id, ctx.query
-    args, unsat_cores = fun_ctx.args, fun_ctx.unsat_cores
+    path_id, query = ctx.path_id, ctx.query
 
-    verbose = print if args.verbose >= 1 else lambda *args, **kwargs: None
+    verbose = print if ctx.args.verbose >= 1 else lambda *args, **kwargs: None
     verbose(f"Checking path condition {path_id=}")
 
     # if the query contains an unsat-core, it is unsat; no need to run the solver
-    if check_unsat_cores(query, unsat_cores):
+    if check_unsat_cores(query, ctx.unsat_cores):
         verbose("  Already proven unsat")
         return SolverOutput(unsat, path_id)
 
@@ -441,53 +439,7 @@ def solve_end_to_end(ctx: PathContext) -> None:
         else:
             verbose("    Refinement did not change the query, no need to solve again")
 
-    #
-    # we are done solving, process and triage the result
-    #
-
-    # retrieve cached exec and clear the cache entry
-    exec = fun_ctx.exec_cache.pop(path_id, None)
-
-    if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE:
-        id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else ""
-        print(f"Trace{id_str}:")
-        print(fun_ctx.traces[path_id], end="")
-
-    if args.print_failed_states:
-        print(f"# {path_id}")
-        print(exec)
-
-    if fun_ctx.solver_executor.is_shutdown():
-        # if the thread pool is in the process of shutting down,
-        # we want to stop processing remaining models/timeouts/errors, etc.
-        return
-
-    # keep track of the solver outputs, so that we can display PASS/FAIL/TIMEOUT/ERROR later
-    fun_ctx.solver_outputs.append(solver_output)
-
-    if result == unsat:
-        if solver_output.unsat_core:
-            fun_ctx.unsat_cores.append(solver_output.unsat_core)
-        return
-
-    # model could be an empty dict here, so compare to None explicitly
-    if model is None:
-        warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}")
-        return
-
-    if model.is_valid:
-        print(red(f"Counterexample: {model}"))
-        fun_ctx.valid_counterexamples.append(model)
-
-        # we have a valid counterexample, so we are eligible for early exit
-        if args.early_exit:
-            debug(f"Shutting down {fun_ctx.info.name}'s solver executor")
-            fun_ctx.solver_executor.shutdown(wait=False)
-    else:
-        warn_str = f"Counterexample (potentially invalid): {model}"
-        warn_code(COUNTEREXAMPLE_INVALID, warn_str)
-
-        fun_ctx.invalid_counterexamples.append(model)
+    return solver_output
 
 
 def check_unsat_cores(query: SMTQuery, unsat_cores: list[list]) -> bool:

From 51657bace279cb6fa0f9cb88f545f752eda30081 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Fri, 17 Jan 2025 11:16:48 -0800
Subject: [PATCH 26/46] fix setup() with multiple paths

---
 src/halmos/__main__.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 9ff83a98..6ea3db08 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -312,7 +312,7 @@ def setup(ctx: FunctionContext) -> Exec:
     )
 
     setup_exs_all = sevm.run(setup_ex)
-    setup_exs_no_error: list[PathContext] = []
+    setup_exs_no_error: list[Exec] = []
 
     for path_id, setup_ex in enumerate(setup_exs_all):
         if args.verbose >= VERBOSITY_TRACE_SETUP:
@@ -333,26 +333,29 @@ def setup(ctx: FunctionContext) -> Exec:
                 render_trace(setup_ex.context)
 
         else:
-            path_ctx = PathContext(
-                path_id=path_id,
-                ex=setup_ex,
-                query=setup_ex.path.to_smt2(args),
-                fun_ctx=ctx,
-            )
-            setup_exs_no_error.append(path_ctx)
+            setup_exs_no_error.append(setup_ex)
 
     setup_exs: list[Exec] = []
 
     match setup_exs_no_error:
         case []:
             pass
-        case [path_ctx]:
-            setup_exs.append(path_ctx.ex)
+        case [ex]:
+            setup_exs.append(ex)
         case _:
-            for path_ctx in setup_exs_no_error:
+            for path_id, ex in enumerate(setup_exs_no_error):
+                path_ctx = PathContext(
+                    args=args,
+                    path_id=path_id,
+                    query=ex.path.to_smt2(args),
+                    solver_executor=ctx.solver_executor,
+                    unsat_cores=ctx.unsat_cores,
+                    dump_dirname=ctx.dump_dirname,
+                )
+
                 solver_output = solve_low_level(path_ctx)
                 if solver_output.result != unsat:
-                    setup_exs.append(path_ctx.ex)
+                    setup_exs.append(ex)
                     if len(setup_exs) > 1:
                         break
 

From 01dcb3b18cec242d6dc3127cee43af80dd65fdd8 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Fri, 17 Jan 2025 14:46:37 -0800
Subject: [PATCH 27/46] share a SolvingContext between FunctionContext and
 PathContext

---
 src/halmos/__main__.py | 17 +++++------
 src/halmos/solve.py    | 69 ++++++++++++++++++------------------------
 2 files changed, 37 insertions(+), 49 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 6ea3db08..d1cf22ee 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -348,9 +348,7 @@ def setup(ctx: FunctionContext) -> Exec:
                     args=args,
                     path_id=path_id,
                     query=ex.path.to_smt2(args),
-                    solver_executor=ctx.solver_executor,
-                    unsat_cores=ctx.unsat_cores,
-                    dump_dirname=ctx.dump_dirname,
+                    solving_ctx=ctx.solving_ctx,
                 )
 
                 solver_output = solve_low_level(path_ctx)
@@ -400,11 +398,12 @@ def run_test(ctx: FunctionContext) -> TestResult:
     # prepare test dump directory if needed
     #
 
-    dump_dirname = ctx.dump_dirname
+    dump_dirname = ctx.solving_ctx.dump_dirname
     should_dump = args.dump_smt_queries or args.solver_command
     if should_dump and not os.path.isdir(dump_dirname):
+        if args.verbose >= 1:
+            print(f"Generating SMT queries in {dump_dirname}")
         os.makedirs(dump_dirname)
-        print(f"Generating SMT queries in {dump_dirname}")
 
     #
     # prepare calldata
@@ -489,7 +488,7 @@ def solve_end_to_end_callback(future: Future):
         #     print(f"# {path_id}")
         #     print(exec)
 
-        if ctx.solver_executor.is_shutdown():
+        if ctx.solving_ctx.executor.is_shutdown():
             # if the thread pool is in the process of shutting down,
             # we want to stop processing remaining models/timeouts/errors, etc.
             return
@@ -499,7 +498,7 @@ def solve_end_to_end_callback(future: Future):
 
         if result == unsat:
             if solver_output.unsat_core:
-                ctx.unsat_cores.append(solver_output.unsat_core)
+                ctx.append_unsat_core(solver_output.unsat_core)
             return
 
         # model could be an empty dict here, so compare to None explicitly
@@ -564,9 +563,7 @@ def solve_end_to_end_callback(future: Future):
                 args=args,
                 path_id=path_id,
                 query=query,
-                dump_dirname=dump_dirname,
-                unsat_cores=ctx.unsat_cores,
-                solver_executor=ctx.solver_executor,
+                solving_ctx=ctx.solving_ctx,
             )
 
             solve_future = ctx.thread_pool.submit(solve_end_to_end, path_ctx)
diff --git a/src/halmos/solve.py b/src/halmos/solve.py
index 9554bc7a..87382882 100644
--- a/src/halmos/solve.py
+++ b/src/halmos/solve.py
@@ -92,6 +92,18 @@ class ContractContext:
     build_out_map: dict
 
 
+@dataclass(frozen=True)
+class SolvingContext:
+    # directory for dumping solver files
+    dump_dirname: str
+
+    # shared solver executor for all paths in the same function
+    executor: PopenExecutor = field(default_factory=PopenExecutor)
+
+    # list of unsat cores
+    unsat_cores: list[list] = field(default_factory=list)
+
+
 @dataclass(frozen=True)
 class FunctionContext:
     # config with function-specific overrides
@@ -109,15 +121,12 @@ class FunctionContext:
     # optional starting state
     setup_ex: Exec | None = None
 
-    # dump directory for this function (generated in __post_init__)
-    dump_dirname: str = field(init=False)
+    # function-level solving context
+    solving_ctx: SolvingContext = field(init=False)
 
     # function-level thread pool that drives assertion solving
     thread_pool: ThreadPoolExecutor = field(init=False)
 
-    # path-specific queries are submitted to this function-specific executor
-    solver_executor: PopenExecutor = field(default_factory=PopenExecutor)
-
     # list of solver outputs for this function
     solver_outputs: list["SolverOutput"] = field(default_factory=list)
 
@@ -127,9 +136,6 @@ class FunctionContext:
     # list of potentially invalid counterexamples for this function
     invalid_counterexamples: list[PotentialModel] = field(default_factory=list)
 
-    # list of unsat cores for this function
-    unsat_cores: list[list] = field(default_factory=list)
-
     # map from path id to trace
     traces: dict[int, str] = field(default_factory=dict)
 
@@ -138,7 +144,8 @@ class FunctionContext:
 
     def __post_init__(self):
         dirname = f"/tmp/{self.info.name}-{uuid.uuid4().hex}"
-        object.__setattr__(self, "dump_dirname", dirname)
+        solving_ctx = SolvingContext(dump_dirname=dirname)
+        object.__setattr__(self, "solving_ctx", solving_ctx)
 
         thread_pool = ThreadPoolExecutor(
             max_workers=self.args.solver_threads,
@@ -147,49 +154,33 @@ def __post_init__(self):
         object.__setattr__(self, "thread_pool", thread_pool)
 
         # register the solver executor to be shutdown on exit
-        ExecutorRegistry().register(self.solver_executor)
+        ExecutorRegistry().register(solving_ctx.executor)
+
+    def append_unsat_core(self, unsat_core: list[str]) -> None:
+        self.solving_ctx.unsat_cores.append(unsat_core)
 
 
 @dataclass(frozen=True)
 class PathContext:
     args: HalmosConfig
-
-    # id of this path
     path_id: int
-
-    # SMT query
+    solving_ctx: SolvingContext
     query: SMTQuery
-
-    # the solver executor is shared across all paths in the same function
-    # we just hold a reference to it
-    solver_executor: PopenExecutor
-
-    # list of unsat cores (inherited from parent function context)
-    unsat_cores: list[list]
-
-    dump_dirname: str
-
-    # filename for this path (generated in __post_init__)
-    dump_filename: str = field(init=False)
-
     is_refined: bool = False
 
-    def __post_init__(self):
+    @property
+    def dump_filename(self) -> str:
         refined_str = ".refined" if self.is_refined else ""
-        filename = os.path.join(self.dump_dirname, f"{self.path_id}{refined_str}.smt2")
-
-        # use __setattr__ because this is a frozen dataclass
-        object.__setattr__(self, "dump_filename", filename)
+        filename = os.path.join(
+            self.solving_ctx.dump_dirname, f"{self.path_id}{refined_str}.smt2"
+        )
+        return filename
 
     def refine(self) -> "PathContext":
         return PathContext(
-            # inherited
             args=self.args,
             path_id=self.path_id,
-            solver_executor=self.solver_executor,
-            unsat_cores=self.unsat_cores,
-            dump_dirname=self.dump_dirname,
-            # refined
+            solving_ctx=self.solving_ctx,
             query=refine(self.query),
             is_refined=True,
         )
@@ -385,7 +376,7 @@ def solve_low_level(path_ctx: PathContext) -> SolverOutput:
     future = PopenFuture(cmd, timeout=timeout_seconds)
 
     # starts the subprocess asynchronously
-    path_ctx.solver_executor.submit(future)
+    path_ctx.solving_ctx.executor.submit(future)
 
     # block until the external solver returns, times out, is interrupted, fails, etc.
     try:
@@ -420,7 +411,7 @@ def solve_end_to_end(ctx: PathContext) -> SolverOutput:
     verbose(f"Checking path condition {path_id=}")
 
     # if the query contains an unsat-core, it is unsat; no need to run the solver
-    if check_unsat_cores(query, ctx.unsat_cores):
+    if check_unsat_cores(query, ctx.solving_ctx.unsat_cores):
         verbose("  Already proven unsat")
         return SolverOutput(unsat, path_id)
 

From 85ca04c4b5018cde16ac3e1e084830541650e0cb Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Fri, 17 Jan 2025 16:19:46 -0800
Subject: [PATCH 28/46] use tempfile and path to manage smt2 files

(hopefully works on windows too)
---
 src/halmos/__main__.py | 11 -------
 src/halmos/solve.py    | 68 ++++++++++++++++++++++++++----------------
 2 files changed, 42 insertions(+), 37 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index d1cf22ee..0bcd9466 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -394,17 +394,6 @@ def run_test(ctx: FunctionContext) -> TestResult:
     if args.verbose >= 1:
         print(f"Executing {funname}")
 
-    #
-    # prepare test dump directory if needed
-    #
-
-    dump_dirname = ctx.solving_ctx.dump_dirname
-    should_dump = args.dump_smt_queries or args.solver_command
-    if should_dump and not os.path.isdir(dump_dirname):
-        if args.verbose >= 1:
-            print(f"Generating SMT queries in {dump_dirname}")
-        os.makedirs(dump_dirname)
-
     #
     # prepare calldata
     #
diff --git a/src/halmos/solve.py b/src/halmos/solve.py
index 87382882..b9e91768 100644
--- a/src/halmos/solve.py
+++ b/src/halmos/solve.py
@@ -1,8 +1,8 @@
-import os
 import re
-import uuid
 from concurrent.futures import ThreadPoolExecutor
 from dataclasses import dataclass, field
+from pathlib import Path
+from tempfile import TemporaryDirectory
 
 from z3 import CheckSatResult, Solver, sat, unknown, unsat
 
@@ -95,7 +95,7 @@ class ContractContext:
 @dataclass(frozen=True)
 class SolvingContext:
     # directory for dumping solver files
-    dump_dirname: str
+    dump_dir: TemporaryDirectory
 
     # shared solver executor for all paths in the same function
     executor: PopenExecutor = field(default_factory=PopenExecutor)
@@ -122,6 +122,7 @@ class FunctionContext:
     setup_ex: Exec | None = None
 
     # function-level solving context
+    # the FunctionContext initializes and owns the SolvingContext
     solving_ctx: SolvingContext = field(init=False)
 
     # function-level thread pool that drives assertion solving
@@ -143,8 +144,25 @@ class FunctionContext:
     exec_cache: dict[int, Exec] = field(default_factory=dict)
 
     def __post_init__(self):
-        dirname = f"/tmp/{self.info.name}-{uuid.uuid4().hex}"
-        solving_ctx = SolvingContext(dump_dirname=dirname)
+        args = self.args
+
+        # create a temporary directory for dumping solver files
+        prefix = (
+            f"{self.info.name}-"
+            if self.info.name
+            else f"{self.contract_ctx.name}-deploy-"
+        )
+
+        # if the user explicitly enabled dumping, we don't want to delete the directory on exit
+        delete = not self.args.dump_smt_queries
+        dump_dir = TemporaryDirectory(
+            prefix=prefix, ignore_cleanup_errors=True, delete=delete
+        )
+
+        if args.verbose >= 1 or args.dump_smt_queries:
+            print(f"Generating SMT queries in {dump_dir.name}")
+
+        solving_ctx = SolvingContext(dump_dir=dump_dir)
         object.__setattr__(self, "solving_ctx", solving_ctx)
 
         thread_pool = ThreadPoolExecutor(
@@ -169,12 +187,11 @@ class PathContext:
     is_refined: bool = False
 
     @property
-    def dump_filename(self) -> str:
+    def dump_file(self) -> Path:
         refined_str = ".refined" if self.is_refined else ""
-        filename = os.path.join(
-            self.solving_ctx.dump_dirname, f"{self.path_id}{refined_str}.smt2"
-        )
-        return filename
+        filename = f"{self.path_id}{refined_str}.smt2"
+
+        return Path(self.solving_ctx.dump_dir.name) / filename
 
     def refine(self) -> "PathContext":
         return PathContext(
@@ -314,10 +331,10 @@ def parse_unsat_core(output: str) -> list[str] | None:
 def dump(
     path_ctx: PathContext,
 ) -> tuple[CheckSatResult, PotentialModel | None, list | None]:
-    args, query, dump_filename = path_ctx.args, path_ctx.query, path_ctx.dump_filename
+    args, query, dump_file = path_ctx.args, path_ctx.query, path_ctx.dump_file
 
     if args.verbose >= 1:
-        debug(f"Writing SMT query to {dump_filename}")
+        debug(f"Writing SMT query to {dump_file}")
 
     # for each implication assertion, `(assert (=> |id| c))`, in query.smtlib,
     # generate a corresponding named assertion, `(assert (! |id| :named <id>))`.
@@ -330,21 +347,20 @@ def dump(
             ]
         )
 
-        with open(dump_filename, "w") as f:
-            f.write("(set-option :produce-unsat-cores true)\n")
-            f.write("(set-logic QF_AUFBV)\n")
-            f.write(query.smtlib)
-            f.write(named_assertions)
-            f.write("(check-sat)\n")
-            f.write("(get-model)\n")
-            f.write("(get-unsat-core)\n")
+        dump_file.write_text(
+            "(set-option :produce-unsat-cores true)\n"
+            "(set-logic QF_AUFBV)\n"
+            f"{query.smtlib}\n"
+            f"{named_assertions}"
+            "(check-sat)\n"
+            "(get-model)\n"
+            "(get-unsat-core)\n"
+        )
 
     else:
-        with open(dump_filename, "w") as f:
-            f.write("(set-logic QF_AUFBV)\n")
-            f.write(query.smtlib)
-            f.write("(check-sat)\n")
-            f.write("(get-model)\n")
+        dump_file.write_text(
+            f"(set-logic QF_AUFBV)\n{query.smtlib}\n(check-sat)\n(get-model)\n"
+        )
 
 
 def is_model_valid(solver_stdout: str) -> bool:
@@ -359,7 +375,7 @@ def solve_low_level(path_ctx: PathContext) -> SolverOutput:
 
     Can raise TimeoutError or some Exception raised during execution"""
 
-    args, smt2_filename = path_ctx.args, path_ctx.dump_filename
+    args, smt2_filename = path_ctx.args, str(path_ctx.dump_file)
 
     # make sure the smt2 file has been written
     dump(path_ctx)

From 5831899c90c758d2b2ac0df0a08457db3626d52b Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Fri, 17 Jan 2025 16:41:01 -0800
Subject: [PATCH 29/46] TemporaryDirectory 3.11 compatibility support

---
 src/halmos/solve.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/halmos/solve.py b/src/halmos/solve.py
index b9e91768..7518c238 100644
--- a/src/halmos/solve.py
+++ b/src/halmos/solve.py
@@ -150,14 +150,18 @@ def __post_init__(self):
         prefix = (
             f"{self.info.name}-"
             if self.info.name
-            else f"{self.contract_ctx.name}-deploy-"
+            else f"{self.contract_ctx.name}-constructor-"
         )
 
         # if the user explicitly enabled dumping, we don't want to delete the directory on exit
         delete = not self.args.dump_smt_queries
-        dump_dir = TemporaryDirectory(
-            prefix=prefix, ignore_cleanup_errors=True, delete=delete
-        )
+
+        # ideally we would pass `delete=delete` to the constructor, but it's in >=3.12
+        dump_dir = TemporaryDirectory(prefix=prefix, ignore_cleanup_errors=True)
+
+        # If user wants to keep the files, prevent cleanup on exit
+        if not delete:
+            dump_dir._finalizer.detach()
 
         if args.verbose >= 1 or args.dump_smt_queries:
             print(f"Generating SMT queries in {dump_dir.name}")

From 61834c08fac9dae85911bfeb2413f5f6292a9734 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Fri, 17 Jan 2025 16:47:17 -0800
Subject: [PATCH 30/46] trying a thing on windows

---
 src/halmos/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/halmos/config.py b/src/halmos/config.py
index dcf567ca..e7f10381 100644
--- a/src/halmos/config.py
+++ b/src/halmos/config.py
@@ -441,7 +441,7 @@ class Config:
 
     solver_command: str = arg(
         help="use the given command when invoking the solver",
-        global_default="z3",
+        global_default="z3.exe" if os.name == "nt" else "z3",
         metavar="COMMAND",
         group=solver,
     )

From d3a63a3fa20b5851e0d79ed2bc88e9621c5dea6f Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Fri, 17 Jan 2025 16:54:12 -0800
Subject: [PATCH 31/46] try specifying full path to z3

---
 src/halmos/config.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/halmos/config.py b/src/halmos/config.py
index e7f10381..c1f89396 100644
--- a/src/halmos/config.py
+++ b/src/halmos/config.py
@@ -1,6 +1,7 @@
 import argparse
 import os
 import re
+import shutil
 import sys
 from collections import OrderedDict
 from collections.abc import Callable, Generator
@@ -441,7 +442,7 @@ class Config:
 
     solver_command: str = arg(
         help="use the given command when invoking the solver",
-        global_default="z3.exe" if os.name == "nt" else "z3",
+        global_default=shutil.which("z3"),
         metavar="COMMAND",
         group=solver,
     )

From 955f4106cdc64ddf17b3942aa87c7fc601a94556 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Fri, 17 Jan 2025 16:59:34 -0800
Subject: [PATCH 32/46] warn if z3 is not found

---
 src/halmos/__main__.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 0bcd9466..f833f802 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -161,6 +161,11 @@ def with_natspec(
 def load_config(_args) -> HalmosConfig:
     config = default_config()
 
+    if not config.solver_command:
+        warn(
+            "could not find z3 on the PATH -- check your PATH/venv or pass --solver-command explicitly"
+        )
+
     # parse CLI args first, so that can get `--help` out of the way and resolve `--debug`
     # but don't apply the CLI overrides yet
     cli_overrides = arg_parser().parse_args(_args)

From 9355d09d721a4416efc775984265c6a08316a1d1 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Fri, 17 Jan 2025 17:14:53 -0800
Subject: [PATCH 33/46] update uv version

---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index a21a1780..a517f7ab 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -47,7 +47,7 @@ jobs:
         uses: astral-sh/setup-uv@v4
         with:
           # Install a specific version of uv.
-          version: "0.5.6"
+          version: "0.5.21"
 
       - name: Set up python ${{ matrix.python-version }}
         run: uv python install ${{ matrix.python-version }}

From 985957985466cf2b92eba7b6f1c823360626c72b Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Wed, 22 Jan 2025 14:36:08 -0800
Subject: [PATCH 34/46] fix bad reference

---
 src/halmos/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index f833f802..17c5b516 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -507,7 +507,7 @@ def solve_end_to_end_callback(future: Future):
             # we have a valid counterexample, so we are eligible for early exit
             if args.early_exit:
                 debug(f"Shutting down {ctx.info.name}'s solver executor")
-                ctx.solver_executor.shutdown(wait=False)
+                ctx.solving_ctx.executor.shutdown(wait=False)
         else:
             warn_str = f"Counterexample (potentially invalid): {model}"
             warn_code(COUNTEREXAMPLE_INVALID, warn_str)

From b9f298594a54cac9668ff6770724263a1c56f375 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Wed, 22 Jan 2025 14:38:40 -0800
Subject: [PATCH 35/46] XXX print path to see what's up on windows ci

---
 src/halmos/__main__.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 17c5b516..4956dd1c 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -166,6 +166,9 @@ def load_config(_args) -> HalmosConfig:
             "could not find z3 on the PATH -- check your PATH/venv or pass --solver-command explicitly"
         )
 
+        # XXX undo this
+        print(f"PATH: {os.environ['PATH']}")
+
     # parse CLI args first, so that can get `--help` out of the way and resolve `--debug`
     # but don't apply the CLI overrides yet
     cli_overrides = arg_parser().parse_args(_args)

From 5068e5901ee39466755244953da9ade51000ba12 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Wed, 22 Jan 2025 15:00:00 -0800
Subject: [PATCH 36/46] finish merge

---
 src/halmos/__main__.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 4956dd1c..720b2c52 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -568,12 +568,20 @@ def solve_end_to_end_callback(future: Future):
             submitted_futures.append(solve_future)
 
         elif ex.context.is_stuck():
-            debug(f"Potential error path (id: {idx+1})")
-            res, _, _ = solve(ex.path.to_smt2(args), args)
+            debug(f"Potential error path (id: {path_id})")
+            path_ctx = PathContext(
+                args=args,
+                path_id=path_id,
+                query=ex.path.to_smt2(args),
+                solving_ctx=ctx.solving_ctx,
+            )
+            res, _, _ = solve_low_level(path_ctx)
             if res != unsat:
-                stuck.append((idx, ex, ex.context.get_stuck_reason()))
+                stuck.append((path_id, ex, ex.context.get_stuck_reason()))
                 if args.print_blocked_states:
-                    traces[idx] = f"{hexify(ex.path)}\n{rendered_trace(ex.context)}"
+                    ctx.traces[path_id] = (
+                        f"{hexify(ex.path)}\n{rendered_trace(ex.context)}"
+                    )
 
         elif not error_output:
             if args.print_success_states:

From 60ef79c3438db2db0636815b4bd20802bc576756 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Wed, 22 Jan 2025 15:18:57 -0800
Subject: [PATCH 37/46] print venv contents

---
 src/halmos/__main__.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 720b2c52..572c5a31 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -167,7 +167,13 @@ def load_config(_args) -> HalmosConfig:
         )
 
         # XXX undo this
-        print(f"PATH: {os.environ['PATH']}")
+        path = os.environ["PATH"]
+        print(f"PATH: {path}")
+        for p in path.split(";"):
+            if ".venv" in p:
+                print(f"venv: {p}")
+                for f in os.listdir(p):
+                    print(f"  {f}")
 
     # parse CLI args first, so that can get `--help` out of the way and resolve `--debug`
     # but don't apply the CLI overrides yet
@@ -575,8 +581,8 @@ def solve_end_to_end_callback(future: Future):
                 query=ex.path.to_smt2(args),
                 solving_ctx=ctx.solving_ctx,
             )
-            res, _, _ = solve_low_level(path_ctx)
-            if res != unsat:
+            solver_output = solve_low_level(path_ctx)
+            if solver_output.result != unsat:
                 stuck.append((path_id, ex, ex.context.get_stuck_reason()))
                 if args.print_blocked_states:
                     ctx.traces[path_id] = (

From 58030515f7b76cd2c4e18622a43d33ab96179f5a Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Wed, 22 Jan 2025 17:08:38 -0800
Subject: [PATCH 38/46] dynamically locate z3 in the venv

---
 src/halmos/__main__.py |  9 ---------
 src/halmos/config.py   | 27 +++++++++++++++++++++++++--
 2 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 572c5a31..a3ada1d9 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -166,15 +166,6 @@ def load_config(_args) -> HalmosConfig:
             "could not find z3 on the PATH -- check your PATH/venv or pass --solver-command explicitly"
         )
 
-        # XXX undo this
-        path = os.environ["PATH"]
-        print(f"PATH: {path}")
-        for p in path.split(";"):
-            if ".venv" in p:
-                print(f"venv: {p}")
-                for f in os.listdir(p):
-                    print(f"  {f}")
-
     # parse CLI args first, so that can get `--help` out of the way and resolve `--debug`
     # but don't apply the CLI overrides yet
     cli_overrides = arg_parser().parse_args(_args)
diff --git a/src/halmos/config.py b/src/halmos/config.py
index c1f89396..eaa78270 100644
--- a/src/halmos/config.py
+++ b/src/halmos/config.py
@@ -1,12 +1,12 @@
 import argparse
 import os
 import re
-import shutil
 import sys
 from collections import OrderedDict
 from collections.abc import Callable, Generator
 from dataclasses import MISSING, dataclass, fields
 from dataclasses import field as dataclass_field
+from pathlib import Path
 from typing import Any
 
 import toml
@@ -26,6 +26,29 @@
 )
 
 
+def find_venv_root() -> Path | None:
+    # If the environment variable is set, use that
+    if "VIRTUAL_ENV" in os.environ:
+        return Path(os.environ["VIRTUAL_ENV"])
+
+    # Otherwise, if we're in a venv, sys.prefix != sys.base_prefix
+    if sys.prefix != sys.base_prefix:
+        return Path(sys.prefix)
+
+    # Not in a virtual environment
+    return None
+
+
+def find_z3_path() -> Path | None:
+    venv_path = find_venv_root()
+    if venv_path:
+        z3_bin = "z3.exe" if sys.platform == "win32" else "z3"
+        z3_path = venv_path / "bin" / z3_bin
+        if z3_path.exists():
+            return z3_path
+    return None
+
+
 # helper to define config fields
 def arg(
     help: str,
@@ -442,7 +465,7 @@ class Config:
 
     solver_command: str = arg(
         help="use the given command when invoking the solver",
-        global_default=shutil.which("z3"),
+        global_default=str(find_z3_path()),
         metavar="COMMAND",
         group=solver,
     )

From 1fc8649235cfdf62e451f1a57d99ba2ec3e9c4b0 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Thu, 23 Jan 2025 10:35:56 -0800
Subject: [PATCH 39/46] fix: avoid raising when path exploration > first cex

---
 src/halmos/__main__.py  | 18 +++++++++++++++---
 src/halmos/processes.py | 15 +++++++++++----
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index a3ada1d9..5554bb71 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -56,6 +56,7 @@
     warn_code,
 )
 from .mapper import BuildOut, DeployAddressMapper
+from .processes import ShutdownError
 from .sevm import (
     EMPTY_BALANCE,
     EVM,
@@ -522,6 +523,12 @@ def solve_end_to_end_callback(future: Future):
     path_id = 0  # default value in case we don't enter the loop body
     submitted_futures = []
     for path_id, ex in enumerate(exs):
+        # check if early exit is triggered
+        if ctx.solving_ctx.executor.is_shutdown():
+            if args.debug:
+                print("aborting path exploration, executor has been shutdown")
+            break
+
         # cache exec in case we need to print it later
         if args.print_failed_states:
             ctx.exec_cache[path_id] = ex
@@ -560,9 +567,14 @@ def solve_end_to_end_callback(future: Future):
                 solving_ctx=ctx.solving_ctx,
             )
 
-            solve_future = ctx.thread_pool.submit(solve_end_to_end, path_ctx)
-            solve_future.add_done_callback(solve_end_to_end_callback)
-            submitted_futures.append(solve_future)
+            try:
+                solve_future = ctx.thread_pool.submit(solve_end_to_end, path_ctx)
+                solve_future.add_done_callback(solve_end_to_end_callback)
+                submitted_futures.append(solve_future)
+            except ShutdownError:
+                if args.debug:
+                    print("aborting path exploration, executor has been shutdown")
+                break
 
         elif ex.context.is_stuck():
             debug(f"Potential error path (id: {path_id})")
diff --git a/src/halmos/processes.py b/src/halmos/processes.py
index 83009942..05109a75 100644
--- a/src/halmos/processes.py
+++ b/src/halmos/processes.py
@@ -23,7 +23,8 @@ def register(self, executor):
         self._executors.add(executor)
 
     def shutdown_all(self):
-        print("Shutting down all executors")
+        """Shuts down all registered executors."""
+
         for ex in list(self._executors):
             ex.shutdown(wait=False)
 
@@ -134,6 +135,10 @@ def is_running(self):
         return self.process and self.process.poll() is None
 
 
+class ShutdownError(RuntimeError):
+    """Raised when submitting a future to an executor that has been shutdown."""
+
+
 class PopenExecutor(concurrent.futures.Executor):
     """
     An executor that runs commands in subprocesses.
@@ -154,11 +159,13 @@ def __init__(self, max_workers: int = 1):
     def futures(self):
         return self._futures
 
-    def submit(self, future: PopenFuture):
-        """Accepts an unstarted PopenFuture and schedules it for execution."""
+    def submit(self, future: PopenFuture) -> PopenFuture:
+        """Accepts an unstarted PopenFuture and schedules it for execution.
+
+        Raises ShutdownError if the executor has been shutdown."""
 
         if self._shutdown.is_set():
-            raise RuntimeError("Cannot submit to a shutdown executor.")
+            raise ShutdownError()
 
         with self._lock:
             self._futures.append(future)

From f7059546b86b2bc5503dd114fcda5074acd4d74a Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Fri, 24 Jan 2025 17:08:20 -0800
Subject: [PATCH 40/46] cleanup: print trace counterexample in the right place

and fix off by one error in num_execs
---
 src/halmos/__main__.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 5554bb71..2808d117 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -473,16 +473,6 @@ def solve_end_to_end_callback(future: Future):
         solver_output = future.result()
         result, model = solver_output.result, solver_output.model
 
-        if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE:
-            id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else ""
-            print(f"Trace{id_str}:")
-            print(ctx.traces[path_id], end="")
-
-        # unsafe, can't print an Exec from a different thread
-        # if args.print_failed_states:
-        #     print(f"# {path_id}")
-        #     print(exec)
-
         if ctx.solving_ctx.executor.is_shutdown():
             # if the thread pool is in the process of shutting down,
             # we want to stop processing remaining models/timeouts/errors, etc.
@@ -501,6 +491,13 @@ def solve_end_to_end_callback(future: Future):
             warn_code(COUNTEREXAMPLE_UNKNOWN, f"Counterexample: {result}")
             return
 
+        # print counterexample trace
+        if args.verbose >= VERBOSITY_TRACE_COUNTEREXAMPLE:
+            path_id = solver_output.path_id
+            id_str = f" #{path_id}" if args.verbose >= VERBOSITY_TRACE_PATHS else ""
+            print(f"Trace{id_str}:")
+            print(ctx.traces[path_id], end="")
+
         if model.is_valid:
             print(red(f"Counterexample: {model}"))
             ctx.valid_counterexamples.append(model)
@@ -609,7 +606,7 @@ def solve_end_to_end_callback(future: Future):
             warn(f"{funsig}: {msg}: --width {args.width}")
             break
 
-    num_execs = path_id
+    num_execs = path_id + 1
 
     # the name is a bit misleading: this timer only starts after the exploration phase is complete
     # but it's possible that solvers have already been running for a while

From 3a501aebefb1e9ec08540bb8b38b7c08980a7453 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Fri, 24 Jan 2025 17:18:41 -0800
Subject: [PATCH 41/46] bring back the executor shutdown in on_exit

---
 src/halmos/__main__.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 2808d117..6bef4247 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -56,7 +56,7 @@
     warn_code,
 )
 from .mapper import BuildOut, DeployAddressMapper
-from .processes import ShutdownError
+from .processes import ExecutorRegistry, ShutdownError
 from .sevm import (
     EMPTY_BALANCE,
     EVM,
@@ -889,8 +889,7 @@ def _main(_args=None) -> MainResult:
     #
 
     def on_exit(exitcode: int) -> MainResult:
-        # disabling this for now because it causes an assertion violation in z3
-        # ExecutorRegistry().shutdown_all()
+        ExecutorRegistry().shutdown_all()
 
         result = MainResult(exitcode, test_results_map)
 

From d7dddf820911199ea2038eaaeca590f955007aa4 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Fri, 24 Jan 2025 17:22:04 -0800
Subject: [PATCH 42/46] fix status display condition

Co-authored-by: Daejun Park <daejunpark@gmail.com>
---
 src/halmos/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 6bef4247..3cca1ebb 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -622,7 +622,7 @@ def solve_end_to_end_callback(future: Future):
     # display assertion solving progress
     #
 
-    if not args.no_status or args.early_exit:
+    if not args.no_status:
         while True:
             done = sum(fm.done() for fm in submitted_futures)
             total = potential

From 9a9d3cd719dd8ba48a46335928f51f706545df02 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Mon, 27 Jan 2025 14:47:15 -0800
Subject: [PATCH 43/46] fix unsafe setup_ex.path.to_smt2() access

---
 src/halmos/__main__.py | 13 +++++++------
 src/halmos/solve.py    |  6 ++++--
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py
index 3cca1ebb..16d33c10 100644
--- a/src/halmos/__main__.py
+++ b/src/halmos/__main__.py
@@ -318,7 +318,7 @@ def setup(ctx: FunctionContext) -> Exec:
     )
 
     setup_exs_all = sevm.run(setup_ex)
-    setup_exs_no_error: list[Exec] = []
+    setup_exs_no_error: list[tuple[Exec, SMTQuery]] = []
 
     for path_id, setup_ex in enumerate(setup_exs_all):
         if args.verbose >= VERBOSITY_TRACE_SETUP:
@@ -339,24 +339,25 @@ def setup(ctx: FunctionContext) -> Exec:
                 render_trace(setup_ex.context)
 
         else:
-            setup_exs_no_error.append(setup_ex)
+            # note: ex.path.to_smt2() needs to be called at this point. The solver object is shared across paths,
+            # and solver.to_smt2() will return a different query if it is called after a different path is explored.
+            setup_exs_no_error.append((setup_ex, setup_ex.path.to_smt2(args)))
 
     setup_exs: list[Exec] = []
 
     match setup_exs_no_error:
         case []:
             pass
-        case [ex]:
+        case [(ex, _)]:
             setup_exs.append(ex)
         case _:
-            for path_id, ex in enumerate(setup_exs_no_error):
+            for path_id, (ex, query) in enumerate(setup_exs_no_error):
                 path_ctx = PathContext(
                     args=args,
                     path_id=path_id,
-                    query=ex.path.to_smt2(args),
+                    query=query,
                     solving_ctx=ctx.solving_ctx,
                 )
-
                 solver_output = solve_low_level(path_ctx)
                 if solver_output.result != unsat:
                     setup_exs.append(ex)
diff --git a/src/halmos/solve.py b/src/halmos/solve.py
index 7518c238..6374e25c 100644
--- a/src/halmos/solve.py
+++ b/src/halmos/solve.py
@@ -445,8 +445,10 @@ def solve_end_to_end(ctx: PathContext) -> SolverOutput:
         refined_ctx = ctx.refine()
 
         if refined_ctx.query.smtlib != query.smtlib:
-            # recurse with the refined query
-            return solve_end_to_end(refined_ctx)
+            # note that check_unsat_cores cannot return true for the refined query, because it relies on only
+            # constraint ids, which don't change after refinement
+            # therefore we can skip the unsat core check in solve_end_to_end and go directly to solve_low_level
+            return solve_low_level(refined_ctx)
         else:
             verbose("    Refinement did not change the query, no need to solve again")
 

From 7d5d3502e5bbe581e0cdef9983699672a55cc92d Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Mon, 27 Jan 2025 14:55:48 -0800
Subject: [PATCH 44/46] simplify PotentialModel

---
 src/halmos/solve.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/halmos/solve.py b/src/halmos/solve.py
index 6374e25c..3f4eabeb 100644
--- a/src/halmos/solve.py
+++ b/src/halmos/solve.py
@@ -54,14 +54,10 @@ class ModelVariable:
 
 @dataclass(frozen=True)
 class PotentialModel:
-    model: ModelVariables | str | None
+    model: ModelVariables
     is_valid: bool
 
     def __str__(self) -> str:
-        # expected to be a filename
-        if isinstance(self.model, str):
-            return f"see {self.model}"
-
         formatted = []
         for v in self.model.values():
             # TODO: parse type and render accordingly
@@ -88,7 +84,8 @@ class ContractContext:
     contract_json: dict
     libs: dict
 
-    # TODO: check if this is really a contract-level variable
+    # note: build_out_map is shared across all contracts compiled using the same compiler version
+    # so in principle, we could consider having another context, say CompileUnitContext, and put build_out_map there
     build_out_map: dict
 
 

From e9184084a9e46da745faa01041b2377de135307e Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Mon, 27 Jan 2025 17:01:21 -0800
Subject: [PATCH 45/46] only render paths to string when debug is active

10x faster morpho-data-structures testProveNextBucket

[PASS] testProveNextBucket(uint256) (paths: 100, time: 123.20s (paths: 123.20s, models: 0.00s), bounds: [])

[PASS] testProveNextBucket(uint256) (paths: 100, time: 12.11s (paths: 11.28s, models: 0.83s), bounds: [])
---
 src/halmos/sevm.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/halmos/sevm.py b/src/halmos/sevm.py
index efd9961c..d319588c 100644
--- a/src/halmos/sevm.py
+++ b/src/halmos/sevm.py
@@ -2609,12 +2609,14 @@ def jumpi(
             follow_false = visited[False] < self.options.loop
             if not (follow_true and follow_false):
                 self.logs.bounded_loops.append(jid)
-                debug(
-                    f"\nloop id: {jid}\n"
-                    f"loop condition: {cond}\n"
-                    f"calldata: {ex.calldata()}\n"
-                    f"path condition:\n{ex.path}\n"
-                )
+                if self.options.debug:
+                    # rendering ex.path to string can be expensive, so only do it if debug is enabled
+                    debug(
+                        f"\nloop id: {jid}\n"
+                        f"loop condition: {cond}\n"
+                        f"calldata: {ex.calldata()}\n"
+                        f"path condition:\n{ex.path}\n"
+                    )
         else:
             # for constant-bounded loops
             follow_true = potential_true

From 0b7dc6e89f5a4b223b617d612707c51f8bfa9bd7 Mon Sep 17 00:00:00 2001
From: karmacoma <karma@coma.lol>
Date: Mon, 27 Jan 2025 17:48:16 -0800
Subject: [PATCH 46/46] switch halmos-solady to bitwuzla

---
 .github/workflows/test-external.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-external.yml b/.github/workflows/test-external.yml
index bfdf7ff3..0b7c217e 100644
--- a/.github/workflows/test-external.yml
+++ b/.github/workflows/test-external.yml
@@ -47,7 +47,7 @@ jobs:
             profile: ""
           - repo: "zobront/halmos-solady"
             dir: "halmos-solady"
-            cmd: "--function testCheck --solver-command yices-smt2 --solver-threads 3"
+            cmd: "--function testCheck --solver-command 'bitwuzla --produce-models' --solver-threads 3"
             branch: ""
             profile: ""
           - repo: "pcaversaccio/snekmate"