diff --git a/mdagent/mainagent/agent.py b/mdagent/mainagent/agent.py index 71d66c8b..e2e0d664 100644 --- a/mdagent/mainagent/agent.py +++ b/mdagent/mainagent/agent.py @@ -1,4 +1,5 @@ import json +import os import time from dotenv import load_dotenv @@ -135,6 +136,7 @@ def run_and_eval(self, user_input, callbacks=None): self.agent = self._initialize_tools_and_agent(user_input) num_steps = 0 tools_used = {} + tools_details = {} step_start_time = start_time = time.time() for step in self.agent.iter({"input": user_input}, include_run_info=True): output = step.get("intermediate_step") @@ -144,8 +146,8 @@ def run_and_eval(self, user_input, callbacks=None): current_time = time.time() step_elapsed_time = current_time - step_start_time step_start_time = current_time - - tools_used[f"Step {num_steps}"] = { + tools_used[action.tool] = tools_used.get(action.tool, 0) + 1 + tools_details[f"Step {num_steps}"] = { "tool": action.tool, "tool_input": action.tool_input, "observation": observation, @@ -164,21 +166,24 @@ def run_and_eval(self, user_input, callbacks=None): "learn": not self.skip_subagents, "curriculum": self.subagents_settings.curriculum, } - print("Evaluation Summary:") + print("\n----- Evaluation Summary -----") print(f"Total Steps: {num_steps+1}") print(f"Total Time: {total_seconds:.2f} seconds ({total_mins:.2f} minutes)") - # TODO: calculate total num of distinct tools used summary = { "agent_settings": agent_settings, "total_steps": num_steps, - "total_time_seconds": total_seconds, - "total_time_minutes": total_mins, - "tools_used": tools_used, + "total_time_seconds": f"{total_seconds:.3f}", + "total_time_minutes": f"{total_mins:.3f}", "final_answer": final_output, - "run_id": run_id, + "tools_used": tools_used, + "tools_details": tools_details, + "run_id": str(run_id), } timestamp = time.strftime("%Y%m%d-%H%M%S") - with open(f"{self.ckpt_dir}/evaluation_{timestamp}.json", "w") as f: + os.makedirs(f"{self.ckpt_dir}/eval", exist_ok=True) + filename = f"{self.ckpt_dir}/eval/evaluation_{timestamp}.json" + with open(filename, "w") as f: json.dump(summary, f, indent=4) + print(f"Summary saved to {filename}") return final_output diff --git a/notebooks/ckpt_eval/evaluation_20240306-075417.json b/notebooks/ckpt_eval/evaluation_20240306-075417.json new file mode 100644 index 00000000..b7fe451e --- /dev/null +++ b/notebooks/ckpt_eval/evaluation_20240306-075417.json @@ -0,0 +1,38 @@ +{ + "agent_settings": { + "llm": "gpt-4-1106-preview", + "agent_type": "Structured", + "resume": false, + "learn": true, + "curriculum": true + }, + "total_steps": 2, + "total_time_seconds": 11.976773023605347, + "total_time_minutes": 0.19961288372675579, + "final_answer": "The PDB file for fibronectin has been downloaded and visualized. You can view the visualization in the provided notebook.", + "tools_used": { + "PDBFileDownloader": 1, + "PDBVisualization": 1 + }, + "tools_details": { + "Step 1": { + "tool": "PDBFileDownloader", + "tool_input": { + "query": "fibronectin" + }, + "observation": "Name2PDB tool successful. downloaded the PDB file:5TFY_075409", + "step_elapsed_time (sec)": 3.9843811988830566, + "timestamp_from_start (sec)": 3.9843811988830566 + }, + "Step 2": { + "tool": "PDBVisualization", + "tool_input": { + "cif_file_name": "5TFY_075409.cif" + }, + "observation": "Visualization created as notebook", + "step_elapsed_time (sec)": 3.132978916168213, + "timestamp_from_start (sec)": 7.1173601150512695 + } + }, + "run_id": "0e1767cf-1ccf-4035-8e1f-7d416202ad94" +} diff --git a/notebooks/eval.ipynb b/notebooks/eval.ipynb new file mode 100644 index 00000000..e69de29b