add example and fix eval format

ur-whitelab · Mar 6, 2024 · 61c0802 · 61c0802
1 parent 5a14d07
commit 61c0802
Show file tree

Hide file tree

Showing 3 changed files with 52 additions and 9 deletions.
diff --git a/mdagent/mainagent/agent.py b/mdagent/mainagent/agent.py
@@ -1,4 +1,5 @@
 import json
+import os
 import time
 
 from dotenv import load_dotenv
@@ -135,6 +136,7 @@ def run_and_eval(self, user_input, callbacks=None):
         self.agent = self._initialize_tools_and_agent(user_input)
         num_steps = 0
         tools_used = {}
+        tools_details = {}
         step_start_time = start_time = time.time()
         for step in self.agent.iter({"input": user_input}, include_run_info=True):
             output = step.get("intermediate_step")
@@ -144,8 +146,8 @@ def run_and_eval(self, user_input, callbacks=None):
                 current_time = time.time()
                 step_elapsed_time = current_time - step_start_time
                 step_start_time = current_time
-
-                tools_used[f"Step {num_steps}"] = {
+                tools_used[action.tool] = tools_used.get(action.tool, 0) + 1
+                tools_details[f"Step {num_steps}"] = {
                     "tool": action.tool,
                     "tool_input": action.tool_input,
                     "observation": observation,
@@ -164,21 +166,24 @@ def run_and_eval(self, user_input, callbacks=None):
             "learn": not self.skip_subagents,
             "curriculum": self.subagents_settings.curriculum,
         }
-        print("Evaluation Summary:")
+        print("\n----- Evaluation Summary -----")
         print(f"Total Steps: {num_steps+1}")
         print(f"Total Time: {total_seconds:.2f} seconds ({total_mins:.2f} minutes)")
-        # TODO: calculate total num of distinct tools used
 
         summary = {
             "agent_settings": agent_settings,
             "total_steps": num_steps,
-            "total_time_seconds": total_seconds,
-            "total_time_minutes": total_mins,
-            "tools_used": tools_used,
+            "total_time_seconds": f"{total_seconds:.3f}",
+            "total_time_minutes": f"{total_mins:.3f}",
             "final_answer": final_output,
-            "run_id": run_id,
+            "tools_used": tools_used,
+            "tools_details": tools_details,
+            "run_id": str(run_id),
         }
         timestamp = time.strftime("%Y%m%d-%H%M%S")
-        with open(f"{self.ckpt_dir}/evaluation_{timestamp}.json", "w") as f:
+        os.makedirs(f"{self.ckpt_dir}/eval", exist_ok=True)
+        filename = f"{self.ckpt_dir}/eval/evaluation_{timestamp}.json"
+        with open(filename, "w") as f:
             json.dump(summary, f, indent=4)
+        print(f"Summary saved to {filename}")
         return final_output
diff --git a/notebooks/ckpt_eval/evaluation_20240306-075417.json b/notebooks/ckpt_eval/evaluation_20240306-075417.json
@@ -0,0 +1,38 @@
+{
+    "agent_settings": {
+        "llm": "gpt-4-1106-preview",
+        "agent_type": "Structured",
+        "resume": false,
+        "learn": true,
+        "curriculum": true
+    },
+    "total_steps": 2,
+    "total_time_seconds": 11.976773023605347,
+    "total_time_minutes": 0.19961288372675579,
+    "final_answer": "The PDB file for fibronectin has been downloaded and visualized. You can view the visualization in the provided notebook.",
+    "tools_used": {
+        "PDBFileDownloader": 1,
+        "PDBVisualization": 1
+    },
+    "tools_details": {
+        "Step 1": {
+            "tool": "PDBFileDownloader",
+            "tool_input": {
+                "query": "fibronectin"
+            },
+            "observation": "Name2PDB tool successful. downloaded the PDB file:5TFY_075409",
+            "step_elapsed_time (sec)": 3.9843811988830566,
+            "timestamp_from_start (sec)": 3.9843811988830566
+        },
+        "Step 2": {
+            "tool": "PDBVisualization",
+            "tool_input": {
+                "cif_file_name": "5TFY_075409.cif"
+            },
+            "observation": "Visualization created as notebook",
+            "step_elapsed_time (sec)": 3.132978916168213,
+            "timestamp_from_start (sec)": 7.1173601150512695
+        }
+    },
+    "run_id": "0e1767cf-1ccf-4035-8e1f-7d416202ad94"
+}
diff --git a/notebooks/eval.ipynb b/notebooks/eval.ipynb