diff --git a/mdagent/tools/base_tools/analysis_tools/plot_tools.py b/mdagent/tools/base_tools/analysis_tools/plot_tools.py index 7c20784d..bf004fc0 100644 --- a/mdagent/tools/base_tools/analysis_tools/plot_tools.py +++ b/mdagent/tools/base_tools/analysis_tools/plot_tools.py @@ -1,9 +1,12 @@ import csv import re +from typing import Optional import matplotlib.pyplot as plt from langchain.tools import BaseTool +from mdagent.utils import PathRegistry + def process_csv(file_name): with open(file_name, "r") as f: @@ -64,13 +67,15 @@ def plot_data(data, headers, matched_headers): class SimulationOutputFigures(BaseTool): name = "PostSimulationFigures" description = """This tool will take - a csv file output from an openmm + a csv file id output from an openmm simulation and create figures for all physical parameters versus timestep of the simulation. Give this tool the path to the csv file output from the simulation.""" + path_registry: Optional[PathRegistry] + def _run(self, file_path: str) -> str: """use the tool.""" try: diff --git a/mdagent/tools/base_tools/preprocess_tools/clean_tools.py b/mdagent/tools/base_tools/preprocess_tools/clean_tools.py index e416568d..e6d408f1 100644 --- a/mdagent/tools/base_tools/preprocess_tools/clean_tools.py +++ b/mdagent/tools/base_tools/preprocess_tools/clean_tools.py @@ -7,7 +7,7 @@ from pdbfixer import PDBFixer from pydantic import BaseModel, Field, root_validator -from mdagent.utils import PathRegistry +from mdagent.utils import FileType, PathRegistry class CleaningTools: @@ -226,7 +226,7 @@ async def _arun(self, query: str) -> str: class CleaningToolFunctionInput(BaseModel): """Input model for CleaningToolFunction""" - pdb_path: str = Field(..., description="Path to PDB or CIF file") + pdb_id: str = Field(..., description="ID of the pdb/cif file in the path registry") output_path: Optional[str] = Field(..., description="Path to the output file") replace_nonstandard_residues: bool = Field( True, description="Whether to replace nonstandard residues with standard ones. " @@ -277,10 +277,10 @@ def _run(self, **input_args) -> str: input_args = input_args["input_args"] else: input_args = input_args - pdbfile_path = input_args.get("pdb_path", None) - if pdbfile_path is None: - return """No file path provided. - The input has to be a dictionary with the key 'pdb_path'""" + pdbfile_id = input_args.get("pdb_id", None) + if pdbfile_id is None: + return """No file was provided. + The input has to be a dictionary with the key 'pdb_id'""" remove_heterogens = input_args.get("remove_heterogens", True) remove_water = input_args.get("remove_water", True) add_hydrogens = input_args.get("add_hydrogens", True) @@ -289,17 +289,23 @@ def _run(self, **input_args) -> str: "replace_nonstandard_residues", True ) add_missing_atoms = input_args.get("add_missing_atoms", True) - output_path = input_args.get("output_path", None) + input_args.get("output_path", None) if self.path_registry is None: return "Path registry not initialized" file_description = "Cleaned File: " - clean_tools = CleaningTools() - pdbfile = clean_tools._extract_path(pdbfile_path, self.path_registry) - name = pdbfile.split(".")[0] - end = pdbfile.split(".")[1] + CleaningTools() + try: + pdbfile = self.path_registry.get_mapped_path(pdbfile_id) + if "/" in pdbfile: + pdbfile_name = pdbfile.split("/")[-1] + name = pdbfile_name.split("_")[0] + end = pdbfile_name.split(".")[1] + print(f"pdbfile: {pdbfile}", f"name: {name}", f"end: {end}") + except Exception as e: + print(f"error retrieving from path_registry, trying to read file {e}") + return "File not found in path registry. " fixer = PDBFixer(filename=pdbfile) - try: fixer.findMissingResidues() except Exception: @@ -321,6 +327,7 @@ def _run(self, **input_args) -> str: try: if replace_nonstandard_residues: fixer.replaceNonstandardResidues() + file_description += " Replaced Nonstandard Residues. " except Exception: print("error at replaceNonstandardResidues") try: @@ -343,26 +350,41 @@ def _run(self, **input_args) -> str: "Missing Atoms Added and replaces nonstandard residues. " ) file_mode = "w" if add_hydrogens else "a" - if output_path: - file_name = output_path - else: - version = 1 - while os.path.exists(f"tidy_{name}v{version}.{end}"): - version += 1 - - file_name = f"tidy_{name}v{version}.{end}" - + file_name = self.path_registry.write_file_name( + type=FileType.PROTEIN, + protein_name=name, + description="Clean", + file_format=end, + ) + file_id = self.path_registry.get_fileid(file_name, FileType.PROTEIN) + # if output_path: + # file_name = output_path + # else: + # version = 1 + # while os.path.exists(f"tidy_{name}v{version}.{end}"): + # version += 1 + # + # file_name = f"tidy_{name}v{version}.{end}" + directory = "files/pdb" + if not os.path.exists(directory): + os.makedirs(directory) if end == "pdb": PDBFile.writeFile( - fixer.topology, fixer.positions, open(file_name, file_mode) + fixer.topology, + fixer.positions, + open(f"{directory}/{file_name}", file_mode), ) elif end == "cif": PDBxFile.writeFile( - fixer.topology, fixer.positions, open(file_name, file_mode) + fixer.topology, + fixer.positions, + open(f"{directory}/{file_name}", file_mode), ) - self.path_registry.map_path(file_name, file_name, file_description) - return f"{file_description} written to {file_name}" + self.path_registry.map_path( + file_id, f"{directory}/{file_name}", file_description + ) + return f"{file_id} written to {directory}/{file_name}" except FileNotFoundError: return "Check your file path. File not found." except Exception as e: diff --git a/mdagent/tools/base_tools/preprocess_tools/pdb_tools.py b/mdagent/tools/base_tools/preprocess_tools/pdb_tools.py index fbce8e15..fbad3a6f 100644 --- a/mdagent/tools/base_tools/preprocess_tools/pdb_tools.py +++ b/mdagent/tools/base_tools/preprocess_tools/pdb_tools.py @@ -10,7 +10,7 @@ from pdbfixer import PDBFixer from pydantic import BaseModel, Field, ValidationError, root_validator -from mdagent.utils import PathRegistry +from mdagent.utils import FileType, PathRegistry def get_pdb(query_string, path_registry=None): @@ -41,13 +41,22 @@ def get_pdb(query_string, path_registry=None): print(f"PDB file found with this ID: {pdbid}") url = f"https://files.rcsb.org/download/{pdbid}.{filetype}" pdb = requests.get(url) - filename = f"{pdbid}.{filetype}" - with open(filename, "w") as file: + filename = path_registry.write_file_name( + FileType.PROTEIN, + protein_name=pdbid, + description="raw", + file_format=filetype, + ) + file_id = path_registry.get_fileid(filename, FileType.PROTEIN) + directory = "files/pdb" + # Create the directory if it does not exist + if not os.path.exists(directory): + os.makedirs(directory) + + with open(f"{directory}/{filename}", "w") as file: file.write(pdb.text) - print(f"{filename} is created.") - file_description = f"PDB file downloaded from RSCB, PDB ID: {pdbid}" - path_registry.map_path(filename, filename, file_description) - return filename + + return filename, file_id return None @@ -73,11 +82,16 @@ def _run(self, query: str) -> str: try: if self.path_registry is None: # this should not happen return "Path registry not initialized" - pdb = get_pdb(query, self.path_registry) - if pdb is None: + filename, pdbfile_id = get_pdb(query, self.path_registry) + if pdbfile_id is None: return "Name2PDB tool failed to find and download PDB file." else: - return f"Name2PDB tool successfully downloaded the PDB file: {pdb}" + self.path_registry.map_path( + pdbfile_id, + f"files/pdb/{filename}", + f"PDB file downloaded from RSCB, PDBFile ID: {pdbfile_id}", + ) + return f"Name2PDB tool successful. downloaded the PDB file:{pdbfile_id}" except Exception as e: return f"Something went wrong. {e}" diff --git a/mdagent/tools/base_tools/simulation_tools/create_simulation.py b/mdagent/tools/base_tools/simulation_tools/create_simulation.py index 4e811c30..7c5e3563 100644 --- a/mdagent/tools/base_tools/simulation_tools/create_simulation.py +++ b/mdagent/tools/base_tools/simulation_tools/create_simulation.py @@ -1,3 +1,4 @@ +import os import textwrap from typing import Optional @@ -131,8 +132,13 @@ def remove_leading_spaces(self, text): class ModifyScriptInput(BaseModel): - query: str = Field(..., description="Simmulation required by the user") - script: str = Field(..., description=" path to the base script file") + query: str = Field( + ..., + description="""Simmulation required by the user.You MUST + specify the objective, requirements of the simulation as well + as on what protein you are working.""", + ) + script: str = Field(..., description=" simulation ID of the base script file") class ModifyBaseSimulationScriptTool(BaseTool): @@ -150,10 +156,17 @@ def __init__(self, path_registry: Optional[PathRegistry], llm: BaseLanguageModel self.llm = llm def _run(self, **input): - base_script_path = input.get("script") - if not base_script_path: - return """No script provided. The keys for the input are: + base_script_id = input.get("script") + if not base_script_id: + return """No id provided. The keys for the input are: 'query' and 'script'""" + try: + base_script_path = self.path_registry.get_mapped_path(base_script_id) + parts = base_script_path.split("/") + if len(parts) > 1: + parts[-1] + except Exception as e: + return f"Error getting path from file id: {e}" with open(base_script_path, "r") as file: base_script = file.read() base_script = "".join(base_script) @@ -172,11 +185,17 @@ def _run(self, **input): script_content = script_content.replace("```", "#") script_content = textwrap.dedent(script_content).strip() # Write to file - filename = "modified_simul.py" - with open(filename, "w") as file: + filename = self.path_registry.write_file_name( + type="SIMULATION", Sim_id=base_script_id, modified=True + ) + file_id = self.path_registry.get_fileid(filename, type="SIMULATION") + directory = "files/simulations" + if not os.path.exists(directory): + os.makedirs(directory) + with open(f"{directory}/{filename}", "w") as file: file.write(script_content) - self.path_registry.map_path(filename, filename, description) + self.path_registry.map_path(file_id, filename, description) return "Script modified successfully" async def _arun(self, query) -> str: diff --git a/mdagent/tools/base_tools/simulation_tools/setup_and_run.py b/mdagent/tools/base_tools/simulation_tools/setup_and_run.py index a59dd367..338e5dcf 100644 --- a/mdagent/tools/base_tools/simulation_tools/setup_and_run.py +++ b/mdagent/tools/base_tools/simulation_tools/setup_and_run.py @@ -19,6 +19,7 @@ LangevinIntegrator, LangevinMiddleIntegrator, MonteCarloBarostat, + OpenMMException, Platform, VerletIntegrator, app, @@ -48,7 +49,9 @@ from mdagent.tools.base_tools.preprocess_tools import CleaningTools # Local Library/Application Imports -from mdagent.utils import PathRegistry +from mdagent.utils import FileType, PathRegistry + +# TODO delete files created from the simulation if not needed. FORCEFIELD_LIST = [ "amber14/DNA.OL15.xml", @@ -545,7 +548,7 @@ async def _arun(self, query: str) -> str: #######==================System Configuration==================######## # System Configuration class SetUpandRunFunctionInput(BaseModel): - pdb_path: str + pdb_id: str forcefield_files: List[str] system_params: Dict[str, Any] = Field( { @@ -614,7 +617,9 @@ class SetUpandRunFunctionInput(BaseModel): class OpenMMSimulation: - def __init__(self, input_params: SetUpandRunFunctionInput): + def __init__( + self, input_params: SetUpandRunFunctionInput, path_registry: PathRegistry + ): self.params = input_params self.int_params = self.params.get("integrator_params", None) if self.int_params is None: @@ -644,13 +649,16 @@ def __init__(self, input_params: SetUpandRunFunctionInput): "record_interval_steps": 100, "record_params": ["step", "potentialEnergy", "temperature"], } + self.path_registry = path_registry self.setup_system() self.setup_integrator() self.create_simulation() def setup_system(self): print("Building system...") - self.pdb = PDBFile(self.params["pdb_path"]) + self.pdb_id = self.params["pdb_id"] + self.pdb_path = self.path_registry.get_mapped_path(name=self.pdb_id) + self.pdb = PDBFile(self.pdb_path) self.forcefield = ForceField(*self.params["forcefield_files"]) self.system = self._create_system(self.pdb, self.forcefield, **self.sys_params) @@ -775,7 +783,7 @@ def unit_to_string(unit): not runnable""" return f"{unit.value_in_unit(unit.unit)}*{unit.unit.get_name()}" - pdb_path = self.params["pdb_path"] + pdb_path = self.pdb_path forcefield_files = ", ".join( f"'{file}'" for file in self.params["forcefield_files"] ) @@ -956,10 +964,14 @@ def remove_leading_spaces(text): script_content = textwrap.dedent(script_content).strip() # Write to file - with open(filename, "w") as file: + directory = "files/simulations" + if not os.path.exists(directory): + os.makedirs(directory) + + with open(f"{directory}/{filename}", "w") as file: file.write(script_content) - print(f"Standalone simulation script written to {filename}") + print(f"Standalone simulation script written to {directory}/{filename}") def run(self): # Minimize and Equilibrate @@ -990,28 +1002,56 @@ class SetUpandRunFunction(BaseTool): args_schema: Type[BaseModel] = SetUpandRunFunctionInput - PathRegistry: Optional[PathRegistry] + path_registry: Optional[PathRegistry] def _run(self, **input_args): + if self.path_registry is None: + print("Path registry not initialized") + return "Path registry not initialized" input = self.check_system_params(input_args) + error = input.get("error", None) if error: return error try: - Simulation = OpenMMSimulation(input) + pdb_id = input["pdb_id"] + except KeyError: + print("whoops no pdb_id found in input,", input) + return "No pdb_id found in input" + try: + Simulation = OpenMMSimulation(input, self.path_registry) print("simulation set!") except ValueError as e: return str(e) + f"This were the inputs {input_args}" + except FileNotFoundError: + return f"File not found, check File id. This were the inputs {input_args}" + except OpenMMException as e: + return f"OpenMM Exception: {str(e)}. This were the inputs {input_args}" try: Simulation.run() - Simulation.write_standalone_script() - return "Simulation done!" except Exception as e: return f"""An exception was found: {str(e)}. Not a problem, thats one purpose of this tool: to run a short simulation to check for correct initialization. \n\n Try a) with different parameters like nonbondedMethod, constraints, etc or b) clean file inputs depending on error """ + try: + file_name = self.path_registry.write_file_name( + type=FileType.SIMULATION, + type_of_sim=input["simmulation_params"]["Ensemble"], + protein_file_id=pdb_id, + ) + file_name += ".py" + file_id = self.path_registry.get_fileid(file_name, FileType.SIMULATION) + Simulation.write_standalone_script(filename=file_name) + self.path_registry.map_path( + file_id, file_name, f"Basic Simulation of Protein {pdb_id}" + ) + return "Simulation done!" + except Exception as e: + print(f"An exception was found: {str(e)}.") + return f"""An exception was found trying to write the filenames: {str(e)}. + """ def _parse_cutoff(self, cutoff): # Check if cutoff is already an OpenMM Quantity (has a unit) @@ -1394,9 +1434,9 @@ def check_system_params(cls, values): error_msg = "constraintTolerance must be specified if rigidWater is True" """Checking if the file is in the path""" - pdb_path = values.get("pdb_path") - if not os.path.exists(pdb_path): - error_msg += "The pdb file is not present in the file" + pdb_id = values.get("pdb_id") + if not pdb_id: + error_msg += "The pdb id is not present in the inputs" """Validating the forcefield files and Integrator""" @@ -1448,7 +1488,7 @@ def check_system_params(cls, values): + "\n Correct this and try again. \n Everthing else is fine" } values = { - "pdb_path": pdb_path, + "pdb_id": pdb_id, "forcefield_files": forcefield_files, "system_params": system_params, "integrator_params": integrator_params, diff --git a/mdagent/tools/base_tools/util_tools/registry_tools.py b/mdagent/tools/base_tools/util_tools/registry_tools.py index 9ea8dff8..8a9f8c21 100644 --- a/mdagent/tools/base_tools/util_tools/registry_tools.py +++ b/mdagent/tools/base_tools/util_tools/registry_tools.py @@ -57,7 +57,7 @@ def _run(self, paths: str) -> str: try: if self.path_registry is None: return "Path registry not initialized" - return self.path_registry.list_path_names() + return self.path_registry.list_path_names_and_descriptions() except Exception: return "Error listing paths" diff --git a/mdagent/tools/maketools.py b/mdagent/tools/maketools.py index c9f2891d..17184215 100644 --- a/mdagent/tools/maketools.py +++ b/mdagent/tools/maketools.py @@ -18,7 +18,6 @@ CheckDirectoryFiles, CleaningToolFunction, ListRegistryPaths, - MapPath2Name, ModifyBaseSimulationScriptTool, Name2PDBTool, PackMolTool, @@ -80,7 +79,7 @@ def make_all_tools( CheckDirectoryFiles(), # InstructionSummary(path_registry=path_instance), ListRegistryPaths(path_registry=path_instance), - MapPath2Name(path_registry=path_instance), + # MapPath2Name(path_registry=path_instance), Name2PDBTool(path_registry=path_instance), PackMolTool(path_registry=path_instance), VisualizeProtein(path_registry=path_instance), diff --git a/mdagent/utils/__init__.py b/mdagent/utils/__init__.py index fc078e19..ef0fa47b 100644 --- a/mdagent/utils/__init__.py +++ b/mdagent/utils/__init__.py @@ -1,4 +1,4 @@ from .makellm import _make_llm -from .path_registry import PathRegistry +from .path_registry import FileType, PathRegistry -__all__ = ["_make_llm", "PathRegistry"] +__all__ = ["_make_llm", "PathRegistry", "FileType"] diff --git a/mdagent/utils/path_registry.py b/mdagent/utils/path_registry.py index 02049114..8fc27a47 100644 --- a/mdagent/utils/path_registry.py +++ b/mdagent/utils/path_registry.py @@ -1,5 +1,15 @@ import json import os +from datetime import datetime +from enum import Enum + + +##TODO: add method to get description from simulation inputs +##TODO: add method to get conditions from simulation outputs +class FileType(Enum): + PROTEIN = 1 + SIMULATION = 2 + RECORD = 3 class PathRegistry: @@ -35,6 +45,7 @@ def _save_mapping_to_json(self, path_dict): existing_data = json.load(json_file) existing_data.update(path_dict) with open(self.json_file_path, "w") as json_file: + existing_data.update(path_dict) json.dump(existing_data, json_file, indent=4) def _check_json_content(self, name): @@ -92,3 +103,73 @@ def list_path_names(self): else "No names found. The JSON file is empty or does not" "contain name mappings." ) + + def list_path_names_and_descriptions(self): + if not self._check_for_json(): + return "JSON file does not exist" + with open(self.json_file_path, "r") as json_file: + data = json.load(json_file) + names = [key for key in data.keys()] + descriptions = [data[key]["description"] for key in data.keys()] + names_w_descriptions = [ + f"{name}: {description}" for name, description in zip(names, descriptions) + ] + return ( + "Files found in registry: " + ", ".join(names_w_descriptions) + if names + else "No names found. The JSON file is empty or does not" + "contain name mappings." + ) + + def get_timestamp(self): + # Get the current date and time + now = datetime.now() + # Format the date and time as "YYYYMMDD_HHMMSS" + timestamp = now.strftime("%Y%m%d_%H%M%S") + + return timestamp + + # File Name/ID in Path Registry JSON + def get_fileid(self, file_name: str, type: FileType): + # Split the filename on underscores + parts, ending = file_name.split(".") + parts_list = parts.split("_") + + # Extract the timestamp (assuming it's always in the second to last part) + timestamp_part = parts_list[-1] + # Get the last 6 digits of the timestamp + timestamp_digits = timestamp_part[-6:] + + if type == FileType.PROTEIN: + # Extract the PDB ID (assuming it's always the first part) + pdb_id = parts_list[0] + return pdb_id + "_" + timestamp_digits + if type == FileType.SIMULATION: + return "sim" + "_" + timestamp_digits + if type == FileType.RECORD: + return "rec" + "_" + timestamp_digits + + def write_file_name(self, type: FileType, **kwargs): + time_stamp = self.get_timestamp() + protein_name = kwargs.get("protein_name", None) + description = kwargs.get("description", "No description provided") + file_format = kwargs.get("file_format", "No file format provided") + protein_file_id = kwargs.get("protein_file_id", None) + type_of_sim = kwargs.get("type_of_sim", None) + conditions = kwargs.get("conditions", None) + Sim_id = kwargs.get("Sim_id", None) + modified = kwargs.get("modified", False) + + if type == FileType.PROTEIN: + file_name = f"{protein_name}_{description}_{time_stamp}.{file_format}" + if type == FileType.SIMULATION: + if conditions: + file_name = f"{type_of_sim}_{protein_file_id}_{conditions}_{time_stamp}" + elif modified: + file_name = f"{Sim_id}_MOD_{time_stamp}" + else: + file_name = f"{type_of_sim}_{protein_file_id}_{time_stamp}" + if type == FileType.RECORD: + file_name = f"{protein_file_id}_{Sim_id}_{time_stamp}" + + return file_name diff --git a/notebooks/testing_changes.ipynb b/notebooks/testing_changes.ipynb new file mode 100644 index 00000000..c945d47e --- /dev/null +++ b/notebooks/testing_changes.ipynb @@ -0,0 +1,1891 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import ast\n", + "import json\n", + "import os\n", + "from typing import Any, Dict, List, Optional\n", + "\n", + "import langchain\n", + "from langchain import LLMChain, PromptTemplate\n", + "from langchain.base_language import BaseLanguageModel\n", + "from langchain.tools import BaseTool\n", + "from openmm import (\n", + " AndersenThermostat,\n", + " BrownianIntegrator,\n", + " LangevinIntegrator,\n", + " LangevinMiddleIntegrator,\n", + " MonteCarloBarostat,\n", + " VerletIntegrator,\n", + " app,\n", + " Platform,\n", + " OpenMMException,\n", + ")\n", + "from openmm.app import (\n", + " PME,\n", + " CutoffPeriodic,\n", + " DCDReporter,\n", + " ForceField,\n", + " Modeller,\n", + " NoCutoff,\n", + " PDBFile,\n", + " PDBReporter,\n", + " PDBxFile,\n", + " Simulation,\n", + " StateDataReporter,\n", + ")\n", + "from openmm.unit import bar, femtoseconds, kelvin, nanometers, picosecond, picoseconds\n", + "from pydantic import BaseModel, Field, root_validator\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from dotenv import load_dotenv\n", + "import os\n", + "load_dotenv()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jorgemedina/miniforge3/envs/MDAGENT/lib/python3.11/site-packages/MDAnalysis/topology/TPRParser.py:161: DeprecationWarning: 'xdrlib' is deprecated and slated for removal in Python 3.13\n", + " import xdrlib\n" + ] + } + ], + "source": [ + "from mdagent import MDAgent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mrkl = MDAgent(type=\"mrkl\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mrkl.run(\"Estimate the SASA of fibronectin. Try using mdtraj to load the trajectory and calculate the SASA.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print('Thought: Thought: To calculate the SASA (Solvent Accessible Surface Area) of fibronectin, we first need to have the trajectory file of fibronectin. We can use the PDBFileDownloader tool to download the PDB file of fibronectin. After that, we can use the Python_REPL tool to load the trajectory and calculate the SASA using mdtraj.\\n\\nAction:\\n```\\n{\\n \"action\": \"PDBFileDownloader\",\\n \"action_input\": {\\n \"query\": \"fibronectin\"\\n }\\n}\\n```\\nObservation: Name2PDB tool successfully downloaded the PDB file: 1X5Y.pdb\\nThought: Now that we have the PDB file of fibronectin, we can use the Python_REPL tool to load the trajectory and calculate the SASA using mdtraj. \\n\\nAction:\\n```\\n{\\n \"action\": \"Python_REPL\",\\n \"action_input\": {\\n \"query\": \"import mdtraj as md\\\\ntraj = md.load(\\'1X5Y.pdb\\')\\\\nsasa = md.shrake_rupley(traj)\\\\nprint(sasa)\"\\n }\\n}\\n```\\n\\nObservation: [[0.29048446 0.01886919 0.03899633 ... 0.16989732 0.15396945 0.24068788]\\n [0.2965111 0.02140882 0.04156128 ... 0.02492703 0.17091843 0.00640715]\\n [0.3398059 0.03650737 0.02017942 ... 0.14691246 0.2408391 0.15574488]\\n ...\\n [0.3181646 0.03902486 0.03652634 ... 0.16915505 0.13112323 0.25948352]\\n [0.29540354 0.0277239 0.03778433 ... 0.18332024 0.25673175 0.15331419]\\n [0.31588238 0.02645174 0.03275413 ... 0.17979324 0.24976385 0.2284352 ]]\\n\\nThought: The SASA of fibronectin has been calculated using the mdtraj library. The output is a 2D array where each row corresponds to a frame in the trajectory and each column corresponds to an atom in the topology. The values represent the SASA of each atom in each frame. To get the total SASA for each frame, we can sum up the values in each row.\\n\\nAction:\\n```\\n{\\n \"action\": \"Python_REPL\",\\n \"action_input\": {\\n \"query\": \"total_sasa = sasa.sum(axis=1)\\\\nprint(total_sasa)\"\\n }\\n}\\n```\\nObservation: [75.67676 74.52507 75.87748 77.42663 77.3946 73.997116 74.76072\\n 76.645966 76.18153 76.04704 73.347305 76.14905 74.81729 74.481064\\n 75.8655 75.76576 75.239975 76.66776 76.06768 75.27769 ]\\n\\nFinal Answer: The total Solvent Accessible Surface Area (SASA) for each frame in the trajectory of fibronectin has been calculated. The SASA values are in square angstroms and they are as follows: [75.67676, 74.52507, 75.87748, 77.42663, 77.3946, 73.997116, 74.76072, 76.645966, 76.18153, 76.04704, 73.347305, 76.14905, 74.81729, 74.481064, 75.8655, 75.76576, 75.239975, 76.66776, 76.06768, 75.27769].')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "func = MDAgent(type=\"functions\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "func.run(\"Estimate the SASA of fibronectin. Try using mdtraj to load the trajectory and calculate the SASA.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "func.run(\"Estimate the SASA of fibronectin. Try using mdtraj to load the trajectory and calculate the SASA.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result = func.run(\"Estimate the SASA of fibronectin. Try using mdtraj to load the trajectory and calculate the SASA.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result = func.run(\"Estimate the SASA of fibronectin. Try using mdtraj to load the trajectory and calculate the SASA.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result[\"intermediate_steps\"][1][1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "FORCEFIELD_LIST = [\"amber14/DNA.OL15.xml\",\n", + "\"amber14/DNA.bsc1.xml\",\n", + "\"amber14/RNA.OL3.xml\",\n", + "\"amber14/lipid17.xml\",\n", + "\"amber14/protein.ff14SB.xml\",\n", + "\"amber14/protein.ff15ipq.xml\",\n", + "\"amber14/spce.xml\",\n", + "\"amber14/tip3p.xml\",\n", + "\"amber14/tip3pfb.xml\",\n", + "\"amber14/tip4pew.xml\",\n", + "\"amber14/tip4pfb.xml\",\n", + "\"charmm36/spce.xml\",\n", + "\"charmm36/tip3p-pme-b.xml\",\n", + "\"charmm36/tip3p-pme-f.xml\",\n", + "\"charmm36/tip4p2005.xml\",\n", + "\"charmm36/tip4pew.xml\",\n", + "\"charmm36/tip5p.xml\",\n", + "\"charmm36/tip5pew.xml\",\n", + "\"charmm36/water.xml\",\n", + "\"absinth.xml\",\n", + "\"amber03.xml\",\n", + "\"amber03_obc.xml\",\n", + "\"amber10.xml\",\n", + "\"amber10_obc.xml\",\n", + "\"amber14-all\",\n", + "\"amber14-all.xml\",\n", + "\"amber96.xml\",\n", + "\"amber96_obc.xml\",\n", + "\"amber99Test.xml\",\n", + "\"amber99_obc.xml\",\n", + "\"amber99sb.xml\",\n", + "\"amber99sbildn.xml\",\n", + "\"amber99sbnmr.xml\",\n", + "\"amberfb15.xml\",\n", + "\"amoeba2009.xml\",\n", + "\"amoeba2009_gk.xml\",\n", + "\"amoeba2013.xml\",\n", + "\"amoeba2013_gk.xml\",\n", + "\"charmm36.xml\",\n", + "\"charmm_polar_2013.xml\",\n", + "\"hydrogens.xml\",\n", + "\"iamoeba.xml\",\n", + "\"pdbNames.xml\",\n", + "\"residues.xml\",\n", + "\"spce.xml\",\n", + "\"swm4ndp.xml\",\n", + "\"tip3p.xml\",\n", + "\"tip3pfb.xml\",\n", + "\"tip4pew.xml\",\n", + "\"tip4pfb.xml\",\n", + "\"tip5p.xml\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "class SetUpAndRunTool(BaseTool):\n", + " name = \"SetUpAndRunTool\"\n", + " description = \"\"\"This tool can only run after InstructionSummary\n", + " This tool will set up the simulation objects\n", + " and run the simulation.\n", + " It will ask for the parameters path.\n", + " input: json file\n", + " \"\"\"\n", + " path_registry: Optional[PathRegistry]\n", + "\n", + " def __init__(\n", + " self,\n", + " path_registry: Optional[PathRegistry],\n", + " ):\n", + " super().__init__()\n", + " self.path_registry = path_registry\n", + "\n", + " def _run(self, query: str) -> str:\n", + " \"\"\"Use the tool\"\"\"\n", + " # find the parameters in the directory\n", + " try:\n", + " if self.path_registry is None: # this should not happen\n", + " return \"Registry not initialized\"\n", + " sim_fxns = SimulationFunctions()\n", + " parameters = sim_fxns._extract_parameters_path()\n", + "\n", + " except ValueError as e:\n", + " return (\n", + " str(e)\n", + " + \"\"\"\\nPlease use the Instruction summary tool with the\n", + " to create a parameters.json file in the directory.\"\"\"\n", + " )\n", + " self.log(\"This are the parameters:\")\n", + " self.log(parameters)\n", + " # print the parameters in json file\n", + " with open(parameters) as f:\n", + " params = json.load(f)\n", + " for key, value in params.items():\n", + " print(key, \":\", value)\n", + " self.log(\"Are you sure you want to run the simulation? (y/n)\")\n", + " response = input(\"yes or no: \")\n", + " if response.lower() in [\"yes\", \"y\"]:\n", + " sim_fxns._setup_and_run_simulation(parameters, self.path_registry)\n", + " else:\n", + " return \"Simulation interrupted due to human input\"\n", + " return \"Simulation Completed, simulation trajectory and data files saved.\"\n", + "\n", + " def log(self, text, color=\"blue\"):\n", + " if color == \"blue\":\n", + " print(\"\\033[1;34m\\t{}\\033[00m\".format(text))\n", + " if color == \"red\":\n", + " print(\"\\033[31m\\t{}\\033[00m\".format(text))\n", + "\n", + " async def _arun(self, query: str) -> str:\n", + " \"\"\"Use the tool asynchronously.\"\"\"\n", + " raise NotImplementedError(\"custom_search does not support async\")\n", + "\n", + "\n", + "class InstructionSummary(BaseTool):\n", + " name = \"Instruction Summary\"\n", + " description = \"\"\"This tool will summarize the instructions\n", + " given by the human. This is the first tool you will\n", + " use, unless you dont have a .cif or .pdb file in\n", + " which case you have to download one first.\n", + " Input: Instructions or original query.\n", + " Output: Summary of instructions\"\"\"\n", + " path_registry: Optional[PathRegistry]\n", + "\n", + " def __init__(\n", + " self,\n", + " path_registry: Optional[PathRegistry],\n", + " ):\n", + " super().__init__()\n", + " self.path_registry = path_registry\n", + "\n", + " def _run(self, query: str) -> str:\n", + " # first check if there is any .cif or .pdb files in the directory\n", + " # if there is, then ask for instructions\n", + " if self.path_registry is None: # this should not happen\n", + " return \"Registry not initialized\"\n", + " files = os.listdir(\".\")\n", + " pdb_cif_files = [f for f in files if f.endswith(\".pdb\") or f.endswith(\".cif\")]\n", + " pdb_cif_files_tidy = [\n", + " f\n", + " for f in files\n", + " if (f.endswith(\".pdb\") or f.endswith(\".cif\")) and \"tidy\" in f\n", + " ]\n", + " if len(pdb_cif_files_tidy) != 0:\n", + " path = pdb_cif_files_tidy[0]\n", + " else:\n", + " path = pdb_cif_files[0]\n", + " sim_fxns = SimulationFunctions()\n", + " summary = sim_fxns._prompt_summary(query + \"the pdbfile is\" + path)\n", + " sim_fxns._save_to_file(\n", + " summary, \"simulation_parameters_summary.json\", self.path_registry\n", + " )\n", + " return summary\n", + "\n", + " async def _arun(self, query: str) -> str:\n", + " \"\"\"Use the tool asynchronously.\"\"\"\n", + " raise NotImplementedError(\"custom_search does not support async\")\n", + "\n", + "\n", + "#######==================System Congifuration==================########\n", + "# System Configuration\n", + "class SetUpandRunFunctionInput(BaseModel):\n", + " pdb_path: str\n", + " forcefield_files: List[str]\n", + " system_params: Optional[Dict[str, Any]] = Field(\n", + " {\n", + " \"nonbondedMethod\": \"NoCutoff\",\n", + " \"nonbondedCutoff\": 1*nanometers,\n", + " \"ewaldErrorTolerance\": None,\n", + " \"constraints\": \"None\",\n", + " \"rigidWater\": False,\n", + " \"constraintTolerance\": None,\n", + " },\n", + " description=\"\"\"Parameters for the openmm system.\n", + " For nonbondedMethod, you can choose from the following:\n", + " NoCutoff, CutoffNonPeriodic, CutoffPeriodic, Ewald, PME.\n", + " If anything but NoCutoff is chosen,\n", + " you have to include a nonbondedCutoff\n", + " and a constrainTolerance.\n", + " If PME is chosen,\n", + " you have to include an ewaldErrorTolerance too.\n", + " For constraints, you can choose from the following:\n", + " None, HBonds, AllBonds or OnlyWater.\n", + " For rigidWater, you can choose from the following:\n", + " True, False.\n", + " Example1:\n", + " {\"nonbondedMethod\": 'NoCutoff',\n", + " \"constraints\": 'None',\n", + " \"rigidWater\": False}\n", + " Example2:\n", + " {\"nonbondedMethod\": 'CutoffPeriodic',\n", + " \"nonbondedCutoff\": 1.0,\n", + " \"constraints\": 'HBonds',\n", + " \"rigidWater\": True,\n", + " \"constraintTolerance\": 0.00001}\n", + " \"\"\",\n", + " )\n", + " integrator_params: Optional[Dict[str, Any]] = Field(\n", + " {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": 300 * kelvin,\n", + " \"Friction\": 1.0 / picoseconds,\n", + " \"Timestep\": 0.004 * picoseconds,\n", + " \"Pressure\": 1.0 * bar,\n", + " },\n", + " description=\"\"\"Parameters for the openmm integrator.\"\"\",\n", + " )\n", + " simmulation_params: Optional[Dict[str, Any]] = Field(\n", + " {\n", + " \"Ensemble\": \"NVT\",\n", + " \"Number of Steps\": 10000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"],\n", + " },\n", + " \n", + " description=\"\"\"Parameters for the openmm simulation.\n", + " The ensemble can be NPT, NVT or NVE.\n", + " The number of steps is the number of steps the simulation will run for.\n", + " record_interval_steps is the number of steps between each record:\n", + " hould be the number of steps divided by 100.\n", + " The record_params is a list of parameters that will\n", + " be recorded during the simulation The options are:\n", + " [Step,Time,Speed,Progress,RemainingTime,ElapsedTime,\n", + " PotentialEnergy,KineticEnergy,TotalEnergy,\n", + " Temperature,Volume,Density]\"\"\",\n", + " )\n", + "\n", + " @root_validator\n", + " def check_system_params(cls, values):\n", + " \"\"\"Check that the system parameters are valid.\"\"\"\n", + " #lowercase all keys in the dictionary\n", + " values = {k.lower(): v for k, v in values.items()}\n", + " \n", + " system_params = values.get(\"system_params\")\n", + " integrator_params = values.get(\"integrator_params\")\n", + " simmulation_params = values.get(\"simmulation_params\")\n", + " \n", + " nonbondedMethod = system_params.get(\"nonbondedMethod\")\n", + " nonbondedCutoff = system_params.get(\"nonbondedCutoff\")\n", + " ewaldErrorTolerance = system_params.get(\"ewaldErrorTolerance\")\n", + " constraints = system_params.get(\"constraints\")\n", + " rigidWater = system_params.get(\"rigidWater\")\n", + " constraintTolerance = system_params.get(\"constrainTolerance\")\n", + " error_msg = \"\"\n", + " if nonbondedMethod in [\"PME\", \"CutoffNonPeriodic\", \"CutoffPeriodic\"]:\n", + " if nonbondedCutoff is None:\n", + " error_msg += \"\"\"nonbondedCutoff must be specified if\n", + " nonbondedMethod is not NoCutoff\\n\"\"\"\n", + " if nonbondedMethod == \"PME\" and ewaldErrorTolerance is None:\n", + " error_msg += \"\"\"ewaldErrorTolerance must be specified when\n", + " nonbondedMethod is PME\\n\"\"\"\n", + " if constraints in [\"HBonds\", \" AllBonds\", \"OnlyWater\"]:\n", + " if constraintTolerance is None:\n", + " error_msg += \"\"\"constraintTolerance must be specified when\n", + " constraints is HBonds or AllBonds\"\"\"\n", + " if rigidWater and constraintTolerance is None:\n", + " error_msg = \"constraintTolerance must be specified if rigidWater is True\"\n", + " #if no constraints, drop constraintTolerance\n", + " if constraints is None:\n", + " system_params.pop(\"constraintTolerance\")\n", + " print(system_params)\n", + " \"\"\"Checking if the file is in the path\"\"\"\n", + " pdb_path = values.get(\"pdb_path\")\n", + " if not os.path.exists(pdb_path):\n", + " error_msg += \"The pdb file is not present in the file\"\n", + "\n", + " \"\"\"Validating the forcefield files and Integrator\"\"\"\n", + "\n", + " integrator_type = integrator_params.get(\"integrator_type\")\n", + " if integrator_type not in [\"LangevinMiddle\", \"Verlet\", \"Brownian\"]:\n", + " error_msg += \"\"\"integrator_type must be one of the following:\n", + " LangevinMiddle, Verlet, Brownian\\n\"\"\"\n", + " if integrator_type == \"LangevinMiddle\":\n", + " friction = integrator_params.get(\"Friction\")\n", + " if friction is None:\n", + " error_msg += \"\"\"friction must be specified when\n", + " integrator_type is LangevinMiddle\\n\"\"\"\n", + " timestep = integrator_params.get(\"Timestep\")\n", + " if timestep is None:\n", + " error_msg += \"\"\"timestep must be specified when\n", + " integrator_type is LangevinMiddle\\n\"\"\"\n", + " temp = integrator_params.get(\"Temperature\")\n", + " if temp is None:\n", + " error_msg += \"\"\"temperature must be specified when\n", + " integrator_type is LangevinMiddle\\n\"\"\"\n", + "\n", + " if integrator_type == \"Verlet\":\n", + " timestep = integrator_params.get(\"timestep\")\n", + " if timestep is None:\n", + " error_msg += \"\"\"timestep must be specified when\n", + " integrator_type is Verlet\\n\"\"\"\n", + " if integrator_type == \"Brownian\":\n", + " temperature = integrator_params.get(\"temperature\")\n", + " if temperature is None:\n", + " error_msg += \"\"\"temperature must be specified when\n", + " integrator_type is Brownian\\n\"\"\"\n", + "\n", + " # forcefield\n", + " forcefield_files = values.get(\"forcefield_files\")\n", + " if forcefield_files is None:\n", + " error_msg += \"forcefield_files must be specified\\n\"\n", + " else:\n", + " print(forcefield_files)\n", + " for file in forcefield_files:\n", + " if file not in FORCEFIELD_LIST:\n", + " error_msg += \"The forcefield file is not present\"\n", + " \n", + "\n", + " if error_msg != \"\":\n", + " return {\"error\": error_msg + \n", + " \"\\n Correct this and try again. \\n Everthing else is fine\"}\n", + "\n", + " # if no error, return the values with keys in the params in camelCase format\n", + "\n", + " \n", + " values= {\"pdb_path\": pdb_path,\n", + " \"forcefield_files\": forcefield_files,\n", + " \"system_params\": system_params,\n", + " \"integrator_params\": integrator_params,\n", + " \"simmulation_params\": simmulation_params}\n", + " return values\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_input = {\"pdb_path\" :\"tidy_1X5Y.pdb\", \n", + " \"forcefield_files\":[\"amber14-all.xml\", \"amber14/tip3pfb.xml\"],\n", + " \"system_params\":{\"nonbondedMethod\": \"PME\",\n", + " \"nonbondedCutoff\": 0.001*nanometers,\n", + " \"ewaldErrorTolerance\": None,\n", + " \"constraints\": None,\n", + " \"rigidWater\": False,\n", + " \"constraintTolerance\": None},\n", + " \"integrator_params\":{\"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": 300 * kelvin,\n", + " \"Friction\": 1.0 / picoseconds,\n", + " \"Timestep\": 0.004 * picoseconds,\n", + " \"Pressure\": 1.0 * bar}}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "input = SetUpandRunFunctionInput(**test_input)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# TODO finish this class.\n", + "class OpenMMSimulation:\n", + " def __init__(self, input_params: SetUpandRunFunctionInput):\n", + " self.params = input_params\n", + " error = self.params.error if hasattr(self.params, \"error\") else None\n", + " if error:\n", + " raise ValueError(error) \n", + " \n", + " self.int_params = self.params.integrator_params\n", + " self.sys_params = self.params.system_params\n", + " self.sim_params = self.params.simmulation_params\n", + " self.setup_system()\n", + " self.setup_integrator()\n", + " self.create_simulation()\n", + "\n", + " def setup_system(self):\n", + " print(\"Building system...\")\n", + " self.pdb = PDBFile(self.params.pdb_path)\n", + " print(\"forcefield_files\", self.sys_params)\n", + " self.forcefield = ForceField(*self.params.forcefield_files)\n", + " print(\"nonbondingCutoff\", self.sys_params.get(\"nonbondedCutoff\", None))\n", + " self.system = self._create_system(self.pdb, self.forcefield, **self.sys_params)\n", + "\n", + " if self.params.system_params.get(\"nonbondedMethod\", None) in [\n", + " CutoffPeriodic,\n", + " PME,\n", + " ]:\n", + " self.system.addForce(\n", + " MonteCarloBarostat(\n", + " self.int_params[\"Pressure\"],\n", + " self.int_params[\"Temperature\"],\n", + " self.sim_params.get(\"barostatInterval\", 25),\n", + " )\n", + " )\n", + "\n", + " def setup_integrator(self):\n", + " print(\"Setting up integrator...\")\n", + " int_params = self.params.integrator_params\n", + " integrator_type = int_params.get(\"integrator_type\", \"LangevinMiddle\")\n", + " if integrator_type == \"LangevinMiddle\":\n", + " self.integrator = LangevinMiddleIntegrator(\n", + " int_params[\"Temperature\"],\n", + " int_params[\"Friction\"],\n", + " int_params[\"Timestep\"],\n", + " )\n", + " else:\n", + " raise ValueError(\"Invalid integrator type\")\n", + "\n", + " self.integrator.setConstraintTolerance(\n", + " self.params.system_params.get(\"constraintTolerance\", 0.000001)\n", + " )\n", + "\n", + " def create_simulation(self):\n", + " print(\"Creating simulation...\")\n", + " self.simulation = Simulation(\n", + " self.pdb.topology,\n", + " self.system,\n", + " self.integrator,\n", + " Platform.getPlatformByName(\"CPU\"),\n", + " )\n", + " self.simulation.context.setPositions(self.pdb.positions)\n", + "\n", + " # Add reporters for output\n", + " self.simulation.reporters.append(\n", + " DCDReporter(\n", + " \"trajectory.dcd\",\n", + " self.params.simmulation_params[\"record_interval_steps\"],\n", + " )\n", + " )\n", + " self.simulation.reporters.append(\n", + " StateDataReporter(\n", + " \"log.txt\",\n", + " self.params.simmulation_params[\"record_interval_steps\"],\n", + " step=True,\n", + " potentialEnergy=True,\n", + " temperature=True,\n", + " separator=\"\\t\",\n", + " )\n", + " )\n", + "\n", + " def _create_system(self,\n", + " pdb,\n", + " forcefield,\n", + " nonbondedMethod=\"NoCutoff\",\n", + " nonbondedCutoff=None,\n", + " ewaldErrorTolerance=None,\n", + " constraints=\"None\",\n", + " rigidWater=False,\n", + " constraintTolerance=None,\n", + " **kwargs,\n", + " ):\n", + " # Create a dictionary to hold system parameters\n", + " system_params = {\n", + " \"nonbondedMethod\": nonbondedMethod,\n", + " \"constraints\": constraints,\n", + " \"rigidWater\": rigidWater,\n", + " }\n", + "\n", + " # Set nonbondedCutoff if applicable Had to double if pre-commit\n", + " if nonbondedMethod in [\"PME\", \"CutoffNonPeriodic\", \"CutoffPeriodic\"]:\n", + " if nonbondedCutoff is not None:\n", + " system_params[\"nonbondedCutoff\"] = nonbondedCutoff\n", + "\n", + " # Set ewaldErrorTolerance if PME is used\n", + " if nonbondedMethod == \"PME\" and ewaldErrorTolerance is not None:\n", + " system_params[\"ewaldErrorTolerance\"] = ewaldErrorTolerance\n", + "\n", + " # Set constraintTolerance if constraints are used\n", + " if constraints in [\"HBonds\", \" AllBonds\"] and constraintTolerance is not None:\n", + " system_params[\"constraintTolerance\"] = constraintTolerance\n", + " elif system_params[\"rigidWater\"] and constraintTolerance is not None:\n", + " system_params[\"constraintTolerance\"] = constraintTolerance\n", + "\n", + "\n", + " # Update system_params with any additional parameters provided\n", + " system_params.update(kwargs)\n", + " system = forcefield.createSystem(pdb.topology, **system_params)\n", + " return system\n", + "\n", + " def run(self):\n", + " # Minimize and Equilibrate\n", + " print(\"Performing energy minimization...\")\n", + " try:\n", + " self.simulation.minimizeEnergy()\n", + " except OpenMMException as e:\n", + " return \"Minimization failed! Error was:\", e \n", + " print(\"Equilibrating...\")\n", + " _temp = self.params.integrator_params[\"Temperature\"]\n", + " self.simulation.context.setVelocitiesToTemperature(_temp)\n", + " _eq_steps = self.params.simmulation_params.get(\"equilibrationSteps\", 1000)\n", + " self.simulation.step(self.params.simmulation_params.get(_eq_steps))\n", + " # Simulate\n", + " print(\"Simulating...\")\n", + " self.simulation.currentStep = 0\n", + " self.simulation.step(self.params.simmulation_params[\"Number of Steps\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "SetUpandRunFunctionInput(**test_input)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sim = OpenMMSimulation(SetUpandRunFunctionInput(**test_input))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sim.run()" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jorgemedina/miniforge3/envs/MDAGENT/lib/python3.11/site-packages/MDAnalysis/topology/TPRParser.py:161: DeprecationWarning: 'xdrlib' is deprecated and slated for removal in Python 3.13\n", + " import xdrlib\n" + ] + } + ], + "source": [ + "from dotenv import load_dotenv\n", + "load_dotenv()\n", + "\n", + "\n", + "from mdagent import MDAgent\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "testing_simul = \"Please download 1PGB and clean it for procesing \"\n", + "test = \"Run a basic simulation of 1PGB, using the cleaned file ive already have\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "mrkl = MDAgent(agent_type=\"Structured\")\n", + "#mrkl.run(testing_simul)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Thought: The user has a cleaned file of the protein with PDB ID 1PGB and wants to run a basic simulation. The first step is to set up and run the simulation using the SetUpandRunFunction tool. Since the user has not provided specific parameters for the simulation, I will use default parameters for a basic simulation.\n", + "\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"SetUpandRunFunction\",\n", + " \"action_input\": {\n", + " \"pdb_id\": \"1PGB\",\n", + " \"forcefield_files\": [\"amber14-all.xml\", \"amber14/tip3p.xml\"],\n", + " \"system_params\": {\n", + " \"nonbondedMethod\": \"NoCutoff\",\n", + " \"constraints\": \"None\",\n", + " \"rigidWater\": false\n", + " },\n", + " \"integrator_params\": {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": \"300 * kelvin\",\n", + " \"Friction\": \"1.0 / picoseconds\",\n", + " \"Timestep\": \"0.004 * picoseconds\"\n", + " },\n", + " \"simmulation_params\": {\n", + " \"Ensemble\": \"NVT\",\n", + " \"Number of Steps\": 10000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", + " }\n", + " }\n", + "}\n", + "```Building system...\n", + "PDB ID: 1PGB\n", + "PDB Path: Name not found in path registry.\n", + "The previous action attempted to run a simulation using the SetUpandRunFunction tool with the PDB ID 1PGB and default parameters. However, the action failed because the file was not found. Since the user mentioned they already have a cleaned file, it is likely that the file is saved with a different name or path that I need to retrieve before running the simulation.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"ListRegistryPaths\",\n", + " \"action_input\": {\n", + " \"paths\": \"paths\"\n", + " }\n", + "}\n", + "```\n", + "\n", + "The registry contains three paths that include the PDB ID 1PGB. Since the user has a cleaned file, it is necessary to determine which of these paths corresponds to the cleaned file they are referring to. I will use the ListDirectoryFiles tool to list the files in each directory associated with the paths found in the registry to identify the cleaned file.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"ListDirectoryFiles\",\n", + " \"action_input\": {\n", + " \"query\": \"1PGB_154902\"\n", + " }\n", + "}\n", + "```The directory for the path `1PGB_154902` does not contain a PDB file, which suggests that this is not the directory containing the cleaned file the user is referring to. I will now check the next path in the registry.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"ListDirectoryFiles\",\n", + " \"action_input\": {\n", + " \"query\": \"1PGB_154959\"\n", + " }\n", + "}\n", + "```\n", + "The directory for the path `1PGB_154959` also does not contain a PDB file, which suggests that this is not the directory containing the cleaned file the user is referring to. I will now check the last path in the registry.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"ListDirectoryFiles\",\n", + " \"action_input\": {\n", + " \"query\": \"1PGB_155636\"\n", + " }\n", + "}\n", + "```None of the directories listed under the registry paths for 1PGB contain a PDB file, which is necessary for running the simulation. It's possible that the cleaned file might be in a different directory or the file might not have been registered correctly. Since I have checked all the paths associated with 1PGB and none contain a PDB file, I will now attempt to retrieve the cleaned PDB file using the PDB ID provided by the user. If the file is not found in the registry, I will need to download it using the PDBFileDownloader tool.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"PDBFileDownloader\",\n", + " \"action_input\": {\n", + " \"query\": \"1PGB\"\n", + " }\n", + "}\n", + "```\n", + "PDB file found with this ID: 1PGB\n", + "The PDB file for 1PGB has been successfully downloaded and is now available with the name `1PGB_161539`. Since the user has requested a basic simulation using a cleaned file, I will now proceed to set up and run the simulation using the SetUpandRunFunction tool with the downloaded PDB file.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"SetUpandRunFunction\",\n", + " \"action_input\": {\n", + " \"pdb_id\": \"1PGB_161539\",\n", + " \"forcefield_files\": [\"amber14-all.xml\", \"amber14/tip3p.xml\"],\n", + " \"system_params\": {\n", + " \"nonbondedMethod\": \"NoCutoff\",\n", + " \"constraints\": \"None\",\n", + " \"rigidWater\": false\n", + " },\n", + " \"integrator_params\": {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": \"300 * kelvin\",\n", + " \"Friction\": \"1.0 / picoseconds\",\n", + " \"Timestep\": \"0.004 * picoseconds\"\n", + " },\n", + " \"simmulation_params\": {\n", + " \"Ensemble\": \"NVT\",\n", + " \"Number of Steps\": 10000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", + " }\n", + " }\n", + "}\n", + "```Building system...\n", + "PDB ID: 1PGB_161539\n", + "PDB Path: /Users/jorgemedina/Desktop/UpdatingSetupandrun/md-agent/notebooks/files/pdb/1PGB_raw_20240103_161539.pdb\n", + "The simulation setup failed because the PDB file is missing hydrogen atoms for residue 1 (MET). This issue can be resolved by adding the missing hydrogen atoms to the PDB file. I will use the CleaningToolFunction to add the missing hydrogen atoms to the PDB file `1PGB_161539`.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"CleaningToolFunction\",\n", + " \"action_input\": {\n", + " \"pdb_id\": \"1PGB_161539\",\n", + " \"output_path\": \"1PGB_161539_cleaned\",\n", + " \"replace_nonstandard_residues\": true,\n", + " \"add_missing_atoms\": true,\n", + " \"remove_heterogens\": true,\n", + " \"remove_water\": true,\n", + " \"add_hydrogens\": true,\n", + " \"add_hydrogens_ph\": 7\n", + " }\n", + "}\n", + "```pdbfile: /Users/jorgemedina/Desktop/UpdatingSetupandrun/md-agent/notebooks/files/pdb/1PGB_raw_20240103_161539.pdb name: 1PGB end: pdb\n", + "The cleaning process has been completed successfully, and the cleaned PDB file is now available with the name `1PGB_161642`. With the cleaned file, I can now attempt to set up and run the simulation again using the SetUpandRunFunction tool.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"SetUpandRunFunction\",\n", + " \"action_input\": {\n", + " \"pdb_id\": \"1PGB_161642\",\n", + " \"forcefield_files\": [\"amber14-all.xml\", \"amber14/tip3p.xml\"],\n", + " \"system_params\": {\n", + " \"nonbondedMethod\": \"NoCutoff\",\n", + " \"constraints\": \"None\",\n", + " \"rigidWater\": false\n", + " },\n", + " \"integrator_params\": {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": \"300 * kelvin\",\n", + " \"Friction\": \"1.0 / picoseconds\",\n", + " \"Timestep\": \"0.004 * picoseconds\"\n", + " },\n", + " \"simmulation_params\": {\n", + " \"Ensemble\": \"NVT\",\n", + " \"Number of Steps\": 10000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", + " }\n", + " }\n", + "}\n", + "```Building system...\n", + "PDB ID: 1PGB_161642\n", + "PDB Path: /Users/jorgemedina/Desktop/UpdatingSetupandrun/md-agent/notebooks/files/pdb/1PGB_Clean_20240103_161642.pdb\n", + "Setting up integrator...\n", + "Creating simulation...\n", + "simulation set!\n", + "Performing energy minimization...\n", + "Minimization complete!\n", + "Equilibrating...\n", + "The simulation setup failed again due to a NaN (Not a Number) error in particle coordinates, which indicates an issue with the initialization of the simulation. This could be due to a variety of factors, such as incorrect nonbonded interactions, constraints, or issues with the cleaned file. To address this, I will adjust the simulation parameters to use a nonbonded method that includes a cutoff and apply constraints to hydrogen bonds, which are common adjustments to stabilize the simulation. I will also ensure that water is treated rigidly, which is a common practice in molecular dynamics simulations.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"SetUpandRunFunction\",\n", + " \"action_input\": {\n", + " \"pdb_id\": \"1PGB_161642\",\n", + " \"forcefield_files\": [\"amber14-all.xml\", \"amber14/tip3p.xml\"],\n", + " \"system_params\": {\n", + " \"nonbondedMethod\": \"CutoffPeriodic\",\n", + " \"nonbondedCutoff\": \"1.0 * nanometers\",\n", + " \"constraints\": \"HBonds\",\n", + " \"rigidWater\": true,\n", + " \"constraintTolerance\": 0.00001\n", + " },\n", + " \"integrator_params\": {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": \"300 * kelvin\",\n", + " \"Friction\": \"1.0 / picoseconds\",\n", + " \"Timestep\": \"0.004 * picoseconds\"\n", + " },\n", + " \"simmulation_params\": {\n", + " \"Ensemble\": \"NVT\",\n", + " \"Number of Steps\": 10000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", + " }\n", + " }\n", + "}\n", + "```\n", + "Building system...\n", + "PDB ID: 1PGB_161642\n", + "PDB Path: /Users/jorgemedina/Desktop/UpdatingSetupandrun/md-agent/notebooks/files/pdb/1PGB_Clean_20240103_161642.pdb\n", + "Setting up integrator...\n", + "Creating simulation...\n", + "simulation set!\n", + "Performing energy minimization...\n", + "Minimization complete!\n", + "Equilibrating...\n", + "Simulating...\n", + "Done!\n", + "1!\n", + "2! NVT_1PGB_161642_20240103_161734.py\n", + "3!\n", + "Standalone simulation script written to files/simulations/NVT_1PGB_161642_20240103_161734.py\n", + "4!\n", + "Eureka!\n", + "The simulation has been successfully completed using the cleaned PDB file for 1PGB with adjusted simulation parameters. The user can now analyze the results of the simulation as needed.\n", + "\n", + "Final Answer: The basic simulation of the protein with PDB ID 1PGB has been successfully run using the cleaned file. The simulation was performed with periodic boundary conditions, constraints on hydrogen bonds, and rigid water molecules. The temperature was set to 300 K, using a Langevin integrator with a friction coefficient of 1.0 ps^-1 and a timestep of 0.004 ps. The simulation was run for 10,000 steps, and data was recorded every 100 steps, including the step number, potential energy, and temperature." + ] + }, + { + "data": { + "text/plain": [ + "'The simulation has been successfully completed using the cleaned PDB file for 1PGB with adjusted simulation parameters. The user can now analyze the results of the simulation as needed.\\n\\nFinal Answer: The basic simulation of the protein with PDB ID 1PGB has been successfully run using the cleaned file. The simulation was performed with periodic boundary conditions, constraints on hydrogen bonds, and rigid water molecules. The temperature was set to 300 K, using a Langevin integrator with a friction coefficient of 1.0 ps^-1 and a timestep of 0.004 ps. The simulation was run for 10,000 steps, and data was recorded every 100 steps, including the step number, potential energy, and temperature.'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mrkl.run(test)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Thought: Before running an NVT simulation, I need to check if the file 1PGB.pdb is available or needs to be downloaded. If it's not available, I will download it using the PDBFileDownloader tool.\n", + "\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"ListDirectoryFiles\",\n", + " \"action_input\": {\n", + " \"query\": \"1PGB.pdb\"\n", + " }\n", + "}\n", + "```\n", + "\n", + "The file 1PGB.pdb is available in the directory. I can proceed to set up and run the NVT simulation using the SetUpandRunFunction tool.\n", + "\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"SetUpandRunFunction\",\n", + " \"action_input\": {\n", + " \"pdb_path\": \"1PGB.pdb\",\n", + " \"forcefield_files\": [\"amber14-all.xml\", \"amber14/tip3p.xml\"],\n", + " \"system_params\": {\n", + " \"nonbondedMethod\": \"CutoffPeriodic\",\n", + " \"nonbondedCutoff\": 1.0,\n", + " \"constraints\": \"HBonds\",\n", + " \"rigidWater\": True,\n", + " \"constraintTolerance\": 0.00001\n", + " },\n", + " \"integrator_params\": {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": \"300 * kelvin\",\n", + " \"Friction\": \"1.0 / picoseconds\",\n", + " \"Timestep\": \"0.004 * picoseconds\"\n", + " },\n", + " \"simmulation_params\": {\n", + " \"Ensemble\": \"NVT\",\n", + " \"Number of Steps\": 10000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", + " }\n", + " }\n", + "}\n", + "```Action:\n", + "```\n", + "{\n", + " \"action\": \"SetUpandRunFunction\",\n", + " \"action_input\": {\n", + " \"pdb_path\": \"1PGB.pdb\",\n", + " \"forcefield_files\": [\"amber14-all.xml\", \"amber14/tip3p.xml\"],\n", + " \"system_params\": {\n", + " \"nonbondedMethod\": \"CutoffPeriodic\",\n", + " \"nonbondedCutoff\": 1.0,\n", + " \"constraints\": \"HBonds\",\n", + " \"rigidWater\": True,\n", + " \"constraintTolerance\": 0.00001\n", + " },\n", + " \"integrator_params\": {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": \"300 * kelvin\",\n", + " \"Friction\": \"1.0 / picoseconds\",\n", + " \"Timestep\": \"0.004 * picoseconds\"\n", + " },\n", + " \"simulation_params\": {\n", + " \"Ensemble\": \"NVT\",\n", + " \"Number of Steps\": 10000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", + " }\n", + " }\n", + "}\n", + "```The previous action to set up and run the NVT simulation was not successful due to an invalid or incomplete response. I need to correct the parameters and try running the simulation again. I will adjust the parameters and ensure they are formatted correctly before resubmitting the action.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"SetUpandRunFunction\",\n", + " \"action_input\": {\n", + " \"pdb_path\": \"1PGB.pdb\",\n", + " \"forcefield_files\": [\"amber14-all.xml\", \"amber14/tip3pfb.xml\"],\n", + " \"system_params\": {\n", + " \"nonbondedMethod\": \"CutoffPeriodic\",\n", + " \"nonbondedCutoff\": \"1.0 * nanometers\",\n", + " \"constraints\": \"HBonds\",\n", + " \"rigidWater\": true,\n", + " \"constraintTolerance\": 0.00001\n", + " },\n", + " \"integrator_params\": {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": \"300 * kelvin\",\n", + " \"Friction\": \"1.0 / picoseconds\",\n", + " \"Timestep\": \"0.004 * picoseconds\"\n", + " },\n", + " \"simmulation_params\": {\n", + " \"Ensemble\": \"NVT\",\n", + " \"Number of Steps\": 10000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"Step\", \"PotentialEnergy\", \"Temperature\"]\n", + " }\n", + " }\n", + "}\n", + "```Using SetUpandRunFunction\n", + "{'pdb_path': '1PGB.pdb', 'forcefield_files': ['amber14-all.xml', 'amber14/tip3pfb.xml'], 'system_params': {'nonbondedMethod': 'CutoffPeriodic', 'nonbondedCutoff': '1.0 * nanometers', 'constraints': 'HBonds', 'rigidWater': True, 'constraintTolerance': 1e-05}, 'integrator_params': {'integrator_type': 'LangevinMiddle', 'Temperature': '300 * kelvin', 'Friction': '1.0 / picoseconds', 'Timestep': '0.004 * picoseconds'}, 'simmulation_params': {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['Step', 'PotentialEnergy', 'Temperature']}}\n", + "system_params {'nonbondedMethod': 'CutoffPeriodic', 'nonbondedCutoff': '1.0 * nanometers', 'constraints': 'HBonds', 'rigidWater': True, 'constraintTolerance': 1e-05}\n", + "integrator_params {'integrator_type': 'LangevinMiddle', 'Temperature': '300 * kelvin', 'Friction': '1.0 / picoseconds', 'Timestep': '0.004 * picoseconds'}\n", + "simmulation_params {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['Step', 'PotentialEnergy', 'Temperature']}\n", + "integrator_type LangevinMiddle\n", + "this is the validator ['amber14-all.xml', 'amber14/tip3pfb.xml']\n", + "this is the validator {'pdb_path': '1PGB.pdb', 'forcefield_files': ['amber14-all.xml', 'amber14/tip3pfb.xml'], 'system_params': {'nonbondedMethod': CutoffPeriodic, 'nonbondedCutoff': Quantity(value=1.0, unit=nanometer), 'constraints': HBonds, 'rigidWater': True, 'constraintTolerance': 1e-05}, 'integrator_params': {'integrator_type': 'LangevinMiddle', 'Temperature': Quantity(value=300.0, unit=kelvin), 'Friction': Quantity(value=1, unit=/picosecond), 'Timestep': Quantity(value=0.004, unit=picosecond)}, 'simmulation_params': {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['Step', 'PotentialEnergy', 'Temperature']}}\n", + "Building system...\n", + "forcefield_files {'nonbondedMethod': CutoffPeriodic, 'nonbondedCutoff': Quantity(value=1.0, unit=nanometer), 'constraints': HBonds, 'rigidWater': True, 'constraintTolerance': 1e-05}\n", + "nonbondingCutoff 1.0 nm\n", + "nonbondedMethod CutoffPeriodic\n", + "constraints HBonds\n", + "rigidWater True\n", + "The simulation setup failed because the PDB file 1PGB.pdb is missing hydrogen atoms for residue 1 (MET). To resolve this, I will use the CleaningToolFunction to add the missing hydrogen atoms to the PDB file.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"CleaningToolFunction\",\n", + " \"action_input\": {\n", + " \"input_args\": {\n", + " \"input_path\": \"1PGB.pdb\",\n", + " \"operations\": {\n", + " \"add_hydrogens\": true\n", + " }\n", + " }\n", + " }\n", + "}\n", + "```The previous action failed because the input to the CleaningToolFunction was not correctly formatted. I need to provide the input as a dictionary with the key 'pdb_path'. I will correct the input and try again.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"CleaningToolFunction\",\n", + " \"action_input\": {\n", + " \"input_args\": {\n", + " \"pdb_path\": \"1PGB.pdb\",\n", + " \"operations\": {\n", + " \"add_hydrogens\": true\n", + " }\n", + " }\n", + " }\n", + "}\n", + "```The file 1PGB.pdb was not found by the CleaningToolFunction, which is unexpected because the file was previously listed in the directory. I will check the list of paths saved in memory to ensure the correct path is used for the PDB file.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"ListRegistryPaths\",\n", + " \"action_input\": {\n", + " \"paths\": \"paths\"\n", + " }\n", + "}\n", + "```The file 1PGB.pdb is indeed registered in the system. I will now attempt to add hydrogens to the PDB file again using the correct registered name.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"CleaningToolFunction\",\n", + " \"action_input\": {\n", + " \"input_args\": {\n", + " \"pdb_path\": \"1PGB.pdb\",\n", + " \"operations\": {\n", + " \"add_hydrogens\": true\n", + " }\n", + " }\n", + " }\n", + "}\n", + "```The file 1PGB.pdb is still not being found by the CleaningToolFunction despite being listed in the registry. It's possible that the file path might be incorrect or there is an issue with the tool accessing the file. To resolve this, I will map the file name to its path again to ensure that the correct path is being used.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"MapPath2Name\",\n", + " \"action_input\": {\n", + " \"file_and_path\": \"1PGB.pdb, 1PGB.pdb\"\n", + " }\n", + "}\n", + "```Now that the path has been successfully mapped to the name 1PGB.pdb, I will attempt to add hydrogens to the PDB file again using the CleaningToolFunction.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"CleaningToolFunction\",\n", + " \"action_input\": {\n", + " \"input_args\": {\n", + " \"pdb_path\": \"1PGB.pdb\",\n", + " \"operations\": {\n", + " \"add_hydrogens\": true\n", + " }\n", + " }\n", + " }\n", + "}\n", + "```The file 1PGB.pdb is still not being found by the CleaningToolFunction despite the path being successfully mapped. There may be an issue with the tool or the file system. To troubleshoot this, I will list the files in the current directory again to confirm the presence of the file.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"ListDirectoryFiles\",\n", + " \"action_input\": {\n", + " \"query\": \"1PGB.pdb\"\n", + " }\n", + "}\n", + "```The file 1PGB.pdb is confirmed to be present in the directory. Since the CleaningToolFunction is not recognizing the file, I will try using the registered name \"1PGB_clean.pdb\" which might be the cleaned version of the PDB file and should be suitable for simulation.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"SetUpandRunFunction\",\n", + " \"action_input\": {\n", + " \"pdb_path\": \"1PGB_clean.pdb\",\n", + " \"forcefield_files\": [\"amber14-all.xml\", \"amber14/tip3pfb.xml\"],\n", + " \"system_params\": {\n", + " \"nonbondedMethod\": \"CutoffPeriodic\",\n", + " \"nonbondedCutoff\": \"1.0 * nanometers\",\n", + " \"constraints\": \"HBonds\",\n", + " \"rigidWater\": true,\n", + " \"constraintTolerance\": 0.00001\n", + " },\n", + " \"integrator_params\": {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": \"300 * kelvin\",\n", + " \"Friction\": \"1.0 / picoseconds\",\n", + " \"Timestep\": \"0.004 * picoseconds\"\n", + " },\n", + " \"simmulation_params\": {\n", + " \"Ensemble\": \"NVT\",\n", + " \"Number of Steps\": 10000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"Step\", \"PotentialEnergy\", \"Temperature\"]\n", + " }\n", + " }\n", + "}\n", + "```Using SetUpandRunFunction\n", + "{'pdb_path': '1PGB_clean.pdb', 'forcefield_files': ['amber14-all.xml', 'amber14/tip3pfb.xml'], 'system_params': {'nonbondedMethod': 'CutoffPeriodic', 'nonbondedCutoff': '1.0 * nanometers', 'constraints': 'HBonds', 'rigidWater': True, 'constraintTolerance': 1e-05}, 'integrator_params': {'integrator_type': 'LangevinMiddle', 'Temperature': '300 * kelvin', 'Friction': '1.0 / picoseconds', 'Timestep': '0.004 * picoseconds'}, 'simmulation_params': {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['Step', 'PotentialEnergy', 'Temperature']}}\n", + "system_params {'nonbondedMethod': 'CutoffPeriodic', 'nonbondedCutoff': '1.0 * nanometers', 'constraints': 'HBonds', 'rigidWater': True, 'constraintTolerance': 1e-05}\n", + "integrator_params {'integrator_type': 'LangevinMiddle', 'Temperature': '300 * kelvin', 'Friction': '1.0 / picoseconds', 'Timestep': '0.004 * picoseconds'}\n", + "simmulation_params {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['Step', 'PotentialEnergy', 'Temperature']}\n", + "integrator_type LangevinMiddle\n", + "this is the validator ['amber14-all.xml', 'amber14/tip3pfb.xml']\n", + "this is the validator {'pdb_path': '1PGB_clean.pdb', 'forcefield_files': ['amber14-all.xml', 'amber14/tip3pfb.xml'], 'system_params': {'nonbondedMethod': CutoffPeriodic, 'nonbondedCutoff': Quantity(value=1.0, unit=nanometer), 'constraints': HBonds, 'rigidWater': True, 'constraintTolerance': 1e-05}, 'integrator_params': {'integrator_type': 'LangevinMiddle', 'Temperature': Quantity(value=300.0, unit=kelvin), 'Friction': Quantity(value=1, unit=/picosecond), 'Timestep': Quantity(value=0.004, unit=picosecond)}, 'simmulation_params': {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['Step', 'PotentialEnergy', 'Temperature']}}\n", + "Building system...\n", + "forcefield_files {'nonbondedMethod': CutoffPeriodic, 'nonbondedCutoff': Quantity(value=1.0, unit=nanometer), 'constraints': HBonds, 'rigidWater': True, 'constraintTolerance': 1e-05}\n", + "nonbondingCutoff 1.0 nm\n", + "nonbondedMethod CutoffPeriodic\n", + "constraints HBonds\n", + "rigidWater True\n", + "Setting up integrator...\n", + "integrator_type LangevinMiddle\n", + "Creating simulation...\n", + "simulation set!\n", + "running simulation in the tool\n", + "Performing energy minimization...\n", + "Minimization complete!\n", + "Equilibrating...\n", + "Simulating...\n", + "Done!\n", + "Standalone simulation script written to reproduce_simulation.py\n", + "The NVT simulation for the protein with PDB ID 1PGB has been successfully completed." + ] + }, + { + "data": { + "text/plain": [ + "'Thought: Thought: Before running an NVT simulation, I need to check if the file 1PGB.pdb is available or needs to be downloaded. If it\\'s not available, I will download it using the PDBFileDownloader tool.\\n\\nAction:\\n```\\n{\\n \"action\": \"ListDirectoryFiles\",\\n \"action_input\": {\\n \"query\": \"1PGB.pdb\"\\n }\\n}\\n```\\n\\n\\nObservation: .DS_Store, trajectory.dcd, reproduce_simulation.py, 1PGB_clean.pdb, 1PGB.pdb, log.txt, 1PGB-processed.pdb, paths_registry.json, checkpoint.chk, testing_changes.ipynb, cleaned_1X5Y.pdb, 1X5Y.pdb\\nThought: Could not parse LLM output: The file 1PGB.pdb is available in the directory. I can proceed to set up and run the NVT simulation using the SetUpandRunFunction tool.\\n\\nAction:\\n```\\n{\\n \"action\": \"SetUpandRunFunction\",\\n \"action_input\": {\\n \"pdb_path\": \"1PGB.pdb\",\\n \"forcefield_files\": [\"amber14-all.xml\", \"amber14/tip3p.xml\"],\\n \"system_params\": {\\n \"nonbondedMethod\": \"CutoffPeriodic\",\\n \"nonbondedCutoff\": 1.0,\\n \"constraints\": \"HBonds\",\\n \"rigidWater\": True,\\n \"constraintTolerance\": 0.00001\\n },\\n \"integrator_params\": {\\n \"integrator_type\": \"LangevinMiddle\",\\n \"Temperature\": \"300 * kelvin\",\\n \"Friction\": \"1.0 / picoseconds\",\\n \"Timestep\": \"0.004 * picoseconds\"\\n },\\n \"simmulation_params\": {\\n \"Ensemble\": \"NVT\",\\n \"Number of Steps\": 10000,\\n \"record_interval_steps\": 100,\\n \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\\n }\\n }\\n}\\n```\\nObservation: Invalid or incomplete response\\nThought: The previous action to set up and run the NVT simulation was not successful due to an invalid or incomplete response. I need to correct the parameters and try running the simulation again. I will adjust the parameters and ensure they are formatted correctly before resubmitting the action.\\n\\nAction:\\n```json\\n{\\n \"action\": \"SetUpandRunFunction\",\\n \"action_input\": {\\n \"pdb_path\": \"1PGB.pdb\",\\n \"forcefield_files\": [\"amber14-all.xml\", \"amber14/tip3pfb.xml\"],\\n \"system_params\": {\\n \"nonbondedMethod\": \"CutoffPeriodic\",\\n \"nonbondedCutoff\": \"1.0 * nanometers\",\\n \"constraints\": \"HBonds\",\\n \"rigidWater\": true,\\n \"constraintTolerance\": 0.00001\\n },\\n \"integrator_params\": {\\n \"integrator_type\": \"LangevinMiddle\",\\n \"Temperature\": \"300 * kelvin\",\\n \"Friction\": \"1.0 / picoseconds\",\\n \"Timestep\": \"0.004 * picoseconds\"\\n },\\n \"simmulation_params\": {\\n \"Ensemble\": \"NVT\",\\n \"Number of Steps\": 10000,\\n \"record_interval_steps\": 100,\\n \"record_params\": [\"Step\", \"PotentialEnergy\", \"Temperature\"]\\n }\\n }\\n}\\n```\\nObservation: No template found for residue 1 (MET). The set of atoms is similar to MET, but it is missing 9 hydrogen atoms. For more information, see https://github.com/openmm/openmm/wiki/Frequently-Asked-Questions#templateThis were the inputs {\\'pdb_path\\': \\'1PGB.pdb\\', \\'forcefield_files\\': [\\'amber14-all.xml\\', \\'amber14/tip3pfb.xml\\'], \\'system_params\\': {\\'nonbondedMethod\\': \\'CutoffPeriodic\\', \\'nonbondedCutoff\\': \\'1.0 * nanometers\\', \\'constraints\\': \\'HBonds\\', \\'rigidWater\\': True, \\'constraintTolerance\\': 1e-05}, \\'integrator_params\\': {\\'integrator_type\\': \\'LangevinMiddle\\', \\'Temperature\\': \\'300 * kelvin\\', \\'Friction\\': \\'1.0 / picoseconds\\', \\'Timestep\\': \\'0.004 * picoseconds\\'}, \\'simmulation_params\\': {\\'Ensemble\\': \\'NVT\\', \\'Number of Steps\\': 10000, \\'record_interval_steps\\': 100, \\'record_params\\': [\\'Step\\', \\'PotentialEnergy\\', \\'Temperature\\']}}\\nThought: The simulation setup failed because the PDB file 1PGB.pdb is missing hydrogen atoms for residue 1 (MET). To resolve this, I will use the CleaningToolFunction to add the missing hydrogen atoms to the PDB file.\\n\\nAction:\\n```json\\n{\\n \"action\": \"CleaningToolFunction\",\\n \"action_input\": {\\n \"input_args\": {\\n \"input_path\": \"1PGB.pdb\",\\n \"operations\": {\\n \"add_hydrogens\": true\\n }\\n }\\n }\\n}\\n```\\nObservation: No file path provided.\\n The input has to be a dictionary with the key \\'pdb_path\\'\\nThought: The previous action failed because the input to the CleaningToolFunction was not correctly formatted. I need to provide the input as a dictionary with the key \\'pdb_path\\'. I will correct the input and try again.\\n\\nAction:\\n```json\\n{\\n \"action\": \"CleaningToolFunction\",\\n \"action_input\": {\\n \"input_args\": {\\n \"pdb_path\": \"1PGB.pdb\",\\n \"operations\": {\\n \"add_hydrogens\": true\\n }\\n }\\n }\\n}\\n```\\nObservation: Check your file path. File not found.\\nThought: The file 1PGB.pdb was not found by the CleaningToolFunction, which is unexpected because the file was previously listed in the directory. I will check the list of paths saved in memory to ensure the correct path is used for the PDB file.\\n\\nAction:\\n```json\\n{\\n \"action\": \"ListRegistryPaths\",\\n \"action_input\": {\\n \"paths\": \"paths\"\\n }\\n}\\n```\\nObservation: Names found in registry: 1PGB.pdb, 1PGB_clean.pdb, modified_simul.py, 1X5Y.pdb, cleaned_1X5Y.pdb\\nThought: The file 1PGB.pdb is indeed registered in the system. I will now attempt to add hydrogens to the PDB file again using the correct registered name.\\n\\nAction:\\n```json\\n{\\n \"action\": \"CleaningToolFunction\",\\n \"action_input\": {\\n \"input_args\": {\\n \"pdb_path\": \"1PGB.pdb\",\\n \"operations\": {\\n \"add_hydrogens\": true\\n }\\n }\\n }\\n}\\n```\\nObservation: Check your file path. File not found.\\nThought: The file 1PGB.pdb is still not being found by the CleaningToolFunction despite being listed in the registry. It\\'s possible that the file path might be incorrect or there is an issue with the tool accessing the file. To resolve this, I will map the file name to its path again to ensure that the correct path is being used.\\n\\nAction:\\n```json\\n{\\n \"action\": \"MapPath2Name\",\\n \"action_input\": {\\n \"file_and_path\": \"1PGB.pdb, 1PGB.pdb\"\\n }\\n}\\n```\\nObservation: Path successfully mapped to name: 1PGB.pdb\\nThought: Now that the path has been successfully mapped to the name 1PGB.pdb, I will attempt to add hydrogens to the PDB file again using the CleaningToolFunction.\\n\\nAction:\\n```json\\n{\\n \"action\": \"CleaningToolFunction\",\\n \"action_input\": {\\n \"input_args\": {\\n \"pdb_path\": \"1PGB.pdb\",\\n \"operations\": {\\n \"add_hydrogens\": true\\n }\\n }\\n }\\n}\\n```\\nObservation: Check your file path. File not found.\\nThought: The file 1PGB.pdb is still not being found by the CleaningToolFunction despite the path being successfully mapped. There may be an issue with the tool or the file system. To troubleshoot this, I will list the files in the current directory again to confirm the presence of the file.\\n\\nAction:\\n```json\\n{\\n \"action\": \"ListDirectoryFiles\",\\n \"action_input\": {\\n \"query\": \"1PGB.pdb\"\\n }\\n}\\n```\\nObservation: .DS_Store, trajectory.dcd, reproduce_simulation.py, 1PGB_clean.pdb, 1PGB.pdb, log.txt, 1PGB-processed.pdb, paths_registry.json, checkpoint.chk, testing_changes.ipynb, cleaned_1X5Y.pdb, 1X5Y.pdb\\nThought: The file 1PGB.pdb is confirmed to be present in the directory. Since the CleaningToolFunction is not recognizing the file, I will try using the registered name \"1PGB_clean.pdb\" which might be the cleaned version of the PDB file and should be suitable for simulation.\\n\\nAction:\\n```json\\n{\\n \"action\": \"SetUpandRunFunction\",\\n \"action_input\": {\\n \"pdb_path\": \"1PGB_clean.pdb\",\\n \"forcefield_files\": [\"amber14-all.xml\", \"amber14/tip3pfb.xml\"],\\n \"system_params\": {\\n \"nonbondedMethod\": \"CutoffPeriodic\",\\n \"nonbondedCutoff\": \"1.0 * nanometers\",\\n \"constraints\": \"HBonds\",\\n \"rigidWater\": true,\\n \"constraintTolerance\": 0.00001\\n },\\n \"integrator_params\": {\\n \"integrator_type\": \"LangevinMiddle\",\\n \"Temperature\": \"300 * kelvin\",\\n \"Friction\": \"1.0 / picoseconds\",\\n \"Timestep\": \"0.004 * picoseconds\"\\n },\\n \"simmulation_params\": {\\n \"Ensemble\": \"NVT\",\\n \"Number of Steps\": 10000,\\n \"record_interval_steps\": 100,\\n \"record_params\": [\"Step\", \"PotentialEnergy\", \"Temperature\"]\\n }\\n }\\n}\\n```\\nObservation: Simulation done!\\nFinal Answer: The NVT simulation for the protein with PDB ID 1PGB has been successfully completed.'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mrkl = MDAgent(type=\"mrkl\")\n", + "mrkl.run(testing_simul)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "func = MDAgent(type=\"functions\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m\n", + "Invoking: `ListDirectoryFiles` with `.`\n", + "\n", + "\n", + "\u001b[0m\u001b[36;1m\u001b[1;3m.DS_Store, trajectory.dcd, reproduce_simulation.py, 1PGB_clean.pdb, 1PGB.pdb, log.txt, 1PGB-processed.pdb, paths_registry.json, checkpoint.chk, testing_changes.ipynb, cleaned_1X5Y.pdb, 1X5Y.pdb\u001b[0m\u001b[32;1m\u001b[1;3m\n", + "Invoking: `MapPath2Name` with `1PGB,1PGB.pdb`\n", + "\n", + "\n", + "\u001b[0m\u001b[38;5;200m\u001b[1;3mPath successfully mapped to name: 1PGB\u001b[0m\u001b[32;1m\u001b[1;3m\n", + "Invoking: `CleaningToolFunction` with `{'pdb_path': '1PGB.pdb', 'output_path': '1PGB_clean.pdb', 'replace_nonstandard_residues': True, 'add_missing_atoms': True, 'remove_heterogens': True, 'remove_water': True, 'add_hydrogens': True, 'add_hydrogens_ph': 7}`\n", + "\n", + "\n", + "\u001b[0m{'pdb_path': '1PGB.pdb', 'output_path': '1PGB_clean.pdb', 'replace_nonstandard_residues': True, 'add_missing_atoms': True, 'remove_heterogens': True, 'remove_water': True, 'add_hydrogens': True, 'add_hydrogens_ph': 7}\n", + "\u001b[38;5;200m\u001b[1;3mCleaned File: Removed Heterogens, and Water Removed. Added Hydrogens at pH 7. Missing Atoms Added and replaces nonstandard residues. written to 1PGB_clean.pdb\u001b[0m\u001b[32;1m\u001b[1;3m\n", + "Invoking: `MapPath2Name` with `1PGB_clean,1PGB_clean.pdb`\n", + "\n", + "\n", + "\u001b[0m\u001b[38;5;200m\u001b[1;3mPath successfully mapped to name: 1PGB_clean\u001b[0mFinal Answer: The PDB file 1PGB.pdb has been cleaned, resulting in the creation of a new file named 1PGB_clean.pdb. The cleaning process involved removing heterogens and water, adding hydrogens at pH 7, adding any missing atoms, and replacing nonstandard residues. The cleaned file is now mapped with the short name \"1PGB_clean\" for future reference.\u001b[32;1m\u001b[1;3mFinal Answer: The PDB file 1PGB.pdb has been cleaned, resulting in the creation of a new file named 1PGB_clean.pdb. The cleaning process involved removing heterogens and water, adding hydrogens at pH 7, adding any missing atoms, and replacing nonstandard residues. The cleaned file is now mapped with the short name \"1PGB_clean\" for future reference.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\nInvoking: `ListDirectoryFiles` with `.`\\n\\n\\n\\nObservation: .DS_Store, trajectory.dcd, reproduce_simulation.py, 1PGB_clean.pdb, 1PGB.pdb, log.txt, 1PGB-processed.pdb, paths_registry.json, checkpoint.chk, testing_changes.ipynb, cleaned_1X5Y.pdb, 1X5Y.pdb\\n\\nInvoking: `MapPath2Name` with `1PGB,1PGB.pdb`\\n\\n\\n\\nObservation: Path successfully mapped to name: 1PGB\\n\\nInvoking: `CleaningToolFunction` with `{\\'pdb_path\\': \\'1PGB.pdb\\', \\'output_path\\': \\'1PGB_clean.pdb\\', \\'replace_nonstandard_residues\\': True, \\'add_missing_atoms\\': True, \\'remove_heterogens\\': True, \\'remove_water\\': True, \\'add_hydrogens\\': True, \\'add_hydrogens_ph\\': 7}`\\n\\n\\n\\nObservation: Cleaned File: Removed Heterogens, and Water Removed. Added Hydrogens at pH 7. Missing Atoms Added and replaces nonstandard residues. written to 1PGB_clean.pdb\\n\\nInvoking: `MapPath2Name` with `1PGB_clean,1PGB_clean.pdb`\\n\\n\\n\\nObservation: Path successfully mapped to name: 1PGB_clean\\nFinal Answer: Final Answer: The PDB file 1PGB.pdb has been cleaned, resulting in the creation of a new file named 1PGB_clean.pdb. The cleaning process involved removing heterogens and water, adding hydrogens at pH 7, adding any missing atoms, and replacing nonstandard residues. The cleaned file is now mapped with the short name \"1PGB_clean\" for future reference.'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "testing_simul = \"Please do a basic cleaning of the PDB file 1PGB.pdb\"\n", + "func.run(testing_simul)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "testing_simul = \"I made a script called reproduce_simulation.py. Please increase the temperature of the simulation by 10 degrees.\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Thought: The assistant needs to modify the script to increase the temperature of the simulation by 10 degrees. The ModifyScriptTool can be used for this purpose. The assistant needs to know the current temperature in the script to increase it by 10 degrees. The assistant should first use the ListDirectoryFiles tool to confirm the existence of the script in the current directory. \n", + "\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"ListDirectoryFiles\",\n", + " \"action_input\": {\n", + " \"query\": \"\"\n", + " }\n", + "}\n", + "```The file \"reproduce_simulation.py\" exists in the current directory. Now, I need to read the script and find the current temperature setting. I can use the Python_REPL tool to open the file and read its contents. \n", + "\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"Python_REPL\",\n", + " \"action_input\": {\n", + " \"query\": \"with open('reproduce_simulation.py', 'r') as file: print(file.read())\"\n", + " }\n", + "}\n", + "```\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Python REPL can execute arbitrary code. Use with caution.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The current temperature in the script is set to 300.0 Kelvin. The assistant needs to increase this temperature by 10 degrees. The ModifyScriptTool can be used to make this change. \n", + "\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"ModifyScriptTool\",\n", + " \"action_input\": {\n", + " \"base_script\": \"reproduce_simulation.py\",\n", + " \"user_requirement\": \"increase the temperature of the simulation by 10 degrees\"\n", + " }\n", + "}\n", + "```It seems I made a mistake in the input for the ModifyScriptTool. The correct keys for the input are 'query' and 'script'. I will correct this and try again.\n", + "\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"ModifyScriptTool\",\n", + " \"action_input\": {\n", + " \"query\": \"increase the temperature of the simulation by 10 degrees\",\n", + " \"script\": \"reproduce_simulation.py\"\n", + " }\n", + "}\n", + "```The script has been successfully modified to increase the temperature of the simulation by 10 degrees. I can now provide the final answer to the user.\n", + "\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"Final Answer\",\n", + " \"action_input\": \"The script 'reproduce_simulation.py' has been successfully modified to increase the temperature of the simulation by 10 degrees.\"\n", + "}\n", + "```" + ] + }, + { + "data": { + "text/plain": [ + "'Thought: Thought: The assistant needs to modify the script to increase the temperature of the simulation by 10 degrees. The ModifyScriptTool can be used for this purpose. The assistant needs to know the current temperature in the script to increase it by 10 degrees. The assistant should first use the ListDirectoryFiles tool to confirm the existence of the script in the current directory. \\n\\nAction:\\n```\\n{\\n \"action\": \"ListDirectoryFiles\",\\n \"action_input\": {\\n \"query\": \"\"\\n }\\n}\\n```\\nObservation: getpdb_example.ipynb, gitissues.ipynb, .DS_Store, math_example.ipynb, trajectory.dcd, clean_example.ipynb, reproduce_simulation.py, 1PGB_clean.pdb, 1PGB.pdb, log.txt, 1PGB-processed.pdb, modified_simul2.py, paths_registry.json, checkpoint.chk, testing_changes.ipynb, pqa_example.ipynb, set_and_run_example.ipynb, Name2Simulation.ipynb, packmol_example.ipynb, 1X5Y.pdb\\nThought: The file \"reproduce_simulation.py\" exists in the current directory. Now, I need to read the script and find the current temperature setting. I can use the Python_REPL tool to open the file and read its contents. \\n\\nAction:\\n```\\n{\\n \"action\": \"Python_REPL\",\\n \"action_input\": {\\n \"query\": \"with open(\\'reproduce_simulation.py\\', \\'r\\') as file: print(file.read())\"\\n }\\n}\\n```\\n\\nObservation: # This script was generated by MDagent-Setup.\\n\\nfrom openmm import *\\nfrom openmm.app import *\\nfrom openmm.unit import *\\n\\n# Input Files\\npdb = PDBFile(\\'1PGB_clean.pdb\\')\\nforcefield = ForceField(\\'amber14-all.xml\\', \\'amber14/tip3pfb.xml\\')\\n\\n# System Configuration\\nnonbondedMethod = CutoffNonPeriodic\\nconstraints = HBonds\\nrigidWater = True\\nconstraintTolerance = 1e-05\\nnonbondedCutoff = 1.0*nanometer\\n\\n# Integration Options\\ndt = 0.004*picosecond\\ntemperature = 300.0*kelvin\\nfriction = 1/picosecond\\n\\n# Simulation Options\\nsteps = 10000\\nequilibrationSteps = 1000\\nplatform = Platform.getPlatformByName(\\'CPU\\')\\ndcdReporter = DCDReporter(\\'trajectory.dcd\\', 10000)\\ndataReporter = StateDataReporter(\\'log.txt\\', 100,\\ntotalSteps=steps,\\nstep=True, speed=True, progress=True, elapsedTime=True, remainingTime=True,\\npotentialEnergy=True, temperature=True, volume=True, density=True,\\nseparator=\\'\\t\\')\\ncheckpointReporter = CheckpointReporter(\\'checkpoint.chk\\', 10000)\\n\\n# Minimize and Equilibrate\\n# ... code for minimization and equilibration ...\\n\\n# Simulate\\n\\nprint(\\'Building system...\\')\\ntopology = pdb.topology\\npositions = pdb.positions\\n\\nsystem = forcefield.createSystem(topology,\\nnonbondedMethod=nonbondedMethod,\\nnonbondedCutoff=nonbondedCutoff, constraints=constraints,\\nrigidWater=rigidWater)\\n\\nintegrator = LangevinMiddleIntegrator(temperature, friction, dt)\\nintegrator.setConstraintTolerance(constraintTolerance)\\nsimulation = Simulation(topology, system, integrator, platform)\\nsimulation.context.setPositions(positions)\\n\\n# Minimize and Equilibrate\\n\\nprint(\\'Performing energy minimization...\\')\\nsimulation.minimizeEnergy()\\nprint(\\'Equilibrating...\\')\\nsimulation.context.setVelocitiesToTemperature(temperature)\\nsimulation.step(equilibrationSteps)\\n\\n# Simulate\\n\\nprint(\\'Simulating...\\')\\nsimulation.reporters.append(dcdReporter)\\nsimulation.reporters.append(dataReporter)\\nsimulation.reporters.append(checkpointReporter)\\nsimulation.currentStep = 0\\nsimulation.step(steps)\\n\\nThought: The current temperature in the script is set to 300.0 Kelvin. The assistant needs to increase this temperature by 10 degrees. The ModifyScriptTool can be used to make this change. \\n\\nAction:\\n```\\n{\\n \"action\": \"ModifyScriptTool\",\\n \"action_input\": {\\n \"base_script\": \"reproduce_simulation.py\",\\n \"user_requirement\": \"increase the temperature of the simulation by 10 degrees\"\\n }\\n}\\n```\\nObservation: No script provided. The keys for the input are:\\n \\'query\\' and \\'script\\'\\nThought: It seems I made a mistake in the input for the ModifyScriptTool. The correct keys for the input are \\'query\\' and \\'script\\'. I will correct this and try again.\\n\\nAction:\\n```\\n{\\n \"action\": \"ModifyScriptTool\",\\n \"action_input\": {\\n \"query\": \"increase the temperature of the simulation by 10 degrees\",\\n \"script\": \"reproduce_simulation.py\"\\n }\\n}\\n```\\nObservation: Script modified successfully\\nFinal Answer: The script \\'reproduce_simulation.py\\' has been successfully modified to increase the temperature of the simulation by 10 degrees.'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mrkl = MDAgent(type=\"mrkl\")\n", + "mrkl.run(testing_simul)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Thought: To run a simulation of fibronectin, we first need to download the PDB file for fibronectin. After that, we can use the SetUpandRunFunction tool to run the simulation. Finally, we can calculate the average surface area of the protein using Python_REPL tool. Let's start by downloading the PDB file for fibronectin.\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"PDBFileDownloader\",\n", + " \"action_input\": {\n", + " \"query\": \"fibronectin\"\n", + " }\n", + "}\n", + "```PDB file found with this ID: 1X5Y\n", + "1X5Y.pdb is created.\n", + "Now that we have the PDB file for fibronectin, we can proceed to run a simulation. For this, we will use the SetUpandRunFunction tool. We will use the default parameters for the simulation. The PDB file path is required as an input for this tool. Let's run the simulation.\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"SetUpandRunFunction\",\n", + " \"action_input\": {\n", + " \"pdb_path\": \"1X5Y.pdb\",\n", + " \"forcefield_files\": [],\n", + " \"system_params\": {\n", + " \"nonbondedMethod\": \"NoCutoff\",\n", + " \"nonbondedCutoff\": \"1 * nanometers\",\n", + " \"ewaldErrorTolerance\": null,\n", + " \"constraints\": \"None\",\n", + " \"rigidWater\": false,\n", + " \"constraintTolerance\": null\n", + " },\n", + " \"integrator_params\": {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": \"300 * kelvin\",\n", + " \"Friction\": \"1.0 / picoseconds\",\n", + " \"Timestep\": \"0.004 * picoseconds\",\n", + " \"Pressure\": \"1.0 * bar\"\n", + " },\n", + " \"simmulation_params\": {\n", + " \"Ensemble\": \"NVT\",\n", + " \"Number of Steps\": 10000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", + " }\n", + " }\n", + "}\n", + "```\n", + "Using SetUpandRunFunction\n", + "{'pdb_path': '1X5Y.pdb', 'forcefield_files': [], 'system_params': {'nonbondedMethod': 'NoCutoff', 'nonbondedCutoff': '1 * nanometers', 'ewaldErrorTolerance': None, 'constraints': 'None', 'rigidWater': False, 'constraintTolerance': None}, 'integrator_params': {'integrator_type': 'LangevinMiddle', 'Temperature': '300 * kelvin', 'Friction': '1.0 / picoseconds', 'Timestep': '0.004 * picoseconds', 'Pressure': '1.0 * bar'}, 'simmulation_params': {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['step', 'potentialEnergy', 'temperature']}}\n", + "system_params {'nonbondedMethod': 'NoCutoff', 'nonbondedCutoff': '1 * nanometers', 'ewaldErrorTolerance': None, 'constraints': 'None', 'rigidWater': False, 'constraintTolerance': None}\n", + "integrator_params {'integrator_type': 'LangevinMiddle', 'Temperature': '300 * kelvin', 'Friction': '1.0 / picoseconds', 'Timestep': '0.004 * picoseconds', 'Pressure': '1.0 * bar'}\n", + "simmulation_params {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['step', 'potentialEnergy', 'temperature']}\n", + "integrator_type LangevinMiddle\n", + "Setting default forcefields v2\n", + "this is the validator error message Invalid ewaldErrorTolerance: float() argument must be a string or a real number, not 'NoneType'.\n", + " If you are using null or None, just dont include\n", + " as part of the parameters.\n", + "Invalid constraintTolerance. float() argument must be a string or a real number, not 'NoneType'. If\n", + " constraintTolerance is null or None,\n", + " just dont include as part of the parameters.\n", + "\n", + "It seems that the simulation failed because of invalid parameters. The error messages suggest that the parameters \"ewaldErrorTolerance\" and \"constraintTolerance\" should not be included if they are null or None. Let's correct this and try again.\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"SetUpandRunFunction\",\n", + " \"action_input\": {\n", + " \"pdb_path\": \"1X5Y.pdb\",\n", + " \"forcefield_files\": [],\n", + " \"system_params\": {\n", + " \"nonbondedMethod\": \"NoCutoff\",\n", + " \"nonbondedCutoff\": \"1 * nanometers\",\n", + " \"constraints\": \"None\",\n", + " \"rigidWater\": false\n", + " },\n", + " \"integrator_params\": {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": \"300 * kelvin\",\n", + " \"Friction\": \"1.0 / picoseconds\",\n", + " \"Timestep\": \"0.004 * picoseconds\",\n", + " \"Pressure\": \"1.0 * bar\"\n", + " },\n", + " \"simmulation_params\": {\n", + " \"Ensemble\": \"NVT\",\n", + " \"Number of Steps\": 10000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", + " }\n", + " }\n", + "}\n", + "```Using SetUpandRunFunction\n", + "{'pdb_path': '1X5Y.pdb', 'forcefield_files': [], 'system_params': {'nonbondedMethod': 'NoCutoff', 'nonbondedCutoff': '1 * nanometers', 'constraints': 'None', 'rigidWater': False}, 'integrator_params': {'integrator_type': 'LangevinMiddle', 'Temperature': '300 * kelvin', 'Friction': '1.0 / picoseconds', 'Timestep': '0.004 * picoseconds', 'Pressure': '1.0 * bar'}, 'simmulation_params': {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['step', 'potentialEnergy', 'temperature']}}\n", + "system_params {'nonbondedMethod': 'NoCutoff', 'nonbondedCutoff': '1 * nanometers', 'constraints': 'None', 'rigidWater': False}\n", + "integrator_params {'integrator_type': 'LangevinMiddle', 'Temperature': '300 * kelvin', 'Friction': '1.0 / picoseconds', 'Timestep': '0.004 * picoseconds', 'Pressure': '1.0 * bar'}\n", + "simmulation_params {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['step', 'potentialEnergy', 'temperature']}\n", + "integrator_type LangevinMiddle\n", + "Setting default forcefields v2\n", + "this is the validator {'pdb_path': '1X5Y.pdb', 'forcefield_files': ['amber14-all.xml', 'amber14/tip3pfb.xml'], 'system_params': {'nonbondedMethod': NoCutoff, 'nonbondedCutoff': Quantity(value=1.0, unit=nanometer), 'constraints': None, 'rigidWater': False}, 'integrator_params': {'integrator_type': 'LangevinMiddle', 'Temperature': Quantity(value=300.0, unit=kelvin), 'Friction': Quantity(value=1, unit=/picosecond), 'Timestep': Quantity(value=0.004, unit=picosecond), 'Pressure': Quantity(value=1.0, unit=bar)}, 'simmulation_params': {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['step', 'potentialEnergy', 'temperature']}}\n", + "Building system...\n", + "forcefield_files {'nonbondedMethod': NoCutoff, 'nonbondedCutoff': Quantity(value=1.0, unit=nanometer), 'constraints': None, 'rigidWater': False}\n", + "nonbondingCutoff 1.0 nm\n", + "nonbondedMethod NoCutoff\n", + "constraints None\n", + "rigidWater False\n", + "The simulation failed because no template was found for residue 1 (GLY). This could be due to the chain missing a terminal group. To resolve this, we can use the CleaningToolFunction to perform various cleaning operations on the PDB file, such as adding missing atoms and hydrogens, replacing nonstandard residues, and/or removing water. Let's clean the PDB file and try running the simulation again.\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"CleaningToolFunction\",\n", + " \"action_input\": {\n", + " \"input_args\": {\n", + " \"pdb_path\": \"1X5Y.pdb\",\n", + " \"output_path\": \"cleaned_1X5Y.pdb\",\n", + " \"remove_heterogens\": false,\n", + " \"add_hydrogens\": true,\n", + " \"replace_nonstandard_residues\": true,\n", + " \"remove_water\": false\n", + " }\n", + " }\n", + "}\n", + "```Now that we have cleaned the PDB file, let's try running the simulation again using the cleaned PDB file.\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"SetUpandRunFunction\",\n", + " \"action_input\": {\n", + " \"pdb_path\": \"cleaned_1X5Y.pdb\",\n", + " \"forcefield_files\": [],\n", + " \"system_params\": {\n", + " \"nonbondedMethod\": \"NoCutoff\",\n", + " \"nonbondedCutoff\": \"1 * nanometers\",\n", + " \"constraints\": \"None\",\n", + " \"rigidWater\": false\n", + " },\n", + " \"integrator_params\": {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": \"300 * kelvin\",\n", + " \"Friction\": \"1.0 / picoseconds\",\n", + " \"Timestep\": \"0.004 * picoseconds\",\n", + " \"Pressure\": \"1.0 * bar\"\n", + " },\n", + " \"simmulation_params\": {\n", + " \"Ensemble\": \"NVT\",\n", + " \"Number of Steps\": 10000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", + " }\n", + " }\n", + "}\n", + "```Using SetUpandRunFunction\n", + "{'pdb_path': 'cleaned_1X5Y.pdb', 'forcefield_files': [], 'system_params': {'nonbondedMethod': 'NoCutoff', 'nonbondedCutoff': '1 * nanometers', 'constraints': 'None', 'rigidWater': False}, 'integrator_params': {'integrator_type': 'LangevinMiddle', 'Temperature': '300 * kelvin', 'Friction': '1.0 / picoseconds', 'Timestep': '0.004 * picoseconds', 'Pressure': '1.0 * bar'}, 'simmulation_params': {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['step', 'potentialEnergy', 'temperature']}}\n", + "system_params {'nonbondedMethod': 'NoCutoff', 'nonbondedCutoff': '1 * nanometers', 'constraints': 'None', 'rigidWater': False}\n", + "integrator_params {'integrator_type': 'LangevinMiddle', 'Temperature': '300 * kelvin', 'Friction': '1.0 / picoseconds', 'Timestep': '0.004 * picoseconds', 'Pressure': '1.0 * bar'}\n", + "simmulation_params {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['step', 'potentialEnergy', 'temperature']}\n", + "integrator_type LangevinMiddle\n", + "Setting default forcefields v2\n", + "this is the validator {'pdb_path': 'cleaned_1X5Y.pdb', 'forcefield_files': ['amber14-all.xml', 'amber14/tip3pfb.xml'], 'system_params': {'nonbondedMethod': NoCutoff, 'nonbondedCutoff': Quantity(value=1.0, unit=nanometer), 'constraints': None, 'rigidWater': False}, 'integrator_params': {'integrator_type': 'LangevinMiddle', 'Temperature': Quantity(value=300.0, unit=kelvin), 'Friction': Quantity(value=1, unit=/picosecond), 'Timestep': Quantity(value=0.004, unit=picosecond), 'Pressure': Quantity(value=1.0, unit=bar)}, 'simmulation_params': {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['step', 'potentialEnergy', 'temperature']}}\n", + "Building system...\n", + "forcefield_files {'nonbondedMethod': NoCutoff, 'nonbondedCutoff': Quantity(value=1.0, unit=nanometer), 'constraints': None, 'rigidWater': False}\n", + "nonbondingCutoff 1.0 nm\n", + "nonbondedMethod NoCutoff\n", + "constraints None\n", + "rigidWater False\n", + "Setting up integrator...\n", + "integrator_type LangevinMiddle\n", + "Creating simulation...\n", + "simulation set!\n", + "running simulation in the tool\n", + "Performing energy minimization...\n", + "Minimization complete!\n", + "Equilibrating...\n", + "The simulation failed again due to a NaN error in the particle coordinates. This could be due to a problem with the initial configuration of the system. We can try to resolve this by changing the parameters of the simulation, such as the nonbondedMethod and constraints. Let's try running the simulation again with different parameters.\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"SetUpandRunFunction\",\n", + " \"action_input\": {\n", + " \"pdb_path\": \"cleaned_1X5Y.pdb\",\n", + " \"forcefield_files\": [],\n", + " \"system_params\": {\n", + " \"nonbondedMethod\": \"CutoffNonPeriodic\",\n", + " \"nonbondedCutoff\": \"1 * nanometers\",\n", + " \"constraints\": \"HBonds\",\n", + " \"rigidWater\": true\n", + " },\n", + " \"integrator_params\": {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": \"300 * kelvin\",\n", + " \"Friction\": \"1.0 / picoseconds\",\n", + " \"Timestep\": \"0.004 * picoseconds\",\n", + " \"Pressure\": \"1.0 * bar\"\n", + " },\n", + " \"simmulation_params\": {\n", + " \"Ensemble\": \"NVT\",\n", + " \"Number of Steps\": 10000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", + " }\n", + " }\n", + "}\n", + "```Using SetUpandRunFunction\n", + "{'pdb_path': 'cleaned_1X5Y.pdb', 'forcefield_files': [], 'system_params': {'nonbondedMethod': 'CutoffNonPeriodic', 'nonbondedCutoff': '1 * nanometers', 'constraints': 'HBonds', 'rigidWater': True}, 'integrator_params': {'integrator_type': 'LangevinMiddle', 'Temperature': '300 * kelvin', 'Friction': '1.0 / picoseconds', 'Timestep': '0.004 * picoseconds', 'Pressure': '1.0 * bar'}, 'simmulation_params': {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['step', 'potentialEnergy', 'temperature']}}\n", + "system_params {'nonbondedMethod': 'CutoffNonPeriodic', 'nonbondedCutoff': '1 * nanometers', 'constraints': 'HBonds', 'rigidWater': True}\n", + "integrator_params {'integrator_type': 'LangevinMiddle', 'Temperature': '300 * kelvin', 'Friction': '1.0 / picoseconds', 'Timestep': '0.004 * picoseconds', 'Pressure': '1.0 * bar'}\n", + "simmulation_params {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['step', 'potentialEnergy', 'temperature']}\n", + "integrator_type LangevinMiddle\n", + "Setting default forcefields v2\n", + "this is the validator error message constraintTolerance must be specified if rigidWater is True\n", + "The simulation failed because the \"constraintTolerance\" parameter must be specified if \"rigidWater\" is set to true. Let's correct this and try running the simulation again.\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"SetUpandRunFunction\",\n", + " \"action_input\": {\n", + " \"pdb_path\": \"cleaned_1X5Y.pdb\",\n", + " \"forcefield_files\": [],\n", + " \"system_params\": {\n", + " \"nonbondedMethod\": \"CutoffNonPeriodic\",\n", + " \"nonbondedCutoff\": \"1 * nanometers\",\n", + " \"constraints\": \"HBonds\",\n", + " \"rigidWater\": true,\n", + " \"constraintTolerance\": 0.00001\n", + " },\n", + " \"integrator_params\": {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": \"300 * kelvin\",\n", + " \"Friction\": \"1.0 / picoseconds\",\n", + " \"Timestep\": \"0.004 * picoseconds\",\n", + " \"Pressure\": \"1.0 * bar\"\n", + " },\n", + " \"simmulation_params\": {\n", + " \"Ensemble\": \"NVT\",\n", + " \"Number of Steps\": 10000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", + " }\n", + " }\n", + "}\n", + "```Using SetUpandRunFunction\n", + "{'pdb_path': 'cleaned_1X5Y.pdb', 'forcefield_files': [], 'system_params': {'nonbondedMethod': 'CutoffNonPeriodic', 'nonbondedCutoff': '1 * nanometers', 'constraints': 'HBonds', 'rigidWater': True, 'constraintTolerance': 1e-05}, 'integrator_params': {'integrator_type': 'LangevinMiddle', 'Temperature': '300 * kelvin', 'Friction': '1.0 / picoseconds', 'Timestep': '0.004 * picoseconds', 'Pressure': '1.0 * bar'}, 'simmulation_params': {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['step', 'potentialEnergy', 'temperature']}}\n", + "system_params {'nonbondedMethod': 'CutoffNonPeriodic', 'nonbondedCutoff': '1 * nanometers', 'constraints': 'HBonds', 'rigidWater': True, 'constraintTolerance': 1e-05}\n", + "integrator_params {'integrator_type': 'LangevinMiddle', 'Temperature': '300 * kelvin', 'Friction': '1.0 / picoseconds', 'Timestep': '0.004 * picoseconds', 'Pressure': '1.0 * bar'}\n", + "simmulation_params {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['step', 'potentialEnergy', 'temperature']}\n", + "integrator_type LangevinMiddle\n", + "Setting default forcefields v2\n", + "this is the validator {'pdb_path': 'cleaned_1X5Y.pdb', 'forcefield_files': ['amber14-all.xml', 'amber14/tip3pfb.xml'], 'system_params': {'nonbondedMethod': CutoffNonPeriodic, 'nonbondedCutoff': Quantity(value=1.0, unit=nanometer), 'constraints': HBonds, 'rigidWater': True, 'constraintTolerance': 1e-05}, 'integrator_params': {'integrator_type': 'LangevinMiddle', 'Temperature': Quantity(value=300.0, unit=kelvin), 'Friction': Quantity(value=1, unit=/picosecond), 'Timestep': Quantity(value=0.004, unit=picosecond), 'Pressure': Quantity(value=1.0, unit=bar)}, 'simmulation_params': {'Ensemble': 'NVT', 'Number of Steps': 10000, 'record_interval_steps': 100, 'record_params': ['step', 'potentialEnergy', 'temperature']}}\n", + "Building system...\n", + "forcefield_files {'nonbondedMethod': CutoffNonPeriodic, 'nonbondedCutoff': Quantity(value=1.0, unit=nanometer), 'constraints': HBonds, 'rigidWater': True, 'constraintTolerance': 1e-05}\n", + "nonbondingCutoff 1.0 nm\n", + "nonbondedMethod CutoffNonPeriodic\n", + "constraints HBonds\n", + "rigidWater True\n", + "Setting up integrator...\n", + "integrator_type LangevinMiddle\n", + "Creating simulation...\n", + "simulation set!\n", + "running simulation in the tool\n", + "Performing energy minimization...\n", + "Minimization complete!\n", + "Equilibrating...\n", + "Simulating...\n", + "Done!\n", + "Standalone simulation script written to reproduce_simulation.py\n", + "Now that the simulation has been successfully completed, we can proceed to calculate the average surface area of the protein. For this, we can use the Python_REPL tool to execute a Python command that calculates the surface area. However, calculating the surface area of a protein is a complex task that requires specialized algorithms and is typically done using specialized software or libraries, such as DSSP or MSMS. Unfortunately, I don't have the capability to perform such a calculation. I recommend using a specialized tool or software for this task. \n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"Final Answer\",\n", + " \"action_input\": \"The simulation of fibronectin has been successfully completed. However, calculating the average surface area of a protein is a complex task that requires specialized algorithms and is typically done using specialized software or libraries, such as DSSP or MSMS. Unfortunately, I don't have the capability to perform such a calculation. I recommend using a specialized tool or software for this task.\"\n", + "}\n", + "```" + ] + }, + { + "data": { + "text/plain": [ + "'Thought: Thought: To run a simulation of fibronectin, we first need to download the PDB file for fibronectin. After that, we can use the SetUpandRunFunction tool to run the simulation. Finally, we can calculate the average surface area of the protein using Python_REPL tool. Let\\'s start by downloading the PDB file for fibronectin.\\nAction:\\n```\\n{\\n \"action\": \"PDBFileDownloader\",\\n \"action_input\": {\\n \"query\": \"fibronectin\"\\n }\\n}\\n```\\nObservation: Name2PDB tool successfully downloaded the PDB file: 1X5Y.pdb\\nThought: Now that we have the PDB file for fibronectin, we can proceed to run a simulation. For this, we will use the SetUpandRunFunction tool. We will use the default parameters for the simulation. The PDB file path is required as an input for this tool. Let\\'s run the simulation.\\nAction:\\n```\\n{\\n \"action\": \"SetUpandRunFunction\",\\n \"action_input\": {\\n \"pdb_path\": \"1X5Y.pdb\",\\n \"forcefield_files\": [],\\n \"system_params\": {\\n \"nonbondedMethod\": \"NoCutoff\",\\n \"nonbondedCutoff\": \"1 * nanometers\",\\n \"ewaldErrorTolerance\": null,\\n \"constraints\": \"None\",\\n \"rigidWater\": false,\\n \"constraintTolerance\": null\\n },\\n \"integrator_params\": {\\n \"integrator_type\": \"LangevinMiddle\",\\n \"Temperature\": \"300 * kelvin\",\\n \"Friction\": \"1.0 / picoseconds\",\\n \"Timestep\": \"0.004 * picoseconds\",\\n \"Pressure\": \"1.0 * bar\"\\n },\\n \"simmulation_params\": {\\n \"Ensemble\": \"NVT\",\\n \"Number of Steps\": 10000,\\n \"record_interval_steps\": 100,\\n \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\\n }\\n }\\n}\\n```\\n\\nObservation: Invalid ewaldErrorTolerance: float() argument must be a string or a real number, not \\'NoneType\\'.\\n If you are using null or None, just dont include\\n as part of the parameters.\\nInvalid constraintTolerance. float() argument must be a string or a real number, not \\'NoneType\\'. If\\n constraintTolerance is null or None,\\n just dont include as part of the parameters.\\n\\n Correct this and try again. \\n Everthing else is fine\\nThought: It seems that the simulation failed because of invalid parameters. The error messages suggest that the parameters \"ewaldErrorTolerance\" and \"constraintTolerance\" should not be included if they are null or None. Let\\'s correct this and try again.\\nAction:\\n```\\n{\\n \"action\": \"SetUpandRunFunction\",\\n \"action_input\": {\\n \"pdb_path\": \"1X5Y.pdb\",\\n \"forcefield_files\": [],\\n \"system_params\": {\\n \"nonbondedMethod\": \"NoCutoff\",\\n \"nonbondedCutoff\": \"1 * nanometers\",\\n \"constraints\": \"None\",\\n \"rigidWater\": false\\n },\\n \"integrator_params\": {\\n \"integrator_type\": \"LangevinMiddle\",\\n \"Temperature\": \"300 * kelvin\",\\n \"Friction\": \"1.0 / picoseconds\",\\n \"Timestep\": \"0.004 * picoseconds\",\\n \"Pressure\": \"1.0 * bar\"\\n },\\n \"simmulation_params\": {\\n \"Ensemble\": \"NVT\",\\n \"Number of Steps\": 10000,\\n \"record_interval_steps\": 100,\\n \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\\n }\\n }\\n}\\n```\\nObservation: No template found for residue 1 (GLY). The set of atoms matches GLY, but the bonds are different. Perhaps the chain is missing a terminal group? For more information, see https://github.com/openmm/openmm/wiki/Frequently-Asked-Questions#templateThis were the inputs {\\'pdb_path\\': \\'1X5Y.pdb\\', \\'forcefield_files\\': [], \\'system_params\\': {\\'nonbondedMethod\\': \\'NoCutoff\\', \\'nonbondedCutoff\\': \\'1 * nanometers\\', \\'constraints\\': \\'None\\', \\'rigidWater\\': False}, \\'integrator_params\\': {\\'integrator_type\\': \\'LangevinMiddle\\', \\'Temperature\\': \\'300 * kelvin\\', \\'Friction\\': \\'1.0 / picoseconds\\', \\'Timestep\\': \\'0.004 * picoseconds\\', \\'Pressure\\': \\'1.0 * bar\\'}, \\'simmulation_params\\': {\\'Ensemble\\': \\'NVT\\', \\'Number of Steps\\': 10000, \\'record_interval_steps\\': 100, \\'record_params\\': [\\'step\\', \\'potentialEnergy\\', \\'temperature\\']}}\\nThought: The simulation failed because no template was found for residue 1 (GLY). This could be due to the chain missing a terminal group. To resolve this, we can use the CleaningToolFunction to perform various cleaning operations on the PDB file, such as adding missing atoms and hydrogens, replacing nonstandard residues, and/or removing water. Let\\'s clean the PDB file and try running the simulation again.\\nAction:\\n```\\n{\\n \"action\": \"CleaningToolFunction\",\\n \"action_input\": {\\n \"input_args\": {\\n \"pdb_path\": \"1X5Y.pdb\",\\n \"output_path\": \"cleaned_1X5Y.pdb\",\\n \"remove_heterogens\": false,\\n \"add_hydrogens\": true,\\n \"replace_nonstandard_residues\": true,\\n \"remove_water\": false\\n }\\n }\\n}\\n```\\nObservation: Cleaned File: Added Hydrogens at pH 7.0. Missing Atoms Added and replaces nonstandard residues. written to cleaned_1X5Y.pdb\\nThought: Now that we have cleaned the PDB file, let\\'s try running the simulation again using the cleaned PDB file.\\nAction:\\n```\\n{\\n \"action\": \"SetUpandRunFunction\",\\n \"action_input\": {\\n \"pdb_path\": \"cleaned_1X5Y.pdb\",\\n \"forcefield_files\": [],\\n \"system_params\": {\\n \"nonbondedMethod\": \"NoCutoff\",\\n \"nonbondedCutoff\": \"1 * nanometers\",\\n \"constraints\": \"None\",\\n \"rigidWater\": false\\n },\\n \"integrator_params\": {\\n \"integrator_type\": \"LangevinMiddle\",\\n \"Temperature\": \"300 * kelvin\",\\n \"Friction\": \"1.0 / picoseconds\",\\n \"Timestep\": \"0.004 * picoseconds\",\\n \"Pressure\": \"1.0 * bar\"\\n },\\n \"simmulation_params\": {\\n \"Ensemble\": \"NVT\",\\n \"Number of Steps\": 10000,\\n \"record_interval_steps\": 100,\\n \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\\n }\\n }\\n}\\n```\\nObservation: An exception was found: Particle coordinate is NaN. For more information, see https://github.com/openmm/openmm/wiki/Frequently-Asked-Questions#nan. Not a problem, thats one\\n purpose of this tool: to run a short simulation to check for correct\\n initialization. \\n\\n Try a) with different parameters like\\n nonbondedMethod, constraints, etc or b) clean file inputs depending on error\\n \\nThought: The simulation failed again due to a NaN error in the particle coordinates. This could be due to a problem with the initial configuration of the system. We can try to resolve this by changing the parameters of the simulation, such as the nonbondedMethod and constraints. Let\\'s try running the simulation again with different parameters.\\nAction:\\n```\\n{\\n \"action\": \"SetUpandRunFunction\",\\n \"action_input\": {\\n \"pdb_path\": \"cleaned_1X5Y.pdb\",\\n \"forcefield_files\": [],\\n \"system_params\": {\\n \"nonbondedMethod\": \"CutoffNonPeriodic\",\\n \"nonbondedCutoff\": \"1 * nanometers\",\\n \"constraints\": \"HBonds\",\\n \"rigidWater\": true\\n },\\n \"integrator_params\": {\\n \"integrator_type\": \"LangevinMiddle\",\\n \"Temperature\": \"300 * kelvin\",\\n \"Friction\": \"1.0 / picoseconds\",\\n \"Timestep\": \"0.004 * picoseconds\",\\n \"Pressure\": \"1.0 * bar\"\\n },\\n \"simmulation_params\": {\\n \"Ensemble\": \"NVT\",\\n \"Number of Steps\": 10000,\\n \"record_interval_steps\": 100,\\n \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\\n }\\n }\\n}\\n```\\nObservation: constraintTolerance must be specified if rigidWater is True\\n Correct this and try again. \\n Everthing else is fine\\nThought: The simulation failed because the \"constraintTolerance\" parameter must be specified if \"rigidWater\" is set to true. Let\\'s correct this and try running the simulation again.\\nAction:\\n```\\n{\\n \"action\": \"SetUpandRunFunction\",\\n \"action_input\": {\\n \"pdb_path\": \"cleaned_1X5Y.pdb\",\\n \"forcefield_files\": [],\\n \"system_params\": {\\n \"nonbondedMethod\": \"CutoffNonPeriodic\",\\n \"nonbondedCutoff\": \"1 * nanometers\",\\n \"constraints\": \"HBonds\",\\n \"rigidWater\": true,\\n \"constraintTolerance\": 0.00001\\n },\\n \"integrator_params\": {\\n \"integrator_type\": \"LangevinMiddle\",\\n \"Temperature\": \"300 * kelvin\",\\n \"Friction\": \"1.0 / picoseconds\",\\n \"Timestep\": \"0.004 * picoseconds\",\\n \"Pressure\": \"1.0 * bar\"\\n },\\n \"simmulation_params\": {\\n \"Ensemble\": \"NVT\",\\n \"Number of Steps\": 10000,\\n \"record_interval_steps\": 100,\\n \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\\n }\\n }\\n}\\n```\\nObservation: Simulation done!\\nFinal Answer: The simulation of fibronectin has been successfully completed. However, calculating the average surface area of a protein is a complex task that requires specialized algorithms and is typically done using specialized software or libraries, such as DSSP or MSMS. Unfortunately, I don\\'t have the capability to perform such a calculation. I recommend using a specialized tool or software for this task.'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test = \"Run a simple simulation of fibronectin\"\n", + "mrkl.run(test)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "\n", + "def get_timestamp():\n", + " # Get the current date and time\n", + " now = datetime.now()\n", + " # Format the date and time as \"YYYYMMDD_HHMMSS\"\n", + " timestamp = now.strftime(\"%Y%m%d_%H%M%S\")\n", + "\n", + " return timestamp\n", + "\n", + " #File Name/ID in Path Registry JSON\n", + "def _get_fileid(file_name, type):\n", + " # Split the filename on underscores\n", + " parts, ending = file_name.split('.')\n", + " parts = parts.split('_')\n", + " print(parts)\n", + "\n", + " # Extract the timestamp (assuming it's always in the second to last part)\n", + " timestamp_part = parts[-1]\n", + " # Get the last 6 digits of the timestamp\n", + " timestamp_digits = timestamp_part[-6:]\n", + " print(timestamp_digits)\n", + "\n", + " if type == 'protein':\n", + " # Extract the PDB ID (assuming it's always the first part)\n", + " pdb_id = parts[0]\n", + " return pdb_id + timestamp_digits\n", + " if type == 'simulation':\n", + " return \"sim\" + timestamp_digits\n", + " if type == 'record':\n", + " return \"rec\" + timestamp_digits\n", + "\n", + "\n", + "def write_file_name( type:str, **kwargs):\n", + " time_stamp = get_timestamp()\n", + " if type == 'protein':\n", + " file_name = f\"{kwargs['protein_name']}_{kwargs['description']}_{time_stamp}.{kwargs['file_format']}\"\n", + " if type == 'simulation':\n", + " file_name = f\"{kwargs['type_of_sim']}_{kwargs['protein_file_id']}_{kwargs['conditions']}_{time_stamp}\"\n", + " if type == 'record':\n", + " file_name = f\"{kwargs['protein_file_id']}_{kwargs['Sim_id']}_{time_stamp}\"\n", + "\n", + " return file_name\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['AHH', 'raw', '20231220', '151938']\n", + "151938\n" + ] + } + ], + "source": [ + "filename = write_file_name(type='protein', protein_name=\"AHH\", description = 'raw', file_format=\"pdb\")\n", + "file_id = _get_fileid(filename,'protein')" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AHH_raw_20231220_151938.pdb\n", + "AHH151938\n" + ] + } + ], + "source": [ + "print(filename)\n", + "print(file_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "MDAGENT", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/test_fxns.py b/tests/test_fxns.py index ac2a9410..61c997d7 100644 --- a/tests/test_fxns.py +++ b/tests/test_fxns.py @@ -1,3 +1,4 @@ +import json import os import warnings from unittest.mock import MagicMock, mock_open, patch @@ -11,7 +12,7 @@ get_pdb, ) from mdagent.tools.base_tools.analysis_tools.plot_tools import plot_data, process_csv -from mdagent.utils import PathRegistry +from mdagent.utils import FileType, PathRegistry warnings.filterwarnings("ignore", category=DeprecationWarning, module="pkg_resources") @@ -167,5 +168,111 @@ def test_setup_simulation_from_json(mock_json_load, mock_file_open, sim_fxns): def test_getpdb(fibronectin, get_registry): - name = get_pdb(fibronectin, get_registry) + name, _ = get_pdb(fibronectin, get_registry) assert name.endswith(".pdb") + + +@pytest.fixture +def path_registry(): + registry = PathRegistry() + registry.get_timestamp = lambda: "20240109" + return registry + + +def test_write_to_file(): + path_registry = PathRegistry() + + with patch("builtins.open", mock_open()): + file_name = path_registry.write_file_name( + FileType.PROTEIN, + protein_name="1XYZ", + description="testing", + file_format="pdb", + ) + # assert file name starts and ends correctly + assert file_name.startswith("1XYZ") + assert file_name.endswith(".pdb") + + +def test_write_file_name_protein(path_registry): + file_name = path_registry.write_file_name( + FileType.PROTEIN, protein_name="1XYZ", description="testing", file_format="pdb" + ) + assert file_name == "1XYZ_testing_20240109.pdb" + + +def test_write_file_name_simulation_with_conditions(path_registry): + file_name = path_registry.write_file_name( + FileType.SIMULATION, + type_of_sim="MD", + protein_file_id="1XYZ", + conditions="pH7", + time_stamp="20240109", + ) + assert file_name == "MD_1XYZ_pH7_20240109" + + +def test_write_file_name_simulation_modified(path_registry): + file_name = path_registry.write_file_name( + FileType.SIMULATION, Sim_id="SIM456", modified=True, time_stamp="20240109" + ) + assert file_name == "SIM456_MOD_20240109" + + +def test_write_file_name_simulation_default(path_registry): + file_name = path_registry.write_file_name( + FileType.SIMULATION, + type_of_sim="MD", + protein_file_id="123", + time_stamp="20240109", + ) + assert file_name == "MD_123_20240109" + + +def test_write_file_name_record(path_registry): + file_name = path_registry.write_file_name( + FileType.RECORD, protein_file_id="123", Sim_id="SIM456", time_stamp="20240109" + ) + assert file_name == "123_SIM456_20240109" + + +def test_map_path(): + mock_json_data = { + "existing_name": { + "path": "existing/path", + "description": "Existing description", + } + } + new_path_dict = {"new_name": {"path": "new/path", "description": "New description"}} + updated_json_data = {**mock_json_data, **new_path_dict} + + path_registry = PathRegistry() + path_registry.json_file_path = "dummy_json_file.json" + + # Mocking os.path.exists to simulate the JSON file existence + with patch("os.path.exists", return_value=True): + # Mocking open for both reading and writing the JSON file + with patch( + "builtins.open", mock_open(read_data=json.dumps(mock_json_data)) + ) as mocked_file: + # Optionally, you can mock internal methods if needed + with patch.object( + path_registry, "_check_for_json", return_value=True + ), patch.object( + path_registry, "_check_json_content", return_value=True + ), patch.object( + path_registry, "_get_full_path", return_value="new/path" + ): # Mocking _get_full_path + result = path_registry.map_path( + "new_name", "new/path", "New description" + ) + # Aggregating all calls to write into a single string + written_data = "".join( + call.args[0] for call in mocked_file().write.call_args_list + ) + + # Comparing the aggregated data with the expected JSON data + assert json.loads(written_data) == updated_json_data + + # Check the result message + assert result == "Path successfully mapped to name: new_name"