diff --git a/mdagent/tools/base_tools/preprocess_tools/pdb_tools.py b/mdagent/tools/base_tools/preprocess_tools/pdb_tools.py index e7d2018b..4e562178 100644 --- a/mdagent/tools/base_tools/preprocess_tools/pdb_tools.py +++ b/mdagent/tools/base_tools/preprocess_tools/pdb_tools.py @@ -254,8 +254,9 @@ def summarize_errors(errors): class Molecule: - def __init__(self, filename, number_of_molecules=1, instructions=None): + def __init__(self, filename, file_id, number_of_molecules=1, instructions=None): self.filename = filename + self.id = file_id self.number_of_molecules = number_of_molecules self.instructions = instructions if instructions else [] self.load() @@ -276,6 +277,7 @@ def __init__( self.molecules = [] self.file_number = 1 self.file_description = file_description + self.final_name = None def add_molecule(self, molecule): self.molecules.append(molecule) @@ -284,16 +286,33 @@ def add_molecule(self, molecule): def generate_input_header(self): # Generate the header of the input file in .inp format + orig_pdbs_ids = [ + f"{molecule.number_of_molecules + molecule.id}" + for molecule in self.molecules + ] + + _final_name = f'{"_".join(orig_pdbs_ids)}' - while os.path.exists(f"packed_structures_v{self.file_number}.pdb"): + self.file_description = ( + "Packed Structures of the following molecules:\n" + + "\n".join( + [ + f"Molecule ID: {molecule.id}, " + f"Number of Molecules: {molecule.number_of_molecules}" + for molecule in self.molecules + ] + ) + ) + while os.path.exists(f"{_final_name}_v{self.file_number}.pdb"): self.file_number += 1 + self.final_name = f"{_final_name}_v{self.file_number}.pdb" with open("packmol.inp", "w") as out: out.write("##Automatically generated by LangChain\n") out.write("tolerance 2.0\n") out.write("filetype pdb\n") out.write( - f"output packed_structures_v{self.file_number}.pdb\n" + f"output {self.final_name}\n" ) # this is the name of the final file out.close() @@ -327,19 +346,23 @@ def run_packmol(self, PathRegistry): "Packmol failed to run. Please check the input file and try again." ) - PathRegistry.map_path( - f"packed_structures_v{self.file_number}.pdb", - f"packed_structures_v{self.file_number}.pdb", - self.file_description, - ) # validate final pdb - pdb_validation = validate_pdb_format(f"packed_structures{self.file_number}.pdb") + pdb_validation = validate_pdb_format(f"{self.file_name}") if pdb_validation[0] == 0: # delete .inp files os.remove("packmol.inp") for molecule in self.molecules: os.remove(molecule.filename) - return "PDB file validated successfully" + # name of packed pdb file + time_stamp = PathRegistry.get_timestamp() + PathRegistry.map_path( + f"PACKED_{time_stamp}", + f"{self.final_name}", + self.file_description, + ) + # move file to files/pdb + os.rename(self.final_name, f"files/pdb/{self.final_name}") + return f"PDB file validated successfully. FileID: PACKED_{time_stamp}" elif pdb_validation[0] == 1: # format pdb_validation[1] list of errors errors = summarize_errors(pdb_validation[1]) @@ -356,6 +379,7 @@ def run_packmol(self, PathRegistry): def packmol_wrapper( PathRegistry, pdbfiles: List, + files_id: List, number_of_molecules: List, instructions: List[List], ): @@ -365,10 +389,10 @@ def packmol_wrapper( # create a box box = PackmolBox() # add molecules to the box - for pdbfile, number_of_molecules, instructions in zip( - pdbfiles, number_of_molecules, instructions + for pdbfile, file_id, number_of_molecules, instructions in zip( + pdbfiles, files_id, number_of_molecules, instructions ): - molecule = Molecule(pdbfile, number_of_molecules, instructions) + molecule = Molecule(pdbfile, file_id, number_of_molecules, instructions) box.add_molecule(molecule) # generate input header box.generate_input_header() @@ -441,8 +465,10 @@ def _run(self, **values) -> str: except ValidationError as e: return str(e) error_msg = values.get("error", None) - pdbfiles = values.get("pdbfiles_id", []) - pdbfiles = [self.path_registry.get_mapped_path(pdbfile) for pdbfile in pdbfiles] + pdbfile_ids = values.get("pdbfiles_id", []) + pdbfiles = [ + self.path_registry.get_mapped_path(pdbfile) for pdbfile in pdbfile_ids + ] pdbfile_names = [pdbfile.split("/")[-1] for pdbfile in pdbfiles] # copy them to the current directory with temp_ names for pdbfile, pdbfile_name in zip(pdbfiles, pdbfile_names): @@ -468,6 +494,7 @@ def _run(self, **values) -> str: return packmol_wrapper( self.path_registry, pdbfiles=pdbfile_names, + files_id=pdbfile_ids, number_of_molecules=number_of_molecules, instructions=instructions, )