diff --git a/src/atomate2/common/jobs/utils.py b/src/atomate2/common/jobs/utils.py index 3bc92583d6..ef6cf9a57c 100644 --- a/src/atomate2/common/jobs/utils.py +++ b/src/atomate2/common/jobs/utils.py @@ -2,20 +2,16 @@ from __future__ import annotations -import os from typing import TYPE_CHECKING from jobflow import Response, job -from monty.os.path import zpath -from pymatgen.command_line.bader_caller import bader_analysis_from_path from pymatgen.symmetry.analyzer import SpacegroupAnalyzer from atomate2 import SETTINGS +from atomate2.common.files import delete_files +from atomate2.utils.path import strip_hostname if TYPE_CHECKING: - from collections.abc import Sequence - from pathlib import Path - from pymatgen.core import Structure @@ -147,8 +143,11 @@ def retrieve_structure_from_materials_project( @job def remove_workflow_files( - directories: Sequence[str], file_names: Sequence[str], allow_zpath: bool = True -) -> list[str]: + directories: list[str | list[str]], + file_names: list[str], + allow_zpath: bool = True, + **kwargs, +) -> None: """ Remove files from previous jobs. @@ -158,47 +157,39 @@ def remove_workflow_files( Parameters ---------- - makers : Sequence[Maker, Flow, Job] - Jobs in the flow on which to remove files. - file_names : Sequence[str] + directories : list of str, or list of list of str + Names of directories to clean output from. + Can be a list of directories, or a list of lists. + file_names : list of str The list of file names to remove, ex. ["WAVECAR"] rather than a full path allow_zpath : bool = True - Whether to allow checking for gzipped output using `monty.os.zpath` + Whether to allow checking for zipped output + **kwargs + Other kwargs to pass to `delete_files` Returns ------- list[str] : list of removed files """ - abs_paths = [os.path.abspath(dir_name.split(":")[-1]) for dir_name in directories] - - removed_files = [] - for job_dir in abs_paths: - for file in file_names: - file_name = os.path.join(job_dir, file) - if allow_zpath: - file_name = zpath(file_name) - - if os.path.isfile(file_name): - removed_files.append(file_name) - os.remove(file_name) - - return removed_files - - -@job -def bader_analysis(dir_name: str | Path, suffix: str | None = None) -> dict: - """Run Bader charge analysis as a job. - - Parameters - ---------- - dir_name : str or Path - The name of the directory to run Bader in. - suffix : str or None (default) - Suffixes of the files to filter by. - - Returns - ------- - dict of bader charge analysis which is JSONable. - """ - dir_name = os.path.abspath(str(dir_name).split(":")[-1]) - return bader_analysis_from_path(dir_name, suffix=suffix or "") + if allow_zpath: + orig_files = list(file_names) + for file in orig_files: + file_names.extend( + f"{file.removesuffix(ext)}{ext}" + for ext in (".gz", ".GZ", ".bz2", ".BZ2", ".z", ".Z") + ) + + flattened_dirs = [] + for dir_name in directories: + if isinstance(dir_name, list | tuple): + flattened_dirs.extend(dir_name) + else: + flattened_dirs.append(dir_name) + + for dir_name in flattened_dirs: + delete_files( + strip_hostname(dir_name), + include_files=file_names, + allow_missing=True, + **kwargs, + ) diff --git a/src/atomate2/vasp/flows/lobster.py b/src/atomate2/vasp/flows/lobster.py index 6498f2e1df..108872f0c7 100644 --- a/src/atomate2/vasp/flows/lobster.py +++ b/src/atomate2/vasp/flows/lobster.py @@ -8,11 +8,11 @@ from jobflow import Flow, Maker from monty.dev import requires +from atomate2.common.jobs.utils import remove_workflow_files from atomate2.lobster.jobs import LobsterMaker from atomate2.vasp.flows.core import DoubleRelaxMaker, UniformBandStructureMaker from atomate2.vasp.jobs.core import NonSCFMaker, RelaxMaker, StaticMaker from atomate2.vasp.jobs.lobster import ( - delete_lobster_wavecar, get_basis_infos, get_lobster_jobs, update_user_incar_settings_maker, @@ -179,10 +179,12 @@ def make( # delete all WAVECARs that have been copied # TODO: this has to be adapted as well if self.delete_wavecars: - delete_wavecars = delete_lobster_wavecar( - dirs=lobster_jobs.output["lobster_dirs"], - lobster_static_dir=lobster_static.output.dir_name, + delete_wavecars = remove_workflow_files( + [lobster_jobs.output["lobster_dirs"], lobster_static.output.dir_name], + ["WAVECAR"], + allow_zpath=True, ) + delete_wavecars.name = "delete_lobster_wavecar" jobs.append(delete_wavecars) return Flow(jobs, output=lobster_jobs.output) diff --git a/src/atomate2/vasp/jobs/lobster.py b/src/atomate2/vasp/jobs/lobster.py index 6b4a45e15e..60d4bb7234 100644 --- a/src/atomate2/vasp/jobs/lobster.py +++ b/src/atomate2/vasp/jobs/lobster.py @@ -9,9 +9,7 @@ from jobflow import Flow, Response, job from pymatgen.io.lobster import Lobsterin -from atomate2.common.files import delete_files from atomate2.lobster.jobs import LobsterMaker -from atomate2.utils.path import strip_hostname from atomate2.vasp.jobs.base import BaseVaspMaker from atomate2.vasp.powerups import update_user_incar_settings from atomate2.vasp.sets.core import LobsterTightStaticSetGenerator @@ -202,29 +200,3 @@ def get_lobster_jobs( flow = Flow(jobs, output=outputs) return Response(replace=flow) - - -@job -def delete_lobster_wavecar( - dirs: list[Path | str], - lobster_static_dir: Path | str = None, -) -> None: - """ - Delete all WAVECARs. - - Parameters - ---------- - dirs : list of path or str - Path to directories of lobster jobs. - lobster_static_dir : Path or str - Path to directory of static VASP run. - """ - if lobster_static_dir: - dirs.append(lobster_static_dir) - - for dir_name in dirs: - delete_files( - strip_hostname(dir_name), - include_files=["WAVECAR", "WAVECAR.gz"], - allow_missing=True, - ) diff --git a/tests/common/test_jobs.py b/tests/common/test_jobs.py index e860b4f065..8d7e93e73a 100644 --- a/tests/common/test_jobs.py +++ b/tests/common/test_jobs.py @@ -1,11 +1,14 @@ import os from datetime import datetime, timezone +from pathlib import Path from jobflow import run_locally +from monty.io import zopen from pymatgen.core import Structure from pytest import approx, mark from atomate2.common.jobs.utils import ( + remove_workflow_files, retrieve_structure_from_materials_project, structure_to_conventional, structure_to_primitive, @@ -44,9 +47,9 @@ def test_retrieve_structure_from_materials_project(): assert isinstance(output, Structure) # test stored data is in expected format - datetime.strptime(stored_data["database_version"], "%Y.%m.%d").replace( - tzinfo=timezone.utc - ) + # Note that patches use `.post` suffix in MP DB versions + db_version = stored_data["database_version"].split(".post")[0] + datetime.strptime(db_version, "%Y.%m.%d").replace(tzinfo=timezone.utc) assert stored_data["task_id"].startswith("mp-") job = retrieve_structure_from_materials_project( @@ -66,3 +69,31 @@ def test_retrieve_structure_from_materials_project(): output = responses[job.uuid][1].output assert "magmom" not in output.site_properties + + +def test_workflow_cleanup(tmp_dir): + dirs = [Path(p) for p in ("test_1", "temp_2")] + + orig_files = ["foo.txt", "bar.txt.gz"] + expected_file_list = [] + for _dir in dirs: + assert not _dir.exists() + _dir.mkdir(exist_ok=True, parents=True) + assert _dir.is_dir() + + for f in orig_files: + with zopen(_dir / f, "wt", encoding="utf8") as _f: + _f.write( + "Lorem ipsum dolor sit amet,\n" + "consectetur adipiscing elit,\n" + "sed do eiusmod tempor incididunt\n" + "ut labore et dolore magna aliqua." + ) + assert (_dir / f).is_file() + assert os.path.getsize(_dir / f) > 0 + + expected_file_list.append(_dir / f) + + job = remove_workflow_files(dirs, [f.split(".gz")[0] for f in orig_files]) + run_locally(job) + assert all(not Path(f).is_file() for f in expected_file_list)