From b7a948daa318a2688b7514af14d23888c8bb2626 Mon Sep 17 00:00:00 2001 From: esoteric-ephemera Date: Wed, 12 Mar 2025 14:32:29 -0700 Subject: [PATCH 1/2] add fix for mp db versioning --- src/atomate2/common/jobs/utils.py | 33 ++---- ...on Kaplan's conflicted copy 2025-03-12).py | 102 ++++++++++++++++++ tests/common/test_jobs.py | 40 ++++++- 3 files changed, 146 insertions(+), 29 deletions(-) create mode 100644 tests/common/test_jobs (Aaron Kaplan's conflicted copy 2025-03-12).py diff --git a/src/atomate2/common/jobs/utils.py b/src/atomate2/common/jobs/utils.py index 3bc92583d6..3671163a1b 100644 --- a/src/atomate2/common/jobs/utils.py +++ b/src/atomate2/common/jobs/utils.py @@ -3,18 +3,18 @@ from __future__ import annotations import os +from pathlib import Path from typing import TYPE_CHECKING from jobflow import Response, job from monty.os.path import zpath -from pymatgen.command_line.bader_caller import bader_analysis_from_path from pymatgen.symmetry.analyzer import SpacegroupAnalyzer from atomate2 import SETTINGS +from atomate2.utils.path import strip_hostname if TYPE_CHECKING: from collections.abc import Sequence - from pathlib import Path from pymatgen.core import Structure @@ -169,36 +169,17 @@ def remove_workflow_files( ------- list[str] : list of removed files """ - abs_paths = [os.path.abspath(dir_name.split(":")[-1]) for dir_name in directories] + abs_paths = [Path(strip_hostname(dir_name)).absolute() for dir_name in directories] removed_files = [] for job_dir in abs_paths: for file in file_names: - file_name = os.path.join(job_dir, file) + file_name = job_dir / file if allow_zpath: - file_name = zpath(file_name) + file_name = Path(zpath(str(file_name))) - if os.path.isfile(file_name): + if file_name.is_file(): removed_files.append(file_name) os.remove(file_name) - return removed_files - - -@job -def bader_analysis(dir_name: str | Path, suffix: str | None = None) -> dict: - """Run Bader charge analysis as a job. - - Parameters - ---------- - dir_name : str or Path - The name of the directory to run Bader in. - suffix : str or None (default) - Suffixes of the files to filter by. - - Returns - ------- - dict of bader charge analysis which is JSONable. - """ - dir_name = os.path.abspath(str(dir_name).split(":")[-1]) - return bader_analysis_from_path(dir_name, suffix=suffix or "") + return [str(f) for f in removed_files] diff --git a/tests/common/test_jobs (Aaron Kaplan's conflicted copy 2025-03-12).py b/tests/common/test_jobs (Aaron Kaplan's conflicted copy 2025-03-12).py new file mode 100644 index 0000000000..d87de86446 --- /dev/null +++ b/tests/common/test_jobs (Aaron Kaplan's conflicted copy 2025-03-12).py @@ -0,0 +1,102 @@ +import os +from datetime import datetime, timezone +from pathlib import Path + +from jobflow import run_locally +from monty.io import zopen +from pymatgen.core import Structure +from pytest import approx, mark + +from atomate2.common.jobs.utils import ( + remove_workflow_files, + retrieve_structure_from_materials_project, + structure_to_conventional, + structure_to_primitive, +) + + +def test_structure_to_primitive(si_structure): + job = structure_to_primitive(si_structure) + + responses = run_locally(job) + output = responses[job.uuid][1].output + + assert output.lattice.alpha == approx(60) + + +def test_structure_to_conventional(si_structure): + job = structure_to_conventional(si_structure) + + responses = run_locally(job) + output = responses[job.uuid][1].output + + assert output.lattice.alpha == approx(90) + + +@mark.skipif( + not os.getenv("MP_API_KEY"), + reason="Materials Project API key not set in environment.", +) +def test_retrieve_structure_from_materials_project(): + job = retrieve_structure_from_materials_project("mp-149") + + responses = run_locally(job) + output = responses[job.uuid][1].output + stored_data = responses[job.uuid][1].stored_data + + assert isinstance(output, Structure) + + # test stored data is in expected format + # Note that patches use `.post` suffix in MP DB versions + db_version = stored_data["database_version"].split(".post")[0] + datetime.strptime(db_version, "%Y.%m.%d").replace(tzinfo=timezone.utc) + assert stored_data["task_id"].startswith("mp-") + + job = retrieve_structure_from_materials_project( + "mp-19009", reset_magnetic_moments=False + ) + + responses = run_locally(job) + output = responses[job.uuid][1].output + + assert "magmom" in output.site_properties + + job = retrieve_structure_from_materials_project( + "mp-19009", reset_magnetic_moments=True + ) + + responses = run_locally(job) + output = responses[job.uuid][1].output + + assert "magmom" not in output.site_properties + + +def test_workflow_cleanup(tmp_dir): + dirs = [Path(p) for p in ("test_1", "temp_2")] + + orig_files = ["foo.txt", "bar.txt.gz"] + expected_file_list = [] + for _dir in dirs: + assert not _dir.exists() + _dir.mkdir(exist_ok=True, parents=True) + assert _dir.is_dir() + + for f in orig_files: + with zopen(_dir / f, "wt", encoding="utf8") as _f: + _f.write( + "Lorem ipsum dolor sit amet,\n" + "consectetur adipiscing elit,\n" + "sed do eiusmod tempor incididunt\n" + "ut labore et dolore magna aliqua." + ) + assert (_dir / f).is_file() + assert os.path.getsize(_dir / f) > 0 + + expected_file_list.append(_dir / f) + + job = remove_workflow_files(dirs, [f.split(".gz")[0] for f in orig_files]) + resp = run_locally(job) + assert sorted([p.absolute() for p in resp[job.uuid][1].output]) == sorted( + [p.absolute() for p in expected_file_list] + ) + assert all(not Path(f).is_file() for f in expected_file_list) diff --git a/tests/common/test_jobs.py b/tests/common/test_jobs.py index e860b4f065..d87de86446 100644 --- a/tests/common/test_jobs.py +++ b/tests/common/test_jobs.py @@ -1,11 +1,14 @@ import os from datetime import datetime, timezone +from pathlib import Path from jobflow import run_locally +from monty.io import zopen from pymatgen.core import Structure from pytest import approx, mark from atomate2.common.jobs.utils import ( + remove_workflow_files, retrieve_structure_from_materials_project, structure_to_conventional, structure_to_primitive, @@ -44,9 +47,9 @@ def test_retrieve_structure_from_materials_project(): assert isinstance(output, Structure) # test stored data is in expected format - datetime.strptime(stored_data["database_version"], "%Y.%m.%d").replace( - tzinfo=timezone.utc - ) + # Note that patches use `.post` suffix in MP DB versions + db_version = stored_data["database_version"].split(".post")[0] + datetime.strptime(db_version, "%Y.%m.%d").replace(tzinfo=timezone.utc) assert stored_data["task_id"].startswith("mp-") job = retrieve_structure_from_materials_project( @@ -66,3 +69,34 @@ def test_retrieve_structure_from_materials_project(): output = responses[job.uuid][1].output assert "magmom" not in output.site_properties + + +def test_workflow_cleanup(tmp_dir): + dirs = [Path(p) for p in ("test_1", "temp_2")] + + orig_files = ["foo.txt", "bar.txt.gz"] + expected_file_list = [] + for _dir in dirs: + assert not _dir.exists() + _dir.mkdir(exist_ok=True, parents=True) + assert _dir.is_dir() + + for f in orig_files: + with zopen(_dir / f, "wt", encoding="utf8") as _f: + _f.write( + "Lorem ipsum dolor sit amet,\n" + "consectetur adipiscing elit,\n" + "sed do eiusmod tempor incididunt\n" + "ut labore et dolore magna aliqua." + ) + assert (_dir / f).is_file() + assert os.path.getsize(_dir / f) > 0 + + expected_file_list.append(_dir / f) + + job = remove_workflow_files(dirs, [f.split(".gz")[0] for f in orig_files]) + resp = run_locally(job) + assert sorted([p.absolute() for p in resp[job.uuid][1].output]) == sorted( + [p.absolute() for p in expected_file_list] + ) + assert all(not Path(f).is_file() for f in expected_file_list) From 349e9a7aa7963eb89932af0f14431905dda22810 Mon Sep 17 00:00:00 2001 From: esoteric-ephemera Date: Wed, 12 Mar 2025 15:16:54 -0700 Subject: [PATCH 2/2] unify lobster / mp file deletion in wf --- src/atomate2/common/jobs/utils.py | 60 ++++++----- src/atomate2/vasp/flows/lobster.py | 10 +- src/atomate2/vasp/jobs/lobster.py | 28 ----- ...on Kaplan's conflicted copy 2025-03-12).py | 102 ------------------ tests/common/test_jobs.py | 5 +- 5 files changed, 42 insertions(+), 163 deletions(-) delete mode 100644 tests/common/test_jobs (Aaron Kaplan's conflicted copy 2025-03-12).py diff --git a/src/atomate2/common/jobs/utils.py b/src/atomate2/common/jobs/utils.py index 3671163a1b..ef6cf9a57c 100644 --- a/src/atomate2/common/jobs/utils.py +++ b/src/atomate2/common/jobs/utils.py @@ -2,20 +2,16 @@ from __future__ import annotations -import os -from pathlib import Path from typing import TYPE_CHECKING from jobflow import Response, job -from monty.os.path import zpath from pymatgen.symmetry.analyzer import SpacegroupAnalyzer from atomate2 import SETTINGS +from atomate2.common.files import delete_files from atomate2.utils.path import strip_hostname if TYPE_CHECKING: - from collections.abc import Sequence - from pymatgen.core import Structure @@ -147,8 +143,11 @@ def retrieve_structure_from_materials_project( @job def remove_workflow_files( - directories: Sequence[str], file_names: Sequence[str], allow_zpath: bool = True -) -> list[str]: + directories: list[str | list[str]], + file_names: list[str], + allow_zpath: bool = True, + **kwargs, +) -> None: """ Remove files from previous jobs. @@ -158,28 +157,39 @@ def remove_workflow_files( Parameters ---------- - makers : Sequence[Maker, Flow, Job] - Jobs in the flow on which to remove files. - file_names : Sequence[str] + directories : list of str, or list of list of str + Names of directories to clean output from. + Can be a list of directories, or a list of lists. + file_names : list of str The list of file names to remove, ex. ["WAVECAR"] rather than a full path allow_zpath : bool = True - Whether to allow checking for gzipped output using `monty.os.zpath` + Whether to allow checking for zipped output + **kwargs + Other kwargs to pass to `delete_files` Returns ------- list[str] : list of removed files """ - abs_paths = [Path(strip_hostname(dir_name)).absolute() for dir_name in directories] - - removed_files = [] - for job_dir in abs_paths: - for file in file_names: - file_name = job_dir / file - if allow_zpath: - file_name = Path(zpath(str(file_name))) - - if file_name.is_file(): - removed_files.append(file_name) - os.remove(file_name) - - return [str(f) for f in removed_files] + if allow_zpath: + orig_files = list(file_names) + for file in orig_files: + file_names.extend( + f"{file.removesuffix(ext)}{ext}" + for ext in (".gz", ".GZ", ".bz2", ".BZ2", ".z", ".Z") + ) + + flattened_dirs = [] + for dir_name in directories: + if isinstance(dir_name, list | tuple): + flattened_dirs.extend(dir_name) + else: + flattened_dirs.append(dir_name) + + for dir_name in flattened_dirs: + delete_files( + strip_hostname(dir_name), + include_files=file_names, + allow_missing=True, + **kwargs, + ) diff --git a/src/atomate2/vasp/flows/lobster.py b/src/atomate2/vasp/flows/lobster.py index 6498f2e1df..108872f0c7 100644 --- a/src/atomate2/vasp/flows/lobster.py +++ b/src/atomate2/vasp/flows/lobster.py @@ -8,11 +8,11 @@ from jobflow import Flow, Maker from monty.dev import requires +from atomate2.common.jobs.utils import remove_workflow_files from atomate2.lobster.jobs import LobsterMaker from atomate2.vasp.flows.core import DoubleRelaxMaker, UniformBandStructureMaker from atomate2.vasp.jobs.core import NonSCFMaker, RelaxMaker, StaticMaker from atomate2.vasp.jobs.lobster import ( - delete_lobster_wavecar, get_basis_infos, get_lobster_jobs, update_user_incar_settings_maker, @@ -179,10 +179,12 @@ def make( # delete all WAVECARs that have been copied # TODO: this has to be adapted as well if self.delete_wavecars: - delete_wavecars = delete_lobster_wavecar( - dirs=lobster_jobs.output["lobster_dirs"], - lobster_static_dir=lobster_static.output.dir_name, + delete_wavecars = remove_workflow_files( + [lobster_jobs.output["lobster_dirs"], lobster_static.output.dir_name], + ["WAVECAR"], + allow_zpath=True, ) + delete_wavecars.name = "delete_lobster_wavecar" jobs.append(delete_wavecars) return Flow(jobs, output=lobster_jobs.output) diff --git a/src/atomate2/vasp/jobs/lobster.py b/src/atomate2/vasp/jobs/lobster.py index 6b4a45e15e..60d4bb7234 100644 --- a/src/atomate2/vasp/jobs/lobster.py +++ b/src/atomate2/vasp/jobs/lobster.py @@ -9,9 +9,7 @@ from jobflow import Flow, Response, job from pymatgen.io.lobster import Lobsterin -from atomate2.common.files import delete_files from atomate2.lobster.jobs import LobsterMaker -from atomate2.utils.path import strip_hostname from atomate2.vasp.jobs.base import BaseVaspMaker from atomate2.vasp.powerups import update_user_incar_settings from atomate2.vasp.sets.core import LobsterTightStaticSetGenerator @@ -202,29 +200,3 @@ def get_lobster_jobs( flow = Flow(jobs, output=outputs) return Response(replace=flow) - - -@job -def delete_lobster_wavecar( - dirs: list[Path | str], - lobster_static_dir: Path | str = None, -) -> None: - """ - Delete all WAVECARs. - - Parameters - ---------- - dirs : list of path or str - Path to directories of lobster jobs. - lobster_static_dir : Path or str - Path to directory of static VASP run. - """ - if lobster_static_dir: - dirs.append(lobster_static_dir) - - for dir_name in dirs: - delete_files( - strip_hostname(dir_name), - include_files=["WAVECAR", "WAVECAR.gz"], - allow_missing=True, - ) diff --git a/tests/common/test_jobs (Aaron Kaplan's conflicted copy 2025-03-12).py b/tests/common/test_jobs (Aaron Kaplan's conflicted copy 2025-03-12).py deleted file mode 100644 index d87de86446..0000000000 --- a/tests/common/test_jobs (Aaron Kaplan's conflicted copy 2025-03-12).py +++ /dev/null @@ -1,102 +0,0 @@ -import os -from datetime import datetime, timezone -from pathlib import Path - -from jobflow import run_locally -from monty.io import zopen -from pymatgen.core import Structure -from pytest import approx, mark - -from atomate2.common.jobs.utils import ( - remove_workflow_files, - retrieve_structure_from_materials_project, - structure_to_conventional, - structure_to_primitive, -) - - -def test_structure_to_primitive(si_structure): - job = structure_to_primitive(si_structure) - - responses = run_locally(job) - output = responses[job.uuid][1].output - - assert output.lattice.alpha == approx(60) - - -def test_structure_to_conventional(si_structure): - job = structure_to_conventional(si_structure) - - responses = run_locally(job) - output = responses[job.uuid][1].output - - assert output.lattice.alpha == approx(90) - - -@mark.skipif( - not os.getenv("MP_API_KEY"), - reason="Materials Project API key not set in environment.", -) -def test_retrieve_structure_from_materials_project(): - job = retrieve_structure_from_materials_project("mp-149") - - responses = run_locally(job) - output = responses[job.uuid][1].output - stored_data = responses[job.uuid][1].stored_data - - assert isinstance(output, Structure) - - # test stored data is in expected format - # Note that patches use `.post` suffix in MP DB versions - db_version = stored_data["database_version"].split(".post")[0] - datetime.strptime(db_version, "%Y.%m.%d").replace(tzinfo=timezone.utc) - assert stored_data["task_id"].startswith("mp-") - - job = retrieve_structure_from_materials_project( - "mp-19009", reset_magnetic_moments=False - ) - - responses = run_locally(job) - output = responses[job.uuid][1].output - - assert "magmom" in output.site_properties - - job = retrieve_structure_from_materials_project( - "mp-19009", reset_magnetic_moments=True - ) - - responses = run_locally(job) - output = responses[job.uuid][1].output - - assert "magmom" not in output.site_properties - - -def test_workflow_cleanup(tmp_dir): - dirs = [Path(p) for p in ("test_1", "temp_2")] - - orig_files = ["foo.txt", "bar.txt.gz"] - expected_file_list = [] - for _dir in dirs: - assert not _dir.exists() - _dir.mkdir(exist_ok=True, parents=True) - assert _dir.is_dir() - - for f in orig_files: - with zopen(_dir / f, "wt", encoding="utf8") as _f: - _f.write( - "Lorem ipsum dolor sit amet,\n" - "consectetur adipiscing elit,\n" - "sed do eiusmod tempor incididunt\n" - "ut labore et dolore magna aliqua." - ) - assert (_dir / f).is_file() - assert os.path.getsize(_dir / f) > 0 - - expected_file_list.append(_dir / f) - - job = remove_workflow_files(dirs, [f.split(".gz")[0] for f in orig_files]) - resp = run_locally(job) - assert sorted([p.absolute() for p in resp[job.uuid][1].output]) == sorted( - [p.absolute() for p in expected_file_list] - ) - assert all(not Path(f).is_file() for f in expected_file_list) diff --git a/tests/common/test_jobs.py b/tests/common/test_jobs.py index d87de86446..8d7e93e73a 100644 --- a/tests/common/test_jobs.py +++ b/tests/common/test_jobs.py @@ -95,8 +95,5 @@ def test_workflow_cleanup(tmp_dir): expected_file_list.append(_dir / f) job = remove_workflow_files(dirs, [f.split(".gz")[0] for f in orig_files]) - resp = run_locally(job) - assert sorted([p.absolute() for p in resp[job.uuid][1].output]) == sorted( - [p.absolute() for p in expected_file_list] - ) + run_locally(job) assert all(not Path(f).is_file() for f in expected_file_list)