Skip to content

Commit d445619

Browse files
More CI fixes (#1141)
* add fix for mp db versioning * unify lobster / mp file deletion in wf
1 parent 96809a4 commit d445619

File tree

4 files changed

+76
-80
lines changed

4 files changed

+76
-80
lines changed

src/atomate2/common/jobs/utils.py

+36-45
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,16 @@
22

33
from __future__ import annotations
44

5-
import os
65
from typing import TYPE_CHECKING
76

87
from jobflow import Response, job
9-
from monty.os.path import zpath
10-
from pymatgen.command_line.bader_caller import bader_analysis_from_path
118
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
129

1310
from atomate2 import SETTINGS
11+
from atomate2.common.files import delete_files
12+
from atomate2.utils.path import strip_hostname
1413

1514
if TYPE_CHECKING:
16-
from collections.abc import Sequence
17-
from pathlib import Path
18-
1915
from pymatgen.core import Structure
2016

2117

@@ -147,8 +143,11 @@ def retrieve_structure_from_materials_project(
147143

148144
@job
149145
def remove_workflow_files(
150-
directories: Sequence[str], file_names: Sequence[str], allow_zpath: bool = True
151-
) -> list[str]:
146+
directories: list[str | list[str]],
147+
file_names: list[str],
148+
allow_zpath: bool = True,
149+
**kwargs,
150+
) -> None:
152151
"""
153152
Remove files from previous jobs.
154153
@@ -158,47 +157,39 @@ def remove_workflow_files(
158157
159158
Parameters
160159
----------
161-
makers : Sequence[Maker, Flow, Job]
162-
Jobs in the flow on which to remove files.
163-
file_names : Sequence[str]
160+
directories : list of str, or list of list of str
161+
Names of directories to clean output from.
162+
Can be a list of directories, or a list of lists.
163+
file_names : list of str
164164
The list of file names to remove, ex. ["WAVECAR"] rather than a full path
165165
allow_zpath : bool = True
166-
Whether to allow checking for gzipped output using `monty.os.zpath`
166+
Whether to allow checking for zipped output
167+
**kwargs
168+
Other kwargs to pass to `delete_files`
167169
168170
Returns
169171
-------
170172
list[str] : list of removed files
171173
"""
172-
abs_paths = [os.path.abspath(dir_name.split(":")[-1]) for dir_name in directories]
173-
174-
removed_files = []
175-
for job_dir in abs_paths:
176-
for file in file_names:
177-
file_name = os.path.join(job_dir, file)
178-
if allow_zpath:
179-
file_name = zpath(file_name)
180-
181-
if os.path.isfile(file_name):
182-
removed_files.append(file_name)
183-
os.remove(file_name)
184-
185-
return removed_files
186-
187-
188-
@job
189-
def bader_analysis(dir_name: str | Path, suffix: str | None = None) -> dict:
190-
"""Run Bader charge analysis as a job.
191-
192-
Parameters
193-
----------
194-
dir_name : str or Path
195-
The name of the directory to run Bader in.
196-
suffix : str or None (default)
197-
Suffixes of the files to filter by.
198-
199-
Returns
200-
-------
201-
dict of bader charge analysis which is JSONable.
202-
"""
203-
dir_name = os.path.abspath(str(dir_name).split(":")[-1])
204-
return bader_analysis_from_path(dir_name, suffix=suffix or "")
174+
if allow_zpath:
175+
orig_files = list(file_names)
176+
for file in orig_files:
177+
file_names.extend(
178+
f"{file.removesuffix(ext)}{ext}"
179+
for ext in (".gz", ".GZ", ".bz2", ".BZ2", ".z", ".Z")
180+
)
181+
182+
flattened_dirs = []
183+
for dir_name in directories:
184+
if isinstance(dir_name, list | tuple):
185+
flattened_dirs.extend(dir_name)
186+
else:
187+
flattened_dirs.append(dir_name)
188+
189+
for dir_name in flattened_dirs:
190+
delete_files(
191+
strip_hostname(dir_name),
192+
include_files=file_names,
193+
allow_missing=True,
194+
**kwargs,
195+
)

src/atomate2/vasp/flows/lobster.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88
from jobflow import Flow, Maker
99
from monty.dev import requires
1010

11+
from atomate2.common.jobs.utils import remove_workflow_files
1112
from atomate2.lobster.jobs import LobsterMaker
1213
from atomate2.vasp.flows.core import DoubleRelaxMaker, UniformBandStructureMaker
1314
from atomate2.vasp.jobs.core import NonSCFMaker, RelaxMaker, StaticMaker
1415
from atomate2.vasp.jobs.lobster import (
15-
delete_lobster_wavecar,
1616
get_basis_infos,
1717
get_lobster_jobs,
1818
update_user_incar_settings_maker,
@@ -179,10 +179,12 @@ def make(
179179
# delete all WAVECARs that have been copied
180180
# TODO: this has to be adapted as well
181181
if self.delete_wavecars:
182-
delete_wavecars = delete_lobster_wavecar(
183-
dirs=lobster_jobs.output["lobster_dirs"],
184-
lobster_static_dir=lobster_static.output.dir_name,
182+
delete_wavecars = remove_workflow_files(
183+
[lobster_jobs.output["lobster_dirs"], lobster_static.output.dir_name],
184+
["WAVECAR"],
185+
allow_zpath=True,
185186
)
187+
delete_wavecars.name = "delete_lobster_wavecar"
186188
jobs.append(delete_wavecars)
187189

188190
return Flow(jobs, output=lobster_jobs.output)

src/atomate2/vasp/jobs/lobster.py

-28
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@
99
from jobflow import Flow, Response, job
1010
from pymatgen.io.lobster import Lobsterin
1111

12-
from atomate2.common.files import delete_files
1312
from atomate2.lobster.jobs import LobsterMaker
14-
from atomate2.utils.path import strip_hostname
1513
from atomate2.vasp.jobs.base import BaseVaspMaker
1614
from atomate2.vasp.powerups import update_user_incar_settings
1715
from atomate2.vasp.sets.core import LobsterTightStaticSetGenerator
@@ -202,29 +200,3 @@ def get_lobster_jobs(
202200

203201
flow = Flow(jobs, output=outputs)
204202
return Response(replace=flow)
205-
206-
207-
@job
208-
def delete_lobster_wavecar(
209-
dirs: list[Path | str],
210-
lobster_static_dir: Path | str = None,
211-
) -> None:
212-
"""
213-
Delete all WAVECARs.
214-
215-
Parameters
216-
----------
217-
dirs : list of path or str
218-
Path to directories of lobster jobs.
219-
lobster_static_dir : Path or str
220-
Path to directory of static VASP run.
221-
"""
222-
if lobster_static_dir:
223-
dirs.append(lobster_static_dir)
224-
225-
for dir_name in dirs:
226-
delete_files(
227-
strip_hostname(dir_name),
228-
include_files=["WAVECAR", "WAVECAR.gz"],
229-
allow_missing=True,
230-
)

tests/common/test_jobs.py

+34-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
import os
22
from datetime import datetime, timezone
3+
from pathlib import Path
34

45
from jobflow import run_locally
6+
from monty.io import zopen
57
from pymatgen.core import Structure
68
from pytest import approx, mark
79

810
from atomate2.common.jobs.utils import (
11+
remove_workflow_files,
912
retrieve_structure_from_materials_project,
1013
structure_to_conventional,
1114
structure_to_primitive,
@@ -44,9 +47,9 @@ def test_retrieve_structure_from_materials_project():
4447
assert isinstance(output, Structure)
4548

4649
# test stored data is in expected format
47-
datetime.strptime(stored_data["database_version"], "%Y.%m.%d").replace(
48-
tzinfo=timezone.utc
49-
)
50+
# Note that patches use `.post` suffix in MP DB versions
51+
db_version = stored_data["database_version"].split(".post")[0]
52+
datetime.strptime(db_version, "%Y.%m.%d").replace(tzinfo=timezone.utc)
5053
assert stored_data["task_id"].startswith("mp-")
5154

5255
job = retrieve_structure_from_materials_project(
@@ -66,3 +69,31 @@ def test_retrieve_structure_from_materials_project():
6669
output = responses[job.uuid][1].output
6770

6871
assert "magmom" not in output.site_properties
72+
73+
74+
def test_workflow_cleanup(tmp_dir):
75+
dirs = [Path(p) for p in ("test_1", "temp_2")]
76+
77+
orig_files = ["foo.txt", "bar.txt.gz"]
78+
expected_file_list = []
79+
for _dir in dirs:
80+
assert not _dir.exists()
81+
_dir.mkdir(exist_ok=True, parents=True)
82+
assert _dir.is_dir()
83+
84+
for f in orig_files:
85+
with zopen(_dir / f, "wt", encoding="utf8") as _f:
86+
_f.write(
87+
"Lorem ipsum dolor sit amet,\n"
88+
"consectetur adipiscing elit,\n"
89+
"sed do eiusmod tempor incididunt\n"
90+
"ut labore et dolore magna aliqua."
91+
)
92+
assert (_dir / f).is_file()
93+
assert os.path.getsize(_dir / f) > 0
94+
95+
expected_file_list.append(_dir / f)
96+
97+
job = remove_workflow_files(dirs, [f.split(".gz")[0] for f in orig_files])
98+
run_locally(job)
99+
assert all(not Path(f).is_file() for f in expected_file_list)

0 commit comments

Comments
 (0)