Skip to content

Commit 4500e23

Browse files
committed
make cp2k interface more robust towards large input files
1 parent 2b295e2 commit 4500e23

File tree

3 files changed

+67
-21
lines changed

3 files changed

+67
-21
lines changed

psiflow/reference/_cp2k.py

+44-20
Original file line numberDiff line numberDiff line change
@@ -139,15 +139,11 @@ def parse_cp2k_output(
139139
return geometry
140140

141141

142-
# typeguarding for some reason incompatible with WQ
143-
def cp2k_singlepoint_pre(
142+
def _prepare_input(
144143
geometry: Geometry,
145144
cp2k_input_dict: dict = {},
146145
properties: tuple = (),
147-
cp2k_command: str = "",
148-
stdout: str = "",
149-
stderr: str = "",
150-
parsl_resource_specification: Optional[dict] = None,
146+
outputs: list = [],
151147
):
152148
from psiflow.reference._cp2k import (
153149
dict_to_str,
@@ -160,18 +156,33 @@ def cp2k_singlepoint_pre(
160156
if "forces" in properties:
161157
cp2k_input_dict["force_eval"]["print"] = {"FORCES": {}}
162158
cp2k_input_str = dict_to_str(cp2k_input_dict)
159+
with open(outputs[0], 'w') as f:
160+
f.write(cp2k_input_str)
161+
162+
163+
prepare_input = python_app(_prepare_input, executors=['default_threads'])
164+
165+
166+
# typeguarding for some reason incompatible with WQ
167+
def cp2k_singlepoint_pre(
168+
cp2k_command: str = "",
169+
stdout: str = "",
170+
stderr: str = "",
171+
inputs: list = [],
172+
parsl_resource_specification: Optional[dict] = None,
173+
):
174+
tmp_command = 'mytmpdir=$(mktemp -d 2>/dev/null || mktemp -d -t "mytmpdir")'
175+
cd_command = "cd $mytmpdir"
176+
cp_command = "cp {} cp2k.inp".format(inputs[0].filepath)
163177

164-
# see https://unix.stackexchange.com/questions/30091/fix-or-alternative-for-mktemp-in-os-x
165-
tmp_command = 'mytmpdir=$(mktemp -d 2>/dev/null || mktemp -d -t "mytmpdir");'
166-
cd_command = "cd $mytmpdir;"
167-
write_command = 'echo "{}" > cp2k.inp;'.format(cp2k_input_str)
168178
command_list = [
169179
tmp_command,
170180
cd_command,
171-
write_command,
172-
cp2k_command,
181+
cp_command,
182+
cp2k_command
173183
]
174-
return " ".join(command_list)
184+
185+
return ' && '.join(command_list)
175186

176187

177188
@typeguard.typechecked
@@ -222,13 +233,26 @@ def _create_apps(self):
222233
app_pre = bash_app(cp2k_singlepoint_pre, executors=[self.executor])
223234
app_post = python_app(cp2k_singlepoint_post, executors=["default_threads"])
224235

225-
self.app_pre = partial(
226-
app_pre,
227-
cp2k_input_dict=self.cp2k_input_dict,
228-
properties=tuple(self.outputs),
229-
cp2k_command=cp2k_command,
230-
parsl_resource_specification=wq_resources,
231-
)
236+
# create wrapped pre app which first parses the input file and writes it to
237+
# disk, then call the actual bash app with the input file as a DataFuture dependency
238+
# This is necessary because for very large structures, the size of the cp2k input
239+
# file is too long to pass as an argument in a command line
240+
def wrapped_app_pre(geometry, stdout: str, stderr: str):
241+
future = prepare_input(
242+
geometry,
243+
cp2k_input_dict=self.cp2k_input_dict,
244+
properties=tuple(self.outputs),
245+
outputs=[psiflow.context().new_file('cp2k_', '.inp')],
246+
)
247+
return app_pre(
248+
cp2k_command=cp2k_command,
249+
stdout=stdout,
250+
stderr=stderr,
251+
inputs=[future.outputs[0]],
252+
parsl_resource_specification=wq_resources,
253+
)
254+
255+
self.app_pre = wrapped_app_pre
232256
self.app_post = partial(
233257
app_post,
234258
properties=tuple(self.outputs),

psiflow/reference/reference.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,17 @@ class Reference(Computable):
105105
outputs: tuple
106106
batch_size: ClassVar[int] = 1 # not really used
107107

108-
def compute(self, dataset: Dataset, *outputs: Optional[Union[str, tuple]]):
108+
def compute(
109+
self,
110+
arg: Union[Dataset, Geometry, AppFuture, list],
111+
*outputs: Optional[Union[str, tuple]],
112+
):
113+
if isinstance(arg, Dataset):
114+
dataset = arg
115+
elif isinstance(arg, list):
116+
dataset = Dataset(arg)
117+
elif isinstance(arg, AppFuture) or isinstance(arg, Geometry):
118+
dataset = Dataset([arg])
109119
compute_outputs = compute_dataset(dataset, dataset.length(), self)
110120
if len(outputs) == 0:
111121
outputs_ = tuple(self.outputs)

tests/test_reference.py

+12
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,18 @@ def test_cp2k_failure(context, tmp_path):
330330
assert "ABORT" in log # verify error is captured
331331

332332

333+
def test_cp2k_memory(context, simple_cp2k_input):
334+
reference = CP2K(simple_cp2k_input)
335+
geometry = Geometry.from_data(
336+
numbers=np.ones(4000),
337+
positions=np.random.uniform(0, 20, size=(4000, 3)),
338+
cell=20 * np.eye(3), # box way too large
339+
)
340+
energy, forces = reference.compute(geometry)
341+
energy, forces = energy.result(), forces.result()
342+
assert np.all(np.isnan(energy))
343+
344+
333345
@pytest.mark.filterwarnings("ignore:Original input file not found")
334346
def test_cp2k_timeout(context, simple_cp2k_input):
335347
reference = CP2K(simple_cp2k_input)

0 commit comments

Comments
 (0)