Skip to content

Commit 59f6fb2

Browse files
committed
Move load_group function to utils.py, add get_path_by_uuid to Logger, add get_group_subpath method.
1 parent d5c2d08 commit 59f6fb2

File tree

6 files changed

+135
-89
lines changed

6 files changed

+135
-89
lines changed

src/aiida/cmdline/commands/cmd_profile.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -433,18 +433,26 @@ def profile_mirror(
433433
if incremental:
434434
msg = 'Incremental mirroring selected. Will update directory.'
435435
echo.echo_report(msg)
436+
else:
437+
msg = 'Overwriting selected. Will clean directory first.'
438+
# TODO: Maybe add y/n confirmation here?
439+
echo.echo_report(msg)
436440

437441
if num_processes_to_dump == 0:
438-
echo.echo_success('No processes to dump.')
442+
msg = 'No processes to dump.'
443+
echo.echo_success(msg)
439444
else:
440445
profile_dumper.dump_processes()
441-
echo.echo_success(f'Dumped {num_processes_to_dump} new nodes.')
446+
msg = f'Dumped {num_processes_to_dump} new nodes.'
447+
echo.echo_success(msg)
442448

443449
if delete_missing:
450+
# breakpoint()
444451
if num_processes_to_delete == 0:
445452
echo.echo_success('No processes to delete.')
446453
else:
447454
profile_dumper.delete_processes()
455+
448456
echo.echo_success(f'Deleted {num_processes_to_delete} node directories.')
449457

450458
# Append the current dump time to dumping safeguard file

src/aiida/tools/dumping/collection.py

Lines changed: 22 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
NodeDumpKeyMapper,
2626
ProcessesToDumpContainer,
2727
filter_nodes_last_dump_time,
28+
load_given_group,
2829
)
2930

3031
logger = AIIDA_LOGGER.getChild('tools.dumping')
@@ -62,7 +63,7 @@ def __init__(
6263
raise Exception(msg)
6364

6465
elif group is not None:
65-
self.group = _validate_group(group)
66+
self.group = load_given_group(group)
6667
if self.group:
6768
self._collection_nodes = [n.uuid for n in self.group.nodes]
6869

@@ -85,9 +86,11 @@ def __init__(
8586

8687
@property
8788
def collection_nodes(self) -> list[str]:
88-
"""Return collection nodes.
89+
"""Property to hold the collection nodes.
8990
90-
:return: List of collection node identifiers.
91+
Takes care of respecting the ``incremental`` attribute, and filtering by ``last_dump_time``.
92+
93+
:return: List of collection node UUIDs.
9194
"""
9295
if self.incremental and self.last_dump_time:
9396
self._collection_nodes = filter_nodes_last_dump_time(
@@ -100,7 +103,9 @@ def collection_nodes(self) -> list[str]:
100103
def processes_to_dump(self) -> ProcessesToDumpContainer:
101104
"""Get the processes to dump from the collection of nodes.
102105
103-
:return: Instance of the ``ProcessesToDump`` class containing the selected calculations and workflows.
106+
Only re-evaluates the processes, if not already set.
107+
108+
:return: Instance of a ``ProcessesToDumpContainer``, that holds the selected calculations and workflows.
104109
"""
105110
if not self._processes_to_dump:
106111
self._processes_to_dump = self._get_processes_to_dump()
@@ -109,16 +114,13 @@ def processes_to_dump(self) -> ProcessesToDumpContainer:
109114
def _get_processes_to_dump(self) -> ProcessesToDumpContainer:
110115
"""Retrieve the processeses from the collection nodes.
111116
112-
If deduplication is selected, this method takes care of only dumping top-level workflows and only dump
113-
calculations in their own designated directories if they are not part of a workflow.
117+
Depending on the attributes of the ``CollectionDumper``, this method takes care of only selecting top-level
118+
workflows and calculations if they are not part of a workflow. This requires to use the actual ORM entities,
119+
rather than UUIDs, as the ``.caller``s have to be checked. In addition, sub-calculations
114120
115-
:return: Instance of the ``ProcessesToDump`` class containing the selected calculations and workflows.
121+
:return: Instance of a ``ProcessesToDumpContainer``, that holds the selected calculations and workflows.
116122
"""
117123

118-
# Deduplication is already handled in the ``get_processes`` method, where PKs/UUIDs are used, rather than AiiDA
119-
# ORM entities as here. Specifically, calculations that are part of a workflow are not dumpid in their own,
120-
# dedicated directory if they are part of a workflow.
121-
122124
if not self.collection_nodes:
123125
return ProcessesToDumpContainer(calculations=[], workflows=[])
124126

@@ -136,8 +138,8 @@ def _get_processes_to_dump(self) -> ProcessesToDumpContainer:
136138
else:
137139
calculations = [node for node in nodes_orm if isinstance(node, orm.CalculationNode)]
138140

139-
# Get sub-calculations that were called by workflows of the group, and which are not
140-
# contained in the group.nodes directly
141+
# Get sub-calculations that were called by workflows but which might themselves not be directly contained in
142+
# the collection
141143
called_calculations = []
142144
for workflow in workflows:
143145
called_calculations += [
@@ -147,13 +149,17 @@ def _get_processes_to_dump(self) -> ProcessesToDumpContainer:
147149
# Convert to set to avoid duplicates
148150
calculations = list(set(calculations + called_calculations))
149151

152+
# Use this small helper class rather than returning a dictionary for access via dot-notation
150153
return ProcessesToDumpContainer(
151154
calculations=calculations,
152155
workflows=workflows,
153156
)
154157

155158
def _dump_processes(self, processes: list[orm.CalculationNode] | list[orm.WorkflowNode]) -> None:
156-
"""Dump a collection of processes."""
159+
"""Dump a list of AiiDA calculations or workflows to disk.
160+
161+
:param processes: List of AiiDA calculations or workflows from the ``ProcessesToDumpContainer``.
162+
"""
157163

158164
if len(list(processes)) == 0:
159165
return
@@ -164,13 +170,12 @@ def _dump_processes(self, processes: list[orm.CalculationNode] | list[orm.Workfl
164170
sub_path.mkdir(exist_ok=True, parents=True)
165171

166172
logger_attr = NodeDumpKeyMapper.get_key_from_node(node=next(iter(processes)))
167-
# ! `getattr` gives a reference to the object, thus I can update the store directly
173+
# ! `getattr` gives a reference to the actual object, thus I can update the store directly
168174
current_store = getattr(self.dump_logger.log, logger_attr)
169175

170-
# breakpoint()
176+
process_dumper = self.process_dumper
171177

172178
for process in processes:
173-
process_dumper = self.process_dumper
174179

175180
process_dump_path = sub_path / process_dumper._generate_default_dump_path(process_node=process, prefix=None)
176181

@@ -225,24 +230,3 @@ def dump(self, output_path: Path | None = None) -> None:
225230
self._dump_processes(processes=collection_processes.workflows)
226231
if len(collection_processes.calculations) > 0:
227232
self._dump_processes(processes=collection_processes.calculations)
228-
229-
230-
def _validate_group(group: orm.Group | str) -> orm.Group | None:
231-
"""Validate the given group identifier.
232-
233-
:param group: The group identifier to validate.
234-
:return: Insance of ``orm.Group``.
235-
:raises NotExistent: If no ``orm.Group`` can be loaded for a given label.
236-
"""
237-
238-
if isinstance(group, str):
239-
try:
240-
return orm.load_group(group)
241-
# `load_group` raises the corresponding errors
242-
except NotExistent:
243-
raise
244-
except:
245-
raise
246-
247-
elif isinstance(group, orm.Group):
248-
return group

src/aiida/tools/dumping/logger.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
from pathlib import Path
1414
from typing import Collection
1515

16+
from aiida.common.exceptions import NotExistent
17+
1618

1719
@dataclass
1820
class DumpLog:
@@ -195,11 +197,31 @@ def deserialize_logs(category_data: dict) -> DumpLogStore:
195197

196198
return instance
197199

198-
def find_store_by_uuid(self, uuid: str) -> DumpLogStore | None:
200+
def get_store_by_uuid(self, uuid: str) -> DumpLogStore:
199201
"""Find the store that contains the given UUID."""
200202
# Iterate over the fields of the DumpLogStoreCollection dataclass for generality
203+
# TODO: Add error handling for wrong UUID
201204
for field_ in fields(self.log):
202205
store = getattr(self.log, field_.name)
203206
if uuid in store.entries:
204207
return store
205-
return None
208+
209+
msg = f"No corresponding `DumpLogStore` found for UUID: `{uuid}`."
210+
raise NotExistent(msg)
211+
212+
def get_path_by_uuid(self, uuid: str) -> Path | None:
213+
"""Find the store that contains the given UUID."""
214+
# Iterate over the fields of the DumpLogStoreCollection dataclass for generality
215+
216+
try:
217+
current_store = self.get_store_by_uuid(uuid=uuid)
218+
path = current_store.entries[uuid].path
219+
return path
220+
except NotExistent as exc:
221+
raise NotExistent(exc.args[0]) from exc
222+
except KeyError as exc:
223+
msg = f"UUID: `{uuid}` not contained in store `{current_store}`."
224+
raise KeyError(msg) from exc
225+
except:
226+
# For debugging
227+
raise

src/aiida/tools/dumping/process.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def __init__(
4848

4949
super().__init__(base_dump_config=self.base_dump_config, dump_logger=dump_logger)
5050

51-
# Unpack arguments for ProcessDumper for easier access
51+
# Unpack arguments for easier access
5252
self.include_inputs = self.process_dump_config.include_inputs
5353
self.include_outputs = self.process_dump_config.include_outputs
5454
self.include_attributes = self.process_dump_config.include_attributes

0 commit comments

Comments
 (0)