25
25
NodeDumpKeyMapper ,
26
26
ProcessesToDumpContainer ,
27
27
filter_nodes_last_dump_time ,
28
+ load_given_group ,
28
29
)
29
30
30
31
logger = AIIDA_LOGGER .getChild ('tools.dumping' )
@@ -62,7 +63,7 @@ def __init__(
62
63
raise Exception (msg )
63
64
64
65
elif group is not None :
65
- self .group = _validate_group (group )
66
+ self .group = load_given_group (group )
66
67
if self .group :
67
68
self ._collection_nodes = [n .uuid for n in self .group .nodes ]
68
69
@@ -85,9 +86,11 @@ def __init__(
85
86
86
87
@property
87
88
def collection_nodes (self ) -> list [str ]:
88
- """Return collection nodes.
89
+ """Property to hold the collection nodes.
89
90
90
- :return: List of collection node identifiers.
91
+ Takes care of respecting the ``incremental`` attribute, and filtering by ``last_dump_time``.
92
+
93
+ :return: List of collection node UUIDs.
91
94
"""
92
95
if self .incremental and self .last_dump_time :
93
96
self ._collection_nodes = filter_nodes_last_dump_time (
@@ -100,7 +103,9 @@ def collection_nodes(self) -> list[str]:
100
103
def processes_to_dump (self ) -> ProcessesToDumpContainer :
101
104
"""Get the processes to dump from the collection of nodes.
102
105
103
- :return: Instance of the ``ProcessesToDump`` class containing the selected calculations and workflows.
106
+ Only re-evaluates the processes, if not already set.
107
+
108
+ :return: Instance of a ``ProcessesToDumpContainer``, that holds the selected calculations and workflows.
104
109
"""
105
110
if not self ._processes_to_dump :
106
111
self ._processes_to_dump = self ._get_processes_to_dump ()
@@ -109,16 +114,13 @@ def processes_to_dump(self) -> ProcessesToDumpContainer:
109
114
def _get_processes_to_dump (self ) -> ProcessesToDumpContainer :
110
115
"""Retrieve the processeses from the collection nodes.
111
116
112
- If deduplication is selected, this method takes care of only dumping top-level workflows and only dump
113
- calculations in their own designated directories if they are not part of a workflow.
117
+ Depending on the attributes of the ``CollectionDumper``, this method takes care of only selecting top-level
118
+ workflows and calculations if they are not part of a workflow. This requires to use the actual ORM entities,
119
+ rather than UUIDs, as the ``.caller``s have to be checked. In addition, sub-calculations
114
120
115
- :return: Instance of the ``ProcessesToDump`` class containing the selected calculations and workflows.
121
+ :return: Instance of a ``ProcessesToDumpContainer``, that holds the selected calculations and workflows.
116
122
"""
117
123
118
- # Deduplication is already handled in the ``get_processes`` method, where PKs/UUIDs are used, rather than AiiDA
119
- # ORM entities as here. Specifically, calculations that are part of a workflow are not dumpid in their own,
120
- # dedicated directory if they are part of a workflow.
121
-
122
124
if not self .collection_nodes :
123
125
return ProcessesToDumpContainer (calculations = [], workflows = [])
124
126
@@ -136,8 +138,8 @@ def _get_processes_to_dump(self) -> ProcessesToDumpContainer:
136
138
else :
137
139
calculations = [node for node in nodes_orm if isinstance (node , orm .CalculationNode )]
138
140
139
- # Get sub-calculations that were called by workflows of the group, and which are not
140
- # contained in the group.nodes directly
141
+ # Get sub-calculations that were called by workflows but which might themselves not be directly contained in
142
+ # the collection
141
143
called_calculations = []
142
144
for workflow in workflows :
143
145
called_calculations += [
@@ -147,13 +149,17 @@ def _get_processes_to_dump(self) -> ProcessesToDumpContainer:
147
149
# Convert to set to avoid duplicates
148
150
calculations = list (set (calculations + called_calculations ))
149
151
152
+ # Use this small helper class rather than returning a dictionary for access via dot-notation
150
153
return ProcessesToDumpContainer (
151
154
calculations = calculations ,
152
155
workflows = workflows ,
153
156
)
154
157
155
158
def _dump_processes (self , processes : list [orm .CalculationNode ] | list [orm .WorkflowNode ]) -> None :
156
- """Dump a collection of processes."""
159
+ """Dump a list of AiiDA calculations or workflows to disk.
160
+
161
+ :param processes: List of AiiDA calculations or workflows from the ``ProcessesToDumpContainer``.
162
+ """
157
163
158
164
if len (list (processes )) == 0 :
159
165
return
@@ -164,13 +170,12 @@ def _dump_processes(self, processes: list[orm.CalculationNode] | list[orm.Workfl
164
170
sub_path .mkdir (exist_ok = True , parents = True )
165
171
166
172
logger_attr = NodeDumpKeyMapper .get_key_from_node (node = next (iter (processes )))
167
- # ! `getattr` gives a reference to the object, thus I can update the store directly
173
+ # ! `getattr` gives a reference to the actual object, thus I can update the store directly
168
174
current_store = getattr (self .dump_logger .log , logger_attr )
169
175
170
- # breakpoint()
176
+ process_dumper = self . process_dumper
171
177
172
178
for process in processes :
173
- process_dumper = self .process_dumper
174
179
175
180
process_dump_path = sub_path / process_dumper ._generate_default_dump_path (process_node = process , prefix = None )
176
181
@@ -225,24 +230,3 @@ def dump(self, output_path: Path | None = None) -> None:
225
230
self ._dump_processes (processes = collection_processes .workflows )
226
231
if len (collection_processes .calculations ) > 0 :
227
232
self ._dump_processes (processes = collection_processes .calculations )
228
-
229
-
230
- def _validate_group (group : orm .Group | str ) -> orm .Group | None :
231
- """Validate the given group identifier.
232
-
233
- :param group: The group identifier to validate.
234
- :return: Insance of ``orm.Group``.
235
- :raises NotExistent: If no ``orm.Group`` can be loaded for a given label.
236
- """
237
-
238
- if isinstance (group , str ):
239
- try :
240
- return orm .load_group (group )
241
- # `load_group` raises the corresponding errors
242
- except NotExistent :
243
- raise
244
- except :
245
- raise
246
-
247
- elif isinstance (group , orm .Group ):
248
- return group
0 commit comments