31
31
build_unique_component_name ,
32
32
)
33
33
from reana_db .database import Session
34
- from reana_db .models import Job , JobCache , Workflow , RunStatus
34
+ from reana_db .models import Job , JobCache , Workflow , RunStatus , JobStatus
35
35
from sqlalchemy .exc import SQLAlchemyError
36
36
from sqlalchemy .orm .attributes import flag_modified
37
37
@@ -118,11 +118,21 @@ def on_message(self, body, message):
118
118
)
119
119
elif workflow and workflow .status not in ALIVE_STATUSES :
120
120
logging .warning (
121
- f"Event for not alive workflow { workflow .id_ } with DB status { workflow .status } received:\n "
122
- f"{ body } \n Ignoring..."
121
+ f"Event for not alive workflow { workflow .id_ } with DB status { workflow .status } received."
123
122
)
123
+ try :
124
+ _delete_workflow_batch_pod (workflow )
125
+ logging .info (
126
+ f"Remove batch-pod for not alive { workflow .id_ } workflow."
127
+ )
128
+ except REANAWorkflowControllerError as exception :
129
+ logging .error (
130
+ f"Could not clean up not alive workflow { workflow .id_ } batch pod for workflow."
131
+ f" Error: { exception } "
132
+ )
133
+ _delete_workflow_jobs (workflow )
124
134
else :
125
- logging .warning (
135
+ logging .error (
126
136
f"Event for workflow { workflow_uuid } that doesn't exist in DB received:\n "
127
137
f"{ body } \n Ignoring..."
128
138
)
@@ -162,7 +172,7 @@ def _update_workflow_status(workflow, status, logs):
162
172
163
173
if RunStatus .should_cleanup_job (status ):
164
174
try :
165
- _delete_workflow_job (workflow )
175
+ _delete_workflow_batch_pod (workflow )
166
176
except REANAWorkflowControllerError as exception :
167
177
logging .error (
168
178
f"Could not clean up workflow job for workflow { workflow .id_ } ."
@@ -281,7 +291,7 @@ def _update_job_cache(msg):
281
291
Session .add (cached_job )
282
292
283
293
284
- def _delete_workflow_job (workflow : Workflow ) -> None :
294
+ def _delete_workflow_batch_pod (workflow : Workflow ) -> None :
285
295
job_name = build_unique_component_name ("run-batch" , workflow .id_ )
286
296
try :
287
297
current_k8s_batchv1_api_client .delete_namespaced_job (
@@ -295,6 +305,36 @@ def _delete_workflow_job(workflow: Workflow) -> None:
295
305
)
296
306
297
307
308
+ def _delete_workflow_jobs (workflow : Workflow ) -> None :
309
+ """
310
+ Delete jobs that belong to workflow. Ignore Kubernetes API errors.
311
+ If job is successfully deleted, update its status to JobStatus.stopped.
312
+ """
313
+ jobs = Session .query (Job ).filter (
314
+ Job .workflow_uuid == workflow .id_ ,
315
+ )
316
+ for job in jobs :
317
+ job_name = build_unique_component_name ("run-job" , job .id_ )
318
+ if job .status in [
319
+ JobStatus .running ,
320
+ JobStatus .queued ,
321
+ JobStatus .created ,
322
+ ]:
323
+ try :
324
+ current_k8s_batchv1_api_client .delete_namespaced_job (
325
+ name = job_name ,
326
+ namespace = REANA_RUNTIME_KUBERNETES_NAMESPACE ,
327
+ propagation_policy = "Background" ,
328
+ )
329
+ job .status = JobStatus .stopped
330
+ Session .add (job )
331
+ except ApiException as e :
332
+ logging .error (
333
+ f"run-job pod { job_name } for { workflow .id_ } could not be deleted. Error: { e } "
334
+ )
335
+ Session .commit ()
336
+
337
+
298
338
def _get_workflow_engine_pod_logs (workflow : Workflow ) -> str :
299
339
try :
300
340
pods = current_k8s_corev1_api_client .list_namespaced_pod (
0 commit comments