Skip to content

Commit 7e8674f

Browse files
author
Vladyslav Moisieienkov
committed
consumer: remove pods for deleted workflows
closes reanahub#437
1 parent f2d2be9 commit 7e8674f

File tree

1 file changed

+46
-6
lines changed

1 file changed

+46
-6
lines changed

reana_workflow_controller/consumer.py

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
build_unique_component_name,
3232
)
3333
from reana_db.database import Session
34-
from reana_db.models import Job, JobCache, Workflow, RunStatus
34+
from reana_db.models import Job, JobCache, Workflow, RunStatus, JobStatus
3535
from sqlalchemy.exc import SQLAlchemyError
3636
from sqlalchemy.orm.attributes import flag_modified
3737

@@ -118,11 +118,21 @@ def on_message(self, body, message):
118118
)
119119
elif workflow and workflow.status not in ALIVE_STATUSES:
120120
logging.warning(
121-
f"Event for not alive workflow {workflow.id_} with DB status {workflow.status} received:\n"
122-
f"{body}\nIgnoring..."
121+
f"Event for not alive workflow {workflow.id_} with DB status {workflow.status} received."
123122
)
123+
try:
124+
_delete_workflow_batch_pod(workflow)
125+
logging.info(
126+
f"Remove batch-pod for not alive {workflow.id_} workflow."
127+
)
128+
except REANAWorkflowControllerError as exception:
129+
logging.error(
130+
f"Could not clean up not alive workflow {workflow.id_} batch pod for workflow."
131+
f" Error: {exception}"
132+
)
133+
_delete_workflow_jobs(workflow)
124134
else:
125-
logging.warning(
135+
logging.error(
126136
f"Event for workflow {workflow_uuid} that doesn't exist in DB received:\n"
127137
f"{body}\nIgnoring..."
128138
)
@@ -162,7 +172,7 @@ def _update_workflow_status(workflow, status, logs):
162172

163173
if RunStatus.should_cleanup_job(status):
164174
try:
165-
_delete_workflow_job(workflow)
175+
_delete_workflow_batch_pod(workflow)
166176
except REANAWorkflowControllerError as exception:
167177
logging.error(
168178
f"Could not clean up workflow job for workflow {workflow.id_}."
@@ -281,7 +291,7 @@ def _update_job_cache(msg):
281291
Session.add(cached_job)
282292

283293

284-
def _delete_workflow_job(workflow: Workflow) -> None:
294+
def _delete_workflow_batch_pod(workflow: Workflow) -> None:
285295
job_name = build_unique_component_name("run-batch", workflow.id_)
286296
try:
287297
current_k8s_batchv1_api_client.delete_namespaced_job(
@@ -295,6 +305,36 @@ def _delete_workflow_job(workflow: Workflow) -> None:
295305
)
296306

297307

308+
def _delete_workflow_jobs(workflow: Workflow) -> None:
309+
"""
310+
Delete jobs that belong to workflow. Ignore Kubernetes API errors.
311+
If job is successfully deleted, update its status to JobStatus.stopped.
312+
"""
313+
jobs = Session.query(Job).filter(
314+
Job.workflow_uuid == workflow.id_,
315+
)
316+
for job in jobs:
317+
job_name = build_unique_component_name("run-job", job.id_)
318+
if job.status in [
319+
JobStatus.running,
320+
JobStatus.queued,
321+
JobStatus.created,
322+
]:
323+
try:
324+
current_k8s_batchv1_api_client.delete_namespaced_job(
325+
name=job_name,
326+
namespace=REANA_RUNTIME_KUBERNETES_NAMESPACE,
327+
propagation_policy="Background",
328+
)
329+
job.status = JobStatus.stopped
330+
Session.add(job)
331+
except ApiException as e:
332+
logging.error(
333+
f"run-job pod {job_name} for {workflow.id_} could not be deleted. Error: {e}"
334+
)
335+
Session.commit()
336+
337+
298338
def _get_workflow_engine_pod_logs(workflow: Workflow) -> str:
299339
try:
300340
pods = current_k8s_corev1_api_client.list_namespaced_pod(

0 commit comments

Comments
 (0)