Skip to content

Commit e2e5677

Browse files
authored
Merge pull request #44 from dabapps/stopping-state
Add STOPPING state
2 parents d90e9fc + e870812 commit e2e5677

File tree

6 files changed

+145
-88
lines changed

6 files changed

+145
-88
lines changed

README.md

+5
Original file line numberDiff line numberDiff line change
@@ -223,9 +223,14 @@ Jobs have a `state` field which can have one of the following values:
223223
* `NEW` (has been created, waiting for a worker process to run the next task)
224224
* `READY` (has run a task before, awaiting a worker process to run the next task)
225225
* `PROCESSING` (a task is currently being processed by a worker)
226+
* `STOPPING` (the worker process has received a signal from the OS requesting it to exit)
226227
* `COMPLETED` (all job tasks have completed successfully)
227228
* `FAILED` (a job task failed)
228229

230+
#### State diagram
231+
232+
![state diagram](states.png)
233+
229234
### API
230235

231236
#### Model methods

django_dbq/management/commands/worker.py

+62-61
Original file line numberDiff line numberDiff line change
@@ -14,72 +14,13 @@
1414
DEFAULT_QUEUE_NAME = "default"
1515

1616

17-
def process_job(queue_name):
18-
"""This function grabs the next available job for a given queue, and runs its next task."""
19-
20-
with transaction.atomic():
21-
job = Job.objects.get_ready_or_none(queue_name)
22-
if not job:
23-
return
24-
25-
logger.info(
26-
'Processing job: name="%s" queue="%s" id=%s state=%s next_task=%s',
27-
job.name,
28-
queue_name,
29-
job.pk,
30-
job.state,
31-
job.next_task,
32-
)
33-
job.state = Job.STATES.PROCESSING
34-
job.save()
35-
36-
try:
37-
task_function = import_string(job.next_task)
38-
task_function(job)
39-
job.update_next_task()
40-
if not job.next_task:
41-
job.state = Job.STATES.COMPLETE
42-
else:
43-
job.state = Job.STATES.READY
44-
except Exception as exception:
45-
logger.exception("Job id=%s failed", job.pk)
46-
job.state = Job.STATES.FAILED
47-
48-
failure_hook_name = job.get_failure_hook_name()
49-
if failure_hook_name:
50-
logger.info(
51-
"Running failure hook %s for job id=%s", failure_hook_name, job.pk
52-
)
53-
failure_hook_function = import_string(failure_hook_name)
54-
failure_hook_function(job, exception)
55-
else:
56-
logger.info("No failure hook for job id=%s", job.pk)
57-
58-
logger.info(
59-
'Updating job: name="%s" id=%s state=%s next_task=%s',
60-
job.name,
61-
job.pk,
62-
job.state,
63-
job.next_task or "none",
64-
)
65-
66-
try:
67-
job.save()
68-
except:
69-
logger.error(
70-
"Failed to save job: id=%s org=%s",
71-
job.pk,
72-
job.workspace.get("organisation_id"),
73-
)
74-
raise
75-
76-
7717
class Worker:
7818
def __init__(self, name, rate_limit_in_seconds):
7919
self.queue_name = name
8020
self.rate_limit_in_seconds = rate_limit_in_seconds
8121
self.alive = True
8222
self.last_job_finished = None
23+
self.current_job = None
8324
self.init_signals()
8425

8526
def init_signals(self):
@@ -93,6 +34,9 @@ def init_signals(self):
9334

9435
def shutdown(self, signum, frame):
9536
self.alive = False
37+
if self.current_job:
38+
self.current_job.state = Job.STATES.STOPPING
39+
self.current_job.save(update_fields=["state"])
9640

9741
def run(self):
9842
while self.alive:
@@ -107,9 +51,66 @@ def process_job(self):
10751
):
10852
return
10953

110-
process_job(self.queue_name)
54+
self._process_job()
55+
11156
self.last_job_finished = timezone.now()
11257

58+
def _process_job(self):
59+
with transaction.atomic():
60+
job = Job.objects.get_ready_or_none(self.queue_name)
61+
if not job:
62+
return
63+
64+
logger.info(
65+
'Processing job: name="%s" queue="%s" id=%s state=%s next_task=%s',
66+
job.name,
67+
self.queue_name,
68+
job.pk,
69+
job.state,
70+
job.next_task,
71+
)
72+
job.state = Job.STATES.PROCESSING
73+
job.save()
74+
self.current_job = job
75+
76+
try:
77+
task_function = import_string(job.next_task)
78+
task_function(job)
79+
job.update_next_task()
80+
if not job.next_task:
81+
job.state = Job.STATES.COMPLETE
82+
else:
83+
job.state = Job.STATES.READY
84+
except Exception as exception:
85+
logger.exception("Job id=%s failed", job.pk)
86+
job.state = Job.STATES.FAILED
87+
88+
failure_hook_name = job.get_failure_hook_name()
89+
if failure_hook_name:
90+
logger.info(
91+
"Running failure hook %s for job id=%s", failure_hook_name, job.pk
92+
)
93+
failure_hook_function = import_string(failure_hook_name)
94+
failure_hook_function(job, exception)
95+
else:
96+
logger.info("No failure hook for job id=%s", job.pk)
97+
98+
logger.info(
99+
'Updating job: name="%s" id=%s state=%s next_task=%s',
100+
job.name,
101+
job.pk,
102+
job.state,
103+
job.next_task or "none",
104+
)
105+
106+
try:
107+
job.save()
108+
except:
109+
logger.exception("Failed to save job: id=%s", job.pk)
110+
raise
111+
112+
self.current_job = None
113+
113114

114115
class Command(BaseCommand):
115116

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Generated by Django 3.2rc1 on 2021-11-29 04:48
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("django_dbq", "0005_job_run_after"),
10+
]
11+
12+
operations = [
13+
migrations.AlterField(
14+
model_name="job",
15+
name="state",
16+
field=models.CharField(
17+
choices=[
18+
("NEW", "New"),
19+
("READY", "Ready"),
20+
("PROCESSING", "Processing"),
21+
("STOPPING", "Stopping"),
22+
("FAILED", "Failed"),
23+
("COMPLETE", "Complete"),
24+
],
25+
db_index=True,
26+
default="NEW",
27+
max_length=20,
28+
),
29+
),
30+
]

django_dbq/models.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,11 @@ def delete_old(self):
5353
"""
5454
Delete all jobs older than DELETE_JOBS_AFTER_HOURS
5555
"""
56-
delete_jobs_in_states = [Job.STATES.FAILED, Job.STATES.COMPLETE]
56+
delete_jobs_in_states = [
57+
Job.STATES.FAILED,
58+
Job.STATES.COMPLETE,
59+
Job.STATES.STOPPING,
60+
]
5761
delete_jobs_created_before = timezone.now() - datetime.timedelta(
5862
hours=DELETE_JOBS_AFTER_HOURS
5963
)
@@ -82,6 +86,7 @@ class STATES(TextChoices):
8286
NEW = "NEW"
8387
READY = "READY"
8488
PROCESSING = "PROCESSING"
89+
STOPPING = "STOPPING"
8590
FAILED = "FAILED"
8691
COMPLETE = "COMPLETE"
8792

django_dbq/tests.py

+42-26
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from django.test.utils import override_settings
88
from django.utils import timezone
99

10-
from django_dbq.management.commands.worker import process_job, Worker
10+
from django_dbq.management.commands.worker import Worker
1111
from django_dbq.models import Job
1212

1313
from io import StringIO
@@ -123,41 +123,53 @@ def test_queue_depth_for_queue_with_zero_jobs(self):
123123

124124
@freezegun.freeze_time()
125125
@mock.patch("django_dbq.management.commands.worker.sleep")
126-
@mock.patch("django_dbq.management.commands.worker.process_job")
127126
class WorkerProcessProcessJobTestCase(TestCase):
128127
def setUp(self):
129128
super().setUp()
130-
self.MockWorker = mock.MagicMock()
131-
self.MockWorker.queue_name = "default"
132-
self.MockWorker.rate_limit_in_seconds = 5
133-
self.MockWorker.last_job_finished = None
129+
self.mock_worker = mock.MagicMock()
130+
self.mock_worker.queue_name = "default"
131+
self.mock_worker.rate_limit_in_seconds = 5
132+
self.mock_worker.last_job_finished = None
134133

135-
def test_process_job_no_previous_job_run(self, mock_process_job, mock_sleep):
136-
Worker.process_job(self.MockWorker)
134+
def test_process_job_no_previous_job_run(self, mock_sleep):
135+
Worker.process_job(self.mock_worker)
137136
self.assertEqual(mock_sleep.call_count, 1)
138-
self.assertEqual(mock_process_job.call_count, 1)
139-
self.assertEqual(self.MockWorker.last_job_finished, timezone.now())
137+
self.assertEqual(self.mock_worker._process_job.call_count, 1)
138+
self.assertEqual(self.mock_worker.last_job_finished, timezone.now())
140139

141-
def test_process_job_previous_job_too_soon(self, mock_process_job, mock_sleep):
142-
self.MockWorker.last_job_finished = timezone.now() - timezone.timedelta(
140+
def test_process_job_previous_job_too_soon(self, mock_sleep):
141+
self.mock_worker.last_job_finished = timezone.now() - timezone.timedelta(
143142
seconds=2
144143
)
145-
Worker.process_job(self.MockWorker)
144+
Worker.process_job(self.mock_worker)
146145
self.assertEqual(mock_sleep.call_count, 1)
147-
self.assertEqual(mock_process_job.call_count, 0)
146+
self.assertEqual(self.mock_worker._process_job.call_count, 0)
148147
self.assertEqual(
149-
self.MockWorker.last_job_finished,
148+
self.mock_worker.last_job_finished,
150149
timezone.now() - timezone.timedelta(seconds=2),
151150
)
152151

153-
def test_process_job_previous_job_long_time_ago(self, mock_process_job, mock_sleep):
154-
self.MockWorker.last_job_finished = timezone.now() - timezone.timedelta(
152+
def test_process_job_previous_job_long_time_ago(self, mock_sleep):
153+
self.mock_worker.last_job_finished = timezone.now() - timezone.timedelta(
155154
seconds=7
156155
)
157-
Worker.process_job(self.MockWorker)
156+
Worker.process_job(self.mock_worker)
158157
self.assertEqual(mock_sleep.call_count, 1)
159-
self.assertEqual(mock_process_job.call_count, 1)
160-
self.assertEqual(self.MockWorker.last_job_finished, timezone.now())
158+
self.assertEqual(self.mock_worker._process_job.call_count, 1)
159+
self.assertEqual(self.mock_worker.last_job_finished, timezone.now())
160+
161+
162+
@override_settings(JOBS={"testjob": {"tasks": ["a"]}})
163+
class ShutdownTestCase(TestCase):
164+
def test_shutdown_sets_state_to_stopping(self):
165+
job = Job.objects.create(name="testjob")
166+
worker = Worker("default", 1)
167+
worker.current_job = job
168+
169+
worker.shutdown(None, None)
170+
171+
job.refresh_from_db()
172+
self.assertEqual(job.state, Job.STATES.STOPPING)
161173

162174

163175
@override_settings(JOBS={"testjob": {"tasks": ["a"]}})
@@ -267,7 +279,7 @@ def test_task_sequence(self):
267279
class ProcessJobTestCase(TestCase):
268280
def test_process_job(self):
269281
job = Job.objects.create(name="testjob")
270-
process_job("default")
282+
Worker("default", 1)._process_job()
271283
job = Job.objects.get()
272284
self.assertEqual(job.state, Job.STATES.COMPLETE)
273285

@@ -276,7 +288,7 @@ def test_process_job_wrong_queue(self):
276288
Processing a different queue shouldn't touch our other job
277289
"""
278290
job = Job.objects.create(name="testjob", queue_name="lol")
279-
process_job("default")
291+
Worker("default", 1)._process_job()
280292
job = Job.objects.get()
281293
self.assertEqual(job.state, Job.STATES.NEW)
282294

@@ -315,7 +327,7 @@ def test_creation_hook_only_runs_on_create(self):
315327
class JobFailureHookTestCase(TestCase):
316328
def test_failure_hook(self):
317329
job = Job.objects.create(name="testjob")
318-
process_job("default")
330+
Worker("default", 1)._process_job()
319331
job = Job.objects.get()
320332
self.assertEqual(job.state, Job.STATES.FAILED)
321333
self.assertEqual(job.workspace["output"], "failure hook ran")
@@ -334,14 +346,18 @@ def test_delete_old_jobs(self):
334346
j2.created = two_days_ago
335347
j2.save()
336348

337-
j3 = Job.objects.create(name="testjob", state=Job.STATES.NEW)
349+
j3 = Job.objects.create(name="testjob", state=Job.STATES.STOPPING)
338350
j3.created = two_days_ago
339351
j3.save()
340352

341-
j4 = Job.objects.create(name="testjob", state=Job.STATES.COMPLETE)
353+
j4 = Job.objects.create(name="testjob", state=Job.STATES.NEW)
354+
j4.created = two_days_ago
355+
j4.save()
356+
357+
j5 = Job.objects.create(name="testjob", state=Job.STATES.COMPLETE)
342358

343359
Job.objects.delete_old()
344360

345361
self.assertEqual(Job.objects.count(), 2)
346-
self.assertTrue(j3 in Job.objects.all())
347362
self.assertTrue(j4 in Job.objects.all())
363+
self.assertTrue(j5 in Job.objects.all())

states.png

99.1 KB
Loading

0 commit comments

Comments
 (0)