@@ -215,7 +215,7 @@ def __init__(self, *,
215
215
216
216
self ._mp_manager = SpawnContext .Manager () # Starts a server process
217
217
self ._tasks_in_progress = self ._mp_manager .dict ()
218
- self ._kill_event = threading .Event ()
218
+ self ._stop_event = threading .Event () # when set, will begin shutdown process
219
219
220
220
self .monitoring_queue = self ._mp_manager .Queue ()
221
221
self .pending_task_queue = SpawnContext .Queue ()
@@ -298,14 +298,9 @@ def drain_to_incoming(self):
298
298
logger .debug ("Sent drain" )
299
299
300
300
@wrap_with_logs
301
- def pull_tasks (self , kill_event ):
301
+ def pull_tasks (self ):
302
302
""" Pull tasks from the incoming tasks zmq pipe onto the internal
303
303
pending task queue
304
-
305
- Parameters:
306
- -----------
307
- kill_event : threading.Event
308
- Event to let the thread know when it is time to die.
309
304
"""
310
305
logger .info ("starting" )
311
306
poller = zmq .Poller ()
@@ -319,7 +314,7 @@ def pull_tasks(self, kill_event):
319
314
last_interchange_contact = time .time ()
320
315
task_recv_counter = 0
321
316
322
- while not kill_event .is_set ():
317
+ while not self . _stop_event .is_set ():
323
318
324
319
# This loop will sit inside poller.poll until either a message
325
320
# arrives or one of these event times is reached. This code
@@ -367,7 +362,7 @@ def pull_tasks(self, kill_event):
367
362
logger .debug ("Got heartbeat from interchange" )
368
363
elif tasks == DRAINED_CODE :
369
364
logger .info ("Got fully drained message from interchange - setting kill flag" )
370
- kill_event .set ()
365
+ self . _stop_event .set ()
371
366
else :
372
367
task_recv_counter += len (tasks )
373
368
logger .debug ("Got executor tasks: {}, cumulative count of tasks: {}" .format (
@@ -383,20 +378,14 @@ def pull_tasks(self, kill_event):
383
378
# Only check if no messages were received.
384
379
if time .time () >= last_interchange_contact + self .heartbeat_threshold :
385
380
logger .critical ("Missing contact with interchange beyond heartbeat_threshold" )
386
- kill_event .set ()
381
+ self . _stop_event .set ()
387
382
logger .critical ("Exiting" )
388
383
break
389
384
390
385
@wrap_with_logs
391
- def push_results (self , kill_event ):
386
+ def push_results (self ):
392
387
""" Listens on the pending_result_queue and sends out results via zmq
393
-
394
- Parameters:
395
- -----------
396
- kill_event : threading.Event
397
- Event to let the thread know when it is time to die.
398
388
"""
399
-
400
389
logger .debug ("Starting result push thread" )
401
390
402
391
push_poll_period = max (10 , self .poll_period ) / 1000 # push_poll_period must be atleast 10 ms
@@ -406,7 +395,7 @@ def push_results(self, kill_event):
406
395
last_result_beat = time .time ()
407
396
items = []
408
397
409
- while not kill_event .is_set ():
398
+ while not self . _stop_event .is_set ():
410
399
try :
411
400
logger .debug ("Starting pending_result_queue get" )
412
401
r = self .task_scheduler .get_result (block = True , timeout = push_poll_period )
@@ -438,18 +427,11 @@ def push_results(self, kill_event):
438
427
logger .critical ("Exiting" )
439
428
440
429
@wrap_with_logs
441
- def worker_watchdog (self , kill_event : threading .Event ):
442
- """Keeps workers alive.
443
-
444
- Parameters:
445
- -----------
446
- kill_event : threading.Event
447
- Event to let the thread know when it is time to die.
448
- """
449
-
430
+ def worker_watchdog (self ):
431
+ """Keeps workers alive."""
450
432
logger .debug ("Starting worker watchdog" )
451
433
452
- while not kill_event .wait (self .heartbeat_period ):
434
+ while not self . _stop_event .wait (self .heartbeat_period ):
453
435
for worker_id , p in self .procs .items ():
454
436
if not p .is_alive ():
455
437
logger .error ("Worker {} has died" .format (worker_id ))
@@ -475,7 +457,7 @@ def worker_watchdog(self, kill_event: threading.Event):
475
457
logger .critical ("Exiting" )
476
458
477
459
@wrap_with_logs
478
- def handle_monitoring_messages (self , kill_event : threading . Event ):
460
+ def handle_monitoring_messages (self ):
479
461
"""Transfer messages from the managed monitoring queue to the result queue.
480
462
481
463
We separate the queues so that the result queue does not rely on a manager
@@ -489,7 +471,7 @@ def handle_monitoring_messages(self, kill_event: threading.Event):
489
471
490
472
poll_period_s = max (10 , self .poll_period ) / 1000 # Must be at least 10 ms
491
473
492
- while not kill_event .is_set ():
474
+ while not self . _stop_event .is_set ():
493
475
try :
494
476
logger .debug ("Starting monitor_queue.get()" )
495
477
msg = self .monitoring_queue .get (block = True , timeout = poll_period_s )
@@ -516,18 +498,16 @@ def start(self):
516
498
517
499
logger .debug ("Workers started" )
518
500
519
- thr_task_puller = threading .Thread (target = self .pull_tasks ,
520
- args = (self ._kill_event ,),
521
- name = "Task-Puller" )
522
- thr_result_pusher = threading .Thread (target = self .push_results ,
523
- args = (self ._kill_event ,),
524
- name = "Result-Pusher" )
525
- thr_worker_watchdog = threading .Thread (target = self .worker_watchdog ,
526
- args = (self ._kill_event ,),
527
- name = "worker-watchdog" )
528
- thr_monitoring_handler = threading .Thread (target = self .handle_monitoring_messages ,
529
- args = (self ._kill_event ,),
530
- name = "Monitoring-Handler" )
501
+ thr_task_puller = threading .Thread (target = self .pull_tasks , name = "Task-Puller" )
502
+ thr_result_pusher = threading .Thread (
503
+ target = self .push_results , name = "Result-Pusher"
504
+ )
505
+ thr_worker_watchdog = threading .Thread (
506
+ target = self .worker_watchdog , name = "worker-watchdog"
507
+ )
508
+ thr_monitoring_handler = threading .Thread (
509
+ target = self .handle_monitoring_messages , name = "Monitoring-Handler"
510
+ )
531
511
532
512
thr_task_puller .start ()
533
513
thr_result_pusher .start ()
@@ -537,7 +517,7 @@ def start(self):
537
517
logger .info ("Manager threads started" )
538
518
539
519
# This might need a multiprocessing event to signal back.
540
- self ._kill_event .wait ()
520
+ self ._stop_event .wait ()
541
521
logger .critical ("Received kill event, terminating worker processes" )
542
522
543
523
thr_task_puller .join ()
0 commit comments