1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
- # vim: ai ts=4 sts=4 et sw=4 nu
4
-
5
1
import datetime
6
2
import queue
7
3
import threading
8
- from typing import Callable
4
+ from collections . abc import Callable
9
5
10
- from . shared import logger
6
+ from zimscraperlib import logger
11
7
12
8
_shutdown = False
13
9
# Lock that ensures that new workers are not created while the interpreter is
14
10
# shutting down. Must be held while mutating _threads_queues and _shutdown.
15
11
_global_shutdown_lock = threading .Lock ()
16
- thread_deadline_sec = 60
17
12
18
13
19
- def excepthook (args ):
14
+ def excepthook (args ): # pragma: no cover
20
15
logger .error (f"UNHANDLED Exception in { args .thread .name } : { args .exc_type } " )
21
16
logger .exception (args .exc_value )
22
17
23
18
24
19
threading .excepthook = excepthook
25
20
26
21
27
- class SotokiExecutor (queue .Queue ):
22
+ class ScraperExecutor (queue .Queue ):
28
23
"""Custom FIFO queue based Executor that's less generic than ThreadPoolExec one
29
24
30
25
Providing more flexibility for the use cases we're interested about:
@@ -34,12 +29,19 @@ class SotokiExecutor(queue.Queue):
34
29
See: https://github.com/python/cpython/blob/3.8/Lib/concurrent/futures/thread.py
35
30
"""
36
31
37
- def __init__ (self , queue_size : int = 10 , nb_workers : int = 1 , prefix : str = "T-" ):
32
+ def __init__ (
33
+ self ,
34
+ queue_size : int = 10 ,
35
+ nb_workers : int = 1 ,
36
+ executor_name : str = "executor" ,
37
+ thread_deadline_sec : int = 60 ,
38
+ ):
38
39
super ().__init__ (queue_size )
39
- self .prefix = prefix
40
+ self .executor_name = executor_name
40
41
self ._shutdown_lock = threading .Lock ()
41
42
self .nb_workers = nb_workers
42
43
self .exceptions = []
44
+ self .thread_deadline_sec = thread_deadline_sec
43
45
44
46
@property
45
47
def exception (self ):
@@ -59,30 +61,38 @@ def submit(self, task: Callable, **kwargs):
59
61
with self ._shutdown_lock , _global_shutdown_lock :
60
62
if not self .alive :
61
63
raise RuntimeError ("cannot submit task to dead executor" )
64
+ if self .no_more :
65
+ raise RuntimeError (
66
+ "cannot submit task to a joined executor, restart it first"
67
+ )
62
68
if _shutdown :
63
- raise RuntimeError ("cannot submit task after " "interpreter shutdown" )
69
+ raise RuntimeError ( # pragma: no cover
70
+ "cannot submit task after interpreter shutdown"
71
+ )
64
72
65
73
while True :
66
74
try :
67
75
self .put ((task , kwargs ), block = True , timeout = 3.0 )
68
76
except queue .Full :
69
77
if self .no_more :
70
- break
78
+ # rarely happens except if submit and join are done in different
79
+ # threads, but we need this to escape the while loop
80
+ break # pragma: no cover
71
81
else :
72
82
break
73
83
74
84
def start (self ):
75
85
"""Enable executor, starting requested amount of workers
76
86
77
- Workers are started always, not provisioned dynamicaly """
87
+ Workers are started always, not provisioned dynamically """
78
88
self .drain ()
79
- self .release_halt ()
80
- self ._workers = set ()
89
+ self ._workers : set [ threading . Thread ] = set ()
90
+ self .no_more = False
81
91
self ._shutdown = False
82
92
self .exceptions [:] = []
83
93
84
94
for n in range (self .nb_workers ):
85
- t = threading .Thread (target = self .worker , name = f"{ self .prefix } { n } " )
95
+ t = threading .Thread (target = self .worker , name = f"{ self .executor_name } - { n } " )
86
96
t .daemon = True
87
97
t .start ()
88
98
self ._workers .add (t )
@@ -95,7 +105,7 @@ def worker(self):
95
105
if self .no_more :
96
106
break
97
107
continue
98
- except TypeError :
108
+ except TypeError : # pragma: no cover
99
109
# received None from the queue. most likely shuting down
100
110
return
101
111
@@ -108,7 +118,7 @@ def worker(self):
108
118
except Exception as exc :
109
119
logger .error (f"Error processing { func } with { kwargs = } " )
110
120
logger .exception (exc )
111
- if raises :
121
+ if raises : # to cover when raises = False
112
122
self .exceptions .append (exc )
113
123
self .shutdown ()
114
124
finally :
@@ -129,30 +139,30 @@ def drain(self):
129
139
130
140
def join (self ):
131
141
"""Await completion of workers, requesting them to stop taking new task"""
132
- logger .debug (f"joining all threads for { self .prefix } " )
142
+ logger .debug (f"joining all threads for { self .executor_name } " )
133
143
self .no_more = True
134
144
for num , t in enumerate (self ._workers ):
135
- deadline = datetime .datetime .now () + datetime .timedelta (
136
- seconds = thread_deadline_sec
145
+ deadline = datetime .datetime .now (tz = datetime .UTC ) + datetime .timedelta (
146
+ seconds = self .thread_deadline_sec
147
+ )
148
+ logger .debug (
149
+ f"Giving { self .executor_name } -{ num } { self .thread_deadline_sec } s to join"
137
150
)
138
- logger .debug (f"Giving { self .prefix } { num } { thread_deadline_sec } s to join" )
139
151
e = threading .Event ()
140
- while t .is_alive () and datetime .datetime .now () < deadline :
152
+ while t .is_alive () and datetime .datetime .now (tz = datetime . UTC ) < deadline :
141
153
t .join (1 )
142
154
e .wait (timeout = 2 )
143
155
if t .is_alive ():
144
- logger .debug (f"Thread { self .prefix } { num } is not joining. Skipping…" )
156
+ logger .debug (
157
+ f"Thread { self .executor_name } -{ num } is not joining. Skipping…"
158
+ )
145
159
else :
146
- logger .debug (f"Thread { self .prefix } { num } joined" )
147
- logger .debug (f"all threads joined for { self .prefix } " )
148
-
149
- def release_halt (self ):
150
- """release the `no_more` flag preventing workers from taking up tasks"""
151
- self .no_more = False
160
+ logger .debug (f"Thread { self .executor_name } -{ num } joined" )
161
+ logger .debug (f"all threads joined for { self .executor_name } " )
152
162
153
- def shutdown (self , wait = True ):
163
+ def shutdown (self , * , wait = True ):
154
164
"""stop the executor, either somewhat immediately or awaiting completion"""
155
- logger .debug (f"shutting down executor { self .prefix } with { wait = } " )
165
+ logger .debug (f"shutting down { self .executor_name } with { wait = } " )
156
166
with self ._shutdown_lock :
157
167
self ._shutdown = True
158
168
0 commit comments