Skip to content

Commit ae8edb7

Browse files
committed
Adapt executor to zimscraperlib
1 parent 711deaf commit ae8edb7

File tree

2 files changed

+329
-33
lines changed

2 files changed

+329
-33
lines changed

src/zimscraperlib/executor.py

Lines changed: 43 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,25 @@
1-
#!/usr/bin/env python
2-
# -*- coding: utf-8 -*-
3-
# vim: ai ts=4 sts=4 et sw=4 nu
4-
51
import datetime
62
import queue
73
import threading
8-
from typing import Callable
4+
from collections.abc import Callable
95

10-
from .shared import logger
6+
from zimscraperlib import logger
117

128
_shutdown = False
139
# Lock that ensures that new workers are not created while the interpreter is
1410
# shutting down. Must be held while mutating _threads_queues and _shutdown.
1511
_global_shutdown_lock = threading.Lock()
16-
thread_deadline_sec = 60
1712

1813

19-
def excepthook(args):
14+
def excepthook(args): # pragma: no cover
2015
logger.error(f"UNHANDLED Exception in {args.thread.name}: {args.exc_type}")
2116
logger.exception(args.exc_value)
2217

2318

2419
threading.excepthook = excepthook
2520

2621

27-
class SotokiExecutor(queue.Queue):
22+
class ScraperExecutor(queue.Queue):
2823
"""Custom FIFO queue based Executor that's less generic than ThreadPoolExec one
2924
3025
Providing more flexibility for the use cases we're interested about:
@@ -34,12 +29,19 @@ class SotokiExecutor(queue.Queue):
3429
See: https://github.com/python/cpython/blob/3.8/Lib/concurrent/futures/thread.py
3530
"""
3631

37-
def __init__(self, queue_size: int = 10, nb_workers: int = 1, prefix: str = "T-"):
32+
def __init__(
33+
self,
34+
queue_size: int = 10,
35+
nb_workers: int = 1,
36+
executor_name: str = "executor",
37+
thread_deadline_sec: int = 60,
38+
):
3839
super().__init__(queue_size)
39-
self.prefix = prefix
40+
self.executor_name = executor_name
4041
self._shutdown_lock = threading.Lock()
4142
self.nb_workers = nb_workers
4243
self.exceptions = []
44+
self.thread_deadline_sec = thread_deadline_sec
4345

4446
@property
4547
def exception(self):
@@ -59,30 +61,38 @@ def submit(self, task: Callable, **kwargs):
5961
with self._shutdown_lock, _global_shutdown_lock:
6062
if not self.alive:
6163
raise RuntimeError("cannot submit task to dead executor")
64+
if self.no_more:
65+
raise RuntimeError(
66+
"cannot submit task to a joined executor, restart it first"
67+
)
6268
if _shutdown:
63-
raise RuntimeError("cannot submit task after " "interpreter shutdown")
69+
raise RuntimeError( # pragma: no cover
70+
"cannot submit task after interpreter shutdown"
71+
)
6472

6573
while True:
6674
try:
6775
self.put((task, kwargs), block=True, timeout=3.0)
6876
except queue.Full:
6977
if self.no_more:
70-
break
78+
# rarely happens except if submit and join are done in different
79+
# threads, but we need this to escape the while loop
80+
break # pragma: no cover
7181
else:
7282
break
7383

7484
def start(self):
7585
"""Enable executor, starting requested amount of workers
7686
77-
Workers are started always, not provisioned dynamicaly"""
87+
Workers are started always, not provisioned dynamically"""
7888
self.drain()
79-
self.release_halt()
80-
self._workers = set()
89+
self._workers: set[threading.Thread] = set()
90+
self.no_more = False
8191
self._shutdown = False
8292
self.exceptions[:] = []
8393

8494
for n in range(self.nb_workers):
85-
t = threading.Thread(target=self.worker, name=f"{self.prefix}{n}")
95+
t = threading.Thread(target=self.worker, name=f"{self.executor_name}-{n}")
8696
t.daemon = True
8797
t.start()
8898
self._workers.add(t)
@@ -95,7 +105,7 @@ def worker(self):
95105
if self.no_more:
96106
break
97107
continue
98-
except TypeError:
108+
except TypeError: # pragma: no cover
99109
# received None from the queue. most likely shuting down
100110
return
101111

@@ -108,7 +118,7 @@ def worker(self):
108118
except Exception as exc:
109119
logger.error(f"Error processing {func} with {kwargs=}")
110120
logger.exception(exc)
111-
if raises:
121+
if raises: # to cover when raises = False
112122
self.exceptions.append(exc)
113123
self.shutdown()
114124
finally:
@@ -129,30 +139,30 @@ def drain(self):
129139

130140
def join(self):
131141
"""Await completion of workers, requesting them to stop taking new task"""
132-
logger.debug(f"joining all threads for {self.prefix}")
142+
logger.debug(f"joining all threads for {self.executor_name}")
133143
self.no_more = True
134144
for num, t in enumerate(self._workers):
135-
deadline = datetime.datetime.now() + datetime.timedelta(
136-
seconds=thread_deadline_sec
145+
deadline = datetime.datetime.now(tz=datetime.UTC) + datetime.timedelta(
146+
seconds=self.thread_deadline_sec
147+
)
148+
logger.debug(
149+
f"Giving {self.executor_name}-{num} {self.thread_deadline_sec}s to join"
137150
)
138-
logger.debug(f"Giving {self.prefix}{num} {thread_deadline_sec}s to join")
139151
e = threading.Event()
140-
while t.is_alive() and datetime.datetime.now() < deadline:
152+
while t.is_alive() and datetime.datetime.now(tz=datetime.UTC) < deadline:
141153
t.join(1)
142154
e.wait(timeout=2)
143155
if t.is_alive():
144-
logger.debug(f"Thread {self.prefix}{num} is not joining. Skipping…")
156+
logger.debug(
157+
f"Thread {self.executor_name}-{num} is not joining. Skipping…"
158+
)
145159
else:
146-
logger.debug(f"Thread {self.prefix}{num} joined")
147-
logger.debug(f"all threads joined for {self.prefix}")
148-
149-
def release_halt(self):
150-
"""release the `no_more` flag preventing workers from taking up tasks"""
151-
self.no_more = False
160+
logger.debug(f"Thread {self.executor_name}-{num} joined")
161+
logger.debug(f"all threads joined for {self.executor_name}")
152162

153-
def shutdown(self, wait=True):
163+
def shutdown(self, *, wait=True):
154164
"""stop the executor, either somewhat immediately or awaiting completion"""
155-
logger.debug(f"shutting down executor {self.prefix} with {wait=}")
165+
logger.debug(f"shutting down {self.executor_name} with {wait=}")
156166
with self._shutdown_lock:
157167
self._shutdown = True
158168

0 commit comments

Comments
 (0)