|
1 | 1 | import asyncio
|
2 |
| -import concurrent |
3 | 2 | import logging
|
4 | 3 | import platform
|
5 | 4 | import threading
|
@@ -98,34 +97,66 @@ async def _get_header_value(
|
98 | 97 |
|
99 | 98 | if platform.system() == "Windows":
|
100 | 99 |
|
101 |
| - class _WindowsAdapter: |
102 |
| - """Utility class to redirect coroutines to an asyncio event loop running |
103 |
| - in a different thread. This allows to use a ProactorEventLoop, which is |
104 |
| - supported by Playwright on Windows. |
| 100 | + class _ThreadedLoopAdapter: |
| 101 | + """Utility class to start an asyncio event loop in a new thread and redirect coroutines. |
| 102 | + This allows to run Playwright in a different loop than the Scrapy crawler, allowing to |
| 103 | + use ProactorEventLoop which is supported by Playwright on Windows. |
105 | 104 | """
|
106 | 105 |
|
107 |
| - loop = None |
108 |
| - thread = None |
| 106 | + _loop: asyncio.AbstractEventLoop |
| 107 | + _thread: threading.Thread |
| 108 | + _coro_queue: asyncio.Queue = asyncio.Queue() |
| 109 | + _stop_event: asyncio.Event = asyncio.Event() |
109 | 110 |
|
110 | 111 | @classmethod
|
111 |
| - def get_event_loop(cls) -> asyncio.AbstractEventLoop: |
112 |
| - if cls.thread is None: |
113 |
| - if cls.loop is None: |
114 |
| - policy = asyncio.WindowsProactorEventLoopPolicy() # type: ignore |
115 |
| - cls.loop = policy.new_event_loop() |
116 |
| - asyncio.set_event_loop(cls.loop) |
117 |
| - if not cls.loop.is_running(): |
118 |
| - cls.thread = threading.Thread(target=cls.loop.run_forever, daemon=True) |
119 |
| - cls.thread.start() |
120 |
| - logger.info("Started loop on separate thread: %s", cls.loop) |
121 |
| - return cls.loop |
| 112 | + async def _handle_coro(cls, coro, future) -> None: |
| 113 | + try: |
| 114 | + future.set_result(await coro) |
| 115 | + except Exception as exc: |
| 116 | + future.set_exception(exc) |
122 | 117 |
|
123 | 118 | @classmethod
|
124 |
| - async def get_result(cls, coro) -> concurrent.futures.Future: |
125 |
| - return asyncio.run_coroutine_threadsafe(coro=coro, loop=cls.get_event_loop()).result() |
| 119 | + async def _process_queue(cls) -> None: |
| 120 | + while not cls._stop_event.is_set(): |
| 121 | + coro, future = await cls._coro_queue.get() |
| 122 | + asyncio.create_task(cls._handle_coro(coro, future)) |
| 123 | + cls._coro_queue.task_done() |
126 | 124 |
|
127 |
| - def _deferred_from_coro(coro) -> Deferred: |
128 |
| - return scrapy.utils.defer.deferred_from_coro(_WindowsAdapter.get_result(coro)) |
| 125 | + @classmethod |
| 126 | + def _deferred_from_coro(cls, coro) -> Deferred: |
| 127 | + future: asyncio.Future = asyncio.Future() |
| 128 | + asyncio.run_coroutine_threadsafe(cls._coro_queue.put((coro, future)), cls._loop) |
| 129 | + return scrapy.utils.defer.deferred_from_coro(future) |
| 130 | + |
| 131 | + @classmethod |
| 132 | + def start(cls) -> None: |
| 133 | + policy = asyncio.WindowsProactorEventLoopPolicy() # type: ignore[attr-defined] |
| 134 | + cls._loop = policy.new_event_loop() |
| 135 | + asyncio.set_event_loop(cls._loop) |
| 136 | + |
| 137 | + cls._thread = threading.Thread(target=cls._loop.run_forever, daemon=True) |
| 138 | + cls._thread.start() |
| 139 | + logger.info("Started loop on separate thread: %s", cls._loop) |
| 140 | + |
| 141 | + asyncio.run_coroutine_threadsafe(cls._process_queue(), cls._loop) |
| 142 | + |
| 143 | + @classmethod |
| 144 | + def stop(cls) -> None: |
| 145 | + cls._stop_event.set() |
| 146 | + asyncio.run_coroutine_threadsafe(cls._coro_queue.join(), cls._loop) |
| 147 | + cls._loop.call_soon_threadsafe(cls._loop.stop) |
| 148 | + cls._thread.join() |
129 | 149 |
|
| 150 | + _deferred_from_coro = _ThreadedLoopAdapter._deferred_from_coro |
130 | 151 | else:
|
| 152 | + |
| 153 | + class _ThreadedLoopAdapter: # type: ignore[no-redef] |
| 154 | + @classmethod |
| 155 | + def start(cls) -> None: |
| 156 | + pass |
| 157 | + |
| 158 | + @classmethod |
| 159 | + def stop(cls) -> None: |
| 160 | + pass |
| 161 | + |
131 | 162 | _deferred_from_coro = scrapy.utils.defer.deferred_from_coro
|
0 commit comments