Skip to content

Commit b1079d7

Browse files
committed
Merge remote-tracking branch 'origin/main' into close-inactive-contexts
2 parents 5ed0764 + d825d6f commit b1079d7

File tree

12 files changed

+117
-50
lines changed

12 files changed

+117
-50
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.0.36
2+
current_version = 0.0.37
33
commit = True
44
tag = True
55

.github/workflows/checks.yml

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,33 +5,29 @@ jobs:
55
checks:
66
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
77
runs-on: ubuntu-latest
8+
timeout-minutes: 5
89
strategy:
910
fail-fast: false
1011
matrix:
1112
include:
12-
- python-version: 3.8
13-
env:
13+
- env:
1414
TOXENV: bandit
15-
- python-version: 3.8
16-
env:
15+
- env:
1716
TOXENV: black
18-
- python-version: 3.8
19-
env:
17+
- env:
2018
TOXENV: flake8
21-
- python-version: 3.8
22-
env:
19+
- env:
2320
TOXENV: typing
24-
- python-version: 3.8
25-
env:
21+
- env:
2622
TOXENV: pylint
2723

2824
steps:
2925
- uses: actions/checkout@v4
3026

31-
- name: Set up Python ${{ matrix.python-version }}
27+
- name: Set up Python
3228
uses: actions/setup-python@v5
3329
with:
34-
python-version: ${{ matrix.python-version }}
30+
python-version: 3.11
3531

3632
- name: Run check
3733
env: ${{ matrix.env }}

.github/workflows/publish.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,15 @@ on:
66
jobs:
77
publish:
88
runs-on: ubuntu-latest
9+
timeout-minutes: 5
910

1011
steps:
1112
- uses: actions/checkout@v4
1213

13-
- name: Set up Python 3
14+
- name: Set up Python
1415
uses: actions/setup-python@v5
1516
with:
16-
python-version: 3
17+
python-version: 3.11
1718

1819
- name: Publish to PyPI
1920
run: |

.github/workflows/tests.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ jobs:
55
tests:
66
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
77
runs-on: ${{ matrix.os }}
8+
timeout-minutes: 20
89
strategy:
910
fail-fast: false
1011
matrix:

README.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1014,7 +1014,12 @@ asyncio.run(main())
10141014

10151015
### Software versions
10161016

1017-
Be sure to include which versions of Scrapy and scrapy-playwright you are using:
1017+
Be sure to include which versions of Scrapy, Playwright and scrapy-playwright you are using:
1018+
1019+
```
1020+
$ playwright --version
1021+
Version 1.44.0
1022+
```
10181023

10191024
```
10201025
$ python -c "import scrapy_playwright; print(scrapy_playwright.__version__)"
@@ -1067,6 +1072,12 @@ class ExampleSpider(scrapy.Spider):
10671072
)
10681073
```
10691074

1075+
#### Minimal code
1076+
Please make the effort to reduce the code to the minimum that still displays the issue.
1077+
It is very rare that a complete project (including middlewares, pipelines, item processing, etc)
1078+
is really needed to reproduce an issue. Reports that do not show an actual debugging attempt
1079+
will not be considered.
1080+
10701081
### Logs and stats
10711082

10721083
Logs for spider jobs displaying the issue in detail are extremely useful

docs/changelog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# scrapy-playwright changelog
22

3+
### [v0.0.37](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.37) (2024-07-03)
4+
5+
* Improve Windows concurrency (#286)
6+
7+
38
### [v0.0.36](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.36) (2024-06-24)
49

510
* Windows support (#276)

scrapy_playwright/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.0.36"
1+
__version__ = "0.0.37"

scrapy_playwright/_utils.py

Lines changed: 53 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import asyncio
2-
import concurrent
32
import logging
43
import platform
54
import threading
@@ -106,34 +105,66 @@ async def _get_header_value(
106105

107106
if platform.system() == "Windows":
108107

109-
class _WindowsAdapter:
110-
"""Utility class to redirect coroutines to an asyncio event loop running
111-
in a different thread. This allows to use a ProactorEventLoop, which is
112-
supported by Playwright on Windows.
108+
class _ThreadedLoopAdapter:
109+
"""Utility class to start an asyncio event loop in a new thread and redirect coroutines.
110+
This allows to run Playwright in a different loop than the Scrapy crawler, allowing to
111+
use ProactorEventLoop which is supported by Playwright on Windows.
113112
"""
114113

115-
loop = None
116-
thread = None
114+
_loop: asyncio.AbstractEventLoop
115+
_thread: threading.Thread
116+
_coro_queue: asyncio.Queue = asyncio.Queue()
117+
_stop_event: asyncio.Event = asyncio.Event()
117118

118119
@classmethod
119-
def get_event_loop(cls) -> asyncio.AbstractEventLoop:
120-
if cls.thread is None:
121-
if cls.loop is None:
122-
policy = asyncio.WindowsProactorEventLoopPolicy() # type: ignore
123-
cls.loop = policy.new_event_loop()
124-
asyncio.set_event_loop(cls.loop)
125-
if not cls.loop.is_running():
126-
cls.thread = threading.Thread(target=cls.loop.run_forever, daemon=True)
127-
cls.thread.start()
128-
logger.info("Started loop on separate thread: %s", cls.loop)
129-
return cls.loop
120+
async def _handle_coro(cls, coro, future) -> None:
121+
try:
122+
future.set_result(await coro)
123+
except Exception as exc:
124+
future.set_exception(exc)
130125

131126
@classmethod
132-
async def get_result(cls, coro) -> concurrent.futures.Future:
133-
return asyncio.run_coroutine_threadsafe(coro=coro, loop=cls.get_event_loop()).result()
127+
async def _process_queue(cls) -> None:
128+
while not cls._stop_event.is_set():
129+
coro, future = await cls._coro_queue.get()
130+
asyncio.create_task(cls._handle_coro(coro, future))
131+
cls._coro_queue.task_done()
134132

135-
def _deferred_from_coro(coro) -> Deferred:
136-
return scrapy.utils.defer.deferred_from_coro(_WindowsAdapter.get_result(coro))
133+
@classmethod
134+
def _deferred_from_coro(cls, coro) -> Deferred:
135+
future: asyncio.Future = asyncio.Future()
136+
asyncio.run_coroutine_threadsafe(cls._coro_queue.put((coro, future)), cls._loop)
137+
return scrapy.utils.defer.deferred_from_coro(future)
138+
139+
@classmethod
140+
def start(cls) -> None:
141+
policy = asyncio.WindowsProactorEventLoopPolicy() # type: ignore[attr-defined]
142+
cls._loop = policy.new_event_loop()
143+
asyncio.set_event_loop(cls._loop)
144+
145+
cls._thread = threading.Thread(target=cls._loop.run_forever, daemon=True)
146+
cls._thread.start()
147+
logger.info("Started loop on separate thread: %s", cls._loop)
148+
149+
asyncio.run_coroutine_threadsafe(cls._process_queue(), cls._loop)
150+
151+
@classmethod
152+
def stop(cls) -> None:
153+
cls._stop_event.set()
154+
asyncio.run_coroutine_threadsafe(cls._coro_queue.join(), cls._loop)
155+
cls._loop.call_soon_threadsafe(cls._loop.stop)
156+
cls._thread.join()
137157

158+
_deferred_from_coro = _ThreadedLoopAdapter._deferred_from_coro
138159
else:
160+
161+
class _ThreadedLoopAdapter: # type: ignore[no-redef]
162+
@classmethod
163+
def start(cls) -> None:
164+
pass
165+
166+
@classmethod
167+
def stop(cls) -> None:
168+
pass
169+
139170
_deferred_from_coro = scrapy.utils.defer.deferred_from_coro

scrapy_playwright/handler.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from scrapy_playwright.headers import use_scrapy_headers
3434
from scrapy_playwright.page import PageMethod
3535
from scrapy_playwright._utils import (
36+
_ThreadedLoopAdapter,
3637
_deferred_from_coro,
3738
_encode_body,
3839
_get_float_setting,
@@ -109,6 +110,7 @@ class ScrapyPlaywrightDownloadHandler(HTTPDownloadHandler):
109110

110111
def __init__(self, crawler: Crawler) -> None:
111112
super().__init__(settings=crawler.settings, crawler=crawler)
113+
_ThreadedLoopAdapter.start()
112114
if platform.system() != "Windows":
113115
verify_installed_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor")
114116
crawler.signals.connect(self._engine_started, signals.engine_started)
@@ -324,6 +326,7 @@ def close(self) -> Deferred:
324326
logger.info("Closing download handler")
325327
yield super().close()
326328
yield _deferred_from_coro(self._close())
329+
_ThreadedLoopAdapter.stop()
327330

328331
async def _close(self) -> None:
329332
logger.info("Closing %i contexts", len(self.context_wrappers))

tests/__init__.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import asyncio
12
import inspect
23
import logging
34
import platform
@@ -13,17 +14,19 @@
1314

1415

1516
if platform.system() == "Windows":
16-
from scrapy_playwright._utils import _WindowsAdapter
17+
from scrapy_playwright._utils import _ThreadedLoopAdapter
1718

1819
def allow_windows(test_method):
19-
"""Wrap tests with the _WindowsAdapter class on Windows."""
20+
"""Wrap tests with the _ThreadedLoopAdapter class on Windows."""
2021
if not inspect.iscoroutinefunction(test_method):
2122
raise RuntimeError(f"{test_method} must be an async def method")
2223

2324
@wraps(test_method)
2425
async def wrapped(self, *args, **kwargs):
25-
logger.debug("Calling _WindowsAdapter.get_result for %r", self)
26-
await _WindowsAdapter.get_result(test_method(self, *args, **kwargs))
26+
_ThreadedLoopAdapter.start()
27+
coro = test_method(self, *args, **kwargs)
28+
asyncio.run_coroutine_threadsafe(coro=coro, loop=_ThreadedLoopAdapter._loop).result()
29+
_ThreadedLoopAdapter.stop()
2730

2831
return wrapped
2932

0 commit comments

Comments
 (0)