Skip to content

Commit f4a32e8

Browse files
committed
snake_case
1 parent 20302ba commit f4a32e8

File tree

7 files changed

+42
-46
lines changed

7 files changed

+42
-46
lines changed

.github/workflows/main.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ jobs:
5959
strategy:
6060
matrix:
6161
os: [ubuntu-18.04, macos-10.15]
62-
python-version: [3.7, 3.8]
62+
python-version: [3.7, 3.8, 3.9]
6363

6464
steps:
6565
- uses: actions/checkout@v2

README.md

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77

88
This project provides a Scrapy Download Handler which performs requests using
9-
[Playwright](https://github.com/microsoft/playwright-python). It can be used to handle
9+
[Playwright for Python](https://github.com/microsoft/playwright-python). It can be used to handle
1010
pages that require JavaScript. This package does not interfere with regular
1111
Scrapy workflows such as request scheduling or item processing.
1212

@@ -59,23 +59,22 @@ TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
5959

6060
* `PLAYWRIGHT_BROWSER_TYPE` (type `str`, default `chromium`)
6161
The browser type to be launched. Valid values are (`chromium`, `firefox`, `webkit`).
62-
See the docs for the [`BrowserType` class](https://microsoft.github.io/playwright-python/async_api/index.html#playwright.async_api.BrowserType).
6362

6463
* `PLAYWRIGHT_LAUNCH_OPTIONS` (type `dict`, default `{}`)
6564

6665
A dictionary with options to be passed when launching the Browser.
67-
See the docs for [`BrowserType.launch`](https://microsoft.github.io/playwright-python/async_api/index.html#playwright.async_api.BrowserType.launch).
66+
See the docs for [`BrowserType.launch`](https://playwright.dev/python/docs/api/class-browsertype#browser_typelaunchkwargs).
6867

6968
* `PLAYWRIGHT_CONTEXT_ARGS` (type `dict`, default `{}`)
7069

7170
A dictionary with keyword arguments to be passed when creating the default Browser context.
72-
See the docs for [`Browser.new_context`](https://microsoft.github.io/playwright-python/async_api/index.html#playwright.async_api.Browser.new_context).
71+
See the docs for [`Browser.new_context`](https://playwright.dev/python/docs/api/class-browser#browsernew_contextkwargs).
7372

7473
* `PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT` (type `Optional[int]`, default `None`)
7574

7675
The timeout used when requesting pages by Playwright. If `None` or unset,
7776
the default value will be used (30000 ms at the time of writing this).
78-
See the docs for [page.setDefaultNavigationTimeout](https://playwright.dev/#version=v1.6.2&path=docs%2Fapi.md&q=pagesetdefaultnavigationtimeouttimeout).
77+
See the docs for [BrowserContext.set_default_navigation_timeout](https://playwright.dev/python/docs/api/class-browsercontext#browser_contextset_default_navigation_timeouttimeout).
7978

8079

8180
## Basic usage
@@ -217,9 +216,9 @@ class ScrollSpider(scrapy.Spider):
217216
playwright=True,
218217
playwright_include_page=True,
219218
playwright_page_coroutines=[
220-
PageCoroutine("waitForSelector", "div.quote"),
219+
PageCoroutine("wait_for_selector", "div.quote"),
221220
PageCoroutine("evaluate", "window.scrollBy(0, document.body.scrollHeight)"),
222-
PageCoroutine("waitForSelector", "div.quote:nth-child(11)"), # 10 per page
221+
PageCoroutine("wait_for_selector", "div.quote:nth-child(11)"), # 10 per page
223222
],
224223
),
225224
)

scrapy_playwright/handler.py

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
from typing import Callable, Optional, Type, TypeVar
55
from urllib.parse import urlparse
66

7-
import playwright
8-
from playwright.async_api import Page
7+
from playwright.async_api import Page, PlaywrightContextManager, Request as PwRequest, Route
98
from scrapy import Spider, signals
109
from scrapy.core.downloader.handlers.http import HTTPDownloadHandler
1110
from scrapy.crawler import Crawler
@@ -34,14 +33,11 @@ def _make_request_handler(
3433
scrapy_request: Request,
3534
stats: StatsCollector,
3635
) -> Callable:
37-
def request_handler(
38-
route: playwright.async_api.Route,
39-
request: playwright.async_api.Request,
40-
) -> None:
36+
def request_handler(route: Route, pw_request: PwRequest) -> None:
4137
"""
4238
Override request headers, method and body
4339
"""
44-
if request.url == scrapy_request.url:
40+
if pw_request.url == scrapy_request.url:
4541
overrides = {
4642
"method": scrapy_request.method,
4743
"headers": {
@@ -50,21 +46,21 @@ def request_handler(
5046
},
5147
}
5248
if scrapy_request.body:
53-
overrides["postData"] = scrapy_request.body.decode(scrapy_request.encoding)
49+
overrides["post_data"] = scrapy_request.body.decode(scrapy_request.encoding)
5450
# otherwise this fails with playwright.helper.Error: NS_ERROR_NET_RESET
5551
if browser_type == "firefox":
56-
overrides["headers"]["host"] = urlparse(request.url).netloc
52+
overrides["headers"]["host"] = urlparse(pw_request.url).netloc
5753
else:
58-
overrides = {"headers": request.headers.copy()}
54+
overrides = {"headers": pw_request.headers.copy()}
5955
# override user agent, for consistency with other requests
6056
if scrapy_request.headers.get("user-agent"):
6157
user_agent = scrapy_request.headers["user-agent"].decode("utf-8")
6258
overrides["headers"]["user-agent"] = user_agent
6359
asyncio.create_task(route.continue_(**overrides))
6460
# increment stats
65-
stats.inc_value("playwright/request_method_count/{}".format(request.method))
61+
stats.inc_value("playwright/request_method_count/{}".format(pw_request.method))
6662
stats.inc_value("playwright/request_count")
67-
if request.isNavigationRequest():
63+
if pw_request.is_navigation_request():
6864
stats.inc_value("playwright/request_count/navigation")
6965

7066
return request_handler
@@ -102,15 +98,15 @@ def _engine_started(self) -> Deferred:
10298
return deferred_from_coro(self._launch_browser())
10399

104100
async def _launch_browser(self) -> None:
105-
self.playwright_context_manager = playwright.AsyncPlaywrightContextManager()
101+
self.playwright_context_manager = PlaywrightContextManager()
106102
self.playwright = await self.playwright_context_manager.start()
107103
browser_launcher = getattr(self.playwright, self.browser_type).launch
108104
self.browser = await browser_launcher(**self.launch_options)
109105
logger.info(f"Browser {self.browser_type} launched")
110-
self.context = await self.browser.newContext(**self.context_args)
106+
self.context = await self.browser.new_context(**self.context_args)
111107
logger.info("Browser context started")
112108
if self.default_navigation_timeout:
113-
self.context.setDefaultNavigationTimeout(self.default_navigation_timeout)
109+
self.context.set_default_navigation_timeout(self.default_navigation_timeout)
114110

115111
@inlineCallbacks
116112
def close(self) -> Deferred:
@@ -143,18 +139,18 @@ async def _download_request(self, request: Request, spider: Spider) -> Response:
143139
try:
144140
result = await self._download_request_with_page(request, spider, page)
145141
except Exception:
146-
if not page.isClosed():
142+
if not page.is_closed():
147143
await page.close()
148144
self.stats.inc_value("playwright/page_count/closed")
149145
raise
150146
else:
151147
return result
152148

153149
async def _create_page_for_request(self, request: Request) -> Page:
154-
page = await self.context.newPage() # type: ignore
150+
page = await self.context.new_page() # type: ignore
155151
self.stats.inc_value("playwright/page_count")
156152
if self.default_navigation_timeout:
157-
page.setDefaultNavigationTimeout(self.default_navigation_timeout)
153+
page.set_default_navigation_timeout(self.default_navigation_timeout)
158154
return page
159155

160156
async def _download_request_with_page(
@@ -170,7 +166,7 @@ async def _download_request_with_page(
170166
if isinstance(pc, PageCoroutine):
171167
method = getattr(page, pc.method)
172168
pc.result = await method(*pc.args, **pc.kwargs)
173-
await page.waitForLoadState(timeout=self.default_navigation_timeout)
169+
await page.wait_for_load_state(timeout=self.default_navigation_timeout)
174170

175171
body = (await page.content()).encode("utf8")
176172
request.meta["download_latency"] = time() - start_time

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
"Programming Language :: Python",
2525
"Programming Language :: Python :: 3.7",
2626
"Programming Language :: Python :: 3.8",
27+
"Programming Language :: Python :: 3.9",
2728
"Framework :: Scrapy",
2829
"Intended Audience :: Developers",
2930
"Topic :: Internet :: WWW/HTTP",
@@ -32,6 +33,6 @@
3233
],
3334
install_requires=[
3435
"scrapy>=2.0,!=2.4.0",
35-
"playwright>=0.7.0",
36+
"playwright>=1.8.0a1",
3637
],
3738
)

tests/test_page_coroutines.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55

66
@pytest.mark.asyncio
77
async def test_page_coroutines():
8-
screenshot = PageCoroutine("screenshot", options={"path": "/tmp/file", "type": "png"})
8+
screenshot = PageCoroutine("screenshot", "foo", 123, path="/tmp/file", type="png")
99
assert screenshot.method == "screenshot"
10-
assert screenshot.args == ()
11-
assert screenshot.kwargs == {"options": {"path": "/tmp/file", "type": "png"}}
10+
assert screenshot.args == ("foo", 123)
11+
assert screenshot.kwargs == {"path": "/tmp/file", "type": "png"}
1212
assert screenshot.result is None
1313
assert str(screenshot) == "<PageCoroutine for method 'screenshot'>"

tests/test_playwright_requests.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
from tempfile import NamedTemporaryFile
44

55
import pytest
6-
from playwright import TimeoutError
7-
from playwright.async_api import Page as PlaywrightPage
6+
from playwright.async_api import Page as PlaywrightPage, TimeoutError
87
from scrapy import Spider, Request, FormRequest
98
from scrapy.http.response.html import HtmlResponse
109
from scrapy.utils.test import get_crawler
@@ -113,11 +112,11 @@ async def test_page_coroutine_infinite_scroll(self):
113112
meta={
114113
"playwright": True,
115114
"playwright_page_coroutines": [
116-
PageCoro("waitForSelector", selector="div.quote"),
115+
PageCoro("wait_for_selector", selector="div.quote"),
117116
PageCoro("evaluate", "window.scrollBy(0, document.body.scrollHeight)"),
118-
PageCoro("waitForSelector", selector="div.quote:nth-child(11)"),
117+
PageCoro("wait_for_selector", selector="div.quote:nth-child(11)"),
119118
PageCoro("evaluate", "window.scrollBy(0, document.body.scrollHeight)"),
120-
PageCoro("waitForSelector", selector="div.quote:nth-child(21)"),
119+
PageCoro("wait_for_selector", selector="div.quote:nth-child(21)"),
121120
],
122121
},
123122
)
@@ -157,7 +156,7 @@ async def test_context_args(self):
157156
get_crawler(
158157
settings_dict={
159158
"PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
160-
"PLAYWRIGHT_CONTEXT_ARGS": {"javaScriptEnabled": False},
159+
"PLAYWRIGHT_CONTEXT_ARGS": {"java_script_enabled": False},
161160
}
162161
)
163162
)
@@ -169,7 +168,7 @@ async def test_context_args(self):
169168
meta={
170169
"playwright": True,
171170
"playwright_page_coroutines": [
172-
PageCoro("waitForSelector", selector="div.quote", timeout=1000),
171+
PageCoro("wait_for_selector", selector="div.quote", timeout=1000),
173172
],
174173
},
175174
)

tox.ini

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,22 @@
11
[tox]
2-
envlist = py37,py38,flake8,typing,black
2+
envlist = py,flake8,typing,black
33

44

55
[testenv]
66
deps =
7-
-rrequirements.txt
8-
-rtests/requirements.txt
7+
playwright>=1.8.0a1
8+
scrapy>=2.0,!=2.4.0
9+
pytest<5.4
10+
pytest-asyncio<0.11
11+
pytest-cov>=2.8
12+
pytest-twisted>=1.11
913
commands =
1014
py.test --reactor=asyncio --cov=scrapy_playwright --cov-report=term-missing --cov-report=html --cov-report=xml {posargs: scrapy_playwright tests}
1115
setenv =
1216
DEBUG=pw:api
1317

14-
[testenv:py37]
15-
basepython = python3.7
16-
17-
[testenv:py38]
18-
basepython = python3.8
18+
[testenv:py]
19+
basepython = python3
1920

2021
[testenv:flake8]
2122
basepython = python3.8

0 commit comments

Comments
 (0)