snake_case

elacuesta · elacuesta · commit f4a32e8c8a41 · 2021-02-22T13:43:46.000-03:00
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -59,7 +59,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-18.04, macos-10.15]
-        python-version: [3.7, 3.8]
+        python-version: [3.7, 3.8, 3.9]
 
     steps:
     - uses: actions/checkout@v2
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@
 
 
 This project provides a Scrapy Download Handler which performs requests using
-[Playwright](https://github.com/microsoft/playwright-python). It can be used to handle
+[Playwright for Python](https://github.com/microsoft/playwright-python). It can be used to handle
 pages that require JavaScript. This package does not interfere with regular
 Scrapy workflows such as request scheduling or item processing.
 
@@ -59,23 +59,22 @@ TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
 
 * `PLAYWRIGHT_BROWSER_TYPE` (type `str`, default `chromium`)
     The browser type to be launched. Valid values are (`chromium`, `firefox`, `webkit`).
-    See the docs for the [`BrowserType` class](https://microsoft.github.io/playwright-python/async_api/index.html#playwright.async_api.BrowserType).
 
 * `PLAYWRIGHT_LAUNCH_OPTIONS` (type `dict`, default `{}`)
 
     A dictionary with options to be passed when launching the Browser.
-    See the docs for [`BrowserType.launch`](https://microsoft.github.io/playwright-python/async_api/index.html#playwright.async_api.BrowserType.launch).
+    See the docs for [`BrowserType.launch`](https://playwright.dev/python/docs/api/class-browsertype#browser_typelaunchkwargs).
 
 * `PLAYWRIGHT_CONTEXT_ARGS` (type `dict`, default `{}`)
 
     A dictionary with keyword arguments to be passed when creating the default Browser context.
-    See the docs for [`Browser.new_context`](https://microsoft.github.io/playwright-python/async_api/index.html#playwright.async_api.Browser.new_context).
+    See the docs for [`Browser.new_context`](https://playwright.dev/python/docs/api/class-browser#browsernew_contextkwargs).
 
 * `PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT` (type `Optional[int]`, default `None`)
 
     The timeout used when requesting pages by Playwright. If `None` or unset,
     the default value will be used (30000 ms at the time of writing this).
-    See the docs for [page.setDefaultNavigationTimeout](https://playwright.dev/#version=v1.6.2&path=docs%2Fapi.md&q=pagesetdefaultnavigationtimeouttimeout).
+    See the docs for [BrowserContext.set_default_navigation_timeout](https://playwright.dev/python/docs/api/class-browsercontext#browser_contextset_default_navigation_timeouttimeout).
 
 
 ## Basic usage
@@ -217,9 +216,9 @@ class ScrollSpider(scrapy.Spider):
                 playwright=True,
                 playwright_include_page=True,
                 playwright_page_coroutines=[
-                    PageCoroutine("waitForSelector", "div.quote"),
+                    PageCoroutine("wait_for_selector", "div.quote"),
                     PageCoroutine("evaluate", "window.scrollBy(0, document.body.scrollHeight)"),
-                    PageCoroutine("waitForSelector", "div.quote:nth-child(11)"),  # 10 per page
+                    PageCoroutine("wait_for_selector", "div.quote:nth-child(11)"),  # 10 per page
                 ],
             ),
         )
diff --git a/scrapy_playwright/handler.py b/scrapy_playwright/handler.py
@@ -4,8 +4,7 @@
 from typing import Callable, Optional, Type, TypeVar
 from urllib.parse import urlparse
 
-import playwright
-from playwright.async_api import Page
+from playwright.async_api import Page, PlaywrightContextManager, Request as PwRequest, Route
 from scrapy import Spider, signals
 from scrapy.core.downloader.handlers.http import HTTPDownloadHandler
 from scrapy.crawler import Crawler
@@ -34,14 +33,11 @@ def _make_request_handler(
     scrapy_request: Request,
     stats: StatsCollector,
 ) -> Callable:
-    def request_handler(
-        route: playwright.async_api.Route,
-        request: playwright.async_api.Request,
-    ) -> None:
+    def request_handler(route: Route, pw_request: PwRequest) -> None:
         """
         Override request headers, method and body
         """
-        if request.url == scrapy_request.url:
+        if pw_request.url == scrapy_request.url:
             overrides = {
                 "method": scrapy_request.method,
                 "headers": {
@@ -50,21 +46,21 @@ def request_handler(
                 },
             }
             if scrapy_request.body:
-                overrides["postData"] = scrapy_request.body.decode(scrapy_request.encoding)
+                overrides["post_data"] = scrapy_request.body.decode(scrapy_request.encoding)
             # otherwise this fails with playwright.helper.Error: NS_ERROR_NET_RESET
             if browser_type == "firefox":
-                overrides["headers"]["host"] = urlparse(request.url).netloc
+                overrides["headers"]["host"] = urlparse(pw_request.url).netloc
         else:
-            overrides = {"headers": request.headers.copy()}
+            overrides = {"headers": pw_request.headers.copy()}
             # override user agent, for consistency with other requests
             if scrapy_request.headers.get("user-agent"):
                 user_agent = scrapy_request.headers["user-agent"].decode("utf-8")
                 overrides["headers"]["user-agent"] = user_agent
         asyncio.create_task(route.continue_(**overrides))
         # increment stats
-        stats.inc_value("playwright/request_method_count/{}".format(request.method))
+        stats.inc_value("playwright/request_method_count/{}".format(pw_request.method))
         stats.inc_value("playwright/request_count")
-        if request.isNavigationRequest():
+        if pw_request.is_navigation_request():
             stats.inc_value("playwright/request_count/navigation")
 
     return request_handler
@@ -102,15 +98,15 @@ def _engine_started(self) -> Deferred:
         return deferred_from_coro(self._launch_browser())
 
     async def _launch_browser(self) -> None:
-        self.playwright_context_manager = playwright.AsyncPlaywrightContextManager()
+        self.playwright_context_manager = PlaywrightContextManager()
         self.playwright = await self.playwright_context_manager.start()
         browser_launcher = getattr(self.playwright, self.browser_type).launch
         self.browser = await browser_launcher(**self.launch_options)
         logger.info(f"Browser {self.browser_type} launched")
-        self.context = await self.browser.newContext(**self.context_args)
+        self.context = await self.browser.new_context(**self.context_args)
         logger.info("Browser context started")
         if self.default_navigation_timeout:
-            self.context.setDefaultNavigationTimeout(self.default_navigation_timeout)
+            self.context.set_default_navigation_timeout(self.default_navigation_timeout)
 
     @inlineCallbacks
     def close(self) -> Deferred:
@@ -143,18 +139,18 @@ async def _download_request(self, request: Request, spider: Spider) -> Response:
         try:
             result = await self._download_request_with_page(request, spider, page)
         except Exception:
-            if not page.isClosed():
+            if not page.is_closed():
                 await page.close()
                 self.stats.inc_value("playwright/page_count/closed")
             raise
         else:
             return result
 
     async def _create_page_for_request(self, request: Request) -> Page:
-        page = await self.context.newPage()  # type: ignore
+        page = await self.context.new_page()  # type: ignore
         self.stats.inc_value("playwright/page_count")
         if self.default_navigation_timeout:
-            page.setDefaultNavigationTimeout(self.default_navigation_timeout)
+            page.set_default_navigation_timeout(self.default_navigation_timeout)
         return page
 
     async def _download_request_with_page(
@@ -170,7 +166,7 @@ async def _download_request_with_page(
             if isinstance(pc, PageCoroutine):
                 method = getattr(page, pc.method)
                 pc.result = await method(*pc.args, **pc.kwargs)
-                await page.waitForLoadState(timeout=self.default_navigation_timeout)
+                await page.wait_for_load_state(timeout=self.default_navigation_timeout)
 
         body = (await page.content()).encode("utf8")
         request.meta["download_latency"] = time() - start_time
diff --git a/setup.py b/setup.py
@@ -24,6 +24,7 @@
         "Programming Language :: Python",
         "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
         "Framework :: Scrapy",
         "Intended Audience :: Developers",
         "Topic :: Internet :: WWW/HTTP",
@@ -32,6 +33,6 @@
     ],
     install_requires=[
         "scrapy>=2.0,!=2.4.0",
-        "playwright>=0.7.0",
+        "playwright>=1.8.0a1",
     ],
 )
diff --git a/tests/test_page_coroutines.py b/tests/test_page_coroutines.py
@@ -5,9 +5,9 @@
 
 @pytest.mark.asyncio
 async def test_page_coroutines():
-    screenshot = PageCoroutine("screenshot", options={"path": "/tmp/file", "type": "png"})
+    screenshot = PageCoroutine("screenshot", "foo", 123, path="/tmp/file", type="png")
     assert screenshot.method == "screenshot"
-    assert screenshot.args == ()
-    assert screenshot.kwargs == {"options": {"path": "/tmp/file", "type": "png"}}
+    assert screenshot.args == ("foo", 123)
+    assert screenshot.kwargs == {"path": "/tmp/file", "type": "png"}
     assert screenshot.result is None
     assert str(screenshot) == "<PageCoroutine for method 'screenshot'>"
diff --git a/tests/test_playwright_requests.py b/tests/test_playwright_requests.py
@@ -3,8 +3,7 @@
 from tempfile import NamedTemporaryFile
 
 import pytest
-from playwright import TimeoutError
-from playwright.async_api import Page as PlaywrightPage
+from playwright.async_api import Page as PlaywrightPage, TimeoutError
 from scrapy import Spider, Request, FormRequest
 from scrapy.http.response.html import HtmlResponse
 from scrapy.utils.test import get_crawler
@@ -113,11 +112,11 @@ async def test_page_coroutine_infinite_scroll(self):
                 meta={
                     "playwright": True,
                     "playwright_page_coroutines": [
-                        PageCoro("waitForSelector", selector="div.quote"),
+                        PageCoro("wait_for_selector", selector="div.quote"),
                         PageCoro("evaluate", "window.scrollBy(0, document.body.scrollHeight)"),
-                        PageCoro("waitForSelector", selector="div.quote:nth-child(11)"),
+                        PageCoro("wait_for_selector", selector="div.quote:nth-child(11)"),
                         PageCoro("evaluate", "window.scrollBy(0, document.body.scrollHeight)"),
-                        PageCoro("waitForSelector", selector="div.quote:nth-child(21)"),
+                        PageCoro("wait_for_selector", selector="div.quote:nth-child(21)"),
                     ],
                 },
             )
@@ -157,7 +156,7 @@ async def test_context_args(self):
             get_crawler(
                 settings_dict={
                     "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
-                    "PLAYWRIGHT_CONTEXT_ARGS": {"javaScriptEnabled": False},
+                    "PLAYWRIGHT_CONTEXT_ARGS": {"java_script_enabled": False},
                 }
             )
         )
@@ -169,7 +168,7 @@ async def test_context_args(self):
                 meta={
                     "playwright": True,
                     "playwright_page_coroutines": [
-                        PageCoro("waitForSelector", selector="div.quote", timeout=1000),
+                        PageCoro("wait_for_selector", selector="div.quote", timeout=1000),
                     ],
                 },
             )
diff --git a/tox.ini b/tox.ini
@@ -1,21 +1,22 @@
 [tox]
-envlist = py37,py38,flake8,typing,black
+envlist = py,flake8,typing,black
 
 
 [testenv]
 deps =
-    -rrequirements.txt
-    -rtests/requirements.txt
+    playwright>=1.8.0a1
+    scrapy>=2.0,!=2.4.0
+    pytest<5.4
+    pytest-asyncio<0.11
+    pytest-cov>=2.8
+    pytest-twisted>=1.11
 commands =
     py.test --reactor=asyncio --cov=scrapy_playwright --cov-report=term-missing --cov-report=html --cov-report=xml {posargs: scrapy_playwright tests}
 setenv =
     DEBUG=pw:api
 
-[testenv:py37]
-basepython = python3.7
-
-[testenv:py38]
-basepython = python3.8
+[testenv:py]
+basepython = python3
 
 [testenv:flake8]
 basepython = python3.8