Skip to content

Commit e3ee615

Browse files
committed
Add playwright.async_api.Page type hint to some examples
1 parent d825d6f commit e3ee615

File tree

6 files changed

+41
-8
lines changed

6 files changed

+41
-8
lines changed

README.md

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -448,14 +448,16 @@ This key could be used in conjunction with `playwright_include_page` to make a c
448448
requests using the same page. For instance:
449449

450450
```python
451+
from playwright.async_api import Page
452+
451453
def start_requests(self):
452454
yield scrapy.Request(
453455
url="https://httpbin.org/get",
454456
meta={"playwright": True, "playwright_include_page": True},
455457
)
456458

457459
def parse(self, response, **kwargs):
458-
page = response.meta["playwright_page"]
460+
page: Page = response.meta["playwright_page"]
459461
yield scrapy.Request(
460462
url="https://httpbin.org/headers",
461463
callback=self.parse_headers,
@@ -514,6 +516,7 @@ necessary the spider job could get stuck because of the limit set by the
514516
`PLAYWRIGHT_MAX_PAGES_PER_CONTEXT` setting.
515517

516518
```python
519+
from playwright.async_api import Page
517520
import scrapy
518521

519522
class AwesomeSpiderWithPage(scrapy.Spider):
@@ -528,7 +531,7 @@ class AwesomeSpiderWithPage(scrapy.Spider):
528531
)
529532

530533
def parse_first(self, response):
531-
page = response.meta["playwright_page"]
534+
page: Page = response.meta["playwright_page"]
532535
return scrapy.Request(
533536
url="https://example.com",
534537
callback=self.parse_second,
@@ -537,13 +540,13 @@ class AwesomeSpiderWithPage(scrapy.Spider):
537540
)
538541

539542
async def parse_second(self, response):
540-
page = response.meta["playwright_page"]
543+
page: Page = response.meta["playwright_page"]
541544
title = await page.title() # "Example Domain"
542545
await page.close()
543546
return {"title": title}
544547

545548
async def errback_close_page(self, failure):
546-
page = failure.request.meta["playwright_page"]
549+
page: Page = failure.request.meta["playwright_page"]
547550
await page.close()
548551
```
549552

examples/books.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from pathlib import Path
44
from typing import Generator, Optional
55

6+
from playwright.async_api import Page
67
from scrapy import Spider
78
from scrapy.http.response import Response
89

@@ -51,7 +52,7 @@ def parse(self, response: Response, current_page: Optional[int] = None) -> Gener
5152

5253
async def parse_book(self, response: Response) -> dict:
5354
url_sha256 = hashlib.sha256(response.url.encode("utf-8")).hexdigest()
54-
page = response.meta["playwright_page"]
55+
page: Page = response.meta["playwright_page"]
5556
await page.screenshot(
5657
path=Path(__file__).parent / "books" / f"{url_sha256}.png", full_page=True
5758
)

examples/contexts.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from pathlib import Path
22

3+
from playwright.async_api import Page
34
from scrapy import Spider, Request
45

56

@@ -96,7 +97,7 @@ def start_requests(self):
9697
)
9798

9899
async def parse(self, response, **kwargs):
99-
page = response.meta["playwright_page"]
100+
page: Page = response.meta["playwright_page"]
100101
context_name = response.meta["playwright_context"]
101102
storage_state = await page.context.storage_state()
102103
await page.close()

examples/max_pages.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from playwright.async_api import Page
12
from scrapy import Spider, Request
23

34

@@ -45,5 +46,5 @@ def parse(self, response, **kwargs):
4546
return {"url": response.url}
4647

4748
async def errback(self, failure):
48-
page = failure.request.meta["playwright_page"]
49+
page: Page = failure.request.meta["playwright_page"]
4950
await page.close()

examples/remote.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import asyncio
2+
3+
from scrapy import Spider, Request
4+
5+
6+
class RemoteSpider(Spider):
7+
"""Connect to a remote chromium instance."""
8+
9+
name = "scroll"
10+
custom_settings = {
11+
"TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
12+
"DOWNLOAD_HANDLERS": {
13+
"https": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
14+
# "http": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
15+
},
16+
# "PLAYWRIGHT_CDP_URL": "ws://localhost:3000",
17+
"PLAYWRIGHT_CONNECT_URL": "ws:/localhost:61915/377758c4-4b49-41fe-9187-e4114197dea4",
18+
}
19+
20+
def start_requests(self):
21+
yield Request(url="https://example.com", meta={"playwright": True})
22+
23+
async def parse(self, response, **kwargs):
24+
await asyncio.sleep(6)
25+
yield {"url": response.url}
26+
yield Request(url="https://example.org", meta={"playwright": True})

examples/storage.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from playwright.async_api import Page
12
from scrapy import Spider, Request
23
from scrapy_playwright.page import PageMethod
34

@@ -27,7 +28,7 @@ def start_requests(self):
2728
)
2829

2930
async def parse(self, response, **kwargs):
30-
page = response.meta["playwright_page"]
31+
page: Page = response.meta["playwright_page"]
3132
storage_state = await page.context.storage_state()
3233
await page.close()
3334
return {

0 commit comments

Comments
 (0)