|
11 | 11 | Page,
|
12 | 12 | PlaywrightContextManager,
|
13 | 13 | Request as PlaywrightRequest,
|
| 14 | + Response as PlaywrightResponse, |
14 | 15 | Route,
|
15 | 16 | )
|
16 | 17 | from scrapy import Spider, signals
|
|
35 | 36 | logger = logging.getLogger("scrapy-playwright")
|
36 | 37 |
|
37 | 38 |
|
| 39 | +def _make_request_logger(context_name: str) -> Callable: |
| 40 | + def _log_request(request: PlaywrightRequest) -> None: |
| 41 | + logger.debug( |
| 42 | + f"[Context={context_name}] Request: <{request.method.upper()} {request.url}> " |
| 43 | + f"(resource type: {request.resource_type}, referrer: {request.headers.get('referer')})" |
| 44 | + ) |
| 45 | + |
| 46 | + return _log_request |
| 47 | + |
| 48 | + |
| 49 | +def _make_response_logger(context_name: str) -> Callable: |
| 50 | + def _log_request(response: PlaywrightResponse) -> None: |
| 51 | + logger.debug( |
| 52 | + f"[Context={context_name}] Response: <{response.status} {response.url}> " |
| 53 | + f"(referrer: {response.headers.get('referer')})" |
| 54 | + ) |
| 55 | + |
| 56 | + return _log_request |
| 57 | + |
| 58 | + |
38 | 59 | class ScrapyPlaywrightDownloadHandler(HTTPDownloadHandler):
|
39 | 60 | def __init__(self, crawler: Crawler) -> None:
|
40 | 61 | super().__init__(settings=crawler.settings, crawler=crawler)
|
@@ -107,6 +128,8 @@ async def _create_page(self, request: Request) -> Page:
|
107 | 128 | context = await self._create_browser_context(context_name, context_kwargs)
|
108 | 129 | self.contexts[context_name] = context
|
109 | 130 | page = await context.new_page()
|
| 131 | + page.on("request", _make_request_logger(context_name)) |
| 132 | + page.on("response", _make_response_logger(context_name)) |
110 | 133 | self.stats.inc_value("playwright/page_count")
|
111 | 134 | if self.default_navigation_timeout:
|
112 | 135 | page.set_default_navigation_timeout(self.default_navigation_timeout)
|
|
0 commit comments