Skip to content

Commit a8ea54c

Browse files
committed
Slight refactor
1 parent 17ce02f commit a8ea54c

File tree

1 file changed

+21
-18
lines changed

1 file changed

+21
-18
lines changed

scrapy_playwright/handler.py

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,9 @@ class Download:
106106
exception: Optional[Exception] = None
107107
response_status: int = 200
108108

109+
def __bool__(self) -> bool:
110+
return bool(self.body) or bool(self.exception)
111+
109112

110113
class ScrapyPlaywrightDownloadHandler(HTTPDownloadHandler):
111114
playwright_context_manager: Optional[PlaywrightContextManager] = None
@@ -380,15 +383,24 @@ async def _download_request_with_page(
380383
if request.meta.get("playwright_include_page"):
381384
request.meta["playwright_page"] = page
382385

386+
# default response values
387+
server_ip_address = None
388+
headers = Headers()
389+
383390
start_time = time()
384-
response, download = await self._get_response_and_download(
385-
request=request, page=page, spider=spider
386-
)
387-
if isinstance(response, PlaywrightResponse):
391+
392+
response, download = await self._get_response_and_download(request, page, spider)
393+
if response:
388394
await _set_redirect_meta(request=request, response=response)
389395
headers = Headers(await response.all_headers())
390396
headers.pop("Content-Encoding", None)
391-
elif not download.url:
397+
request.meta["playwright_security_details"] = await response.security_details()
398+
with suppress(KeyError, TypeError, ValueError):
399+
server_addr = await response.server_addr()
400+
server_ip_address = ip_address(server_addr["ipAddress"])
401+
elif download:
402+
request.meta["playwright_suggested_filename"] = download.suggested_filename
403+
else:
392404
logger.warning(
393405
"Navigating to %s returned None, the response"
394406
" will have empty headers and status 200",
@@ -400,7 +412,6 @@ async def _download_request_with_page(
400412
"scrapy_request_method": request.method,
401413
},
402414
)
403-
headers = Headers()
404415

405416
await self._apply_page_methods(page, request, spider)
406417
body_str = await _get_page_content(
@@ -412,22 +423,14 @@ async def _download_request_with_page(
412423
)
413424
request.meta["download_latency"] = time() - start_time
414425

415-
server_ip_address = None
416-
if response is not None:
417-
request.meta["playwright_security_details"] = await response.security_details()
418-
with suppress(KeyError, TypeError, ValueError):
419-
server_addr = await response.server_addr()
420-
server_ip_address = ip_address(server_addr["ipAddress"])
421-
422-
if download.exception:
426+
if download and download.exception:
423427
raise download.exception
424428

425429
if not request.meta.get("playwright_include_page"):
426430
await page.close()
427431
self.stats.inc_value("playwright/page_count/closed")
428432

429-
if download.url:
430-
request.meta["playwright_suggested_filename"] = download.suggested_filename
433+
if download and download.url:
431434
respcls = responsetypes.from_args(url=download.url, body=download.body)
432435
return respcls(
433436
url=download.url,
@@ -452,7 +455,7 @@ async def _download_request_with_page(
452455

453456
async def _get_response_and_download(
454457
self, request: Request, page: Page, spider: Spider
455-
) -> Tuple[Optional[PlaywrightResponse], Download]:
458+
) -> Tuple[Optional[PlaywrightResponse], Optional[Download]]:
456459
response: Optional[PlaywrightResponse] = None
457460
download: Download = Download() # updated in-place in _handle_download
458461
download_started = asyncio.Event()
@@ -521,7 +524,7 @@ async def _handle_response(response: PlaywrightResponse) -> None:
521524
page.remove_listener("download", _handle_download)
522525
page.remove_listener("response", _handle_response)
523526

524-
return response, download
527+
return response, download if download else None
525528

526529
async def _apply_page_methods(self, page: Page, request: Request, spider: Spider) -> None:
527530
context_name = request.meta.get("playwright_context")

0 commit comments

Comments
 (0)