Skip to content

Commit 725bf90

Browse files
committed
Wait for download started event with timeout
1 parent e3ee615 commit 725bf90

File tree

1 file changed

+32
-2
lines changed

1 file changed

+32
-2
lines changed

scrapy_playwright/handler.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,9 @@ async def _download_request_with_page(
372372
request.meta["playwright_page"] = page
373373

374374
start_time = time()
375-
response, download = await self._get_response_and_download(request=request, page=page)
375+
response, download = await self._get_response_and_download(
376+
request=request, page=page, spider=spider
377+
)
376378
if isinstance(response, PlaywrightResponse):
377379
await _set_redirect_meta(request=request, response=response)
378380
headers = Headers(await response.all_headers())
@@ -440,13 +442,15 @@ async def _download_request_with_page(
440442
)
441443

442444
async def _get_response_and_download(
443-
self, request: Request, page: Page
445+
self, request: Request, page: Page, spider: Spider
444446
) -> Tuple[Optional[PlaywrightResponse], dict]:
445447
response: Optional[PlaywrightResponse] = None
446448
download: dict = {} # updated in-place in _handle_download
449+
download_started = asyncio.Event()
447450
download_ready = asyncio.Event()
448451

449452
async def _handle_download(dwnld: Download) -> None:
453+
download_started.set()
450454
self.stats.inc_value("playwright/download_count")
451455
try:
452456
if failure := await dwnld.failure():
@@ -473,6 +477,32 @@ async def _handle_download(dwnld: Download) -> None:
473477
and "net::ERR_ABORTED" in err.message
474478
):
475479
raise
480+
try:
481+
logger.debug(
482+
"Waiting on dowload to start for %s",
483+
request.url,
484+
extra={
485+
"spider": spider,
486+
"context_name": request.meta.get("playwright_context"),
487+
"scrapy_request_url": request.url,
488+
"scrapy_request_method": request.method,
489+
},
490+
)
491+
# TODO: timeout as setting
492+
await asyncio.wait_for(download_started.wait(), timeout=1)
493+
except asyncio.exceptions.TimeoutError:
494+
raise err
495+
496+
logger.debug(
497+
"Waiting on dowload to finish for %s",
498+
request.url,
499+
extra={
500+
"spider": spider,
501+
"context_name": request.meta.get("playwright_context"),
502+
"scrapy_request_url": request.url,
503+
"scrapy_request_method": request.method,
504+
},
505+
)
476506
await download_ready.wait()
477507
finally:
478508
page.remove_listener("download", _handle_download)

0 commit comments

Comments
 (0)