@@ -372,7 +372,9 @@ async def _download_request_with_page(
372
372
request .meta ["playwright_page" ] = page
373
373
374
374
start_time = time ()
375
- response , download = await self ._get_response_and_download (request = request , page = page )
375
+ response , download = await self ._get_response_and_download (
376
+ request = request , page = page , spider = spider
377
+ )
376
378
if isinstance (response , PlaywrightResponse ):
377
379
await _set_redirect_meta (request = request , response = response )
378
380
headers = Headers (await response .all_headers ())
@@ -440,13 +442,15 @@ async def _download_request_with_page(
440
442
)
441
443
442
444
async def _get_response_and_download (
443
- self , request : Request , page : Page
445
+ self , request : Request , page : Page , spider : Spider
444
446
) -> Tuple [Optional [PlaywrightResponse ], dict ]:
445
447
response : Optional [PlaywrightResponse ] = None
446
448
download : dict = {} # updated in-place in _handle_download
449
+ download_started = asyncio .Event ()
447
450
download_ready = asyncio .Event ()
448
451
449
452
async def _handle_download (dwnld : Download ) -> None :
453
+ download_started .set ()
450
454
self .stats .inc_value ("playwright/download_count" )
451
455
try :
452
456
if failure := await dwnld .failure ():
@@ -473,6 +477,32 @@ async def _handle_download(dwnld: Download) -> None:
473
477
and "net::ERR_ABORTED" in err .message
474
478
):
475
479
raise
480
+ try :
481
+ logger .debug (
482
+ "Waiting on dowload to start for %s" ,
483
+ request .url ,
484
+ extra = {
485
+ "spider" : spider ,
486
+ "context_name" : request .meta .get ("playwright_context" ),
487
+ "scrapy_request_url" : request .url ,
488
+ "scrapy_request_method" : request .method ,
489
+ },
490
+ )
491
+ # TODO: timeout as setting
492
+ await asyncio .wait_for (download_started .wait (), timeout = 1 )
493
+ except asyncio .exceptions .TimeoutError :
494
+ raise err
495
+
496
+ logger .debug (
497
+ "Waiting on dowload to finish for %s" ,
498
+ request .url ,
499
+ extra = {
500
+ "spider" : spider ,
501
+ "context_name" : request .meta .get ("playwright_context" ),
502
+ "scrapy_request_url" : request .url ,
503
+ "scrapy_request_method" : request .method ,
504
+ },
505
+ )
476
506
await download_ready .wait ()
477
507
finally :
478
508
page .remove_listener ("download" , _handle_download )
0 commit comments