@@ -378,7 +378,7 @@ where
378
378
fn remove_cycle (
379
379
query_map : & QueryMap ,
380
380
jobs : & mut Vec < QueryJobId > ,
381
- wakelist : & mut Vec < Arc < QueryWaiter > > ,
381
+ wakelist : & Mutex < Vec < Arc < QueryWaiter > > > ,
382
382
) -> bool {
383
383
let mut visited = FxHashSet :: default ( ) ;
384
384
let mut stack = Vec :: new ( ) ;
@@ -466,7 +466,7 @@ fn remove_cycle(
466
466
* waiter. cycle . lock ( ) = Some ( error) ;
467
467
468
468
// Put the waiter on the list of things to resume
469
- wakelist. push ( waiter) ;
469
+ wakelist. lock ( ) . push ( waiter) ;
470
470
471
471
true
472
472
} else {
@@ -478,36 +478,37 @@ fn remove_cycle(
478
478
/// If a query cycle is found it will break the cycle by finding an edge which
479
479
/// uses a query latch and then resuming that waiter.
480
480
/// There may be multiple cycles involved in a deadlock, so this searches
481
- /// all active queries for cycles before finally resuming all the waiters at once.
481
+ /// all active queries for cycles. But only one waiter will be resumed at once.
482
482
pub fn break_query_cycles ( query_map : QueryMap , registry : & rayon_core:: Registry ) {
483
- let mut wakelist = Vec :: new ( ) ;
483
+ static WAKELIST : Mutex < Vec < Arc < QueryWaiter > > > = Mutex :: new ( Vec :: new ( ) ) ;
484
484
let mut jobs: Vec < QueryJobId > = query_map. keys ( ) . cloned ( ) . collect ( ) ;
485
485
486
486
let mut found_cycle = false ;
487
487
488
488
while jobs. len ( ) > 0 {
489
- if remove_cycle ( & query_map, & mut jobs, & mut wakelist ) {
489
+ if remove_cycle ( & query_map, & mut jobs, & WAKELIST ) {
490
490
found_cycle = true ;
491
491
}
492
492
}
493
-
493
+ let mut wake = WAKELIST . lock ( ) ;
494
494
// Check that a cycle was found. It is possible for a deadlock to occur without
495
495
// a query cycle if a query which can be waited on uses Rayon to do multithreading
496
496
// internally. Such a query (X) may be executing on 2 threads (A and B) and A may
497
497
// wait using Rayon on B. Rayon may then switch to executing another query (Y)
498
498
// which in turn will wait on X causing a deadlock. We have a false dependency from
499
499
// X to Y due to Rayon waiting and a true dependency from Y to X. The algorithm here
500
500
// only considers the true dependency and won't detect a cycle.
501
- if !found_cycle {
501
+ if !found_cycle && wake . is_empty ( ) {
502
502
panic ! (
503
503
"deadlock detected as we're unable to find a query cycle to break\n \
504
504
current query map:\n {:#?}",
505
505
query_map
506
506
) ;
507
507
}
508
508
509
- // FIXME: Ensure this won't cause a deadlock before we return
510
- for waiter in wakelist. into_iter ( ) {
509
+ // Only one waiter is resumed at a time to avoid waking up multiple
510
+ // waiters at the same time and causing deadlock due to thread grabbing.
511
+ if let Some ( waiter) = wake. pop ( ) {
511
512
waiter. notify ( registry) ;
512
513
}
513
514
}
0 commit comments