@@ -928,6 +928,12 @@ module Redirector = struct
928
928
let nested_parallel_queues =
929
929
{queues= Queues. create () ; mutex= Mutex. create () }
930
930
931
+ (* We create another queue only for VM_receive_memory operations for the same reason again.
932
+ Migration spawns 2 operations, send and receive, so if there is limited available worker space
933
+ a deadlock can happen when VMs are migrating between hosts or on localhost migration
934
+ as the receiver has no free workers to receive memory. *)
935
+ let receive_memory_queues = {queues= Queues. create () ; mutex= Mutex. create () }
936
+
931
937
(* we do not want to use = when comparing queues: queues can contain
932
938
(uncomparable) functions, and we are only interested in comparing the
933
939
equality of their static references *)
@@ -1062,6 +1068,7 @@ module Redirector = struct
1062
1068
(default.queues
1063
1069
:: parallel_queues.queues
1064
1070
:: nested_parallel_queues.queues
1071
+ :: receive_memory_queues.queues
1065
1072
:: List. map snd (StringMap. bindings ! overrides)
1066
1073
)
1067
1074
)
@@ -1297,7 +1304,8 @@ module WorkerPool = struct
1297
1304
for _i = 1 to size do
1298
1305
incr Redirector. default ;
1299
1306
incr Redirector. parallel_queues ;
1300
- incr Redirector. nested_parallel_queues
1307
+ incr Redirector. nested_parallel_queues ;
1308
+ incr Redirector. receive_memory_queues
1301
1309
done
1302
1310
1303
1311
let set_size size =
@@ -1313,7 +1321,8 @@ module WorkerPool = struct
1313
1321
in
1314
1322
inner Redirector. default ;
1315
1323
inner Redirector. parallel_queues ;
1316
- inner Redirector. nested_parallel_queues
1324
+ inner Redirector. nested_parallel_queues ;
1325
+ inner Redirector. receive_memory_queues
1317
1326
end
1318
1327
1319
1328
(* Keep track of which VMs we're rebooting so we avoid transient glitches where
@@ -3360,19 +3369,20 @@ let uses_mxgpu id =
3360
3369
)
3361
3370
(VGPU_DB. ids id)
3362
3371
3363
- let queue_operation_int ?traceparent dbg id op =
3372
+ let queue_operation_int ?traceparent ?(redirector = Redirector. default) dbg id
3373
+ op =
3364
3374
let task =
3365
3375
Xenops_task. add ?traceparent tasks dbg
3366
3376
(let r = ref None in
3367
3377
fun t -> perform ~result: r op t ; ! r
3368
3378
)
3369
3379
in
3370
3380
let tag = if uses_mxgpu id then " mxgpu" else id in
3371
- Redirector. push Redirector. default tag (op, task) ;
3381
+ Redirector. push redirector tag (op, task) ;
3372
3382
task
3373
3383
3374
- let queue_operation ?traceparent dbg id op =
3375
- let task = queue_operation_int ?traceparent dbg id op in
3384
+ let queue_operation ?traceparent ? redirector dbg id op =
3385
+ let task = queue_operation_int ?traceparent ?redirector dbg id op in
3376
3386
Xenops_task. id_of_handle task
3377
3387
3378
3388
let queue_operation_and_wait dbg id op =
@@ -3821,7 +3831,12 @@ module VM = struct
3821
3831
; vmr_compressed= compressed_memory
3822
3832
}
3823
3833
in
3824
- let task = Some (queue_operation ?traceparent dbg id op) in
3834
+ let task =
3835
+ Some
3836
+ (queue_operation ?traceparent
3837
+ ~redirector: Redirector. receive_memory_queues dbg id op
3838
+ )
3839
+ in
3825
3840
Option. iter
3826
3841
(fun t -> t |> Xenops_client. wait_for_task dbg |> ignore)
3827
3842
task
0 commit comments