@@ -916,6 +916,12 @@ module Redirector = struct
916
916
Parallel atoms, creating a deadlock. *)
917
917
let parallel_queues = {queues= Queues. create () ; mutex= Mutex. create () }
918
918
919
+ (* We create another queue only for VM_receive_memory operations for the same reason as Parallel atoms.
920
+ Migration spawns 2 operations, send and receive, so if there is limited available worker space
921
+ a deadlock can happen when VMs are migrating between hosts or on localhost migration
922
+ as the receiver has no free workers to receive memory. *)
923
+ let receive_memory_queues = {queues= Queues. create () ; mutex= Mutex. create () }
924
+
919
925
(* When a thread is actively processing a queue, items are redirected to a
920
926
thread-private queue *)
921
927
let overrides = ref StringMap. empty
@@ -1035,6 +1041,7 @@ module Redirector = struct
1035
1041
List. concat_map one
1036
1042
(default.queues
1037
1043
:: parallel_queues.queues
1044
+ :: receive_memory_queues.queues
1038
1045
:: List. map snd (StringMap. bindings ! overrides)
1039
1046
)
1040
1047
)
@@ -1268,7 +1275,8 @@ module WorkerPool = struct
1268
1275
let start size =
1269
1276
for _i = 1 to size do
1270
1277
incr Redirector. default ;
1271
- incr Redirector. parallel_queues
1278
+ incr Redirector. parallel_queues ;
1279
+ incr Redirector. receive_memory_queues
1272
1280
done
1273
1281
1274
1282
let set_size size =
@@ -1283,7 +1291,8 @@ module WorkerPool = struct
1283
1291
done
1284
1292
in
1285
1293
inner Redirector. default ;
1286
- inner Redirector. parallel_queues
1294
+ inner Redirector. parallel_queues ;
1295
+ inner Redirector. receive_memory_queues
1287
1296
end
1288
1297
1289
1298
(* Keep track of which VMs we're rebooting so we avoid transient glitches where
@@ -3276,19 +3285,20 @@ let uses_mxgpu id =
3276
3285
)
3277
3286
(VGPU_DB. ids id)
3278
3287
3279
- let queue_operation_int ?traceparent dbg id op =
3288
+ let queue_operation_int ?traceparent ?(redirector = Redirector. default) dbg id
3289
+ op =
3280
3290
let task =
3281
3291
Xenops_task. add ?traceparent tasks dbg
3282
3292
(let r = ref None in
3283
3293
fun t -> perform ~result: r op t ; ! r
3284
3294
)
3285
3295
in
3286
3296
let tag = if uses_mxgpu id then " mxgpu" else id in
3287
- Redirector. push Redirector. default tag (op, task) ;
3297
+ Redirector. push redirector tag (op, task) ;
3288
3298
task
3289
3299
3290
- let queue_operation ?traceparent dbg id op =
3291
- let task = queue_operation_int ?traceparent dbg id op in
3300
+ let queue_operation ?traceparent ? redirector dbg id op =
3301
+ let task = queue_operation_int ?traceparent ?redirector dbg id op in
3292
3302
Xenops_task. id_of_handle task
3293
3303
3294
3304
let queue_operation_and_wait dbg id op =
@@ -3737,7 +3747,12 @@ module VM = struct
3737
3747
; vmr_compressed= compressed_memory
3738
3748
}
3739
3749
in
3740
- let task = Some (queue_operation ?traceparent dbg id op) in
3750
+ let task =
3751
+ Some
3752
+ (queue_operation ?traceparent
3753
+ ~redirector: Redirector. receive_memory_queues dbg id op
3754
+ )
3755
+ in
3741
3756
Option. iter
3742
3757
(fun t -> t |> Xenops_client. wait_for_task dbg |> ignore)
3743
3758
task
0 commit comments