@@ -706,14 +706,16 @@ function create_worker(manager, wconfig)
706706 join_message = JoinPGRPMsg(w. id, all_locs, PGRP. topology, enable_threaded_blas, isclusterlazy())
707707 send_msg_now(w, MsgHeader(RRID(0 ,0 ), ntfy_oid), join_message)
708708
709- @async manage(w. manager, w. id, w. config, :register)
709+ errormonitor( @async manage(w. manager, w. id, w. config, :register) )
710710 # wait for rr_ntfy_join with timeout
711711 timedout = false
712- @async begin
713- sleep($ timeout)
714- timedout = true
715- put!(rr_ntfy_join, 1 )
716- end
712+ errormonitor(
713+ @async begin
714+ sleep($ timeout)
715+ timedout = true
716+ put!(rr_ntfy_join, 1 )
717+ end
718+ )
717719 wait(rr_ntfy_join)
718720 if timedout
719721 error(" worker did not connect within $timeout seconds" )
@@ -763,17 +765,20 @@ function check_master_connect()
763765 if ccall(:jl_running_on_valgrind,Cint,()) != 0
764766 return
765767 end
766- @async begin
767- start = time_ns()
768- while ! haskey(map_pid_wrkr, 1 ) && (time_ns() - start) < timeout
769- sleep(1.0 )
770- end
771768
772- if ! haskey(map_pid_wrkr, 1 )
773- print(stderr , " Master process (id 1) could not connect within $(timeout/ 1e9 ) seconds.\n exiting.\n " )
774- exit(1 )
769+ errormonitor(
770+ @async begin
771+ start = time_ns()
772+ while ! haskey(map_pid_wrkr, 1 ) && (time_ns() - start) < timeout
773+ sleep(1.0 )
774+ end
775+
776+ if ! haskey(map_pid_wrkr, 1 )
777+ print(stderr , " Master process (id 1) could not connect within $(timeout/ 1e9 ) seconds.\n exiting.\n " )
778+ exit(1 )
779+ end
775780 end
776- end
781+ )
777782end
778783
779784
0 commit comments