@@ -99,10 +99,10 @@ mutable struct Worker
99
99
del_msgs:: Array{Any,1} # XXX : Could del_msgs and add_msgs be Channels?
100
100
add_msgs:: Array{Any,1}
101
101
@atomic gcflag:: Bool
102
- state:: WorkerState
103
- c_state:: Condition # wait for state changes
104
- ct_time:: Float64 # creation time
105
- conn_func:: Any # used to setup connections lazily
102
+ @atomic state:: WorkerState
103
+ c_state:: Threads. Condition # wait for state changes, lock for state
104
+ ct_time:: Float64 # creation time
105
+ conn_func:: Any # used to setup connections lazily
106
106
107
107
r_stream:: IO
108
108
w_stream:: IO
@@ -134,7 +134,7 @@ mutable struct Worker
134
134
if haskey (map_pid_wrkr, id)
135
135
return map_pid_wrkr[id]
136
136
end
137
- w= new (id, Threads. ReentrantLock (), [], [], false , W_CREATED, Condition (), time (), conn_func)
137
+ w= new (id, Threads. ReentrantLock (), [], [], false , W_CREATED, Threads . Condition (), time (), conn_func)
138
138
w. initialized = Event ()
139
139
register_worker (w)
140
140
w
@@ -144,8 +144,10 @@ mutable struct Worker
144
144
end
145
145
146
146
function set_worker_state (w, state)
147
- w. state = state
148
- notify (w. c_state; all= true )
147
+ lock (w. c_state) do
148
+ @atomic w. state = state
149
+ notify (w. c_state; all= true )
150
+ end
149
151
end
150
152
151
153
function check_worker_state (w:: Worker )
@@ -161,15 +163,16 @@ function check_worker_state(w::Worker)
161
163
else
162
164
w. ct_time = time ()
163
165
if myid () > w. id
164
- t = @async exec_conn_func (w)
166
+ t = Threads . @spawn Threads . threadpool () exec_conn_func (w)
165
167
else
166
168
# route request via node 1
167
- t = @async remotecall_fetch ((p,to_id) -> remotecall_fetch (exec_conn_func, p, to_id), 1 , w. id, myid ())
169
+ t = Threads . @spawn Threads . threadpool () remotecall_fetch ((p,to_id) -> remotecall_fetch (exec_conn_func, p, to_id), 1 , w. id, myid ())
168
170
end
169
171
errormonitor (t)
170
172
wait_for_conn (w)
171
173
end
172
174
end
175
+ return nothing
173
176
end
174
177
175
178
exec_conn_func (id:: Int ) = exec_conn_func (worker_from_id (id):: Worker )
@@ -191,9 +194,17 @@ function wait_for_conn(w)
191
194
timeout = worker_timeout () - (time () - w. ct_time)
192
195
timeout <= 0 && error (" peer $(w. id) has not connected to $(myid ()) " )
193
196
194
- @async (sleep (timeout); notify (w. c_state; all= true ))
195
- wait (w. c_state)
196
- w. state === W_CREATED && error (" peer $(w. id) didn't connect to $(myid ()) within $timeout seconds" )
197
+ T = Threads. @spawn Threads. threadpool () begin
198
+ sleep ($ timeout)
199
+ lock (w. c_state) do
200
+ notify (w. c_state; all= true )
201
+ end
202
+ end
203
+ errormonitor (T)
204
+ lock (w. c_state) do
205
+ wait (w. c_state)
206
+ w. state === W_CREATED && error (" peer $(w. id) didn't connect to $(myid ()) within $timeout seconds" )
207
+ end
197
208
end
198
209
nothing
199
210
end
@@ -247,7 +258,7 @@ function start_worker(out::IO, cookie::AbstractString=readline(stdin); close_std
247
258
else
248
259
sock = listen (interface, LPROC. bind_port)
249
260
end
250
- errormonitor (@async while isopen (sock)
261
+ errormonitor (Threads . @spawn while isopen (sock)
251
262
client = accept (sock)
252
263
process_messages (client, client, true )
253
264
end )
279
290
280
291
281
292
function redirect_worker_output (ident, stream)
282
- t = @async while ! eof (stream)
293
+ t = Threads . @spawn while ! eof (stream)
283
294
line = readline (stream)
284
295
if startswith (line, " From worker " )
285
296
# stdout's of "additional" workers started from an initial worker on a host are not available
@@ -318,7 +329,7 @@ function read_worker_host_port(io::IO)
318
329
leader = String[]
319
330
try
320
331
while ntries > 0
321
- readtask = @async readline (io)
332
+ readtask = Threads . @spawn Threads . threadpool () readline (io)
322
333
yield ()
323
334
while ! istaskdone (readtask) && ((time_ns () - t0) < timeout)
324
335
sleep (0.05 )
@@ -419,7 +430,7 @@ if launching workers programmatically, execute `addprocs` in its own task.
419
430
420
431
```julia
421
432
# On busy clusters, call `addprocs` asynchronously
422
- t = @async addprocs(...)
433
+ t = Threads.@spawn addprocs(...)
423
434
```
424
435
425
436
```julia
@@ -485,20 +496,23 @@ function addprocs_locked(manager::ClusterManager; kwargs...)
485
496
# call manager's `launch` is a separate task. This allows the master
486
497
# process initiate the connection setup process as and when workers come
487
498
# online
488
- t_launch = @async launch (manager, params, launched, launch_ntfy)
499
+ t_launch = Threads . @spawn Threads . threadpool () launch (manager, params, launched, launch_ntfy)
489
500
490
501
@sync begin
491
502
while true
492
503
if isempty (launched)
493
504
istaskdone (t_launch) && break
494
- @async (sleep (1 ); notify (launch_ntfy))
505
+ Threads. @spawn Threads. threadpool () begin
506
+ sleep (1 )
507
+ notify (launch_ntfy)
508
+ end
495
509
wait (launch_ntfy)
496
510
end
497
511
498
512
if ! isempty (launched)
499
513
wconfig = popfirst! (launched)
500
514
let wconfig= wconfig
501
- @async setup_launched_worker (manager, wconfig, launched_q)
515
+ Threads . @spawn Threads . threadpool () setup_launched_worker (manager, wconfig, launched_q)
502
516
end
503
517
end
504
518
end
@@ -578,7 +592,7 @@ function launch_n_additional_processes(manager, frompid, fromconfig, cnt, launch
578
592
wconfig. port = port
579
593
580
594
let wconfig= wconfig
581
- @async begin
595
+ Threads . @spawn Threads . threadpool () begin
582
596
pid = create_worker (manager, wconfig)
583
597
remote_do (redirect_output_from_additional_worker, frompid, pid, port)
584
598
push! (launched_q, pid)
@@ -645,7 +659,12 @@ function create_worker(manager, wconfig)
645
659
# require the value of config.connect_at which is set only upon connection completion
646
660
for jw in PGRP. workers
647
661
if (jw. id != 1 ) && (jw. id < w. id)
648
- (jw. state === W_CREATED) && wait (jw. c_state)
662
+ # wait for wl to join
663
+ if jw. state === W_CREATED
664
+ lock (jw. c_state) do
665
+ wait (jw. c_state)
666
+ end
667
+ end
649
668
push! (join_list, jw)
650
669
end
651
670
end
@@ -668,7 +687,12 @@ function create_worker(manager, wconfig)
668
687
end
669
688
670
689
for wl in wlist
671
- (wl. state === W_CREATED) && wait (wl. c_state)
690
+ lock (wl. c_state) do
691
+ if wl. state === W_CREATED
692
+ # wait for wl to join
693
+ wait (wl. c_state)
694
+ end
695
+ end
672
696
push! (join_list, wl)
673
697
end
674
698
end
@@ -727,23 +751,21 @@ function redirect_output_from_additional_worker(pid, port)
727
751
end
728
752
729
753
function check_master_connect ()
730
- timeout = worker_timeout () * 1e9
731
754
# If we do not have at least process 1 connect to us within timeout
732
755
# we log an error and exit, unless we're running on valgrind
733
756
if ccall (:jl_running_on_valgrind ,Cint,()) != 0
734
757
return
735
758
end
736
- @async begin
737
- start = time_ns ()
738
- while ! haskey (map_pid_wrkr, 1 ) && (time_ns () - start) < timeout
739
- sleep (1.0 )
740
- end
741
759
742
- if ! haskey (map_pid_wrkr, 1 )
743
- print (stderr , " Master process (id 1) could not connect within $(timeout/ 1e9 ) seconds.\n exiting.\n " )
744
- exit (1 )
760
+ errormonitor (
761
+ Threads. @spawn begin
762
+ timeout = worker_timeout ()
763
+ if timedwait (() -> ! haskey (map_pid_wrkr, 1 ), timeout) === :timed_out
764
+ print (stderr , " Master process (id 1) could not connect within $(timeout) seconds.\n exiting.\n " )
765
+ exit (1 )
766
+ end
745
767
end
746
- end
768
+ )
747
769
end
748
770
749
771
@@ -1028,13 +1050,13 @@ function rmprocs(pids...; waitfor=typemax(Int))
1028
1050
1029
1051
pids = vcat (pids... )
1030
1052
if waitfor == 0
1031
- t = @async _rmprocs (pids, typemax (Int))
1053
+ t = Threads . @spawn Threads . threadpool () _rmprocs (pids, typemax (Int))
1032
1054
yield ()
1033
1055
return t
1034
1056
else
1035
1057
_rmprocs (pids, waitfor)
1036
1058
# return a dummy task object that user code can wait on.
1037
- return @async nothing
1059
+ return Threads . @spawn Threads . threadpool () nothing
1038
1060
end
1039
1061
end
1040
1062
@@ -1217,7 +1239,7 @@ function interrupt(pids::AbstractVector=workers())
1217
1239
@assert myid () == 1
1218
1240
@sync begin
1219
1241
for pid in pids
1220
- @async interrupt (pid)
1242
+ Threads . @spawn Threads . threadpool () interrupt (pid)
1221
1243
end
1222
1244
end
1223
1245
end
@@ -1288,18 +1310,16 @@ end
1288
1310
1289
1311
using Random: randstring
1290
1312
1291
- let inited = false
1292
- # do initialization that's only needed when there is more than 1 processor
1293
- global function init_multi ()
1294
- if ! inited
1295
- inited = true
1296
- push! (Base. package_callbacks, _require_callback)
1297
- atexit (terminate_all_workers)
1298
- init_bind_addr ()
1299
- cluster_cookie (randstring (HDR_COOKIE_LEN))
1300
- end
1301
- return nothing
1313
+ # do initialization that's only needed when there is more than 1 processor
1314
+ const inited = Threads. Atomic {Bool} (false )
1315
+ function init_multi ()
1316
+ if ! Threads. atomic_cas! (inited, false , true )
1317
+ push! (Base. package_callbacks, _require_callback)
1318
+ atexit (terminate_all_workers)
1319
+ init_bind_addr ()
1320
+ cluster_cookie (randstring (HDR_COOKIE_LEN))
1302
1321
end
1322
+ return nothing
1303
1323
end
1304
1324
1305
1325
function init_parallel ()
0 commit comments