@@ -151,7 +151,7 @@ function set_worker_state(w, state)
151
151
end
152
152
153
153
function check_worker_state (w:: Worker )
154
- if w. state === W_CREATED
154
+ if ( @atomic w. state) === W_CREATED
155
155
if ! isclusterlazy ()
156
156
if PGRP. topology === :all_to_all
157
157
# Since higher pids connect with lower pids, the remote worker
@@ -163,10 +163,10 @@ function check_worker_state(w::Worker)
163
163
else
164
164
w. ct_time = time ()
165
165
if myid () > w. id
166
- t = Threads . @spawn Threads . threadpool () exec_conn_func (w)
166
+ t = @async exec_conn_func (w)
167
167
else
168
168
# route request via node 1
169
- t = Threads . @spawn Threads . threadpool () remotecall_fetch ((p,to_id) -> remotecall_fetch (exec_conn_func, p, to_id), 1 , w. id, myid ())
169
+ t = @async remotecall_fetch ((p,to_id) -> remotecall_fetch (exec_conn_func, p, to_id), 1 , w. id, myid ())
170
170
end
171
171
errormonitor (t)
172
172
wait_for_conn (w)
@@ -190,20 +190,14 @@ function exec_conn_func(w::Worker)
190
190
end
191
191
192
192
function wait_for_conn (w)
193
- if w. state === W_CREATED
193
+ if ( @atomic w. state) === W_CREATED
194
194
timeout = worker_timeout () - (time () - w. ct_time)
195
195
timeout <= 0 && error (" peer $(w. id) has not connected to $(myid ()) " )
196
196
197
- T = Threads. @spawn Threads. threadpool () begin
198
- sleep ($ timeout)
199
- lock (w. c_state) do
200
- notify (w. c_state; all= true )
201
- end
202
- end
203
- errormonitor (T)
204
- lock (w. c_state) do
205
- wait (w. c_state)
206
- w. state === W_CREATED && error (" peer $(w. id) didn't connect to $(myid ()) within $timeout seconds" )
197
+ if timedwait (() -> (@atomic w. state) === W_CONNECTED, timeout) === :timed_out
198
+ # Notify any waiters on the state and throw
199
+ @lock w. c_state notify (w. c_state)
200
+ error (" peer $(w. id) didn't connect to $(myid ()) within $timeout seconds" )
207
201
end
208
202
end
209
203
nothing
@@ -258,7 +252,7 @@ function start_worker(out::IO, cookie::AbstractString=readline(stdin); close_std
258
252
else
259
253
sock = listen (interface, LPROC. bind_port)
260
254
end
261
- errormonitor (Threads . @spawn while isopen (sock)
255
+ errormonitor (@async while isopen (sock)
262
256
client = accept (sock)
263
257
process_messages (client, client, true )
264
258
end )
290
284
291
285
292
286
function redirect_worker_output (ident, stream)
293
- t = Threads . @spawn while ! eof (stream)
287
+ t = @async while ! eof (stream)
294
288
line = readline (stream)
295
289
if startswith (line, " From worker " )
296
290
# stdout's of "additional" workers started from an initial worker on a host are not available
@@ -329,7 +323,7 @@ function read_worker_host_port(io::IO)
329
323
leader = String[]
330
324
try
331
325
while ntries > 0
332
- readtask = Threads . @spawn Threads . threadpool () readline (io)
326
+ readtask = @async readline (io)
333
327
yield ()
334
328
while ! istaskdone (readtask) && ((time_ns () - t0) < timeout)
335
329
sleep (0.05 )
@@ -430,7 +424,7 @@ if launching workers programmatically, execute `addprocs` in its own task.
430
424
431
425
```julia
432
426
# On busy clusters, call `addprocs` asynchronously
433
- t = Threads.@spawn addprocs(...)
427
+ t = @async addprocs(...)
434
428
```
435
429
436
430
```julia
@@ -496,13 +490,14 @@ function addprocs_locked(manager::ClusterManager; kwargs...)
496
490
# call manager's `launch` is a separate task. This allows the master
497
491
# process initiate the connection setup process as and when workers come
498
492
# online
499
- t_launch = Threads. @spawn Threads. threadpool () launch (manager, params, launched, launch_ntfy)
493
+ # NOTE: Must be `@async`. See FIXME above
494
+ t_launch = @async launch (manager, params, launched, launch_ntfy)
500
495
501
496
@sync begin
502
497
while true
503
498
if isempty (launched)
504
499
istaskdone (t_launch) && break
505
- Threads . @spawn Threads . threadpool () begin
500
+ @async begin # NOTE: Must be `@async`. See FIXME above
506
501
sleep (1 )
507
502
notify (launch_ntfy)
508
503
end
@@ -512,7 +507,8 @@ function addprocs_locked(manager::ClusterManager; kwargs...)
512
507
if ! isempty (launched)
513
508
wconfig = popfirst! (launched)
514
509
let wconfig= wconfig
515
- Threads. @spawn Threads. threadpool () setup_launched_worker (manager, wconfig, launched_q)
510
+ # NOTE: Must be `@async`. See FIXME above
511
+ @async setup_launched_worker (manager, wconfig, launched_q)
516
512
end
517
513
end
518
514
end
@@ -592,7 +588,7 @@ function launch_n_additional_processes(manager, frompid, fromconfig, cnt, launch
592
588
wconfig. port = port
593
589
594
590
let wconfig= wconfig
595
- Threads . @spawn Threads . threadpool () begin
591
+ @async begin
596
592
pid = create_worker (manager, wconfig)
597
593
remote_do (redirect_output_from_additional_worker, frompid, pid, port)
598
594
push! (launched_q, pid)
@@ -660,7 +656,7 @@ function create_worker(manager, wconfig)
660
656
for jw in PGRP. workers
661
657
if (jw. id != 1 ) && (jw. id < w. id)
662
658
# wait for wl to join
663
- if jw. state === W_CREATED
659
+ if ( @atomic jw. state) === W_CREATED
664
660
lock (jw. c_state) do
665
661
wait (jw. c_state)
666
662
end
@@ -688,7 +684,7 @@ function create_worker(manager, wconfig)
688
684
689
685
for wl in wlist
690
686
lock (wl. c_state) do
691
- if wl. state === W_CREATED
687
+ if ( @atomic wl. state) === W_CREATED
692
688
# wait for wl to join
693
689
wait (wl. c_state)
694
690
end
@@ -758,7 +754,7 @@ function check_master_connect()
758
754
end
759
755
760
756
errormonitor (
761
- Threads . @spawn begin
757
+ @async begin
762
758
timeout = worker_timeout ()
763
759
if timedwait (() -> ! haskey (map_pid_wrkr, 1 ), timeout) === :timed_out
764
760
print (stderr , " Master process (id 1) could not connect within $(timeout) seconds.\n exiting.\n " )
@@ -890,7 +886,7 @@ function nprocs()
890
886
n = length (PGRP. workers)
891
887
# filter out workers in the process of being setup/shutdown.
892
888
for jw in PGRP. workers
893
- if ! isa (jw, LocalProcess) && (jw. state != = W_CONNECTED)
889
+ if ! isa (jw, LocalProcess) && (( @atomic jw. state) != = W_CONNECTED)
894
890
n = n - 1
895
891
end
896
892
end
@@ -941,7 +937,7 @@ julia> procs()
941
937
function procs ()
942
938
if myid () == 1 || (PGRP. topology === :all_to_all && ! isclusterlazy ())
943
939
# filter out workers in the process of being setup/shutdown.
944
- return Int[x. id for x in PGRP. workers if isa (x, LocalProcess) || (x. state === W_CONNECTED)]
940
+ return Int[x. id for x in PGRP. workers if isa (x, LocalProcess) || (( @atomic x. state) === W_CONNECTED)]
945
941
else
946
942
return Int[x. id for x in PGRP. workers]
947
943
end
950
946
function id_in_procs (id) # faster version of `id in procs()`
951
947
if myid () == 1 || (PGRP. topology === :all_to_all && ! isclusterlazy ())
952
948
for x in PGRP. workers
953
- if (x. id:: Int ) == id && (isa (x, LocalProcess) || (x:: Worker ). state === W_CONNECTED)
949
+ if (x. id:: Int ) == id && (isa (x, LocalProcess) || (@atomic ( x:: Worker ). state) === W_CONNECTED)
954
950
return true
955
951
end
956
952
end
@@ -972,7 +968,7 @@ Specifically all workers bound to the same ip-address as `pid` are returned.
972
968
"""
973
969
function procs (pid:: Integer )
974
970
if myid () == 1
975
- all_workers = [x for x in PGRP. workers if isa (x, LocalProcess) || (x. state === W_CONNECTED)]
971
+ all_workers = [x for x in PGRP. workers if isa (x, LocalProcess) || (( @atomic x. state) === W_CONNECTED)]
976
972
if (pid == 1 ) || (isa (map_pid_wrkr[pid]. manager, LocalManager))
977
973
Int[x. id for x in filter (w -> (w. id== 1 ) || (isa (w. manager, LocalManager)), all_workers)]
978
974
else
@@ -1050,13 +1046,13 @@ function rmprocs(pids...; waitfor=typemax(Int))
1050
1046
1051
1047
pids = vcat (pids... )
1052
1048
if waitfor == 0
1053
- t = Threads . @spawn Threads . threadpool () _rmprocs (pids, typemax (Int))
1049
+ t = @async _rmprocs (pids, typemax (Int))
1054
1050
yield ()
1055
1051
return t
1056
1052
else
1057
1053
_rmprocs (pids, waitfor)
1058
1054
# return a dummy task object that user code can wait on.
1059
- return Threads . @spawn Threads . threadpool () nothing
1055
+ return @async nothing
1060
1056
end
1061
1057
end
1062
1058
@@ -1079,11 +1075,11 @@ function _rmprocs(pids, waitfor)
1079
1075
1080
1076
start = time_ns ()
1081
1077
while (time_ns () - start) < waitfor* 1e9
1082
- all (w -> w. state === W_TERMINATED, rmprocset) && break
1078
+ all (w -> ( @atomic w. state) === W_TERMINATED, rmprocset) && break
1083
1079
sleep (min (0.1 , waitfor - (time_ns () - start)/ 1e9 ))
1084
1080
end
1085
1081
1086
- unremoved = [wrkr. id for wrkr in filter (w -> w. state != = W_TERMINATED, rmprocset)]
1082
+ unremoved = [wrkr. id for wrkr in filter (w -> ( @atomic w. state) != = W_TERMINATED, rmprocset)]
1087
1083
if length (unremoved) > 0
1088
1084
estr = string (" rmprocs: pids " , unremoved, " not terminated after " , waitfor, " seconds." )
1089
1085
throw (ErrorException (estr))
@@ -1239,7 +1235,7 @@ function interrupt(pids::AbstractVector=workers())
1239
1235
@assert myid () == 1
1240
1236
@sync begin
1241
1237
for pid in pids
1242
- Threads . @spawn Threads . threadpool () interrupt (pid)
1238
+ @async interrupt (pid)
1243
1239
end
1244
1240
end
1245
1241
end
0 commit comments