@@ -637,27 +637,38 @@ function deregister_worker(pg, pid)
637
637
# delete this worker from our remote reference client sets
638
638
ids = []
639
639
tonotify = []
640
- for (id,rv) in pg. refs
641
- if in (pid,rv. clientset)
642
- push! (ids, id)
640
+ lock (client_refs) do
641
+ for (id,rv) in pg. refs
642
+ if in (pid,rv. clientset)
643
+ push! (ids, id)
644
+ end
645
+ if rv. waitingfor == pid
646
+ push! (tonotify, (id,rv))
647
+ end
643
648
end
644
- if rv . waitingfor == pid
645
- push! (tonotify, ( id,rv) )
649
+ for id in ids
650
+ del_client (pg, id, pid )
646
651
end
647
- end
648
- for id in ids
649
- del_client (pg, id, pid)
650
- end
651
652
652
- # throw exception to tasks waiting for this pid
653
- for (id,rv) in tonotify
654
- notify_error (rv. c, ProcessExitedException ())
655
- delete! (pg. refs, id)
653
+ # throw exception to tasks waiting for this pid
654
+ for (id,rv) in tonotify
655
+ notify_error (rv. c, ProcessExitedException ())
656
+ delete! (pg. refs, id)
657
+ end
656
658
end
657
659
end
658
660
659
661
# # remote refs ##
660
- const client_refs = WeakKeyDict ()
662
+
663
+ """
664
+ client_refs
665
+
666
+ Tracks whether a particular AbstractRemoteRef
667
+ (identified by its RRID) exists on this worker.
668
+
669
+ The client_refs lock is also used to synchronize access to `.refs` and associated clientset state
670
+ """
671
+ const client_refs = WeakKeyDict {Any, Void} () # used as a WeakKeySet
661
672
662
673
abstract AbstractRemoteRef
663
674
@@ -680,34 +691,26 @@ type RemoteChannel{T<:AbstractChannel} <: AbstractRemoteRef
680
691
end
681
692
682
693
function test_existing_ref (r:: AbstractRemoteRef )
683
- found = getkey (client_refs, r, false )
684
- if found != = false
685
- if client_refs[r] == true
686
- @assert r. where > 0
687
- if isa (r, Future) && isnull (found. v) && ! isnull (r. v)
688
- # we have recd the value from another source, probably a deserialized ref, send a del_client message
689
- send_del_client (r)
690
- found. v = r. v
691
- end
692
- return found
693
- else
694
- # just delete the entry.
695
- delete! (client_refs, found)
696
- end
694
+ found = getkey (client_refs, r, nothing )
695
+ if found != = nothing
696
+ @assert r. where > 0
697
+ if isa (r, Future) && isnull (found. v) && ! isnull (r. v)
698
+ # we have recd the value from another source, probably a deserialized ref, send a del_client message
699
+ send_del_client (r)
700
+ found. v = r. v
701
+ end
702
+ return found:: typeof (r)
697
703
end
698
704
699
- client_refs[r] = true
705
+ client_refs[r] = nothing
700
706
finalizer (r, finalize_ref)
701
707
return r
702
708
end
703
709
704
710
function finalize_ref (r:: AbstractRemoteRef )
705
- if r. where > 0 # Handle the case of the finalizer having being called manually
706
- if haskey (client_refs, r)
707
- # NOTE: Change below line to deleting the entry once issue https://github.com/JuliaLang/julia/issues/14445
708
- # is fixed.
709
- client_refs[r] = false
710
- end
711
+ if r. where > 0 # Handle the case of the finalizer having been called manually
712
+ islocked (client_refs) && return finalizer (r, finalize_ref) # delay finalizer for later, when it's not already locked
713
+ delete! (client_refs, r)
711
714
if isa (r, RemoteChannel)
712
715
send_del_client (r)
713
716
else
@@ -717,7 +720,7 @@ function finalize_ref(r::AbstractRemoteRef)
717
720
end
718
721
r. where = 0
719
722
end
720
- return r
723
+ nothing
721
724
end
722
725
723
726
Future (w:: LocalProcess ) = Future (w. id)
@@ -791,23 +794,27 @@ A low-level API which returns the backing `AbstractChannel` for an `id` returned
791
794
The call is valid only on the node where the backing channel exists.
792
795
"""
793
796
function channel_from_id (id)
794
- rv = get (PGRP. refs, id, false )
797
+ rv = lock (client_refs) do
798
+ return get (PGRP. refs, id, false )
799
+ end
795
800
if rv === false
796
801
throw (ErrorException (" Local instance of remote reference not found" ))
797
802
end
798
- rv. c
803
+ return rv. c
799
804
end
800
805
801
806
lookup_ref (rrid:: RRID , f= def_rv_channel) = lookup_ref (PGRP, rrid, f)
802
807
function lookup_ref (pg, rrid, f)
803
- rv = get (pg. refs, rrid, false )
804
- if rv === false
805
- # first we've heard of this ref
806
- rv = RemoteValue (f ())
807
- pg. refs[rrid] = rv
808
- push! (rv. clientset, rrid. whence)
809
- end
810
- rv
808
+ return lock (client_refs) do
809
+ rv = get (pg. refs, rrid, false )
810
+ if rv === false
811
+ # first we've heard of this ref
812
+ rv = RemoteValue (f ())
813
+ pg. refs[rrid] = rv
814
+ push! (rv. clientset, rrid. whence)
815
+ end
816
+ return rv
817
+ end :: RemoteValue
811
818
end
812
819
813
820
"""
@@ -827,7 +834,7 @@ function isready(rr::Future)
827
834
! isnull (rr. v) && return true
828
835
829
836
rid = remoteref_id (rr)
830
- if rr. where == myid ()
837
+ return if rr. where == myid ()
831
838
isready (lookup_ref (rid). c)
832
839
else
833
840
remotecall_fetch (rid-> isready (lookup_ref (rid). c), rr. where, rid)
@@ -844,7 +851,7 @@ it can be safely used on a `Future` since they are assigned only once.
844
851
"""
845
852
function isready (rr:: RemoteChannel , args... )
846
853
rid = remoteref_id (rr)
847
- if rr. where == myid ()
854
+ return if rr. where == myid ()
848
855
isready (lookup_ref (rid). c, args... )
849
856
else
850
857
remotecall_fetch (rid-> isready (lookup_ref (rid). c, args... ), rr. where, rid)
@@ -855,11 +862,7 @@ del_client(rr::AbstractRemoteRef) = del_client(remoteref_id(rr), myid())
855
862
856
863
del_client (id, client) = del_client (PGRP, id, client)
857
864
function del_client (pg, id, client)
858
- # As a workaround to issue https://github.com/JuliaLang/julia/issues/14445
859
- # the dict/set updates are executed asynchronously so that they do
860
- # not occur in the midst of a gc. The `@async` prefix must be removed once
861
- # 14445 is fixed.
862
- @async begin
865
+ lock (client_refs) do
863
866
rv = get (pg. refs, id, false )
864
867
if rv != = false
865
868
delete! (rv. clientset, client)
@@ -898,8 +901,10 @@ function send_del_client(rr)
898
901
end
899
902
900
903
function add_client (id, client)
901
- rv = lookup_ref (id)
902
- push! (rv. clientset, client)
904
+ lock (client_refs) do
905
+ rv = lookup_ref (id)
906
+ push! (rv. clientset, client)
907
+ end
903
908
nothing
904
909
end
905
910
@@ -999,19 +1004,21 @@ function run_work_thunk(thunk, print_error)
999
1004
result = RemoteException (ce)
1000
1005
print_error && showerror (STDERR, ce)
1001
1006
end
1002
- result
1007
+ return result
1003
1008
end
1004
1009
function run_work_thunk (rv:: RemoteValue , thunk)
1005
1010
put! (rv, run_work_thunk (thunk, false ))
1006
1011
nothing
1007
1012
end
1008
1013
1009
1014
function schedule_call (rid, thunk)
1010
- rv = RemoteValue (def_rv_channel ())
1011
- (PGRP:: ProcessGroup ). refs[rid] = rv
1012
- push! (rv. clientset, rid. whence)
1013
- schedule (@task (run_work_thunk (rv,thunk)))
1014
- rv
1015
+ return lock (client_refs) do
1016
+ rv = RemoteValue (def_rv_channel ())
1017
+ (PGRP:: ProcessGroup ). refs[rid] = rv
1018
+ push! (rv. clientset, rid. whence)
1019
+ @schedule run_work_thunk (rv, thunk)
1020
+ return rv
1021
+ end
1015
1022
end
1016
1023
1017
1024
# make a thunk to call f on args in a way that simulates what would happen if
@@ -1026,13 +1033,13 @@ end
1026
1033
function remotecall (f, w:: LocalProcess , args... ; kwargs... )
1027
1034
rr = Future (w)
1028
1035
schedule_call (remoteref_id (rr), local_remotecall_thunk (f, args, kwargs))
1029
- rr
1036
+ return rr
1030
1037
end
1031
1038
1032
1039
function remotecall (f, w:: Worker , args... ; kwargs... )
1033
1040
rr = Future (w)
1034
1041
send_msg (w, MsgHeader (remoteref_id (rr)), CallMsg {:call} (f, args, kwargs))
1035
- rr
1042
+ return rr
1036
1043
end
1037
1044
1038
1045
"""
@@ -1046,7 +1053,7 @@ remotecall(f, id::Integer, args...; kwargs...) = remotecall(f, worker_from_id(id
1046
1053
1047
1054
function remotecall_fetch (f, w:: LocalProcess , args... ; kwargs... )
1048
1055
v= run_work_thunk (local_remotecall_thunk (f,args, kwargs), false )
1049
- isa (v, RemoteException) ? throw (v) : v
1056
+ return isa (v, RemoteException) ? throw (v) : v
1050
1057
end
1051
1058
1052
1059
function remotecall_fetch (f, w:: Worker , args... ; kwargs... )
@@ -1057,8 +1064,10 @@ function remotecall_fetch(f, w::Worker, args...; kwargs...)
1057
1064
rv. waitingfor = w. id
1058
1065
send_msg (w, MsgHeader (RRID (0 ,0 ), oid), CallMsg {:call_fetch} (f, args, kwargs))
1059
1066
v = take! (rv)
1060
- delete! (PGRP. refs, oid)
1061
- isa (v, RemoteException) ? throw (v) : v
1067
+ lock (client_refs) do
1068
+ delete! (PGRP. refs, oid)
1069
+ end
1070
+ return isa (v, RemoteException) ? throw (v) : v
1062
1071
end
1063
1072
1064
1073
"""
@@ -1080,9 +1089,11 @@ function remotecall_wait(f, w::Worker, args...; kwargs...)
1080
1089
rr = Future (w)
1081
1090
send_msg (w, MsgHeader (remoteref_id (rr), prid), CallWaitMsg (f, args, kwargs))
1082
1091
v = fetch (rv. c)
1083
- delete! (PGRP. refs, prid)
1092
+ lock (client_refs) do
1093
+ delete! (PGRP. refs, prid)
1094
+ end
1084
1095
isa (v, RemoteException) && throw (v)
1085
- rr
1096
+ return rr
1086
1097
end
1087
1098
1088
1099
"""
@@ -1834,9 +1845,11 @@ function create_worker(manager, wconfig)
1834
1845
1835
1846
@schedule manage (w. manager, w. id, w. config, :register )
1836
1847
wait (rr_ntfy_join)
1837
- delete! (PGRP. refs, ntfy_oid)
1848
+ lock (client_refs) do
1849
+ delete! (PGRP. refs, ntfy_oid)
1850
+ end
1838
1851
1839
- w. id
1852
+ return w. id
1840
1853
end
1841
1854
1842
1855
@@ -1859,7 +1872,7 @@ function launch_additional(np::Integer, cmd::Cmd)
1859
1872
additional_io_objs[port] = io
1860
1873
end
1861
1874
1862
- addresses
1875
+ return addresses
1863
1876
end
1864
1877
1865
1878
function redirect_output_from_additional_worker (pid, port)
0 commit comments