Skip to content

Commit 0c123a8

Browse files
authored
Merge pull request #462 from JuliaParallel/jps/set-task-tid-loop
Retry jl_set_task_tid on failure
2 parents e3c6d5e + 0295887 commit 0c123a8

File tree

4 files changed

+24
-10
lines changed

4 files changed

+24
-10
lines changed

Diff for: src/Dagger.jl

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ include("utils/dagdebug.jl")
2525

2626
# Distributed data
2727
include("utils/locked-object.jl")
28+
include("utils/tasks.jl")
2829
include("options.jl")
2930
include("processor.jl")
3031
include("scopes.jl")

Diff for: src/processor.jl

+1-6
Original file line numberDiff line numberDiff line change
@@ -166,12 +166,7 @@ function execute!(proc::ThreadProc, @nospecialize(f), @nospecialize(args...); @n
166166
TimespanLogging.prof_task_put!(tls.sch_handle.thunk_id.id)
167167
@invokelatest f(args...; kwargs...)
168168
end
169-
task.sticky = true
170-
ret = ccall(:jl_set_task_tid, Cint, (Any, Cint), task, proc.tid-1)
171-
if ret == 0
172-
error("jl_set_task_tid == 0")
173-
end
174-
@assert Threads.threadid(task) == proc.tid
169+
set_task_tid!(task, proc.tid)
175170
schedule(task)
176171
try
177172
fetch(task)

Diff for: src/sch/Sch.jl

+2-4
Original file line numberDiff line numberDiff line change
@@ -1286,8 +1286,7 @@ function start_processor_runner!(istate::ProcessorInternalState, uid::UInt64, re
12861286
lock(istate.queue) do _
12871287
tid = task_tid_for_processor(to_proc)
12881288
if tid !== nothing
1289-
t.sticky = true
1290-
ret = ccall(:jl_set_task_tid, Cint, (Any, Cint), t, tid-1)
1289+
Dagger.set_task_tid!(t, tid)
12911290
else
12921291
t.sticky = false
12931292
end
@@ -1299,8 +1298,7 @@ function start_processor_runner!(istate::ProcessorInternalState, uid::UInt64, re
12991298
end
13001299
tid = task_tid_for_processor(to_proc)
13011300
if tid !== nothing
1302-
proc_run_task.sticky = true
1303-
ret = ccall(:jl_set_task_tid, Cint, (Any, Cint), proc_run_task, tid-1)
1301+
Dagger.set_task_tid!(proc_run_task, tid)
13041302
else
13051303
proc_run_task.sticky = false
13061304
end

Diff for: src/utils/tasks.jl

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
function set_task_tid!(task::Task, tid::Integer)
2+
task.sticky = true
3+
ctr = 0
4+
while true
5+
ret = ccall(:jl_set_task_tid, Cint, (Any, Cint), task, tid-1)
6+
if ret == 1
7+
break
8+
elseif ret == 0
9+
yield()
10+
else
11+
error("Unexpected retcode from jl_set_task_tid: $ret")
12+
end
13+
ctr += 1
14+
if ctr > 10
15+
@warn "Setting task TID to $tid failed, giving up!"
16+
return
17+
end
18+
end
19+
@assert Threads.threadid(task) == tid "jl_set_task_tid failed!"
20+
end

0 commit comments

Comments
 (0)