Skip to content

Commit a66f893

Browse files
committed
threads: avoid deadlock from recursive lock acquire (PR #38487)
Finalizers can't safely acquire many essential locks (such as the iolock, to cleanup libuv objects) if they are run inside another lock. Therefore, inhibit all finalizers on the thread until all locks are released (previously, this was only true for our internal locks). (cherry-picked from 59aedd1)
1 parent 599ecd8 commit a66f893

File tree

12 files changed

+140
-31
lines changed

12 files changed

+140
-31
lines changed

NEWS.md

+1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ Multi-threading changes
8888
* `@threads` now allows an optional schedule argument. Use `@threads :static ...` to
8989
ensure that the same schedule will be used as in past versions; the default schedule
9090
is likely to change in the future.
91+
* New function `Base.Threads.foreach(f, channel::Channel)` for multithreaded `Channel` consumption ([#34543]).
9192

9293
Build system changes
9394
--------------------

base/gcutils.jl

+10
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,16 @@ Control whether garbage collection is enabled using a boolean argument (`true` f
9191
"""
9292
enable(on::Bool) = ccall(:jl_gc_enable, Int32, (Int32,), on) != 0
9393

94+
"""
95+
GC.enable_finalizers(on::Bool)
96+
97+
Increment or decrement the counter that controls the running of finalizers on
98+
the current Task. Finalizers will only run when the counter is at zero. (Set
99+
`true` for enabling, `false` for disabling). They may still run concurrently on
100+
another Task or thread.
101+
"""
102+
enable_finalizers(on::Bool) = ccall(:jl_gc_enable_finalizers, Cvoid, (Ptr{Cvoid}, Int32,), C_NULL, on)
103+
94104
"""
95105
GC.@preserve x1 x2 ... xn expr
96106

base/lock.jl

+22-3
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,24 @@
44
"""
55
ReentrantLock()
66
7-
Creates a re-entrant lock for synchronizing [`Task`](@ref)s.
8-
The same task can acquire the lock as many times as required.
9-
Each [`lock`](@ref) must be matched with an [`unlock`](@ref).
7+
Creates a re-entrant lock for synchronizing [`Task`](@ref)s. The same task can
8+
acquire the lock as many times as required. Each [`lock`](@ref) must be matched
9+
with an [`unlock`](@ref).
10+
11+
Calling 'lock' will also inhibit running of finalizers on that thread until the
12+
corresponding 'unlock'. Use of the standard lock pattern illustrated below
13+
should naturally be supported, but beware of inverting the try/lock order or
14+
missing the try block entirely (e.g. attempting to return with the lock still
15+
held):
16+
17+
```
18+
lock(l)
19+
try
20+
<atomic work>
21+
finally
22+
unlock(l)
23+
end
24+
```
1025
"""
1126
mutable struct ReentrantLock <: AbstractLock
1227
locked_by::Union{Task, Nothing}
@@ -44,6 +59,7 @@ function trylock(rl::ReentrantLock)
4459
if rl.reentrancy_cnt == 0
4560
rl.locked_by = t
4661
rl.reentrancy_cnt = 1
62+
GC.enable_finalizers(false)
4763
got = true
4864
elseif t === notnothing(rl.locked_by)
4965
rl.reentrancy_cnt += 1
@@ -71,6 +87,7 @@ function lock(rl::ReentrantLock)
7187
if rl.reentrancy_cnt == 0
7288
rl.locked_by = t
7389
rl.reentrancy_cnt = 1
90+
GC.enable_finalizers(false)
7491
break
7592
elseif t === notnothing(rl.locked_by)
7693
rl.reentrancy_cnt += 1
@@ -111,6 +128,7 @@ function unlock(rl::ReentrantLock)
111128
rethrow()
112129
end
113130
end
131+
GC.enable_finalizers(true)
114132
end
115133
unlock(rl.cond_wait)
116134
return
@@ -132,6 +150,7 @@ function unlockall(rl::ReentrantLock)
132150
rethrow()
133151
end
134152
end
153+
GC.enable_finalizers(true)
135154
unlock(rl.cond_wait)
136155
return n
137156
end

base/locks-mt.jl

+10-1
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,12 @@ Base.assert_havelock(l::SpinLock) = islocked(l) ? nothing : Base.concurrency_vio
6161
function lock(l::SpinLock)
6262
while true
6363
if _get(l) == 0
64+
GC.enable_finalizers(false)
6465
p = _xchg!(l, 1)
6566
if p == 0
6667
return
6768
end
69+
GC.enable_finalizers(true)
6870
end
6971
ccall(:jl_cpu_pause, Cvoid, ())
7072
# Temporary solution before we have gc transition support in codegen.
@@ -74,13 +76,20 @@ end
7476

7577
function trylock(l::SpinLock)
7678
if _get(l) == 0
77-
return _xchg!(l, 1) == 0
79+
GC.enable_finalizers(false)
80+
p = _xchg!(l, 1)
81+
if p == 0
82+
return true
83+
end
84+
GC.enable_finalizers(true)
7885
end
7986
return false
8087
end
8188

8289
function unlock(l::SpinLock)
90+
_get(l) == 0 && error("unlock count must match lock count")
8391
_set!(l, 0)
92+
GC.enable_finalizers(true)
8493
ccall(:jl_cpu_wake, Cvoid, ())
8594
return
8695
end

src/gc.c

+27-3
Original file line numberDiff line numberDiff line change
@@ -392,12 +392,36 @@ static void run_finalizers(jl_ptls_t ptls)
392392
arraylist_free(&copied_list);
393393
}
394394

395+
JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls)
396+
{
397+
if (ptls == NULL)
398+
ptls = jl_get_ptls_states();
399+
return ptls->finalizers_inhibited;
400+
}
401+
395402
JL_DLLEXPORT void jl_gc_enable_finalizers(jl_ptls_t ptls, int on)
396403
{
404+
if (ptls == NULL)
405+
ptls = jl_get_ptls_states();
397406
int old_val = ptls->finalizers_inhibited;
398407
int new_val = old_val + (on ? -1 : 1);
408+
if (new_val < 0) {
409+
JL_TRY {
410+
jl_error(""); // get a backtrace
411+
}
412+
JL_CATCH {
413+
jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n");
414+
// Only print the backtrace once, to avoid spamming the logs
415+
static int backtrace_printed = 0;
416+
if (backtrace_printed == 0) {
417+
backtrace_printed = 1;
418+
jlbacktrace(); // written to STDERR_FILENO
419+
}
420+
}
421+
return;
422+
}
399423
ptls->finalizers_inhibited = new_val;
400-
if (!new_val && old_val && !ptls->in_finalizer) {
424+
if (!new_val && old_val && !ptls->in_finalizer && ptls->current_task->locks.len == 0) {
401425
ptls->in_finalizer = 1;
402426
run_finalizers(ptls);
403427
ptls->in_finalizer = 0;
@@ -1580,7 +1604,7 @@ STATIC_INLINE uintptr_t gc_read_stack(void *_addr, uintptr_t offset,
15801604
JL_NORETURN NOINLINE void gc_assert_datatype_fail(jl_ptls_t ptls, jl_datatype_t *vt,
15811605
jl_gc_mark_sp_t sp)
15821606
{
1583-
jl_printf(JL_STDOUT, "GC error (probable corruption) :\n");
1607+
jl_safe_printf("GC error (probable corruption) :\n");
15841608
gc_debug_print_status();
15851609
jl_(vt);
15861610
gc_debug_critical_error();
@@ -3121,7 +3145,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
31213145
// Only disable finalizers on current thread
31223146
// Doing this on all threads is racy (it's impossible to check
31233147
// or wait for finalizers on other threads without dead lock).
3124-
if (!ptls->finalizers_inhibited) {
3148+
if (!ptls->finalizers_inhibited && ptls->current_task->locks.len == 0) {
31253149
int8_t was_in_finalizer = ptls->in_finalizer;
31263150
ptls->in_finalizer = 1;
31273151
run_finalizers(ptls);

src/julia.h

-3
Original file line numberDiff line numberDiff line change
@@ -1719,7 +1719,6 @@ typedef struct _jl_handler_t {
17191719
int8_t gc_state;
17201720
size_t locks_len;
17211721
sig_atomic_t defer_signal;
1722-
int finalizers_inhibited;
17231722
jl_timing_block_t *timing_stack;
17241723
size_t world_age;
17251724
} jl_handler_t;
@@ -1751,8 +1750,6 @@ typedef struct _jl_task_t {
17511750
jl_gcframe_t *gcstack;
17521751
// saved exception stack
17531752
jl_excstack_t *excstack;
1754-
// current world age
1755-
size_t world_age;
17561753

17571754
// id of owning thread
17581755
// does not need to be defined until the task runs

src/locks.h

+4-5
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,9 @@ static inline void jl_lock_frame_pop(void)
8989

9090
static inline void jl_mutex_lock(jl_mutex_t *lock)
9191
{
92-
jl_ptls_t ptls = jl_get_ptls_states();
9392
JL_SIGATOMIC_BEGIN();
9493
jl_mutex_wait(lock, 1);
9594
jl_lock_frame_push(lock);
96-
jl_gc_enable_finalizers(ptls, 0);
9795
}
9896

9997
static inline int jl_mutex_trylock_nogc(jl_mutex_t *lock)
@@ -116,10 +114,8 @@ static inline int jl_mutex_trylock(jl_mutex_t *lock)
116114
{
117115
int got = jl_mutex_trylock_nogc(lock);
118116
if (got) {
119-
jl_ptls_t ptls = jl_get_ptls_states();
120117
JL_SIGATOMIC_BEGIN();
121118
jl_lock_frame_push(lock);
122-
jl_gc_enable_finalizers(ptls, 0);
123119
}
124120
return got;
125121
}
@@ -139,9 +135,12 @@ static inline void jl_mutex_unlock(jl_mutex_t *lock)
139135
{
140136
jl_ptls_t ptls = jl_get_ptls_states();
141137
jl_mutex_unlock_nogc(lock);
142-
jl_gc_enable_finalizers(ptls, 1);
143138
jl_lock_frame_pop();
144139
JL_SIGATOMIC_END();
140+
if (ptls->current_task->locks.len == 0 && ptls->finalizers_inhibited == 0) {
141+
ptls->finalizers_inhibited = 1;
142+
jl_gc_enable_finalizers(ptls, 1); // call run_finalizers (may GC)
143+
}
145144
}
146145

147146
static inline void jl_mutex_init(jl_mutex_t *lock) JL_NOTSAFEPOINT

src/rtutils.c

+8-4
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,6 @@ JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh)
215215
eh->gc_state = ptls->gc_state;
216216
eh->locks_len = current_task->locks.len;
217217
eh->defer_signal = ptls->defer_signal;
218-
eh->finalizers_inhibited = ptls->finalizers_inhibited;
219218
eh->world_age = ptls->world_age;
220219
current_task->eh = eh;
221220
#ifdef ENABLE_TIMINGS
@@ -246,21 +245,26 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
246245
current_task->eh = eh->prev;
247246
ptls->pgcstack = eh->gcstack;
248247
arraylist_t *locks = &current_task->locks;
249-
if (locks->len > eh->locks_len) {
250-
for (size_t i = locks->len;i > eh->locks_len;i--)
248+
int unlocks = locks->len > eh->locks_len;
249+
if (unlocks) {
250+
for (size_t i = locks->len; i > eh->locks_len; i--)
251251
jl_mutex_unlock_nogc((jl_mutex_t*)locks->items[i - 1]);
252252
locks->len = eh->locks_len;
253253
}
254254
ptls->world_age = eh->world_age;
255255
ptls->defer_signal = eh->defer_signal;
256256
ptls->gc_state = eh->gc_state;
257-
ptls->finalizers_inhibited = eh->finalizers_inhibited;
258257
if (old_gc_state && !eh->gc_state) {
259258
jl_gc_safepoint_(ptls);
260259
}
261260
if (old_defer_signal && !eh->defer_signal) {
262261
jl_sigint_safepoint(ptls);
263262
}
263+
if (unlocks && eh->locks_len == 0 && ptls->finalizers_inhibited == 0) {
264+
// call run_finalizers
265+
ptls->finalizers_inhibited = 1;
266+
jl_gc_enable_finalizers(ptls, 1);
267+
}
264268
}
265269

266270
JL_DLLEXPORT void jl_pop_handler(int n)

src/task.c

+14-3
Original file line numberDiff line numberDiff line change
@@ -319,9 +319,8 @@ static void ctx_switch(jl_ptls_t ptls)
319319
}
320320

321321
// set up global state for new task
322-
lastt->world_age = ptls->world_age;
323322
ptls->pgcstack = t->gcstack;
324-
ptls->world_age = t->world_age;
323+
ptls->world_age = 0;
325324
t->gcstack = NULL;
326325
#ifdef MIGRATE_TASKS
327326
ptls->previous_task = lastt;
@@ -404,8 +403,14 @@ JL_DLLEXPORT void jl_switch(void)
404403
else if (t->tid != ptls->tid) {
405404
jl_error("cannot switch to task running on another thread");
406405
}
406+
407+
// Store old values on the stack and reset
407408
sig_atomic_t defer_signal = ptls->defer_signal;
408409
int8_t gc_state = jl_gc_unsafe_enter(ptls);
410+
size_t world_age = ptls->world_age;
411+
int finalizers_inhibited = ptls->finalizers_inhibited;
412+
ptls->world_age = 0;
413+
ptls->finalizers_inhibited = 0;
409414

410415
#ifdef ENABLE_TIMINGS
411416
jl_timing_block_t *blk = ct->timing_stack;
@@ -427,7 +432,12 @@ JL_DLLEXPORT void jl_switch(void)
427432
assert(ptls == refetch_ptls());
428433
#endif
429434

430-
ct = ptls->current_task;
435+
// Pop old values back off the stack
436+
assert(ct == ptls->current_task &&
437+
0 == ptls->world_age &&
438+
0 == ptls->finalizers_inhibited);
439+
ptls->world_age = world_age;
440+
ptls->finalizers_inhibited = finalizers_inhibited;
431441

432442
#ifdef ENABLE_TIMINGS
433443
assert(blk == ct->timing_stack);
@@ -680,6 +690,7 @@ STATIC_OR_JS void NOINLINE JL_NORETURN start_task(void)
680690
jl_ptls_t ptls = jl_get_ptls_states();
681691
jl_task_t *t = ptls->current_task;
682692
jl_value_t *res;
693+
assert(ptls->finalizers_inhibited == 0);
683694

684695
#ifdef MIGRATE_TASKS
685696
jl_task_t *pt = ptls->previous_task;

stdlib/Distributed/src/messages.jl

+3-2
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ function flush_gc_msgs(w::Worker)
147147
end
148148

149149
# del_msgs gets populated by finalizers, so be very careful here about ordering of allocations
150+
# XXX: threading requires this to be atomic
150151
new_array = Any[]
151152
msgs = w.del_msgs
152153
w.del_msgs = new_array
@@ -178,7 +179,7 @@ function send_msg_(w::Worker, header, msg, now::Bool)
178179
wait(w.initialized)
179180
end
180181
io = w.w_stream
181-
lock(io.lock)
182+
lock(io)
182183
try
183184
reset_state(w.w_serializer)
184185
serialize_hdr_raw(io, header)
@@ -191,7 +192,7 @@ function send_msg_(w::Worker, header, msg, now::Bool)
191192
flush(io)
192193
end
193194
finally
194-
unlock(io.lock)
195+
unlock(io)
195196
end
196197
end
197198

test/core.jl

+6-4
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ using Random, SparseArrays, InteractiveUtils
66

77
const Bottom = Union{}
88

9-
109
# For curmod_*
1110
include("testenv.jl")
1211

@@ -2071,7 +2070,7 @@ mutable struct A6142 <: AbstractMatrix{Float64}; end
20712070
+(x::A6142, y::AbstractRange) = "AbstractRange method called" #16324 ambiguity
20722071

20732072
# issue #6175
2074-
function g6175(); print(""); (); end
2073+
function g6175(); GC.safepoint(); (); end
20752074
g6175(i::Real, I...) = g6175(I...)
20762075
g6175(i, I...) = tuple(length(i), g6175(I...)...)
20772076
@test g6175(1:5) === (5,)
@@ -2211,7 +2210,7 @@ day_in(obj6387)
22112210
function segfault6793(;gamma=1)
22122211
A = 1
22132212
B = 1
2214-
print()
2213+
GC.safepoint()
22152214
return
22162215
-gamma
22172216
nothing
@@ -3317,7 +3316,7 @@ function f11065()
33173316
if i == 1
33183317
z = "z is defined"
33193318
elseif i == 2
3320-
print(z)
3319+
print(z) # z is undefined
33213320
end
33223321
end
33233322
end
@@ -4234,7 +4233,10 @@ end
42344233
end
42354234
# disable GC to make sure no collection/promotion happens
42364235
# when we are constructing the objects
4236+
get_finalizers_inhibited() = ccall(:jl_gc_get_finalizers_inhibited, Int32, (Ptr{Cvoid},), C_NULL)
42374237
let gc_enabled13995 = GC.enable(false)
4238+
@assert gc_enabled13995
4239+
@assert get_finalizers_inhibited() == 0
42384240
finalized13995 = [false, false, false, false]
42394241
create_dead_object13995(finalized13995)
42404242
GC.enable(true)

0 commit comments

Comments
 (0)