Skip to content

Commit 59aedd1

Browse files
committedDec 3, 2020
threads: avoid deadlock from recursive lock acquire
Finalizers can't safely acquire many essential locks (such as the iolock, to cleanup libuv objects) if they are run inside another lock. Therefore, inhibit all finalizers on the thread until all locks are released (previously, this was only true for our internal locks).
1 parent 49b8e61 commit 59aedd1

File tree

12 files changed

+142
-33
lines changed

12 files changed

+142
-33
lines changed
 

‎NEWS.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ Command-line option changes
7272
Multi-threading changes
7373
-----------------------
7474

75+
* Locks now automatically inhibit finalizers from running, to avoid deadlock ([#TBD]).
76+
* New function `Base.Threads.foreach(f, channel::Channel)` for multithreaded `Channel` consumption ([#34543]).
7577

7678
Build system changes
7779
--------------------
@@ -85,7 +87,6 @@ New library functions
8587
---------------------
8688

8789
* New function `Base.kron!` and corresponding overloads for various matrix types for performing Kronecker product in-place ([#31069]).
88-
* New function `Base.Threads.foreach(f, channel::Channel)` for multithreaded `Channel` consumption ([#34543]).
8990
* New function `Base.readeach(io, T)` for iteratively performing `read(io, T)` ([#36150]).
9091
* `Iterators.map` is added. It provides another syntax `Iterators.map(f, iterators...)`
9192
for writing `(f(args...) for args in zip(iterators...))`, i.e. a lazy `map` ([#34352]).

‎base/gcutils.jl

+10
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,16 @@ Control whether garbage collection is enabled using a boolean argument (`true` f
106106
"""
107107
enable(on::Bool) = ccall(:jl_gc_enable, Int32, (Int32,), on) != 0
108108

109+
"""
110+
GC.enable_finalizers(on::Bool)
111+
112+
Increment or decrement the counter that controls the running of finalizers on
113+
the current Task. Finalizers will only run when the counter is at zero. (Set
114+
`true` for enabling, `false` for disabling). They may still run concurrently on
115+
another Task or thread.
116+
"""
117+
enable_finalizers(on::Bool) = ccall(:jl_gc_enable_finalizers, Cvoid, (Ptr{Cvoid}, Int32,), C_NULL, on)
118+
109119
"""
110120
GC.@preserve x1 x2 ... xn expr
111121

‎base/lock.jl

+22-3
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,24 @@ const ThreadSynchronizer = GenericCondition{Threads.SpinLock}
66
"""
77
ReentrantLock()
88
9-
Creates a re-entrant lock for synchronizing [`Task`](@ref)s.
10-
The same task can acquire the lock as many times as required.
11-
Each [`lock`](@ref) must be matched with an [`unlock`](@ref).
9+
Creates a re-entrant lock for synchronizing [`Task`](@ref)s. The same task can
10+
acquire the lock as many times as required. Each [`lock`](@ref) must be matched
11+
with an [`unlock`](@ref).
12+
13+
Calling 'lock' will also inhibit running of finalizers on that thread until the
14+
corresponding 'unlock'. Use of the standard lock pattern illustrated below
15+
should naturally be supported, but beware of inverting the try/lock order or
16+
missing the try block entirely (e.g. attempting to return with the lock still
17+
held):
18+
19+
```
20+
lock(l)
21+
try
22+
<atomic work>
23+
finally
24+
unlock(l)
25+
end
26+
```
1227
"""
1328
mutable struct ReentrantLock <: AbstractLock
1429
locked_by::Union{Task, Nothing}
@@ -50,6 +65,7 @@ function trylock(rl::ReentrantLock)
5065
if rl.reentrancy_cnt == 0
5166
rl.locked_by = t
5267
rl.reentrancy_cnt = 1
68+
GC.enable_finalizers(false)
5369
got = true
5470
else
5571
got = false
@@ -77,6 +93,7 @@ function lock(rl::ReentrantLock)
7793
if rl.reentrancy_cnt == 0
7894
rl.locked_by = t
7995
rl.reentrancy_cnt = 1
96+
GC.enable_finalizers(false)
8097
break
8198
end
8299
try
@@ -118,6 +135,7 @@ function unlock(rl::ReentrantLock)
118135
rethrow()
119136
end
120137
end
138+
GC.enable_finalizers(true)
121139
unlock(rl.cond_wait)
122140
end
123141
return
@@ -139,6 +157,7 @@ function unlockall(rl::ReentrantLock)
139157
rethrow()
140158
end
141159
end
160+
GC.enable_finalizers(true)
142161
unlock(rl.cond_wait)
143162
return n
144163
end

‎base/locks-mt.jl

+10-1
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,12 @@ Base.assert_havelock(l::SpinLock) = islocked(l) ? nothing : Base.concurrency_vio
6161
function lock(l::SpinLock)
6262
while true
6363
if _get(l) == 0
64+
GC.enable_finalizers(false)
6465
p = _xchg!(l, 1)
6566
if p == 0
6667
return
6768
end
69+
GC.enable_finalizers(true)
6870
end
6971
ccall(:jl_cpu_pause, Cvoid, ())
7072
# Temporary solution before we have gc transition support in codegen.
@@ -74,13 +76,20 @@ end
7476

7577
function trylock(l::SpinLock)
7678
if _get(l) == 0
77-
return _xchg!(l, 1) == 0
79+
GC.enable_finalizers(false)
80+
p = _xchg!(l, 1)
81+
if p == 0
82+
return true
83+
end
84+
GC.enable_finalizers(true)
7885
end
7986
return false
8087
end
8188

8289
function unlock(l::SpinLock)
90+
_get(l) == 0 && error("unlock count must match lock count")
8391
_set!(l, 0)
92+
GC.enable_finalizers(true)
8493
ccall(:jl_cpu_wake, Cvoid, ())
8594
return
8695
end

‎src/gc.c

+28-4
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ static void run_finalizer(jl_ptls_t ptls, jl_value_t *o, jl_value_t *ff)
280280
jl_printf((JL_STREAM*)STDERR_FILENO, "error in running finalizer: ");
281281
jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
282282
jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
283-
jlbacktrace(); // writen to STDERR_FILENO
283+
jlbacktrace(); // written to STDERR_FILENO
284284
}
285285
}
286286

@@ -392,12 +392,36 @@ static void run_finalizers(jl_ptls_t ptls)
392392
arraylist_free(&copied_list);
393393
}
394394

395+
JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls)
396+
{
397+
if (ptls == NULL)
398+
ptls = jl_get_ptls_states();
399+
return ptls->finalizers_inhibited;
400+
}
401+
395402
JL_DLLEXPORT void jl_gc_enable_finalizers(jl_ptls_t ptls, int on)
396403
{
404+
if (ptls == NULL)
405+
ptls = jl_get_ptls_states();
397406
int old_val = ptls->finalizers_inhibited;
398407
int new_val = old_val + (on ? -1 : 1);
408+
if (new_val < 0) {
409+
JL_TRY {
410+
jl_error(""); // get a backtrace
411+
}
412+
JL_CATCH {
413+
jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n");
414+
// Only print the backtrace once, to avoid spamming the logs
415+
static int backtrace_printed = 0;
416+
if (backtrace_printed == 0) {
417+
backtrace_printed = 1;
418+
jlbacktrace(); // written to STDERR_FILENO
419+
}
420+
}
421+
return;
422+
}
399423
ptls->finalizers_inhibited = new_val;
400-
if (!new_val && old_val && !ptls->in_finalizer) {
424+
if (!new_val && old_val && !ptls->in_finalizer && ptls->locks.len == 0) {
401425
ptls->in_finalizer = 1;
402426
run_finalizers(ptls);
403427
ptls->in_finalizer = 0;
@@ -1581,7 +1605,7 @@ STATIC_INLINE uintptr_t gc_read_stack(void *_addr, uintptr_t offset,
15811605
JL_NORETURN NOINLINE void gc_assert_datatype_fail(jl_ptls_t ptls, jl_datatype_t *vt,
15821606
jl_gc_mark_sp_t sp)
15831607
{
1584-
jl_printf(JL_STDOUT, "GC error (probable corruption) :\n");
1608+
jl_safe_printf("GC error (probable corruption) :\n");
15851609
gc_debug_print_status();
15861610
jl_(vt);
15871611
gc_debug_critical_error();
@@ -3192,7 +3216,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
31923216
// Only disable finalizers on current thread
31933217
// Doing this on all threads is racy (it's impossible to check
31943218
// or wait for finalizers on other threads without dead lock).
3195-
if (!ptls->finalizers_inhibited) {
3219+
if (!ptls->finalizers_inhibited && ptls->locks.len == 0) {
31963220
int8_t was_in_finalizer = ptls->in_finalizer;
31973221
ptls->in_finalizer = 1;
31983222
run_finalizers(ptls);

‎src/julia.h

-3
Original file line numberDiff line numberDiff line change
@@ -1730,7 +1730,6 @@ typedef struct _jl_handler_t {
17301730
int8_t gc_state;
17311731
size_t locks_len;
17321732
sig_atomic_t defer_signal;
1733-
int finalizers_inhibited;
17341733
jl_timing_block_t *timing_stack;
17351734
size_t world_age;
17361735
} jl_handler_t;
@@ -1753,8 +1752,6 @@ typedef struct _jl_task_t {
17531752
int16_t tid;
17541753
// multiqueue priority
17551754
int16_t prio;
1756-
// current world age
1757-
size_t world_age;
17581755
// saved exception stack
17591756
jl_excstack_t *excstack;
17601757
// current exception handler

‎src/locks.h

+4-5
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,9 @@ static inline void jl_lock_frame_pop(void)
8484

8585
static inline void jl_mutex_lock(jl_mutex_t *lock)
8686
{
87-
jl_ptls_t ptls = jl_get_ptls_states();
8887
JL_SIGATOMIC_BEGIN();
8988
jl_mutex_wait(lock, 1);
9089
jl_lock_frame_push(lock);
91-
jl_gc_enable_finalizers(ptls, 0);
9290
}
9391

9492
static inline int jl_mutex_trylock_nogc(jl_mutex_t *lock)
@@ -111,10 +109,8 @@ static inline int jl_mutex_trylock(jl_mutex_t *lock)
111109
{
112110
int got = jl_mutex_trylock_nogc(lock);
113111
if (got) {
114-
jl_ptls_t ptls = jl_get_ptls_states();
115112
JL_SIGATOMIC_BEGIN();
116113
jl_lock_frame_push(lock);
117-
jl_gc_enable_finalizers(ptls, 0);
118114
}
119115
return got;
120116
}
@@ -134,9 +130,12 @@ static inline void jl_mutex_unlock(jl_mutex_t *lock)
134130
{
135131
jl_ptls_t ptls = jl_get_ptls_states();
136132
jl_mutex_unlock_nogc(lock);
137-
jl_gc_enable_finalizers(ptls, 1);
138133
jl_lock_frame_pop();
139134
JL_SIGATOMIC_END();
135+
if (ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) {
136+
ptls->finalizers_inhibited = 1;
137+
jl_gc_enable_finalizers(ptls, 1); // call run_finalizers (may GC)
138+
}
140139
}
141140

142141
static inline void jl_mutex_init(jl_mutex_t *lock) JL_NOTSAFEPOINT

‎src/rtutils.c

+8-4
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,6 @@ JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh)
218218
eh->gc_state = ptls->gc_state;
219219
eh->locks_len = ptls->locks.len;
220220
eh->defer_signal = ptls->defer_signal;
221-
eh->finalizers_inhibited = ptls->finalizers_inhibited;
222221
eh->world_age = ptls->world_age;
223222
current_task->eh = eh;
224223
#ifdef ENABLE_TIMINGS
@@ -249,14 +248,14 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
249248
current_task->eh = eh->prev;
250249
ptls->pgcstack = eh->gcstack;
251250
small_arraylist_t *locks = &ptls->locks;
252-
if (locks->len > eh->locks_len) {
253-
for (size_t i = locks->len;i > eh->locks_len;i--)
251+
int unlocks = locks->len > eh->locks_len;
252+
if (unlocks) {
253+
for (size_t i = locks->len; i > eh->locks_len; i--)
254254
jl_mutex_unlock_nogc((jl_mutex_t*)locks->items[i - 1]);
255255
locks->len = eh->locks_len;
256256
}
257257
ptls->world_age = eh->world_age;
258258
ptls->defer_signal = eh->defer_signal;
259-
ptls->finalizers_inhibited = eh->finalizers_inhibited;
260259
if (old_gc_state != eh->gc_state) {
261260
jl_atomic_store_release(&ptls->gc_state, eh->gc_state);
262261
if (old_gc_state) {
@@ -266,6 +265,11 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
266265
if (old_defer_signal && !eh->defer_signal) {
267266
jl_sigint_safepoint(ptls);
268267
}
268+
if (unlocks && eh->locks_len == 0 && ptls->finalizers_inhibited == 0) {
269+
// call run_finalizers
270+
ptls->finalizers_inhibited = 1;
271+
jl_gc_enable_finalizers(ptls, 1);
272+
}
269273
}
270274

271275
JL_DLLEXPORT void jl_pop_handler(int n)

‎src/task.c

+14-3
Original file line numberDiff line numberDiff line change
@@ -393,9 +393,8 @@ static void ctx_switch(jl_ptls_t ptls)
393393
}
394394

395395
// set up global state for new task
396-
lastt->world_age = ptls->world_age;
397396
ptls->pgcstack = t->gcstack;
398-
ptls->world_age = t->world_age;
397+
ptls->world_age = 0;
399398
t->gcstack = NULL;
400399
#ifdef MIGRATE_TASKS
401400
ptls->previous_task = lastt;
@@ -510,8 +509,14 @@ JL_DLLEXPORT void jl_switch(void)
510509
else if (t->tid != ptls->tid) {
511510
jl_error("cannot switch to task running on another thread");
512511
}
512+
513+
// Store old values on the stack and reset
513514
sig_atomic_t defer_signal = ptls->defer_signal;
514515
int8_t gc_state = jl_gc_unsafe_enter(ptls);
516+
size_t world_age = ptls->world_age;
517+
int finalizers_inhibited = ptls->finalizers_inhibited;
518+
ptls->world_age = 0;
519+
ptls->finalizers_inhibited = 0;
515520

516521
#ifdef ENABLE_TIMINGS
517522
jl_timing_block_t *blk = ct->timing_stack;
@@ -533,7 +538,12 @@ JL_DLLEXPORT void jl_switch(void)
533538
assert(ptls == refetch_ptls());
534539
#endif
535540

536-
ct = ptls->current_task;
541+
// Pop old values back off the stack
542+
assert(ct == ptls->current_task &&
543+
0 == ptls->world_age &&
544+
0 == ptls->finalizers_inhibited);
545+
ptls->world_age = world_age;
546+
ptls->finalizers_inhibited = finalizers_inhibited;
537547

538548
#ifdef ENABLE_TIMINGS
539549
assert(blk == ct->timing_stack);
@@ -796,6 +806,7 @@ STATIC_OR_JS void NOINLINE JL_NORETURN start_task(void)
796806
jl_ptls_t ptls = jl_get_ptls_states();
797807
jl_task_t *t = ptls->current_task;
798808
jl_value_t *res;
809+
assert(ptls->finalizers_inhibited == 0);
799810

800811
#ifdef MIGRATE_TASKS
801812
jl_task_t *pt = ptls->previous_task;

‎stdlib/Distributed/src/messages.jl

+3-2
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ function flush_gc_msgs(w::Worker)
135135
end
136136

137137
# del_msgs gets populated by finalizers, so be very careful here about ordering of allocations
138+
# XXX: threading requires this to be atomic
138139
new_array = Any[]
139140
msgs = w.del_msgs
140141
w.del_msgs = new_array
@@ -166,7 +167,7 @@ function send_msg_(w::Worker, header, msg, now::Bool)
166167
wait(w.initialized)
167168
end
168169
io = w.w_stream
169-
lock(io.lock)
170+
lock(io)
170171
try
171172
reset_state(w.w_serializer)
172173
serialize_hdr_raw(io, header)
@@ -179,7 +180,7 @@ function send_msg_(w::Worker, header, msg, now::Bool)
179180
flush(io)
180181
end
181182
finally
182-
unlock(io.lock)
183+
unlock(io)
183184
end
184185
end
185186

0 commit comments

Comments
 (0)
Please sign in to comment.