Skip to content

Commit 0253ebf

Browse files
authored
Merge pull request #38487 from JuliaLang/jn/lock-finalizers
threads: avoid deadlock from recursive lock acquire
2 parents 5948ee6 + 72bebdf commit 0253ebf

16 files changed

+162
-57
lines changed

NEWS.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ Command-line option changes
7272
Multi-threading changes
7373
-----------------------
7474

75+
* Locks now automatically inhibit finalizers from running, to avoid deadlock ([#TBD]).
76+
* New function `Base.Threads.foreach(f, channel::Channel)` for multithreaded `Channel` consumption ([#34543]).
7577

7678
Build system changes
7779
--------------------
@@ -85,7 +87,6 @@ New library functions
8587
---------------------
8688

8789
* New function `Base.kron!` and corresponding overloads for various matrix types for performing Kronecker product in-place ([#31069]).
88-
* New function `Base.Threads.foreach(f, channel::Channel)` for multithreaded `Channel` consumption ([#34543]).
8990
* New function `Base.readeach(io, T)` for iteratively performing `read(io, T)` ([#36150]).
9091
* `Iterators.map` is added. It provides another syntax `Iterators.map(f, iterators...)`
9192
for writing `(f(args...) for args in zip(iterators...))`, i.e. a lazy `map` ([#34352]).

base/gcutils.jl

+10
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,16 @@ Control whether garbage collection is enabled using a boolean argument (`true` f
106106
"""
107107
enable(on::Bool) = ccall(:jl_gc_enable, Int32, (Int32,), on) != 0
108108

109+
"""
110+
GC.enable_finalizers(on::Bool)
111+
112+
Increment or decrement the counter that controls the running of finalizers on
113+
the current Task. Finalizers will only run when the counter is at zero. (Set
114+
`true` for enabling, `false` for disabling). They may still run concurrently on
115+
another Task or thread.
116+
"""
117+
enable_finalizers(on::Bool) = ccall(:jl_gc_enable_finalizers, Cvoid, (Ptr{Cvoid}, Int32,), C_NULL, on)
118+
109119
"""
110120
GC.@preserve x1 x2 ... xn expr
111121

base/lock.jl

+22-3
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,24 @@ const ThreadSynchronizer = GenericCondition{Threads.SpinLock}
66
"""
77
ReentrantLock()
88
9-
Creates a re-entrant lock for synchronizing [`Task`](@ref)s.
10-
The same task can acquire the lock as many times as required.
11-
Each [`lock`](@ref) must be matched with an [`unlock`](@ref).
9+
Creates a re-entrant lock for synchronizing [`Task`](@ref)s. The same task can
10+
acquire the lock as many times as required. Each [`lock`](@ref) must be matched
11+
with an [`unlock`](@ref).
12+
13+
Calling 'lock' will also inhibit running of finalizers on that thread until the
14+
corresponding 'unlock'. Use of the standard lock pattern illustrated below
15+
should naturally be supported, but beware of inverting the try/lock order or
16+
missing the try block entirely (e.g. attempting to return with the lock still
17+
held):
18+
19+
```
20+
lock(l)
21+
try
22+
<atomic work>
23+
finally
24+
unlock(l)
25+
end
26+
```
1227
"""
1328
mutable struct ReentrantLock <: AbstractLock
1429
locked_by::Union{Task, Nothing}
@@ -50,6 +65,7 @@ function trylock(rl::ReentrantLock)
5065
if rl.reentrancy_cnt == 0
5166
rl.locked_by = t
5267
rl.reentrancy_cnt = 1
68+
GC.enable_finalizers(false)
5369
got = true
5470
else
5571
got = false
@@ -77,6 +93,7 @@ function lock(rl::ReentrantLock)
7793
if rl.reentrancy_cnt == 0
7894
rl.locked_by = t
7995
rl.reentrancy_cnt = 1
96+
GC.enable_finalizers(false)
8097
break
8198
end
8299
try
@@ -118,6 +135,7 @@ function unlock(rl::ReentrantLock)
118135
rethrow()
119136
end
120137
end
138+
GC.enable_finalizers(true)
121139
unlock(rl.cond_wait)
122140
end
123141
return
@@ -139,6 +157,7 @@ function unlockall(rl::ReentrantLock)
139157
rethrow()
140158
end
141159
end
160+
GC.enable_finalizers(true)
142161
unlock(rl.cond_wait)
143162
return n
144163
end

base/locks-mt.jl

+10-1
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,12 @@ Base.assert_havelock(l::SpinLock) = islocked(l) ? nothing : Base.concurrency_vio
6161
function lock(l::SpinLock)
6262
while true
6363
if _get(l) == 0
64+
GC.enable_finalizers(false)
6465
p = _xchg!(l, 1)
6566
if p == 0
6667
return
6768
end
69+
GC.enable_finalizers(true)
6870
end
6971
ccall(:jl_cpu_pause, Cvoid, ())
7072
# Temporary solution before we have gc transition support in codegen.
@@ -74,13 +76,20 @@ end
7476

7577
function trylock(l::SpinLock)
7678
if _get(l) == 0
77-
return _xchg!(l, 1) == 0
79+
GC.enable_finalizers(false)
80+
p = _xchg!(l, 1)
81+
if p == 0
82+
return true
83+
end
84+
GC.enable_finalizers(true)
7885
end
7986
return false
8087
end
8188

8289
function unlock(l::SpinLock)
90+
_get(l) == 0 && error("unlock count must match lock count")
8391
_set!(l, 0)
92+
GC.enable_finalizers(true)
8493
ccall(:jl_cpu_wake, Cvoid, ())
8594
return
8695
end

src/gc.c

+28-4
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ static void run_finalizer(jl_ptls_t ptls, jl_value_t *o, jl_value_t *ff)
280280
jl_printf((JL_STREAM*)STDERR_FILENO, "error in running finalizer: ");
281281
jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
282282
jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
283-
jlbacktrace(); // writen to STDERR_FILENO
283+
jlbacktrace(); // written to STDERR_FILENO
284284
}
285285
}
286286

@@ -392,12 +392,36 @@ static void run_finalizers(jl_ptls_t ptls)
392392
arraylist_free(&copied_list);
393393
}
394394

395+
JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls)
396+
{
397+
if (ptls == NULL)
398+
ptls = jl_get_ptls_states();
399+
return ptls->finalizers_inhibited;
400+
}
401+
395402
JL_DLLEXPORT void jl_gc_enable_finalizers(jl_ptls_t ptls, int on)
396403
{
404+
if (ptls == NULL)
405+
ptls = jl_get_ptls_states();
397406
int old_val = ptls->finalizers_inhibited;
398407
int new_val = old_val + (on ? -1 : 1);
408+
if (new_val < 0) {
409+
JL_TRY {
410+
jl_error(""); // get a backtrace
411+
}
412+
JL_CATCH {
413+
jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n");
414+
// Only print the backtrace once, to avoid spamming the logs
415+
static int backtrace_printed = 0;
416+
if (backtrace_printed == 0) {
417+
backtrace_printed = 1;
418+
jlbacktrace(); // written to STDERR_FILENO
419+
}
420+
}
421+
return;
422+
}
399423
ptls->finalizers_inhibited = new_val;
400-
if (!new_val && old_val && !ptls->in_finalizer) {
424+
if (!new_val && old_val && !ptls->in_finalizer && ptls->locks.len == 0) {
401425
ptls->in_finalizer = 1;
402426
run_finalizers(ptls);
403427
ptls->in_finalizer = 0;
@@ -1581,7 +1605,7 @@ STATIC_INLINE uintptr_t gc_read_stack(void *_addr, uintptr_t offset,
15811605
JL_NORETURN NOINLINE void gc_assert_datatype_fail(jl_ptls_t ptls, jl_datatype_t *vt,
15821606
jl_gc_mark_sp_t sp)
15831607
{
1584-
jl_printf(JL_STDOUT, "GC error (probable corruption) :\n");
1608+
jl_safe_printf("GC error (probable corruption) :\n");
15851609
gc_debug_print_status();
15861610
jl_(vt);
15871611
gc_debug_critical_error();
@@ -3192,7 +3216,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
31923216
// Only disable finalizers on current thread
31933217
// Doing this on all threads is racy (it's impossible to check
31943218
// or wait for finalizers on other threads without dead lock).
3195-
if (!ptls->finalizers_inhibited) {
3219+
if (!ptls->finalizers_inhibited && ptls->locks.len == 0) {
31963220
int8_t was_in_finalizer = ptls->in_finalizer;
31973221
ptls->in_finalizer = 1;
31983222
run_finalizers(ptls);

src/init.c

-3
Original file line numberDiff line numberDiff line change
@@ -745,9 +745,6 @@ void _julia_init(JL_IMAGE_SEARCH rel)
745745

746746
jl_init_tasks();
747747
jl_init_root_task(stack_lo, stack_hi);
748-
#ifdef ENABLE_TIMINGS
749-
jl_root_task->timing_stack = jl_root_timing;
750-
#endif
751748
jl_init_common_symbols();
752749
jl_init_flisp();
753750
jl_init_serializer();

src/julia.h

-5
Original file line numberDiff line numberDiff line change
@@ -1730,7 +1730,6 @@ typedef struct _jl_handler_t {
17301730
int8_t gc_state;
17311731
size_t locks_len;
17321732
sig_atomic_t defer_signal;
1733-
int finalizers_inhibited;
17341733
jl_timing_block_t *timing_stack;
17351734
size_t world_age;
17361735
} jl_handler_t;
@@ -1753,8 +1752,6 @@ typedef struct _jl_task_t {
17531752
int16_t tid;
17541753
// multiqueue priority
17551754
int16_t prio;
1756-
// current world age
1757-
size_t world_age;
17581755
// saved exception stack
17591756
jl_excstack_t *excstack;
17601757
// current exception handler
@@ -1768,8 +1765,6 @@ typedef struct _jl_task_t {
17681765

17691766
// saved gc stack top for context switches
17701767
jl_gcframe_t *gcstack;
1771-
1772-
jl_timing_block_t *timing_stack;
17731768
} jl_task_t;
17741769

17751770
#define JL_TASK_STATE_RUNNABLE 0

src/julia_threads.h

+1
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ struct _jl_tls_states_t {
207207
struct _jl_task_t *previous_task;
208208
#endif
209209
struct _jl_task_t *root_task;
210+
struct _jl_timing_block_t *timing_stack;
210211
void *stackbase;
211212
size_t stacksize;
212213
jl_ucontext_t base_ctx; // base context of stack

src/locks.h

+4-5
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,9 @@ static inline void jl_lock_frame_pop(void)
8484

8585
static inline void jl_mutex_lock(jl_mutex_t *lock)
8686
{
87-
jl_ptls_t ptls = jl_get_ptls_states();
8887
JL_SIGATOMIC_BEGIN();
8988
jl_mutex_wait(lock, 1);
9089
jl_lock_frame_push(lock);
91-
jl_gc_enable_finalizers(ptls, 0);
9290
}
9391

9492
static inline int jl_mutex_trylock_nogc(jl_mutex_t *lock)
@@ -111,10 +109,8 @@ static inline int jl_mutex_trylock(jl_mutex_t *lock)
111109
{
112110
int got = jl_mutex_trylock_nogc(lock);
113111
if (got) {
114-
jl_ptls_t ptls = jl_get_ptls_states();
115112
JL_SIGATOMIC_BEGIN();
116113
jl_lock_frame_push(lock);
117-
jl_gc_enable_finalizers(ptls, 0);
118114
}
119115
return got;
120116
}
@@ -134,9 +130,12 @@ static inline void jl_mutex_unlock(jl_mutex_t *lock)
134130
{
135131
jl_ptls_t ptls = jl_get_ptls_states();
136132
jl_mutex_unlock_nogc(lock);
137-
jl_gc_enable_finalizers(ptls, 1);
138133
jl_lock_frame_pop();
139134
JL_SIGATOMIC_END();
135+
if (ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) {
136+
ptls->finalizers_inhibited = 1;
137+
jl_gc_enable_finalizers(ptls, 1); // call run_finalizers (may GC)
138+
}
140139
}
141140

142141
static inline void jl_mutex_init(jl_mutex_t *lock) JL_NOTSAFEPOINT

src/rtutils.c

+9-5
Original file line numberDiff line numberDiff line change
@@ -218,11 +218,10 @@ JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh)
218218
eh->gc_state = ptls->gc_state;
219219
eh->locks_len = ptls->locks.len;
220220
eh->defer_signal = ptls->defer_signal;
221-
eh->finalizers_inhibited = ptls->finalizers_inhibited;
222221
eh->world_age = ptls->world_age;
223222
current_task->eh = eh;
224223
#ifdef ENABLE_TIMINGS
225-
eh->timing_stack = current_task->timing_stack;
224+
eh->timing_stack = ptls->timing_stack;
226225
#endif
227226
}
228227

@@ -249,14 +248,14 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
249248
current_task->eh = eh->prev;
250249
ptls->pgcstack = eh->gcstack;
251250
small_arraylist_t *locks = &ptls->locks;
252-
if (locks->len > eh->locks_len) {
253-
for (size_t i = locks->len;i > eh->locks_len;i--)
251+
int unlocks = locks->len > eh->locks_len;
252+
if (unlocks) {
253+
for (size_t i = locks->len; i > eh->locks_len; i--)
254254
jl_mutex_unlock_nogc((jl_mutex_t*)locks->items[i - 1]);
255255
locks->len = eh->locks_len;
256256
}
257257
ptls->world_age = eh->world_age;
258258
ptls->defer_signal = eh->defer_signal;
259-
ptls->finalizers_inhibited = eh->finalizers_inhibited;
260259
if (old_gc_state != eh->gc_state) {
261260
jl_atomic_store_release(&ptls->gc_state, eh->gc_state);
262261
if (old_gc_state) {
@@ -266,6 +265,11 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
266265
if (old_defer_signal && !eh->defer_signal) {
267266
jl_sigint_safepoint(ptls);
268267
}
268+
if (unlocks && eh->locks_len == 0 && ptls->finalizers_inhibited == 0) {
269+
// call run_finalizers
270+
ptls->finalizers_inhibited = 1;
271+
jl_gc_enable_finalizers(ptls, 1);
272+
}
269273
}
270274

271275
JL_DLLEXPORT void jl_pop_handler(int n)

src/task.c

+19-9
Original file line numberDiff line numberDiff line change
@@ -393,9 +393,8 @@ static void ctx_switch(jl_ptls_t ptls)
393393
}
394394

395395
// set up global state for new task
396-
lastt->world_age = ptls->world_age;
397396
ptls->pgcstack = t->gcstack;
398-
ptls->world_age = t->world_age;
397+
ptls->world_age = 0;
399398
t->gcstack = NULL;
400399
#ifdef MIGRATE_TASKS
401400
ptls->previous_task = lastt;
@@ -510,13 +509,20 @@ JL_DLLEXPORT void jl_switch(void)
510509
else if (t->tid != ptls->tid) {
511510
jl_error("cannot switch to task running on another thread");
512511
}
512+
513+
// Store old values on the stack and reset
513514
sig_atomic_t defer_signal = ptls->defer_signal;
514515
int8_t gc_state = jl_gc_unsafe_enter(ptls);
516+
size_t world_age = ptls->world_age;
517+
int finalizers_inhibited = ptls->finalizers_inhibited;
518+
ptls->world_age = 0;
519+
ptls->finalizers_inhibited = 0;
515520

516521
#ifdef ENABLE_TIMINGS
517-
jl_timing_block_t *blk = ct->timing_stack;
522+
jl_timing_block_t *blk = ptls->timing_stack;
518523
if (blk)
519524
jl_timing_block_stop(blk);
525+
ptls->timing_stack = NULL;
520526
#endif
521527

522528
ctx_switch(ptls);
@@ -533,10 +539,16 @@ JL_DLLEXPORT void jl_switch(void)
533539
assert(ptls == refetch_ptls());
534540
#endif
535541

536-
ct = ptls->current_task;
542+
// Pop old values back off the stack
543+
assert(ct == ptls->current_task &&
544+
0 == ptls->world_age &&
545+
0 == ptls->finalizers_inhibited);
546+
ptls->world_age = world_age;
547+
ptls->finalizers_inhibited = finalizers_inhibited;
537548

538549
#ifdef ENABLE_TIMINGS
539-
assert(blk == ct->timing_stack);
550+
assert(ptls->timing_stack == NULL);
551+
ptls->timing_stack = blk;
540552
if (blk)
541553
jl_timing_block_start(blk);
542554
#else
@@ -595,7 +607,7 @@ static void JL_NORETURN throw_internal(jl_value_t *exception JL_MAYBE_UNROOTED)
595607
jl_handler_t *eh = ptls->current_task->eh;
596608
if (eh != NULL) {
597609
#ifdef ENABLE_TIMINGS
598-
jl_timing_block_t *cur_block = ptls->current_task->timing_stack;
610+
jl_timing_block_t *cur_block = ptls->timing_stack;
599611
while (cur_block && eh->timing_stack != cur_block) {
600612
cur_block = jl_pop_timing_block(cur_block);
601613
}
@@ -696,9 +708,6 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
696708
t->started = 0;
697709
t->prio = -1;
698710
t->tid = -1;
699-
#ifdef ENABLE_TIMINGS
700-
t->timing_stack = jl_root_timing;
701-
#endif
702711

703712
#if defined(JL_DEBUG_BUILD)
704713
if (!t->copy_stack)
@@ -801,6 +810,7 @@ STATIC_OR_JS void NOINLINE JL_NORETURN start_task(void)
801810
jl_ptls_t ptls = jl_get_ptls_states();
802811
jl_task_t *t = ptls->current_task;
803812
jl_value_t *res;
813+
assert(ptls->finalizers_inhibited == 0);
804814

805815
#ifdef MIGRATE_TASKS
806816
jl_task_t *pt = ptls->previous_task;

0 commit comments

Comments
 (0)