Skip to content

Commit 272eaf9

Browse files
authored
Merge pull request #41733 from NHDaly/nhd-allow-reentrant-compilation-time-measurement
Make `jl_cumulative_compile_time_ns` global (and reentrant).
2 parents cc4be25 + b4ca196 commit 272eaf9

File tree

8 files changed

+64
-42
lines changed

8 files changed

+64
-42
lines changed

base/timing.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ function gc_alloc_count(diff::GC_Diff)
5555
diff.malloc + diff.realloc + diff.poolalloc + diff.bigalloc
5656
end
5757

58-
# cumulative total time spent on compilation
58+
# cumulative total time spent on compilation, in nanoseconds
5959
cumulative_compile_time_ns_before() = ccall(:jl_cumulative_compile_time_ns_before, UInt64, ())
6060
cumulative_compile_time_ns_after() = ccall(:jl_cumulative_compile_time_ns_after, UInt64, ())
6161

src/aotcompile.cpp

+8-8
Original file line numberDiff line numberDiff line change
@@ -278,8 +278,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
278278
JL_GC_PUSH1(&src);
279279
JL_LOCK(&codegen_lock);
280280
uint64_t compiler_start_time = 0;
281-
int tid = jl_threadid();
282-
if (jl_measure_compile_time[tid])
281+
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
282+
if (measure_compile_time_enabled)
283283
compiler_start_time = jl_hrtime();
284284

285285
CompilationPolicy policy = (CompilationPolicy) _policy;
@@ -407,8 +407,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
407407
}
408408

409409
data->M = std::move(clone);
410-
if (jl_measure_compile_time[tid])
411-
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
410+
if (measure_compile_time_enabled)
411+
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
412412
if (policy == CompilationPolicy::ImagingMode)
413413
imaging_mode = 0;
414414
JL_UNLOCK(&codegen_lock); // Might GC
@@ -908,8 +908,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
908908
jl_llvm_functions_t decls;
909909
JL_LOCK(&codegen_lock);
910910
uint64_t compiler_start_time = 0;
911-
int tid = jl_threadid();
912-
if (jl_measure_compile_time[tid])
911+
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
912+
if (measure_compile_time_enabled)
913913
compiler_start_time = jl_hrtime();
914914
std::tie(m, decls) = jl_emit_code(mi, src, jlrettype, output);
915915

@@ -934,8 +934,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
934934
m.release(); // the return object `llvmf` will be the owning pointer
935935
}
936936
JL_GC_POP();
937-
if (jl_measure_compile_time[tid])
938-
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
937+
if (measure_compile_time_enabled)
938+
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
939939
JL_UNLOCK(&codegen_lock); // Might GC
940940
if (F)
941941
return F;

src/gf.c

+8-4
Original file line numberDiff line numberDiff line change
@@ -3161,19 +3161,23 @@ int jl_has_concrete_subtype(jl_value_t *typ)
31613161
#define typeinf_lock codegen_lock
31623162

31633163
static uint64_t inference_start_time = 0;
3164+
static uint8_t inference_is_measuring_compile_time = 0;
31643165

31653166
JL_DLLEXPORT void jl_typeinf_begin(void)
31663167
{
31673168
JL_LOCK(&typeinf_lock);
3168-
if (jl_measure_compile_time[jl_threadid()])
3169+
if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) {
31693170
inference_start_time = jl_hrtime();
3171+
inference_is_measuring_compile_time = 1;
3172+
}
31703173
}
31713174

31723175
JL_DLLEXPORT void jl_typeinf_end(void)
31733176
{
3174-
int tid = jl_threadid();
3175-
if (typeinf_lock.count == 1 && jl_measure_compile_time[tid])
3176-
jl_cumulative_compile_time[tid] += (jl_hrtime() - inference_start_time);
3177+
if (typeinf_lock.count == 1 && inference_is_measuring_compile_time) {
3178+
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - inference_start_time));
3179+
inference_is_measuring_compile_time = 0;
3180+
}
31773181
JL_UNLOCK(&typeinf_lock);
31783182
}
31793183

src/jitlayers.cpp

+22-22
Original file line numberDiff line numberDiff line change
@@ -76,16 +76,16 @@ void jl_jit_globals(std::map<void *, GlobalVariable*> &globals)
7676
extern "C" JL_DLLEXPORT
7777
uint64_t jl_cumulative_compile_time_ns_before()
7878
{
79-
int tid = jl_threadid();
80-
jl_measure_compile_time[tid] = 1;
81-
return jl_cumulative_compile_time[tid];
79+
// Increment the flag to allow reentrant callers to `@time`.
80+
jl_atomic_fetch_add(&jl_measure_compile_time_enabled, 1);
81+
return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
8282
}
8383
extern "C" JL_DLLEXPORT
8484
uint64_t jl_cumulative_compile_time_ns_after()
8585
{
86-
int tid = jl_threadid();
87-
jl_measure_compile_time[tid] = 0;
88-
return jl_cumulative_compile_time[tid];
86+
// Decrement the flag when done measuring, allowing other callers to continue measuring.
87+
jl_atomic_fetch_add(&jl_measure_compile_time_enabled, -1);
88+
return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
8989
}
9090

9191
// this generates llvm code for the lambda info
@@ -231,8 +231,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
231231
{
232232
JL_LOCK(&codegen_lock);
233233
uint64_t compiler_start_time = 0;
234-
int tid = jl_threadid();
235-
if (jl_measure_compile_time[tid])
234+
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
235+
if (measure_compile_time_enabled)
236236
compiler_start_time = jl_hrtime();
237237
jl_codegen_params_t params;
238238
jl_codegen_params_t *pparams = (jl_codegen_params_t*)p;
@@ -256,8 +256,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
256256
if (success && llvmmod == NULL)
257257
jl_add_to_ee(std::unique_ptr<Module>(into));
258258
}
259-
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
260-
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
259+
if (codegen_lock.count == 1 && measure_compile_time_enabled)
260+
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
261261
JL_UNLOCK(&codegen_lock);
262262
return success;
263263
}
@@ -313,8 +313,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
313313
{
314314
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
315315
uint64_t compiler_start_time = 0;
316-
int tid = jl_threadid();
317-
if (jl_measure_compile_time[tid])
316+
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
317+
if (measure_compile_time_enabled)
318318
compiler_start_time = jl_hrtime();
319319
// if we don't have any decls already, try to generate it now
320320
jl_code_info_t *src = NULL;
@@ -352,8 +352,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
352352
else {
353353
codeinst = NULL;
354354
}
355-
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
356-
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
355+
if (codegen_lock.count == 1 && measure_compile_time_enabled)
356+
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
357357
JL_UNLOCK(&codegen_lock);
358358
JL_GC_POP();
359359
return codeinst;
@@ -367,8 +367,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
367367
}
368368
JL_LOCK(&codegen_lock);
369369
uint64_t compiler_start_time = 0;
370-
int tid = jl_threadid();
371-
if (jl_measure_compile_time[tid])
370+
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
371+
if (measure_compile_time_enabled)
372372
compiler_start_time = jl_hrtime();
373373
if (unspec->invoke == NULL) {
374374
jl_code_info_t *src = NULL;
@@ -396,8 +396,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
396396
}
397397
JL_GC_POP();
398398
}
399-
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
400-
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
399+
if (codegen_lock.count == 1 && measure_compile_time_enabled)
400+
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
401401
JL_UNLOCK(&codegen_lock); // Might GC
402402
}
403403

@@ -420,8 +420,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
420420
// so create an exception here so we can print pretty our lies
421421
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
422422
uint64_t compiler_start_time = 0;
423-
int tid = jl_threadid();
424-
if (jl_measure_compile_time[tid])
423+
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
424+
if (measure_compile_time_enabled)
425425
compiler_start_time = jl_hrtime();
426426
specfptr = (uintptr_t)codeinst->specptr.fptr;
427427
if (specfptr == 0) {
@@ -446,8 +446,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
446446
}
447447
JL_GC_POP();
448448
}
449-
if (jl_measure_compile_time[tid])
450-
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
449+
if (measure_compile_time_enabled)
450+
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
451451
JL_UNLOCK(&codegen_lock);
452452
}
453453
if (specfptr != 0)

src/julia_internal.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,9 @@ static inline uint64_t cycleclock(void)
154154

155155
#include "timing.h"
156156

157-
extern uint8_t *jl_measure_compile_time;
158-
extern uint64_t *jl_cumulative_compile_time;
157+
// Global *atomic* integers controlling *process-wide* measurement of compilation time.
158+
extern uint8_t jl_measure_compile_time_enabled;
159+
extern uint64_t jl_cumulative_compile_time;
159160

160161
#ifdef _COMPILER_MICROSOFT_
161162
# define jl_return_address() ((uintptr_t)_ReturnAddress())

src/task.c

+4-1
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,10 @@ static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_M
562562
ptls->io_wait = 0;
563563
// @time needs its compile timer disabled on error,
564564
// and cannot use a try-finally as it would break scope for assignments
565-
jl_measure_compile_time[ptls->tid] = 0;
565+
// We blindly disable compilation time tracking here, for all running Tasks, even though
566+
// it may cause some incorrect measurements. This is a known bug, and is being tracked
567+
// here: https://github.com/JuliaLang/julia/pull/39138
568+
jl_atomic_store_relaxed(&jl_measure_compile_time_enabled, 0);
566569
JL_GC_PUSH1(&exception);
567570
jl_gc_unsafe_enter(ptls);
568571
if (exception) {

src/threading.c

+2-4
Original file line numberDiff line numberDiff line change
@@ -287,8 +287,8 @@ void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k)
287287
#endif
288288

289289
jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED;
290-
uint8_t *jl_measure_compile_time = NULL;
291-
uint64_t *jl_cumulative_compile_time = NULL;
290+
uint8_t jl_measure_compile_time_enabled = 0;
291+
uint64_t jl_cumulative_compile_time = 0;
292292

293293
// return calling thread's ID
294294
// Also update the suspended_threads list in signals-mach when changing the
@@ -467,8 +467,6 @@ void jl_init_threading(void)
467467
}
468468
if (jl_n_threads <= 0)
469469
jl_n_threads = 1;
470-
jl_measure_compile_time = (uint8_t*)calloc(jl_n_threads, sizeof(*jl_measure_compile_time));
471-
jl_cumulative_compile_time = (uint64_t*)calloc(jl_n_threads, sizeof(*jl_cumulative_compile_time));
472470
#ifndef __clang_analyzer__
473471
jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*));
474472
#endif

test/misc.jl

+16
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,22 @@ function timev_macro_scope()
263263
end
264264
@test timev_macro_scope() == 1
265265

266+
before = Base.cumulative_compile_time_ns_before();
267+
268+
# exercise concurrent calls to `@time` for reentrant compilation time measurement.
269+
t1 = @async @time begin
270+
sleep(2)
271+
@eval module M ; f(x,y) = x+y ; end
272+
@eval M.f(2,3)
273+
end
274+
t2 = @async begin
275+
sleep(1)
276+
@time 2 + 2
277+
end
278+
279+
after = Base.cumulative_compile_time_ns_after();
280+
@test after >= before;
281+
266282
# interactive utilities
267283

268284
struct ambigconvert; end # inject a problematic `convert` method to ensure it still works

0 commit comments

Comments
 (0)