From df32b326d6fef5c6d05d8dff7544a5198f5b1282 Mon Sep 17 00:00:00 2001 From: Nathan Daly <nhdaly@gmail.com> Date: Thu, 26 Aug 2021 17:37:21 -0400 Subject: [PATCH 1/4] Allow for permanent tracking of compilation times In situations where we run long-running server that spends non-trivial amount of time in the compilation we are happy to pay the cost to have this instrumentation always on. This PR allows us to enable permanent tracking (effectively switching the before/after switches moot) by calling `jl_track_compile_time_permanently()` method, and then providing `jl_cumulative_compile_time_ns_total()` to return the total time. Co-Authored-By: janrous-rai <jan.rous@relational.ai> --- src/jitlayers.cpp | 27 +++++++++++++++++++++++---- src/julia_internal.h | 7 ++++--- src/task.c | 12 ++++++++---- src/threading.c | 1 + 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 60b1903aaa802..8444a155c75ac 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -74,20 +74,39 @@ void jl_jit_globals(std::map<void *, GlobalVariable*> &globals) } extern "C" JL_DLLEXPORT -uint64_t jl_cumulative_compile_time_ns_before() +void jl_track_compile_time_permanently() { - // Increment the flag to allow reentrant callers to `@time`. + jl_always_measure_compile_time = 1; jl_atomic_fetch_add(&jl_measure_compile_time_enabled, 1); +} + +extern "C" JL_DLLEXPORT +uint64_t jl_cumulative_compile_time_ns_before() +{ + if (!jl_always_measure_compile_time) { + // Increment the flag to allow reentrant callers to `@time`. + jl_atomic_fetch_add(&jl_measure_compile_time_enabled, 1); + } return jl_atomic_load_relaxed(&jl_cumulative_compile_time); } + extern "C" JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns_after() { - // Decrement the flag when done measuring, allowing other callers to continue measuring. - jl_atomic_fetch_add(&jl_measure_compile_time_enabled, -1); + if (!jl_always_measure_compile_time) { + // Decrement the flag when done measuring, allowing other callers to continue measuring. + jl_atomic_fetch_add(&jl_measure_compile_time_enabled, -1); + } return jl_atomic_load_relaxed(&jl_cumulative_compile_time); } +extern "C" JL_DLLEXPORT +uint64_t jl_cumulative_compile_time_ns_total() +{ + return jl_atomic_load_relaxed(&jl_cumulative_compile_time); +} + + // this generates llvm code for the lambda info // and adds the result to the jitlayers // (and the shadow module), diff --git a/src/julia_internal.h b/src/julia_internal.h index a7943e11a8067..760e74047733e 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -154,9 +154,10 @@ static inline uint64_t cycleclock(void) #include "timing.h" -// Global *atomic* integers controlling *process-wide* measurement of compilation time. -extern uint8_t jl_measure_compile_time_enabled; -extern uint64_t jl_cumulative_compile_time; +// Global variables controlling *process-wide* measurement of compilation time. +extern uint8_t jl_always_measure_compile_time; +extern uint8_t jl_measure_compile_time_enabled; // *atomic* +extern uint64_t jl_cumulative_compile_time; // *atomic* #ifdef _COMPILER_MICROSOFT_ # define jl_return_address() ((uintptr_t)_ReturnAddress()) diff --git a/src/task.c b/src/task.c index 88d4eac0863c9..ba39ce69b4b13 100644 --- a/src/task.c +++ b/src/task.c @@ -562,10 +562,14 @@ static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_M ptls->io_wait = 0; // @time needs its compile timer disabled on error, // and cannot use a try-finally as it would break scope for assignments - // We blindly disable compilation time tracking here, for all running Tasks, even though - // it may cause some incorrect measurements. This is a known bug, and is being tracked - // here: https://github.com/JuliaLang/julia/pull/39138 - jl_atomic_store_relaxed(&jl_measure_compile_time_enabled, 0); + // Though if the user has requested to _always_ measure compile time, we don't need to + // enable or disable it. + if (!jl_always_measure_compile_time) { + // We blindly disable compilation time tracking here, for all running Tasks, even + // though it may cause some incorrect measurements. This is a known bug, and is + // being tracked here: https://github.com/JuliaLang/julia/pull/39138 + jl_atomic_store_relaxed(&jl_measure_compile_time_enabled, 0); + } JL_GC_PUSH1(&exception); jl_gc_unsafe_enter(ptls); if (exception) { diff --git a/src/threading.c b/src/threading.c index ffe53c07b45ee..dddde238682c3 100644 --- a/src/threading.c +++ b/src/threading.c @@ -287,6 +287,7 @@ void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k) #endif jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED; +uint8_t jl_always_measure_compile_time = 0; uint8_t jl_measure_compile_time_enabled = 0; uint64_t jl_cumulative_compile_time = 0; From e4c5b95e4de7f18f46d39be341f1896c53222049 Mon Sep 17 00:00:00 2001 From: Nathan Daly <nhdaly@gmail.com> Date: Thu, 26 Aug 2021 17:46:30 -0400 Subject: [PATCH 2/4] Add julia bindings & tests for cumulative_compile_time_ns_total() and track_compile_time_permanently() --- base/timing.jl | 3 +++ test/misc.jl | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/base/timing.jl b/base/timing.jl index 45a27e3378977..22daf14cfc7fc 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -59,6 +59,9 @@ end cumulative_compile_time_ns_before() = ccall(:jl_cumulative_compile_time_ns_before, UInt64, ()) cumulative_compile_time_ns_after() = ccall(:jl_cumulative_compile_time_ns_after, UInt64, ()) +track_compile_time_permanently() = ccall(:jl_track_compile_time_permanently, UInt64, ()) +cumulative_compile_time_ns_total() = ccall(:jl_cumulative_compile_time_ns_total, UInt64, ()) + # total time spend in garbage collection, in nanoseconds gc_time_ns() = ccall(:jl_gc_total_hrtime, UInt64, ()) diff --git a/test/misc.jl b/test/misc.jl index 94c35c43ffaec..b8ba13001c0a2 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -264,6 +264,7 @@ end @test timev_macro_scope() == 1 before = Base.cumulative_compile_time_ns_before(); +@test Base.cumulative_compile_time_ns_total() >= before; # exercise concurrent calls to `@time` for reentrant compilation time measurement. t1 = @async @time begin @@ -279,6 +280,28 @@ end after = Base.cumulative_compile_time_ns_after(); @test after >= before; +@test Base.cumulative_compile_time_ns_total() >= after; + +@testset "cumulative_compile_time_ns_total()" begin + # Global compilation tracking is disabled, so total time doesn't increment + ct1 = Base.cumulative_compile_time_ns_total() + @eval module M ; f(x,y) = x+y ; end + @eval M.f(2,3) + ct2 = Base.cumulative_compile_time_ns_total() + @test ct2 == ct1 + + # !!! Note that this cannot be undone !!! + Base.track_compile_time_permanently() + + # With global compilation tracking *enabled*, total time does increment + ct1 = Base.cumulative_compile_time_ns_total() + @eval module M ; f(x,y) = x+y ; end + @eval M.f(2,3) + ct2 = Base.cumulative_compile_time_ns_total() + @test ct2 > ct1 +end + + # interactive utilities struct ambigconvert; end # inject a problematic `convert` method to ensure it still works From 42c6b6ec7b4527cbb3414a4ff4d742fb146865bb Mon Sep 17 00:00:00 2001 From: Nathan Daly <nhdaly@gmail.com> Date: Thu, 26 Aug 2021 17:42:12 -0400 Subject: [PATCH 3/4] Add support for measuring compile time per thread Restores the ability to also track per-thread compilation time in addition to the global time we created in #41733. Co-Authored-By: janrous-rai <jan.rous@relational.ai> --- src/jitlayers.cpp | 6 ++++++ src/julia_internal.h | 1 + src/threading.c | 2 ++ 3 files changed, 9 insertions(+) diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 8444a155c75ac..5e28ef204c7e1 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -106,6 +106,12 @@ uint64_t jl_cumulative_compile_time_ns_total() return jl_atomic_load_relaxed(&jl_cumulative_compile_time); } +extern "C" JL_DLLEXPORT +uint64_t jl_cumulative_compile_time_ns_for_current_thread() +{ + return jl_cumulative_compile_time_per_thread[jl_threadid()]; +} + // this generates llvm code for the lambda info // and adds the result to the jitlayers diff --git a/src/julia_internal.h b/src/julia_internal.h index 760e74047733e..99c25824c8885 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -158,6 +158,7 @@ static inline uint64_t cycleclock(void) extern uint8_t jl_always_measure_compile_time; extern uint8_t jl_measure_compile_time_enabled; // *atomic* extern uint64_t jl_cumulative_compile_time; // *atomic* +extern uint64_t *jl_cumulative_compile_time_per_thread; #ifdef _COMPILER_MICROSOFT_ # define jl_return_address() ((uintptr_t)_ReturnAddress()) diff --git a/src/threading.c b/src/threading.c index dddde238682c3..0f663e14a0386 100644 --- a/src/threading.c +++ b/src/threading.c @@ -290,6 +290,7 @@ jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED; uint8_t jl_always_measure_compile_time = 0; uint8_t jl_measure_compile_time_enabled = 0; uint64_t jl_cumulative_compile_time = 0; +uint64_t *jl_cumulative_compile_time_per_thread = NULL; // return calling thread's ID // Also update the suspended_threads list in signals-mach when changing the @@ -468,6 +469,7 @@ void jl_init_threading(void) } if (jl_n_threads <= 0) jl_n_threads = 1; + jl_cumulative_compile_time_per_thread = (uint64_t*)calloc(jl_n_threads, sizeof(*jl_cumulative_compile_time_per_thread)); #ifndef __clang_analyzer__ jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*)); #endif From d3e1850d879834edff1d3c19d5177492ba01a22e Mon Sep 17 00:00:00 2001 From: Nathan Daly <nhdaly@gmail.com> Date: Thu, 26 Aug 2021 17:59:33 -0400 Subject: [PATCH 4/4] Add docstrings --- base/timing.jl | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/base/timing.jl b/base/timing.jl index 22daf14cfc7fc..2132657c864c7 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -59,7 +59,32 @@ end cumulative_compile_time_ns_before() = ccall(:jl_cumulative_compile_time_ns_before, UInt64, ()) cumulative_compile_time_ns_after() = ccall(:jl_cumulative_compile_time_ns_after, UInt64, ()) +""" + Base.track_compile_time_permanently() + +Permanently enable tracking of time spent in compilation by Julia. + +Julia has the ability to measure the amount of time spent during compilation (including +type-inference, optimization, llvm optimizaiton, codegen, etc). However, on some systems +(FreeBSD-based systems are the known problems), this measurment can be too expensive, so +it is not enabled by default. + +Calling this function will globally enable tracking the cumulative compilation time. + +You can fetch the current total cumulative time spent in compilation by calling: +- [`Base.cumulative_compile_time_ns_total()`](@ref) +""" track_compile_time_permanently() = ccall(:jl_track_compile_time_permanently, UInt64, ()) +""" + Base.cumulative_compile_time_ns_total() + +The current total cumulative time Julia has spent in compilation, in nanoseconds. + +To enable this global measurement, you must call +[`Base.track_compile_time_permanently()`](@ref) after starting this julia process. +Otherwise, this function will only return the total time spent in compilation during calls +to [`Base.@time`](@ref). +""" cumulative_compile_time_ns_total() = ccall(:jl_cumulative_compile_time_ns_total, UInt64, ()) # total time spend in garbage collection, in nanoseconds