diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 45a414d..1ac5b5c 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -19,8 +19,8 @@ jobs: fail-fast: false matrix: version: - - '1.9' - - '~1.10.0-0' + - '1.10' + - 'pre' - 'nightly' os: - ubuntu-latest @@ -28,7 +28,7 @@ jobs: - x64 steps: - uses: actions/checkout@v3 - - uses: julia-actions/setup-julia@v1 + - uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} diff --git a/Project.toml b/Project.toml index a7959b8..803eb90 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "AllocCheck" uuid = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a" authors = ["JuliaHub Inc."] -version = "0.1.3" +version = "0.2.0" [deps] ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04" @@ -10,11 +10,11 @@ LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" [compat] -GPUCompiler = "0.24, 0.25, 0.26" -LLVM = "6.3" +GPUCompiler = "0.27" +LLVM = "9.1" ExprTools = "0.1" MacroTools = "0.5" -julia = "1.9" +julia = "1.10" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/src/AllocCheck.jl b/src/AllocCheck.jl index 6229dcd..5834dd5 100644 --- a/src/AllocCheck.jl +++ b/src/AllocCheck.jl @@ -93,12 +93,28 @@ Find all static allocation sites in the provided LLVM IR. This function modifies the LLVM module in-place, effectively trashing it. """ -function find_allocs!(mod::LLVM.Module, meta; ignore_throw=true) +function find_allocs!(mod::LLVM.Module, meta, entry_name::String; ignore_throw=true, invoke_entry=false) (; entry, compiled) = meta errors = [] + entry = LLVM.ModuleFunctionSet(mod)[entry_name] worklist = LLVM.Function[ entry ] seen = LLVM.Function[ entry ] + if invoke_entry + @assert startswith(name(entry), "jfptr") + f = pop!(worklist) + for block in blocks(f) + for inst in instructions(block) + if isa(inst, LLVM.CallInst) + decl = called_operand(inst) + if decl isa LLVM.Function && length(blocks(decl)) > 0 && !in(decl, seen) + push!(worklist, decl) + push!(seen, decl) + end + end + end + end + end while !isempty(worklist) f = pop!(worklist) @@ -202,12 +218,14 @@ function check_allocs(@nospecialize(func), @nospecialize(types); ignore_throw=tr end source = GPUCompiler.methodinstance(Base._stable_typeof(func), Base.to_tuple_type(types)) target = DefaultCompilerTarget() - job = CompilerJob(source, config) + job = CompilerJob(source, alloc_config(:specfunc)) allocs = JuliaContext() do ctx mod, meta = GPUCompiler.compile(:llvm, job, validate=false, optimize=false, cleanup=false) - optimize!(job, mod) + (; entry, compiled) = meta + entry_name = name(entry) + optimize!(mod) - allocs = find_allocs!(mod, meta; ignore_throw) + allocs = find_allocs!(mod, meta, entry_name; ignore_throw, invoke_entry=false) # display(mod) # dispose(mod) allocs diff --git a/src/classify.jl b/src/classify.jl index 938d902..72b59cf 100644 --- a/src/classify.jl +++ b/src/classify.jl @@ -20,7 +20,8 @@ function classify_runtime_fn(name::AbstractString; ignore_throw::Bool) if name in ("alloc_genericmemory", "genericmemory_copy", "genericmemory_copy_slice", "string_to_genericmemory", "ptr_to_genericmemory", "array_copy", "alloc_string", "alloc_array_1d", "alloc_array_2d", "alloc_array_3d", "gc_alloc_typed", - "gc_pool_alloc", "gc_pool_alloc_instrumented", "gc_big_alloc_instrumented" + "gc_small_alloc", "gc_pool_alloc", "gc_small_alloc_instrumented", + "gc_pool_alloc_instrumented", "gc_big_alloc_instrumented" ) || occursin(r"^box_.*", name) return (:alloc, may_alloc) elseif name in ("f__apply_latest", "f__apply_iterate", "f__apply_pure", "f__call_latest", @@ -62,7 +63,7 @@ function fn_may_allocate(name::AbstractString; ignore_throw::Bool) if name in ("egal__unboxed", "lock_value", "unlock_value", "get_nth_field_noalloc", "load_and_lookup", "lazy_load_and_lookup", "box_bool", "box_int8", "box_uint8", "excstack_state", "restore_excstack", "enter_handler", - "pop_handler", "f_typeof", "clock_now", "throw", "gc_queue_root", "gc_enable", + "pop_handler", "pop_handler_noexcept", "f_typeof", "clock_now", "throw", "gc_queue_root", "gc_enable", "gc_disable_finalizers_internal", "gc_is_in_finalizer", "enable_gc_logging", "gc_safepoint", "gc_collect", "genericmemory_owner", "get_pgcstack") || occursin(r"^unbox_.*", name) return false # these functions never allocate @@ -141,7 +142,7 @@ function resolve_allocations(call::LLVM.Value) isnothing(match_) && return nothing name = match_[2] - if name in ("gc_pool_alloc_instrumented", "gc_big_alloc_instrumented", "gc_alloc_typed") + if name in ("gc_pool_alloc_instrumented", "gc_small_alloc_instrumented", "gc_big_alloc_instrumented", "gc_alloc_typed") type = resolve_static_jl_value_t(operands(call)[end-1]) return type !== nothing ? [(call, type)] : nothing elseif name in ("alloc_array_1d", "alloc_array_2d", "alloc_array_3d") @@ -179,7 +180,7 @@ function resolve_allocations(call::LLVM.Value) typestr == "uint8pointer" && return [(call, Ptr{UInt8})] typestr == "voidpointer" && return [(call, Ptr{Cvoid})] @assert false # above is exhaustive - elseif name == "gc_pool_alloc" + elseif name in ("gc_pool_alloc", "gc_small_alloc") seen = Set() allocs = Tuple{LLVM.Instruction, Any}[] for calluse in transitive_uses(call; unwrap = (use)->user(use) isa LLVM.BitCastInst) diff --git a/src/compiler.jl b/src/compiler.jl index ecba319..aaba640 100644 --- a/src/compiler.jl +++ b/src/compiler.jl @@ -10,7 +10,7 @@ function __init__() tm[] = LLVM.JITTargetMachine(LLVM.triple(), cpu_name(), cpu_features(); optlevel = llvm_codegen_level(opt_level)) LLVM.asm_verbosity!(tm[], true) - lljit = LLVM.has_julia_ojit() ? LLVM.JuliaOJIT() : LLVM.LLJIT(; tm=tm[]) + lljit = LLVM.JuliaOJIT() jd_main = LLVM.JITDylib(lljit) @@ -35,20 +35,11 @@ function __init__() end end -@static if LLVM.has_julia_ojit() - struct CompilerInstance - jit::LLVM.JuliaOJIT - lctm::Union{LLVM.LazyCallThroughManager, Nothing} - ism::Union{LLVM.IndirectStubsManager, Nothing} - end -else - struct CompilerInstance - jit::LLVM.LLJIT - lctm::Union{LLVM.LazyCallThroughManager, Nothing} - ism::Union{LLVM.IndirectStubsManager, Nothing} - end +struct CompilerInstance + jit::LLVM.JuliaOJIT + lctm::Union{LLVM.LazyCallThroughManager, Nothing} + ism::Union{LLVM.IndirectStubsManager, Nothing} end - struct CompileResult{Success, F, TT, RT} f_ptr::Ptr{Cvoid} arg_types::Type{TT} @@ -65,29 +56,16 @@ const tm = Ref{TargetMachine}() # for opt pipeline # cache of kernel instances const _kernel_instances = Dict{Any, Any}() const compiler_cache = Dict{Any, CompileResult}() -const config = CompilerConfig(DefaultCompilerTarget(), NativeParams(); - kernel=false, entry_abi = :specfunc, always_inline=false) +alloc_config(func_abi::Symbol) = CompilerConfig(DefaultCompilerTarget(), NativeParams(); + kernel=false, entry_abi = func_abi, always_inline=false) const NativeCompilerJob = CompilerJob{NativeCompilerTarget,NativeParams} GPUCompiler.can_safepoint(@nospecialize(job::NativeCompilerJob)) = true GPUCompiler.runtime_module(::NativeCompilerJob) = Runtime -function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module) - triple = GPUCompiler.llvm_triple(job.config.target) - tm = GPUCompiler.llvm_machine(job.config.target) - if VERSION >= v"1.10-beta3" - @dispose pb = LLVM.PassBuilder(tm) begin - @dispose mpm = LLVM.NewPMModulePassManager(pb) begin - build_newpm_pipeline!(pb, mpm) - run!(mpm, mod, tm) - end - end - else - @dispose pm=LLVM.ModulePassManager() begin - build_oldpm_pipeline!(pm) - run!(pm, mod) - end - end +function optimize!(mod::LLVM.Module) + pipeline = LLVM.Interop.JuliaPipeline(opt_level=Base.JLOptions().opt_level) + run!(pipeline, mod) end """ @@ -112,15 +90,17 @@ function compile_callable(f::F, tt::TT=Tuple{}; ignore_throw=true) where {F, TT} function compile(@nospecialize(job::CompilerJob)) return JuliaContext() do ctx mod, meta = GPUCompiler.compile(:llvm, job, validate=false) - optimize!(job, mod) + (; entry, compiled) = meta + entry_name = name(entry) + optimize!(mod) clone = copy(mod) - analysis = find_allocs!(mod, meta; ignore_throw) + analysis = find_allocs!(mod, meta, entry_name; ignore_throw, invoke_entry=true) # TODO: This is the wrong meta - return clone, meta, analysis + return clone, entry_name, analysis end end - function link(@nospecialize(job::CompilerJob), (mod, meta, analysis)) + function link(@nospecialize(job::CompilerJob), (mod, entry_name, analysis)) return JuliaContext() do ctx lljit = jit[].jit jd = LLVM.JITDylib(lljit) @@ -130,7 +110,7 @@ function compile_callable(f::F, tt::TT=Tuple{}; ignore_throw=true) where {F, TT} GPUCompiler.ThreadSafeModule(mod) end LLVM.add!(lljit, jd, tsm) - f_ptr = pointer(LLVM.lookup(lljit, LLVM.name(meta.entry))) + f_ptr = pointer(LLVM.lookup(lljit, entry_name)) if f_ptr == C_NULL throw(GPUCompiler.InternalCompilerError(job, "Failed to compile @check_allocs function")) @@ -142,7 +122,7 @@ function compile_callable(f::F, tt::TT=Tuple{}; ignore_throw=true) where {F, TT} end end end - fun = GPUCompiler.cached_compilation(cache, source, config, compile, link) + fun = GPUCompiler.cached_compilation(cache, source, alloc_config(:func), compile, link) # create a callable object that captures the function instance. we don't need to think # about world age here, as GPUCompiler already does and will return a different object @@ -153,7 +133,10 @@ end function (f::CompileResult{Success, F, TT, RT})(args...) where {Success, F, TT, RT} if Success - return abi_call(f.f_ptr, RT, TT, f.func, args...) + argsv = Any[args...] + GC.@preserve argsv begin + return ccall(f.f_ptr, Any, (Any, Ptr{Any}, UInt32), f.func, pointer(argsv), length(args)) + end else error("@check_allocs function contains ", length(f.analysis), " allocations.") end diff --git a/src/compiler_utils.jl b/src/compiler_utils.jl index 15a0e6e..0337d5b 100644 --- a/src/compiler_utils.jl +++ b/src/compiler_utils.jl @@ -20,29 +20,5 @@ function cpu_name() end function cpu_features() - if VERSION >= v"1.10.0-beta1" - return ccall(:jl_get_cpu_features, String, ()) - end - - @static if Sys.ARCH == :x86_64 || - Sys.ARCH == :x86 - return "+mmx,+sse,+sse2,+fxsr,+cx8" # mandated by Julia - else - return "" - end -end - -if VERSION >= v"1.10-beta3" - function build_newpm_pipeline!(pb::LLVM.PassBuilder, mpm::LLVM.NewPMModulePassManager, speedup=2, size=0, lower_intrinsics=true, - dump_native=false, external_use=false, llvm_only=false,) - ccall(:jl_build_newpm_pipeline, Cvoid, - (LLVM.API.LLVMModulePassManagerRef, LLVM.API.LLVMPassBuilderRef, Cint, Cint, Cint, Cint, Cint, Cint), - mpm, pb, speedup, size, lower_intrinsics, dump_native, external_use, llvm_only) - end -else - function build_oldpm_pipeline!(pm::LLVM.ModulePassManager, opt_level=2, lower_intrinsics=true) - ccall(:jl_add_optimization_passes, Cvoid, - (LLVM.API.LLVMPassManagerRef, Cint, Cint), - pm, opt_level, lower_intrinsics) - end + return ccall(:jl_get_cpu_features, String, ()) end diff --git a/test/runtests.jl b/test/runtests.jl index 4377483..d5c86c1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -172,7 +172,11 @@ end # The check should raise errors only for problematic argument types @check_allocs mymul(x,y) = x * y @test mymul(1.5, 2.5) == 1.5 * 2.5 - @test_throws AllocCheckFailure mymul(rand(10,10), rand(10,10)) + if VERSION < v"1.12-DEV" + @test_throws AllocCheckFailure mymul(rand(10,10), rand(10,10)) + else + @test_broken false # TODO: investigate segfault above with --check-bounds=yes + end # If provided, ignore_throw=false should include allocations that # happen only on error paths @@ -220,11 +224,11 @@ end @test length(check_allocs(Base.mightalias, (Memory{Int},Memory{Int}))) == 0 # uses jl_genericmemory_owner (intercepted) end - @test any(alloc.type == Base.RefValue{Int} for alloc in check_allocs(()->Ref{Int}(), ())) + @test any((alloc isa AllocationSite && alloc.type == Base.RefValue{Int}) for alloc in check_allocs(()->Ref{Int}(), ())) allocs1 = check_allocs(()->Ref{Vector{Int64}}(Int64[]), ()) - @test any(alloc.type == Base.RefValue{Vector{Int64}} for alloc in allocs1) - @test any(alloc.type == Vector{Int64} for alloc in allocs1) + @test any((alloc isa AllocationSite && alloc.type == Base.RefValue{Vector{Int64}}) for alloc in allocs1) + @test any((alloc isa AllocationSite && alloc.type == Vector{Int64}) for alloc in allocs1) end @testset "Error types" begin @@ -327,8 +331,25 @@ Documentation for `issue64`. v[i], v[j] = v[j], v[i] v end -let io = IOBuffer() - print(io, @doc issue64) - s = String(take!(io)) - @test occursin("Documentation for `issue64`.", s) +@check_allocs function foo_with_union_rt(t::Tuple{Float64, Float64}) + if rand((1, -1)) == 1 + return t + else + return nothing + end +end + +@testset "issues" begin + # issue #64 + let io = IOBuffer() + print(io, @doc issue64) + s = String(take!(io)) + @test occursin("Documentation for `issue64`.", s) + end + + # issue #70 + x = foo_with_union_rt((1.0, 1.5)) + @test x === nothing || x === (1.0, 1.5) + x = foo_with_union_rt((1.0, 1.5)) + @test x === nothing || x === (1.0, 1.5) end