diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 45a414d..d456c57 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -19,16 +19,16 @@ jobs: fail-fast: false matrix: version: + - 'pre' - '1.9' - - '~1.10.0-0' - - 'nightly' + - '1.10' os: - ubuntu-latest arch: - x64 steps: - uses: actions/checkout@v3 - - uses: julia-actions/setup-julia@v1 + - uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} diff --git a/src/AllocCheck.jl b/src/AllocCheck.jl index 6229dcd..4bf296c 100644 --- a/src/AllocCheck.jl +++ b/src/AllocCheck.jl @@ -204,8 +204,8 @@ function check_allocs(@nospecialize(func), @nospecialize(types); ignore_throw=tr target = DefaultCompilerTarget() job = CompilerJob(source, config) allocs = JuliaContext() do ctx - mod, meta = GPUCompiler.compile(:llvm, job, validate=false, optimize=false, cleanup=false) - optimize!(job, mod) + mod, meta = GPUCompiler.compile(:llvm, job, validate=false, optimize=true, cleanup=false) + # GPUCompiler.optimize!(job, mod) # TODO: Why is this broken? allocs = find_allocs!(mod, meta; ignore_throw) # display(mod) diff --git a/src/classify.jl b/src/classify.jl index 938d902..b0cc6c8 100644 --- a/src/classify.jl +++ b/src/classify.jl @@ -62,7 +62,7 @@ function fn_may_allocate(name::AbstractString; ignore_throw::Bool) if name in ("egal__unboxed", "lock_value", "unlock_value", "get_nth_field_noalloc", "load_and_lookup", "lazy_load_and_lookup", "box_bool", "box_int8", "box_uint8", "excstack_state", "restore_excstack", "enter_handler", - "pop_handler", "f_typeof", "clock_now", "throw", "gc_queue_root", "gc_enable", + "pop_handler", "pop_handler_noexcept", "f_typeof", "clock_now", "throw", "gc_queue_root", "gc_enable", "gc_disable_finalizers_internal", "gc_is_in_finalizer", "enable_gc_logging", "gc_safepoint", "gc_collect", "genericmemory_owner", "get_pgcstack") || occursin(r"^unbox_.*", name) return false # these functions never allocate diff --git a/src/compiler.jl b/src/compiler.jl index ecba319..a3f2f96 100644 --- a/src/compiler.jl +++ b/src/compiler.jl @@ -111,8 +111,8 @@ function compile_callable(f::F, tt::TT=Tuple{}; ignore_throw=true) where {F, TT} function compile(@nospecialize(job::CompilerJob)) return JuliaContext() do ctx - mod, meta = GPUCompiler.compile(:llvm, job, validate=false) - optimize!(job, mod) + mod, meta = GPUCompiler.compile(:llvm, job, validate=false, optimize=true) + # optimize!(job, mod) # TODO clone = copy(mod) analysis = find_allocs!(mod, meta; ignore_throw) diff --git a/src/compiler_utils.jl b/src/compiler_utils.jl index 15a0e6e..5c5d6dd 100644 --- a/src/compiler_utils.jl +++ b/src/compiler_utils.jl @@ -32,7 +32,64 @@ function cpu_features() end end -if VERSION >= v"1.10-beta3" +mutable struct PipelineConfig + Speedup::Cint + Size::Cint + lower_intrinsics::Cint + dump_native::Cint + external_use::Cint + llvm_only::Cint + always_inline::Cint + enable_early_simplifications::Cint + enable_early_optimizations::Cint + enable_scalar_optimizations::Cint + enable_loop_optimizations::Cint + enable_vector_pipeline::Cint + remove_ni::Cint + cleanup::Cint +end + +if VERSION >= v"1.11-alpha1" + function build_newpm_pipeline!( + pb::LLVM.PassBuilder, + mpm::LLVM.NewPMModulePassManager, + speedup=2, + size=0, + lower_intrinsics=true, + dump_native=false, + external_use=false, + llvm_only=false, + always_inline=true, + enable_early_simplifications=true, + enable_early_optimizations=true, + enable_scalar_optimizations=true, + enable_loop_optimizations=true, + enable_vector_pipeline=true, + remove_ni=true, + cleanup=false # note: modified vs. base + ) + cfg = PipelineConfig( + speedup, + size, + lower_intrinsics, + dump_native, + external_use, + llvm_only, + always_inline, + enable_early_simplifications, + enable_early_optimizations, + enable_scalar_optimizations, + enable_loop_optimizations, + enable_vector_pipeline, + remove_ni, + cleanup, + ) + ccall(:jl_build_newpm_pipeline, Cvoid, + (LLVM.API.LLVMModulePassManagerRef, LLVM.API.LLVMPassBuilderRef, Ref{PipelineConfig}), + mpm, pb, cfg + ) + end +elseif VERSION >= v"1.10-beta3" function build_newpm_pipeline!(pb::LLVM.PassBuilder, mpm::LLVM.NewPMModulePassManager, speedup=2, size=0, lower_intrinsics=true, dump_native=false, external_use=false, llvm_only=false,) ccall(:jl_build_newpm_pipeline, Cvoid,