Skip to content

Commit af09340

Browse files
committed
optimizer: allow EA-powered finalizer inlining
E.g. this allows `finalizer` inlining in the following case: ```julia mutable struct ForeignBuffer{T} const ptr::Ptr{T} end const foreign_buffer_finalized = Ref(false) function foreign_alloc(::Type{T}, length) where T ptr = Libc.malloc(sizeof(T) * length) ptr = Base.unsafe_convert(Ptr{T}, ptr) obj = ForeignBuffer{T}(ptr) return finalizer(obj) do obj Base.@assume_effects :notaskstate :nothrow foreign_buffer_finalized[] = true Libc.free(obj.ptr) end end function f_EA_finalizer(N::Int) workspace = foreign_alloc(Float64, N) GC.@preserve workspace begin (;ptr) = workspace Base.@assume_effects :nothrow @noinline println(devnull, "ptr = ", ptr) end end ``` ```julia julia> @code_typed f_EA_finalizer(42) CodeInfo( 1 ── %1 = Base.mul_int(8, N)::Int64 │ %2 = Core.lshr_int(%1, 63)::Int64 │ %3 = Core.trunc_int(Core.UInt8, %2)::UInt8 │ %4 = Core.eq_int(%3, 0x01)::Bool └─── goto #3 if not %4 2 ── invoke Core.throw_inexacterror(:convert::Symbol, UInt64::Type, %1::Int64)::Union{} └─── unreachable 3 ── goto #4 4 ── %9 = Core.bitcast(Core.UInt64, %1)::UInt64 └─── goto #5 5 ── goto #6 6 ── goto #7 7 ── goto #8 8 ── %14 = $(Expr(:foreigncall, :(:malloc), Ptr{Nothing}, svec(UInt64), 0, :(:ccall), :(%9), :(%9)))::Ptr{Nothing} └─── goto #9 9 ── %16 = Base.bitcast(Ptr{Float64}, %14)::Ptr{Float64} │ %17 = %new(ForeignBuffer{Float64}, %16)::ForeignBuffer{Float64} └─── goto #10 10 ─ %19 = $(Expr(:gc_preserve_begin, :(%17))) │ %20 = Base.getfield(%17, :ptr)::Ptr{Float64} │ invoke Main.println(Main.devnull::Base.DevNull, "ptr = "::String, %20::Ptr{Float64})::Nothing │ $(Expr(:gc_preserve_end, :(%19))) │ %23 = Main.foreign_buffer_finalized::Base.RefValue{Bool} │ Base.setfield!(%23, :x, true)::Bool │ %25 = Base.getfield(%17, :ptr)::Ptr{Float64} │ %26 = Base.bitcast(Ptr{Nothing}, %25)::Ptr{Nothing} │ $(Expr(:foreigncall, :(:free), Nothing, svec(Ptr{Nothing}), 0, :(:ccall), :(%26), :(%25)))::Nothing └─── return nothing ) => Nothing ``` However, this is still a WIP. Before merging, I want to improve EA's precision a bit and at least fix the test case that is currently marked as `broken`. I also need to check its impact on compiler performance. Additionally, I believe this feature is not yet practical. In particular, there is still significant room for improvement in the following areas: - EA's interprocedural capabilities: currently EA is performed ad-hoc for limited frames because of latency reasons, which significantly reduces its precision in the presence of interprocedural calls. - Relaxing the `:nothrow` check for finalizer inlining: the current algorithm requires `:nothrow`-ness on all paths from the allocation of the mutable struct to its last use, which is not practical for real-world cases. Even when `:nothrow` cannot be guaranteed, auxiliary optimizations such as inserting a `finalize` call after the last use might still be possible.
1 parent dd31084 commit af09340

File tree

8 files changed

+134
-43
lines changed

8 files changed

+134
-43
lines changed

base/compiler/optimize.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,7 @@ function refine_effects!(interp::AbstractInterpreter, sv::PostOptAnalysisState)
648648
if !is_effect_free(sv.result.ipo_effects) && sv.all_effect_free && !isempty(sv.ea_analysis_pending)
649649
ir = sv.ir
650650
nargs = let def = sv.result.linfo.def; isa(def, Method) ? Int(def.nargs) : 0; end
651-
estate = EscapeAnalysis.analyze_escapes(ir, nargs, optimizer_lattice(interp), GetNativeEscapeCache(interp))
651+
estate = EscapeAnalysis.analyze_escapes(ir, nargs, optimizer_lattice(interp), get_escape_cache(interp))
652652
argescapes = EscapeAnalysis.ArgEscapeCache(estate)
653653
stack_analysis_result!(sv.result, argescapes)
654654
validate_mutable_arg_escapes!(estate, sv)

base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl

+38-18
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import ._TOP_MOD: ==, getindex, setindex!
1818
using Core: MethodMatch, SimpleVector, ifelse, sizeof
1919
using Core.IR
2020
using ._TOP_MOD: # Base definitions
21-
@__MODULE__, @assert, @eval, @goto, @inbounds, @inline, @label, @noinline,
21+
@__MODULE__, @assert, @eval, @goto, @inbounds, @inline, @label, @noinline, @show,
2222
@nospecialize, @specialize, BitSet, Callable, Csize_t, IdDict, IdSet, UnitRange, Vector,
2323
copy, delete!, empty!, enumerate, error, first, get, get!, haskey, in, isassigned,
2424
isempty, ismutabletype, keys, last, length, max, min, missing, pop!, push!, pushfirst!,
@@ -657,11 +657,13 @@ function analyze_escapes(ir::IRCode, nargs::Int, 𝕃ₒ::AbstractLattice, get_e
657657
# `escape_exception!` conservatively propagates `AllEscape` anyway,
658658
# and so escape information imposed on `:the_exception` isn't computed
659659
continue
660+
elseif head === :gc_preserve_begin
661+
# GC preserve is handled by `escape_gc_preserve!`
662+
elseif head === :gc_preserve_end
663+
escape_gc_preserve!(astate, pc, stmt.args)
660664
elseif head === :static_parameter || # this exists statically, not interested in its escape
661-
head === :copyast || # XXX can this account for some escapes?
662-
head === :isdefined || # just returns `Bool`, nothing accounts for any escapes
663-
head === :gc_preserve_begin || # `GC.@preserve` expressions themselves won't be used anywhere
664-
head === :gc_preserve_end # `GC.@preserve` expressions themselves won't be used anywhere
665+
head === :copyast || # XXX escape something?
666+
head === :isdefined # just returns `Bool`, nothing accounts for any escapes
665667
continue
666668
else
667669
add_conservative_changes!(astate, pc, stmt.args)
@@ -1062,17 +1064,24 @@ end
10621064
function escape_invoke!(astate::AnalysisState, pc::Int, args::Vector{Any})
10631065
mi = first(args)::MethodInstance
10641066
first_idx, last_idx = 2, length(args)
1067+
add_liveness_changes!(astate, pc, args, first_idx, last_idx)
10651068
# TODO inspect `astate.ir.stmts[pc][:info]` and use const-prop'ed `InferenceResult` if available
10661069
cache = astate.get_escape_cache(mi)
1070+
ret = SSAValue(pc)
10671071
if cache isa Bool
10681072
if cache
1069-
return nothing # guaranteed to have no escape
1073+
# This method call is very simple and has good effects, so there's no need to
1074+
# escape its arguments. However, since the arguments might be returned, we need
1075+
# to consider the possibility of aliasing between them and the return value.
1076+
for argidx = first_idx:last_idx
1077+
add_alias_change!(astate, ret, args[argidx])
1078+
end
1079+
return nothing
10701080
else
10711081
return add_conservative_changes!(astate, pc, args, 2)
10721082
end
10731083
end
10741084
cache = cache::ArgEscapeCache
1075-
ret = SSAValue(pc)
10761085
retinfo = astate.estate[ret] # escape information imposed on the call statement
10771086
method = mi.def::Method
10781087
nargs = Int(method.nargs)
@@ -1160,6 +1169,17 @@ function escape_foreigncall!(astate::AnalysisState, pc::Int, args::Vector{Any})
11601169
end
11611170
end
11621171

1172+
function escape_gc_preserve!(astate::AnalysisState, pc::Int, args::Vector{Any})
1173+
@assert length(args) == 1 "invalid :gc_preserve_end"
1174+
val = args[1]
1175+
@assert val isa SSAValue "invalid :gc_preserve_end"
1176+
beginstmt = astate.ir[val][:stmt]
1177+
@assert isexpr(beginstmt, :gc_preserve_begin) "invalid :gc_preserve_end"
1178+
beginargs = beginstmt.args
1179+
# COMBAK we might need to add liveness for all statements from `:gc_preserve_begin` to `:gc_preserve_end`
1180+
add_liveness_changes!(astate, pc, beginargs)
1181+
end
1182+
11631183
normalize(@nospecialize x) = isa(x, QuoteNode) ? x.value : x
11641184

11651185
function escape_call!(astate::AnalysisState, pc::Int, args::Vector{Any})
@@ -1185,20 +1205,12 @@ function escape_call!(astate::AnalysisState, pc::Int, args::Vector{Any})
11851205
if result === missing
11861206
# if this call hasn't been handled by any of pre-defined handlers, escape it conservatively
11871207
add_conservative_changes!(astate, pc, args)
1188-
return
11891208
elseif result === true
11901209
add_liveness_changes!(astate, pc, args, 2)
1191-
return # ThrownEscape is already checked
1210+
elseif is_nothrow(astate.ir, pc)
1211+
add_liveness_changes!(astate, pc, args, 2)
11921212
else
1193-
# we escape statements with the `ThrownEscape` property using the effect-freeness
1194-
# computed by `stmt_effect_flags` invoked within inlining
1195-
# TODO throwness ≠ "effect-free-ness"
1196-
if is_nothrow(astate.ir, pc)
1197-
add_liveness_changes!(astate, pc, args, 2)
1198-
else
1199-
add_fallback_changes!(astate, pc, args, 2)
1200-
end
1201-
return
1213+
add_fallback_changes!(astate, pc, args, 2)
12021214
end
12031215
end
12041216

@@ -1526,4 +1538,12 @@ function escape_array_copy!(astate::AnalysisState, pc::Int, args::Vector{Any})
15261538
add_liveness_changes!(astate, pc, args, 6)
15271539
end
15281540

1541+
function escape_builtin!(::typeof(Core.finalizer), astate::AnalysisState, pc::Int, args::Vector{Any})
1542+
if length(args) 3
1543+
obj = args[3]
1544+
add_liveness_change!(astate, obj, pc) # TODO setup a proper FinalizerEscape?
1545+
end
1546+
return false
1547+
end
1548+
15291549
end # baremodule EscapeAnalysis

base/compiler/ssair/passes.jl

+57-22
Original file line numberDiff line numberDiff line change
@@ -1300,7 +1300,13 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
13001300
# Inlining performs legality checks on the finalizer to determine
13011301
# whether or not we may inline it. If so, it appends extra arguments
13021302
# at the end of the intrinsic. Detect that here.
1303-
length(stmt.args) == 5 || continue
1303+
if length(stmt.args) == 4 && stmt.args[4] === nothing
1304+
# constant case
1305+
elseif length(stmt.args) == 5 && stmt.args[4] isa Bool && stmt.args[5] isa MethodInstance
1306+
# inlining case
1307+
else
1308+
continue
1309+
end
13041310
end
13051311
is_finalizer = true
13061312
elseif isexpr(stmt, :foreigncall)
@@ -1685,7 +1691,32 @@ end
16851691
function sroa_mutables!(ir::IRCode, defuses::IdDict{Int,Tuple{SPCSet,SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing,InliningState})
16861692
𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp)
16871693
lazypostdomtree = LazyPostDomtree(ir)
1694+
function find_finalizer_idx(defuse::SSADefUse)
1695+
finalizer_idx = nothing
1696+
for use in defuse.uses
1697+
if use.kind === :finalizer
1698+
# For now: Only allow one finalizer per allocation
1699+
finalizer_idx !== nothing && return false
1700+
finalizer_idx = use.idx
1701+
end
1702+
end
1703+
if finalizer_idx === nothing
1704+
return true
1705+
elseif inlining === nothing
1706+
return false
1707+
end
1708+
return finalizer_idx
1709+
end
16881710
for (defidx, (intermediaries, defuse)) in defuses
1711+
# Find the type for this allocation
1712+
defexpr = ir[SSAValue(defidx)][:stmt]
1713+
isexpr(defexpr, :new) || continue
1714+
typ = unwrap_unionall(ir.stmts[defidx][:type])
1715+
# Could still end up here if we tried to setfield! on an immutable, which would
1716+
# error at runtime, but is not illegal to have in the IR.
1717+
typ = widenconst(typ)
1718+
ismutabletype(typ) || continue
1719+
typ = typ::DataType
16891720
# Check if there are any uses we did not account for. If so, the variable
16901721
# escapes and we cannot eliminate the allocation. This works, because we're guaranteed
16911722
# not to include any intermediaries that have dead uses. As a result, missing uses will only ever
@@ -1696,29 +1727,33 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int,Tuple{SPCSet,SSADefUse}}
16961727
nuses += used_ssas[iidx]
16971728
end
16981729
nuses_total = used_ssas[defidx] + nuses - length(intermediaries)
1699-
nleaves == nuses_total || continue
1700-
# Find the type for this allocation
1701-
defexpr = ir[SSAValue(defidx)][:stmt]
1702-
isexpr(defexpr, :new) || continue
1703-
typ = unwrap_unionall(ir.stmts[defidx][:type])
1704-
# Could still end up here if we tried to setfield! on an immutable, which would
1705-
# error at runtime, but is not illegal to have in the IR.
1706-
typ = widenconst(typ)
1707-
ismutabletype(typ) || continue
1708-
typ = typ::DataType
1709-
# First check for any finalizer calls
1710-
finalizer_idx = nothing
1711-
for use in defuse.uses
1712-
if use.kind === :finalizer
1713-
# For now: Only allow one finalizer per allocation
1714-
finalizer_idx !== nothing && @goto skip
1715-
finalizer_idx = use.idx
1730+
if nleaves nuses_total
1731+
finalizer_idx = find_finalizer_idx(defuse)
1732+
if finalizer_idx isa Int
1733+
nargs = length(ir.argtypes) # TODO
1734+
estate = EscapeAnalysis.analyze_escapes(ir, nargs, 𝕃ₒ, get_escape_cache(inlining.interp))
1735+
einfo = estate[SSAValue(defidx)]
1736+
if EscapeAnalysis.has_no_escape(einfo)
1737+
already = BitSet(use.idx for use in defuse.uses)
1738+
for idx = einfo.Liveness
1739+
if idx already
1740+
push!(defuse.uses, SSAUse(:EALiveness, idx))
1741+
end
1742+
end
1743+
try_resolve_finalizer!(ir, defidx, finalizer_idx, defuse, inlining::InliningState,
1744+
lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info])
1745+
end
17161746
end
1717-
end
1718-
if finalizer_idx !== nothing && inlining !== nothing
1719-
try_resolve_finalizer!(ir, defidx, finalizer_idx, defuse, inlining,
1720-
lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info])
17211747
continue
1748+
else
1749+
finalizer_idx = find_finalizer_idx(defuse)
1750+
if finalizer_idx isa Int
1751+
try_resolve_finalizer!(ir, defidx, finalizer_idx, defuse, inlining::InliningState,
1752+
lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info])
1753+
continue
1754+
elseif !finalizer_idx
1755+
continue
1756+
end
17221757
end
17231758
# Partition defuses by field
17241759
fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)]

base/compiler/types.jl

+2
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,8 @@ typeinf_lattice(::AbstractInterpreter) = InferenceLattice(BaseInferenceLattice.i
452452
ipo_lattice(::AbstractInterpreter) = InferenceLattice(IPOResultLattice.instance)
453453
optimizer_lattice(::AbstractInterpreter) = SimpleInferenceLattice.instance
454454

455+
get_escape_cache(interp::AbstractInterpreter) = GetNativeEscapeCache(interp)
456+
455457
abstract type CallInfo end
456458

457459
@nospecialize

test/compiler/EscapeAnalysis/EAUtils.jl

+1
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ CC.OptimizationParams(interp::EscapeAnalyzer) = interp.opt_params
115115
CC.get_inference_world(interp::EscapeAnalyzer) = interp.world
116116
CC.get_inference_cache(interp::EscapeAnalyzer) = interp.inf_cache
117117
CC.cache_owner(::EscapeAnalyzer) = EAToken()
118+
CC.get_escape_cache(interp::EscapeAnalyzer) = GetEscapeCache(interp)
118119

119120
function CC.ipo_dataflow_analysis!(interp::EscapeAnalyzer, ir::IRCode, caller::InferenceResult)
120121
# run EA on all frames that have been optimized

test/compiler/codegen.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ if opt_level > 0
266266
@test occursin("ret $Iptr %\"x::$(Int)\"", load_dummy_ref_ir)
267267
end
268268

269-
# Issue 22770
269+
# Issue JuliaLang/julia#22770
270270
let was_gced = false
271271
@noinline make_tuple(x) = tuple(x)
272272
@noinline use(x) = ccall(:jl_breakpoint, Cvoid, ())

test/compiler/effects.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -1135,7 +1135,7 @@ end
11351135

11361136
# These need EA
11371137
@test Core.Compiler.is_effect_free(Base.infer_effects(set_ref_with_unused_arg_1, (Base.RefValue{Int},)))
1138-
@test Core.Compiler.is_effect_free(Base.infer_effects(set_ref_with_unused_arg_2, (Base.RefValue{Int},)))
1138+
@test_broken Core.Compiler.is_effect_free(Base.infer_effects(set_ref_with_unused_arg_2, (Base.RefValue{Int},)))
11391139
@test Core.Compiler.is_effect_free_if_inaccessiblememonly(Base.infer_effects(set_arg_ref!, (Base.RefValue{Int},)))
11401140
@test_broken Core.Compiler.is_effect_free(Base.infer_effects(set_arr_with_unused_arg_1, (Vector{Int},)))
11411141
@test_broken Core.Compiler.is_effect_free(Base.infer_effects(set_arr_with_unused_arg_2, (Vector{Int},)))

test/compiler/inline.jl

+33
Original file line numberDiff line numberDiff line change
@@ -2223,3 +2223,36 @@ let src = code_typed1(bar_split_error, Tuple{})
22232223
@test count(iscall((src, foo_split)), src.code) == 0
22242224
@test count(iscall((src, Core.throw_methoderror)), src.code) > 0
22252225
end
2226+
2227+
# finalizer inlining with EA
2228+
mutable struct ForeignBuffer{T}
2229+
const ptr::Ptr{T}
2230+
end
2231+
mutable struct ForeignBufferChecker
2232+
@atomic finalized::Bool
2233+
end
2234+
const foreign_buffer_checker = ForeignBufferChecker(false)
2235+
function foreign_alloc(::Type{T}, length) where T
2236+
ptr = Libc.malloc(sizeof(T) * length)
2237+
ptr = Base.unsafe_convert(Ptr{T}, ptr)
2238+
obj = ForeignBuffer{T}(ptr)
2239+
return finalizer(obj) do obj
2240+
Base.@assume_effects :notaskstate :nothrow
2241+
@atomic foreign_buffer_checker.finalized = true
2242+
Libc.free(obj.ptr)
2243+
end
2244+
end
2245+
function f_EA_finalizer(N::Int)
2246+
workspace = foreign_alloc(Float64, N)
2247+
GC.@preserve workspace begin
2248+
(;ptr) = workspace
2249+
Base.@assume_effects :nothrow @noinline println(devnull, "ptr = ", ptr)
2250+
end
2251+
end
2252+
let src = code_typed1(f_EA_finalizer, (Int,))
2253+
@test count(iscall((src, Core.finalizer)), src.code) == 0
2254+
end
2255+
let;Base.Experimental.@force_compile
2256+
f_EA_finalizer(42000)
2257+
@test foreign_buffer_checker.finalized
2258+
end

0 commit comments

Comments
 (0)