Skip to content

Commit 41e603e

Browse files
authored
Refine out callee rooted values from live set at the call site (#37197)
1 parent 0336f67 commit 41e603e

File tree

2 files changed

+56
-5
lines changed

2 files changed

+56
-5
lines changed

src/llvm-late-gc-lowering.cpp

+37-5
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,8 @@ struct State {
295295
// Those values that - if live out from our parent basic block - are live
296296
// at this safepoint.
297297
std::vector<std::vector<int>> LiveIfLiveOut;
298+
// The set of values that are kept alive by the callee.
299+
std::vector<std::vector<int>> CalleeRoots;
298300
// We don't bother doing liveness on Allocas that were not mem2reg'ed.
299301
// they just get directly sunk into the root array.
300302
std::vector<AllocaInst *> Allocas;
@@ -359,7 +361,7 @@ struct LateLowerGCFrame: public FunctionPass, private JuliaPassContext {
359361
void NoteUseChain(State &S, BBState &BBS, User *TheUser);
360362
SmallVector<int, 1> GetPHIRefinements(PHINode *phi, State &S);
361363
void FixUpRefinements(ArrayRef<int> PHINumbers, State &S);
362-
void RefineLiveSet(BitVector &LS, State &S);
364+
void RefineLiveSet(BitVector &LS, State &S, const std::vector<int> &CalleeRoots);
363365
Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Value *V);
364366
Value *EmitLoadTag(IRBuilder<> &builder, Value *V);
365367
};
@@ -1002,7 +1004,7 @@ void LateLowerGCFrame::MaybeNoteDef(State &S, BBState &BBS, Value *Def, const st
10021004
}
10031005
}
10041006

1005-
static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI) {
1007+
static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI, std::vector<int> CalleeRoots) {
10061008
int Number = ++S.MaxSafepointNumber;
10071009
S.SafepointNumbering[CI] = Number;
10081010
S.ReverseSafepointNumbering.push_back(CI);
@@ -1012,6 +1014,7 @@ static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI) {
10121014
// computation)
10131015
S.LiveSets.push_back(BBS.UpExposedUses);
10141016
S.LiveIfLiveOut.push_back(std::vector<int>{});
1017+
S.CalleeRoots.push_back(std::move(CalleeRoots));
10151018
return Number;
10161019
}
10171020

@@ -1515,7 +1518,25 @@ State LateLowerGCFrame::LocalScan(Function &F) {
15151518
// Intrinsics are never safepoints.
15161519
continue;
15171520
}
1518-
int SafepointNumber = NoteSafepoint(S, BBS, CI);
1521+
std::vector<int> CalleeRoots;
1522+
for (Use &U : CI->arg_operands()) {
1523+
// Find all callee rooted arguments.
1524+
// Record them instead of simply remove them from live values here
1525+
// since they can be useful during refinment
1526+
// (e.g. to remove roots of objects that are refined to these)
1527+
Value *V = U;
1528+
if (isa<Constant>(V) || !isa<PointerType>(V->getType()) ||
1529+
getValueAddrSpace(V) != AddressSpace::CalleeRooted)
1530+
continue;
1531+
V = V->stripPointerCasts();
1532+
if (!isTrackedValue(V))
1533+
continue;
1534+
auto Num = Number(S, V);
1535+
if (Num < 0)
1536+
continue;
1537+
CalleeRoots.push_back(Num);
1538+
}
1539+
int SafepointNumber = NoteSafepoint(S, BBS, CI, std::move(CalleeRoots));
15191540
BBS.HasSafepoint = true;
15201541
BBS.TopmostSafepoint = SafepointNumber;
15211542
BBS.Safepoints.push_back(SafepointNumber);
@@ -1845,12 +1866,18 @@ JL_USED_FUNC static void dumpSafepointsForBBName(Function &F, State &S, const ch
18451866
}
18461867
}
18471868

1848-
void LateLowerGCFrame::RefineLiveSet(BitVector &LS, State &S)
1869+
void LateLowerGCFrame::RefineLiveSet(BitVector &LS, State &S, const std::vector<int> &CalleeRoots)
18491870
{
18501871
BitVector FullLS(S.MaxPtrNumber + 1, false);
18511872
FullLS |= LS;
18521873
// First expand the live set according to the refinement map
18531874
// so that we can see all the values that are effectively live.
1875+
for (auto Num: CalleeRoots) {
1876+
// For callee rooted values, they are all kept alive at the safepoint.
1877+
// Make sure they are marked (even though they probably are already)
1878+
// so that other values can be refined to them.
1879+
FullLS[Num] = 1;
1880+
}
18541881
bool changed;
18551882
do {
18561883
changed = false;
@@ -1891,6 +1918,11 @@ void LateLowerGCFrame::RefineLiveSet(BitVector &LS, State &S)
18911918
LS[Idx] = 0;
18921919
}
18931920
}
1921+
for (auto Num: CalleeRoots) {
1922+
// Now unmark all values that are rooted by the callee after
1923+
// refining other values to them.
1924+
LS[Num] = 0;
1925+
}
18941926
}
18951927

18961928
void LateLowerGCFrame::ComputeLiveSets(State &S) {
@@ -1909,7 +1941,7 @@ void LateLowerGCFrame::ComputeLiveSets(State &S) {
19091941
if (HasBitSet(BBS.LiveOut, Live))
19101942
LS[Live] = 1;
19111943
}
1912-
RefineLiveSet(LS, S);
1944+
RefineLiveSet(LS, S, S.CalleeRoots[idx]);
19131945
// If the function has GC preserves, figure out whether we need to
19141946
// add in any extra live values.
19151947
if (!S.GCPreserves.empty()) {

test/llvmpasses/late-lower-gc.ll

+19
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ declare {}*** @julia.ptls_states()
88
declare void @jl_safepoint()
99
declare {} addrspace(10)* @jl_apply_generic({} addrspace(10)*, {} addrspace(10)**, i32)
1010
declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8*, i64, {} addrspace(10)*)
11+
declare i32 @rooting_callee({} addrspace(12)*, {} addrspace(12)*)
1112

1213
define void @gc_frame_lowering(i64 %a, i64 %b) {
1314
top:
@@ -74,6 +75,24 @@ top:
7475
ret void
7576
}
7677

78+
define i32 @callee_root({} addrspace(10)* %v0, {} addrspace(10)* %v1) {
79+
top:
80+
; CHECK-LABEL: @callee_root
81+
; CHECK-NOT: @julia.new_gc_frame
82+
%v2 = call {}*** @julia.ptls_states()
83+
%v3 = bitcast {} addrspace(10)* %v0 to {} addrspace(10)* addrspace(10)*
84+
%v4 = addrspacecast {} addrspace(10)* addrspace(10)* %v3 to {} addrspace(10)* addrspace(11)*
85+
%v5 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %v4 unordered, align 8
86+
%v6 = bitcast {} addrspace(10)* %v1 to {} addrspace(10)* addrspace(10)*
87+
%v7 = addrspacecast {} addrspace(10)* addrspace(10)* %v6 to {} addrspace(10)* addrspace(11)*
88+
%v8 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %v7 unordered, align 8
89+
%v9 = addrspacecast {} addrspace(10)* %v5 to {} addrspace(12)*
90+
%v10 = addrspacecast {} addrspace(10)* %v8 to {} addrspace(12)*
91+
%v11 = call i32 @rooting_callee({} addrspace(12)* %v9, {} addrspace(12)* %v10)
92+
ret i32 %v11
93+
; CHECK: ret i32
94+
}
95+
7796
!0 = !{i64 0, i64 23}
7897
!1 = !{}
7998
!2 = distinct !{!2}

0 commit comments

Comments
 (0)