Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: JuliaLang/julia
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 327dd1be04871da306b6679d63ad996d466cb465
Choose a base ref
..
head repository: JuliaLang/julia
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 97b56660dca4299a4a07f9ea24416bd815d29029
Choose a head ref
Showing with 67 additions and 23 deletions.
  1. +66 −22 src/llvm-alloc-opt.cpp
  2. +1 −1 src/llvm-late-gc-lowering.cpp
88 changes: 66 additions & 22 deletions src/llvm-alloc-opt.cpp
Original file line number Diff line number Diff line change
@@ -178,16 +178,16 @@ struct Optimizer {
ssize_t getGCAllocSize(Instruction *I);
void pushInstruction(Instruction *I);

void insertLifetimeEnd(Instruction *ptr, Constant *sz, Instruction *insert);
void insertLifetimeEnd(Value *ptr, Constant *sz, Instruction *insert);
// insert llvm.lifetime.* calls for `ptr` with size `sz` based on the use of `orig`.
void insertLifetime(Instruction *ptr, Constant *sz, Instruction *orig);
void insertLifetime(Value *ptr, Constant *sz, Instruction *orig);

void checkInst(Instruction *I);

void replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID,
Instruction *orig_i, Instruction *new_i);
void removeAlloc(CallInst *orig_inst);
void moveToStack(CallInst *orig_inst, size_t sz);
void moveToStack(CallInst *orig_inst, size_t sz, bool has_ref);
void splitOnStack(CallInst *orig_inst);

Function &F;
@@ -262,13 +262,15 @@ struct Optimizer {
bool hasaggr:1;
bool multiloc:1;
bool hasload:1;
Type *elty;
SmallVector<MemOp,4> accesses;
Field(uint32_t size)
Field(uint32_t size, Type *elty)
: size(size),
hasobjref(false),
hasaggr(false),
multiloc(false),
hasload(false)
hasload(false),
elty(elty)
{
}
};
@@ -311,7 +313,7 @@ struct Optimizer {
void dump();
bool addMemOp(Instruction *inst, unsigned opno, uint32_t offset, Type *elty,
bool isstore, const DataLayout &DL);
std::pair<const uint32_t,Field> &getField(uint32_t offset, uint32_t size);
std::pair<const uint32_t,Field> &getField(uint32_t offset, uint32_t size, Type *elty);
std::map<uint32_t,Field>::iterator findLowerField(uint32_t offset)
{
// Find the last field that starts no higher than `offset`.
@@ -383,9 +385,14 @@ void Optimizer::optimizeAll()
splitOnStack(orig);
continue;
}
if (has_ref)
continue;
moveToStack(orig, sz);
if (has_ref) {
if (use_info.memops.size() != 1 || has_refaggr ||
use_info.memops.begin()->second.size != sz) {
continue;
}
// The object only has a single field that's a reference with only one kind of access.
}
moveToStack(orig, sz, has_ref);
}
}

@@ -445,16 +452,19 @@ ssize_t Optimizer::getGCAllocSize(Instruction *I)
}

std::pair<const uint32_t,Optimizer::Field>&
Optimizer::AllocUseInfo::getField(uint32_t offset, uint32_t size)
Optimizer::AllocUseInfo::getField(uint32_t offset, uint32_t size, Type *elty)
{
auto it = findLowerField(offset);
auto end = memops.end();
auto lb = end; // first overlap
auto ub = end; // last overlap
if (it != end) {
// The slot found contains the current location
if (it->first + it->second.size >= offset + size)
if (it->first + it->second.size >= offset + size) {
if (it->second.elty != elty)
it->second.elty = nullptr;
return *it;
}
if (it->first + it->second.size > offset) {
lb = it;
ub = it;
@@ -472,12 +482,12 @@ Optimizer::AllocUseInfo::getField(uint32_t offset, uint32_t size)
}
// no overlap found just create a new one.
if (lb == end)
return *memops.emplace(offset, Field(size)).first;
return *memops.emplace(offset, Field(size, elty)).first;
// We find overlapping but not containing slot we need to merge slot/create new one
uint32_t new_offset = std::min(offset, lb->first);
uint32_t new_addrub = std::max(offset + uint32_t(size), ub->first + ub->second.size);
uint32_t new_size = new_addrub - new_offset;
Field field(new_size);
Field field(new_size, nullptr);
field.multiloc = true;
++ub;
for (it = lb; it != ub; ++it) {
@@ -501,7 +511,7 @@ bool Optimizer::AllocUseInfo::addMemOp(Instruction *inst, unsigned opno, uint32_
memop.size = size;
memop.isaggr = isa<CompositeType>(elty);
memop.isobjref = hasObjref(elty);
auto &field = getField(offset, size);
auto &field = getField(offset, size, elty);
if (field.first != offset || field.second.size != size)
field.second.multiloc = true;
if (!isstore)
@@ -688,7 +698,7 @@ void Optimizer::checkInst(Instruction *I)
}
}

void Optimizer::insertLifetimeEnd(Instruction *ptr, Constant *sz, Instruction *insert)
void Optimizer::insertLifetimeEnd(Value *ptr, Constant *sz, Instruction *insert)
{
BasicBlock::iterator it(insert);
BasicBlock::iterator begin(insert->getParent()->begin());
@@ -710,7 +720,7 @@ void Optimizer::insertLifetimeEnd(Instruction *ptr, Constant *sz, Instruction *i
CallInst::Create(pass.lifetime_end, {sz, ptr}, "", insert);
}

void Optimizer::insertLifetime(Instruction *ptr, Constant *sz, Instruction *orig)
void Optimizer::insertLifetime(Value *ptr, Constant *sz, Instruction *orig)
{
CallInst::Create(pass.lifetime_start, {sz, ptr}, "", orig);
BasicBlock *def_bb = orig->getParent();
@@ -915,7 +925,7 @@ void Optimizer::replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID,

// This function should not erase any safepoint so that the lifetime marker can find and cache
// all the original safepoints.
void Optimizer::moveToStack(CallInst *orig_inst, size_t sz)
void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
{
auto tag = orig_inst->getArgOperand(2);
removed.push_back(orig_inst);
@@ -935,6 +945,16 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz)
buff = prolog_builder.CreateAlloca(pass.T_int8, ConstantInt::get(pass.T_int64, 0));
ptr = buff;
}
else if (has_ref) {
// Allocate with the correct type so that the GC frame lowering pass will
// treat this as a non-mem2reg'd alloca
// The ccall root and GC preserve handling below is a bit conservative since they
// won't be needed if the alloca isn't optimized out.
// In the future, we should teach the lowering pass about this refinement relation.
buff = prolog_builder.CreateAlloca(pass.T_prjlvalue);
buff->setAlignment(align);
ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, pass.T_pint8));
}
else {
buff = prolog_builder.CreateAlloca(Type::getIntNTy(*pass.ctx, sz * 8));
buff->setAlignment(align);
@@ -998,6 +1018,11 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz)
}
// Also remove the preserve intrinsics so that it can be better optimized.
if (pass.gc_preserve_begin == callee) {
if (has_ref) {
IRBuilder<> builder(call);
call->replaceUsesOfWith(orig_i, builder.CreateLoad(pass.T_prjlvalue, buff));
return;
}
removeGCPreserve(call, orig_i);
return;
}
@@ -1013,7 +1038,15 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz)
}
// remove from operand bundle
Type *orig_t = orig_i->getType();
user->replaceUsesOfWith(orig_i, ConstantPointerNull::get(cast<PointerType>(orig_t)));
Value *replace;
if (has_ref) {
IRBuilder<> builder(user);
replace = builder.CreateLoad(pass.T_prjlvalue, buff);
}
else {
replace = ConstantPointerNull::get(cast<PointerType>(orig_t));
}
user->replaceUsesOfWith(orig_i, replace);
}
else if (isa<AddrSpaceCastInst>(user) || isa<BitCastInst>(user)) {
auto cast_t = PointerType::get(cast<PointerType>(user->getType())->getElementType(),
@@ -1161,10 +1194,21 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
// need to preserve at some point, there's no need to allocate the field.
if (!field.hasload && (!field.hasobjref || !use_info.haspreserve))
continue;
auto allocty = field.hasobjref ? pass.T_prjlvalue : Type::getIntNTy(*pass.ctx,
field.size * 8);
slots.push_back(SplitSlot{prolog_builder.CreateAlloca(allocty), field.hasobjref,
offset, field.size});
SplitSlot slot{nullptr, field.hasobjref, offset, field.size};
Type *allocty;
if (field.hasobjref) {
allocty = pass.T_prjlvalue;
}
else if (field.elty && !field.multiloc) {
allocty = field.elty;
}
else {
allocty = Type::getIntNTy(*pass.ctx, field.size * 8);
}
slot.slot = prolog_builder.CreateAlloca(allocty);
insertLifetime(prolog_builder.CreateBitCast(slot.slot, pass.T_pint8),
ConstantInt::get(pass.T_int64, field.size), orig_inst);
slots.push_back(std::move(slot));
}
const auto nslots = slots.size();
auto find_slot = [&] (uint32_t offset) {
2 changes: 1 addition & 1 deletion src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
@@ -769,7 +769,7 @@ void RecursivelyVisit(callback f, Value *V) {
f(VU);
if (isa<CallInst>(TheUser) || isa<LoadInst>(TheUser) ||
isa<SelectInst>(TheUser) || isa<PHINode>(TheUser) ||
isa<StoreInst>(TheUser))
isa<StoreInst>(TheUser) || isa<PtrToIntInst>(TheUser))
continue;
if (isa<GetElementPtrInst>(TheUser) || isa<BitCastInst>(TheUser) || isa<AddrSpaceCastInst>(TheUser)) {
RecursivelyVisit<VisitInst, callback>(f, TheUser);