Skip to content

Commit 68d04ba

Browse files
authored
codegen: make boundscheck GEP not be inbounds while the load GEP is inbounds (#55681)
Avoids undefined behavior on the boundschecking arithmetic, which is correct only assuming overflow follows unsigned arithmetic wrap around rules. Also add names to the Memory related LLVM instructions to aid debugging Closes: #55674
1 parent 351727f commit 68d04ba

File tree

2 files changed

+38
-6
lines changed

2 files changed

+38
-6
lines changed

src/cgutils.cpp

+30-6
Original file line numberDiff line numberDiff line change
@@ -3026,12 +3026,15 @@ static Value *emit_genericmemoryelsize(jl_codectx_t &ctx, Value *v, jl_value_t *
30263026
size_t sz = sty->layout->size;
30273027
if (sty->layout->flags.arrayelem_isunion)
30283028
sz++;
3029-
return ConstantInt::get(ctx.types().T_size, sz);
3029+
auto elsize = ConstantInt::get(ctx.types().T_size, sz);
3030+
return elsize;
30303031
}
30313032
else {
30323033
Value *t = emit_typeof(ctx, v, false, false, true);
30333034
Value *elsize = emit_datatype_size(ctx, t, add_isunion);
3034-
return ctx.builder.CreateZExt(elsize, ctx.types().T_size);
3035+
elsize = ctx.builder.CreateZExt(elsize, ctx.types().T_size);
3036+
setName(ctx.emission_context, elsize, "elsize");
3037+
return elsize;
30353038
}
30363039
}
30373040

@@ -3066,6 +3069,7 @@ static Value *emit_genericmemorylen(jl_codectx_t &ctx, Value *addr, jl_value_t *
30663069
MDBuilder MDB(ctx.builder.getContext());
30673070
auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, genericmemoryype_maxsize(typ)));
30683071
LI->setMetadata(LLVMContext::MD_range, rng);
3072+
setName(ctx.emission_context, LI, "memory_len");
30693073
return LI;
30703074
}
30713075

@@ -3075,7 +3079,7 @@ static Value *emit_genericmemoryptr(jl_codectx_t &ctx, Value *mem, const jl_data
30753079
Value *addr = mem;
30763080
addr = decay_derived(ctx, addr);
30773081
addr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, addr, 1);
3078-
setName(ctx.emission_context, addr, ".data_ptr");
3082+
setName(ctx.emission_context, addr, "memory_data_ptr");
30793083
PointerType *PPT = cast<PointerType>(ctx.types().T_jlgenericmemory->getElementType(1));
30803084
LoadInst *LI = ctx.builder.CreateAlignedLoad(PPT, addr, Align(sizeof(char*)));
30813085
LI->setOrdering(AtomicOrdering::NotAtomic);
@@ -3087,6 +3091,7 @@ static Value *emit_genericmemoryptr(jl_codectx_t &ctx, Value *mem, const jl_data
30873091
assert(AS == AddressSpace::Loaded);
30883092
ptr = ctx.builder.CreateCall(prepare_call(gc_loaded_func), { mem, ptr });
30893093
}
3094+
setName(ctx.emission_context, ptr, "memory_data");
30903095
return ptr;
30913096
}
30923097

@@ -4195,6 +4200,7 @@ static jl_cgval_t _emit_memoryref(jl_codectx_t &ctx, Value *mem, Value *data, co
41954200
Value *ref = Constant::getNullValue(get_memoryref_type(ctx.builder.getContext(), ctx.types().T_size, layout, 0));
41964201
ref = ctx.builder.CreateInsertValue(ref, data, 0);
41974202
ref = ctx.builder.CreateInsertValue(ref, mem, 1);
4203+
setName(ctx.emission_context, ref, "memory_ref");
41984204
return mark_julia_type(ctx, ref, false, typ);
41994205
}
42004206

@@ -4215,6 +4221,7 @@ static Value *emit_memoryref_FCA(jl_codectx_t &ctx, const jl_cgval_t &ref, const
42154221
LoadInst *load = ctx.builder.CreateLoad(type, data_pointer(ctx, ref));
42164222
jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ref.tbaa);
42174223
ai.decorateInst(load);
4224+
setName(ctx.emission_context, load, "memory_ref_FCA");
42184225
return load;
42194226
}
42204227
else {
@@ -4231,9 +4238,12 @@ static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cg
42314238
return jl_cgval_t();
42324239
Value *V = emit_memoryref_FCA(ctx, ref, layout);
42334240
Value *data = CreateSimplifiedExtractValue(ctx, V, 0);
4241+
maybeSetName(ctx.emission_context, data, "memoryref_data");
42344242
Value *mem = CreateSimplifiedExtractValue(ctx, V, 1);
4243+
maybeSetName(ctx.emission_context, mem, "memoryref_mem");
42354244
Value *i = emit_unbox(ctx, ctx.types().T_size, idx, (jl_value_t*)jl_long_type);
42364245
Value *offset = ctx.builder.CreateSub(i, ConstantInt::get(ctx.types().T_size, 1));
4246+
setName(ctx.emission_context, offset, "memoryref_offset");
42374247
Value *elsz = emit_genericmemoryelsize(ctx, mem, ref.typ, false);
42384248
bool bc = bounds_check_enabled(ctx, inbounds);
42394249
#if 1
@@ -4245,12 +4255,14 @@ static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cg
42454255
bool isghost = layout->size == 0;
42464256
if ((!isboxed && isunion) || isghost) {
42474257
newdata = ctx.builder.CreateAdd(data, offset);
4258+
setName(ctx.emission_context, newdata, "memoryref_data+offset");
42484259
if (bc) {
42494260
BasicBlock *failBB, *endBB;
42504261
failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
42514262
endBB = BasicBlock::Create(ctx.builder.getContext(), "idxend");
42524263
Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
42534264
Value *inbound = ctx.builder.CreateICmpULT(newdata, mlen);
4265+
setName(ctx.emission_context, offset, "memoryref_isinbounds");
42544266
ctx.builder.CreateCondBr(inbound, endBB, failBB);
42554267
failBB->insertInto(ctx.f);
42564268
ctx.builder.SetInsertPoint(failBB);
@@ -4278,10 +4290,13 @@ static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cg
42784290
// and we can further rearrange that as ovflw = !( offset+len < len+len ) as unsigned math
42794291
Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
42804292
ovflw = ctx.builder.CreateICmpUGE(ctx.builder.CreateAdd(offset, mlen), ctx.builder.CreateNUWAdd(mlen, mlen));
4293+
setName(ctx.emission_context, ovflw, "memoryref_ovflw");
42814294
}
42824295
#endif
42834296
boffset = ctx.builder.CreateMul(offset, elsz);
4284-
newdata = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), data, boffset);
4297+
setName(ctx.emission_context, boffset, "memoryref_byteoffset");
4298+
newdata = ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()), data, boffset);
4299+
setName(ctx.emission_context, newdata, "memoryref_data_byteoffset");
42854300
(void)boffset; // LLVM is very bad at handling GEP with types different from the load
42864301
if (bc) {
42874302
BasicBlock *failBB, *endBB;
@@ -4304,8 +4319,11 @@ static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cg
43044319
ctx.builder.CreatePtrToInt(newdata, ctx.types().T_size),
43054320
ctx.builder.CreatePtrToInt(mptr, ctx.types().T_size));
43064321
Value *blen = ctx.builder.CreateMul(mlen, elsz, "", true, true);
4322+
setName(ctx.emission_context, blen, "memoryref_bytelen");
43074323
Value *inbound = ctx.builder.CreateICmpULT(bidx0, blen);
4324+
setName(ctx.emission_context, inbound, "memoryref_isinbounds");
43084325
inbound = ctx.builder.CreateAnd(ctx.builder.CreateNot(ovflw), inbound);
4326+
setName(ctx.emission_context, inbound, "memoryref_isinbounds&notovflw");
43094327
#else
43104328
Value *idx0; // (newdata - mptr) / elsz
43114329
idx0 = ctx.builder.CreateSub(
@@ -4342,8 +4360,10 @@ static jl_cgval_t emit_memoryref_offset(jl_codectx_t &ctx, const jl_cgval_t &ref
43424360
offset = ctx.builder.CreateSub(
43434361
ctx.builder.CreatePtrToInt(data, ctx.types().T_size),
43444362
ctx.builder.CreatePtrToInt(mptr, ctx.types().T_size));
4363+
setName(ctx.emission_context, offset, "memoryref_offset");
43454364
Value *elsz = emit_genericmemoryelsize(ctx, mem, ref.typ, false);
43464365
offset = ctx.builder.CreateExactUDiv(offset, elsz);
4366+
setName(ctx.emission_context, offset, "memoryref_offsetidx");
43474367
}
43484368
offset = ctx.builder.CreateAdd(offset, ConstantInt::get(ctx.types().T_size, 1));
43494369
return mark_julia_type(ctx, offset, false, jl_long_type);
@@ -4352,7 +4372,9 @@ static jl_cgval_t emit_memoryref_offset(jl_codectx_t &ctx, const jl_cgval_t &ref
43524372
static Value *emit_memoryref_mem(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
43534373
{
43544374
Value *V = emit_memoryref_FCA(ctx, ref, layout);
4355-
return CreateSimplifiedExtractValue(ctx, V, 1);
4375+
V = CreateSimplifiedExtractValue(ctx, V, 1);
4376+
maybeSetName(ctx.emission_context, V, "memoryref_mem");
4377+
return V;
43564378
}
43574379

43584380
static Value *emit_memoryref_ptr(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
@@ -4374,13 +4396,15 @@ static Value *emit_memoryref_ptr(jl_codectx_t &ctx, const jl_cgval_t &ref, const
43744396
data = ctx.builder.CreateCall(prepare_call(gc_loaded_func), { mem, data });
43754397
if (!GEPlist.empty()) {
43764398
for (auto &GEP : make_range(GEPlist.rbegin(), GEPlist.rend())) {
4377-
Instruction *GEP2 = GEP->clone();
4399+
GetElementPtrInst *GEP2 = cast<GetElementPtrInst>(GEP->clone());
43784400
GEP2->mutateType(PointerType::get(GEP->getResultElementType(), AS));
43794401
GEP2->setOperand(GetElementPtrInst::getPointerOperandIndex(), data);
4402+
GEP2->setIsInBounds(true);
43804403
ctx.builder.Insert(GEP2);
43814404
data = GEP2;
43824405
}
43834406
}
4407+
setName(ctx.emission_context, data, "memoryref_data");
43844408
return data;
43854409
}
43864410

src/codegen.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,14 @@ void setName(jl_codegen_params_t &params, Value *V, const Twine &Name)
171171
}
172172
}
173173

174+
void maybeSetName(jl_codegen_params_t &params, Value *V, const Twine &Name)
175+
{
176+
// To be used when we may get an Instruction or something that is not an instruction i.e Constants/Arguments
177+
if (params.debug_level >= 2 && isa<Instruction>(V)) {
178+
V->setName(Name);
179+
}
180+
}
181+
174182
void setName(jl_codegen_params_t &params, Value *V, std::function<std::string()> GetName)
175183
{
176184
assert((isa<Constant>(V) || isa<Instruction>(V)) && "Should only set names on instructions!");

0 commit comments

Comments
 (0)