Skip to content

Commit 4a50693

Browse files
committed
threading: update codegen to use atomic annotations also
And add load/store alignment annotations, because LLVM now prefers that we try to specify those explicitly, even though it's not required. This does not yet include correct load/store behaviors for objects with inlined references (the recent #34126 PR).
1 parent 6e387a9 commit 4a50693

12 files changed

+242
-178
lines changed

src/atomics.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@
7575
// TODO: Maybe add jl_atomic_compare_exchange_weak for spin lock
7676
# define jl_atomic_store(obj, val) \
7777
__atomic_store_n(obj, val, __ATOMIC_SEQ_CST)
78-
# define jl_atomic_store_relaxed(obj, val) \
78+
# define jl_atomic_store_relaxed(obj, val) \
7979
__atomic_store_n(obj, val, __ATOMIC_RELAXED)
8080
# if defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) || \
8181
!(defined(_CPU_X86_) || defined(_CPU_X86_64_))
@@ -271,6 +271,7 @@ static inline void jl_atomic_store_release(volatile T *obj, T2 val)
271271
jl_signal_fence();
272272
*obj = (T)val;
273273
}
274+
template<typename T, typename T2>
274275
static inline void jl_atomic_store_relaxed(volatile T *obj, T2 val)
275276
{
276277
*obj = (T)val;

src/ccall.cpp

+20-17
Original file line numberDiff line numberDiff line change
@@ -89,15 +89,15 @@ static Value *runtime_sym_lookup(
8989
BasicBlock *dlsym_lookup = BasicBlock::Create(jl_LLVMContext, "dlsym");
9090
BasicBlock *ccall_bb = BasicBlock::Create(jl_LLVMContext, "ccall");
9191
Constant *initnul = ConstantPointerNull::get((PointerType*)T_pvoidfunc);
92-
LoadInst *llvmf_orig = irbuilder.CreateAlignedLoad(llvmgv, sizeof(void*));
92+
LoadInst *llvmf_orig = irbuilder.CreateAlignedLoad(T_pvoidfunc, llvmgv, sizeof(void*));
9393
// This in principle needs a consume ordering so that load from
9494
// this pointer sees a valid value. However, this is not supported by
9595
// LLVM (or agreed on in the C/C++ standard FWIW) and should be
9696
// almost impossible to happen on every platform we support since this
9797
// ordering is enforced by the hardware and LLVM has to speculate an
9898
// invalid load from the `cglobal` but doesn't depend on the `cglobal`
9999
// value for this to happen.
100-
// llvmf_orig->setAtomic(AtomicOrdering::Consume);
100+
llvmf_orig->setAtomic(AtomicOrdering::Unordered);
101101
irbuilder.CreateCondBr(
102102
irbuilder.CreateICmpNE(llvmf_orig, initnul),
103103
ccall_bb,
@@ -116,7 +116,7 @@ static Value *runtime_sym_lookup(
116116
}
117117
Value *llvmf = irbuilder.CreateCall(prepare_call_in(jl_builderModule(irbuilder), jldlsym_func),
118118
{ libname, stringConstPtr(emission_context, irbuilder, f_name), libptrgv });
119-
auto store = irbuilder.CreateAlignedStore(llvmf, llvmgv, sizeof(void*));
119+
StoreInst *store = irbuilder.CreateAlignedStore(llvmf, llvmgv, sizeof(void*));
120120
store->setAtomic(AtomicOrdering::Release);
121121
irbuilder.CreateBr(ccall_bb);
122122

@@ -172,7 +172,7 @@ static GlobalVariable *emit_plt_thunk(
172172
IRBuilder<> irbuilder(b0);
173173
Value *ptr = runtime_sym_lookup(emission_context, irbuilder, funcptype, f_lib, f_name, plt, libptrgv,
174174
llvmgv, runtime_lib);
175-
auto store = irbuilder.CreateAlignedStore(irbuilder.CreateBitCast(ptr, T_pvoidfunc), got, sizeof(void*));
175+
StoreInst *store = irbuilder.CreateAlignedStore(irbuilder.CreateBitCast(ptr, T_pvoidfunc), got, sizeof(void*));
176176
store->setAtomic(AtomicOrdering::Release);
177177
SmallVector<Value*, 16> args;
178178
for (Function::arg_iterator arg = plt->arg_begin(), arg_e = plt->arg_end(); arg != arg_e; ++arg)
@@ -237,7 +237,7 @@ static Value *emit_plt(
237237
// consume ordering too. This is even less likely to cause issues though
238238
// since the only thing we do to this loaded pointer is to call it
239239
// immediately.
240-
// got_val->setAtomic(AtomicOrdering::Consume);
240+
got_val->setAtomic(AtomicOrdering::Unordered);
241241
return ctx.builder.CreateBitCast(got_val, funcptype);
242242
}
243243

@@ -352,17 +352,19 @@ static Value *llvm_type_rewrite(
352352
Value *from;
353353
Value *to;
354354
const DataLayout &DL = jl_data_layout;
355+
unsigned align = std::max(DL.getPrefTypeAlignment(target_type), DL.getPrefTypeAlignment(from_type));
355356
if (DL.getTypeAllocSize(target_type) >= DL.getTypeAllocSize(from_type)) {
356357
to = emit_static_alloca(ctx, target_type);
358+
cast<AllocaInst>(to)->setAlignment(Align(align));
357359
from = emit_bitcast(ctx, to, from_type->getPointerTo());
358360
}
359361
else {
360362
from = emit_static_alloca(ctx, from_type);
363+
cast<AllocaInst>(from)->setAlignment(Align(align));
361364
to = emit_bitcast(ctx, from, target_type->getPointerTo());
362365
}
363-
// XXX: deal with possible alignment issues
364-
ctx.builder.CreateStore(v, from);
365-
return ctx.builder.CreateLoad(to);
366+
ctx.builder.CreateAlignedStore(v, from, align);
367+
return ctx.builder.CreateAlignedLoad(to, align);
366368
}
367369

368370
// --- argument passing and scratch space utilities ---
@@ -1580,9 +1582,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
15801582
Value *ptls_i16 = emit_bitcast(ctx, ctx.ptlsStates, T_pint16);
15811583
const int tid_offset = offsetof(jl_tls_states_t, tid);
15821584
Value *ptid = ctx.builder.CreateGEP(ptls_i16, ConstantInt::get(T_size, tid_offset / 2));
1583-
return mark_or_box_ccall_result(ctx,
1584-
tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(ptid)),
1585-
retboxed, rt, unionall, static_rt);
1585+
LoadInst *tid = ctx.builder.CreateAlignedLoad(ptid, sizeof(int16_t));
1586+
tbaa_decorate(tbaa_const, tid);
1587+
return mark_or_box_ccall_result(ctx, tid, retboxed, rt, unionall, static_rt);
15861588
}
15871589
else if (is_libjulia_func(jl_get_current_task)) {
15881590
assert(lrt == T_prjlvalue);
@@ -1591,9 +1593,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
15911593
Value *ptls_pv = emit_bitcast(ctx, ctx.ptlsStates, T_pprjlvalue);
15921594
const int ct_offset = offsetof(jl_tls_states_t, current_task);
15931595
Value *pct = ctx.builder.CreateGEP(ptls_pv, ConstantInt::get(T_size, ct_offset / sizeof(void*)));
1594-
return mark_or_box_ccall_result(ctx,
1595-
tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(pct)),
1596-
retboxed, rt, unionall, static_rt);
1596+
LoadInst *ct = ctx.builder.CreateAlignedLoad(pct, sizeof(void*));
1597+
tbaa_decorate(tbaa_const, ct);
1598+
return mark_or_box_ccall_result(ctx, ct, retboxed, rt, unionall, static_rt);
15971599
}
15981600
else if (is_libjulia_func(jl_set_next_task)) {
15991601
assert(lrt == T_void);
@@ -1612,8 +1614,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
16121614
ctx.builder.CreateCall(prepare_call(gcroot_flush_func));
16131615
Value *pdefer_sig = emit_defer_signal(ctx);
16141616
Value *defer_sig = ctx.builder.CreateLoad(pdefer_sig);
1615-
defer_sig = ctx.builder.CreateAdd(defer_sig,
1616-
ConstantInt::get(T_sigatomic, 1));
1617+
defer_sig = ctx.builder.CreateAdd(defer_sig, ConstantInt::get(T_sigatomic, 1));
16171618
ctx.builder.CreateStore(defer_sig, pdefer_sig);
16181619
emit_signal_fence(ctx);
16191620
return ghostValue(jl_nothing_type);
@@ -1675,7 +1676,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
16751676
idx = ctx.builder.CreateAdd(idx, ConstantInt::get(T_size, ((jl_datatype_t*)ety)->layout->first_ptr));
16761677
}
16771678
Value *slot_addr = ctx.builder.CreateInBoundsGEP(T_prjlvalue, arrayptr, idx);
1678-
Value *load = tbaa_decorate(tbaa_ptrarraybuf, ctx.builder.CreateLoad(T_prjlvalue, slot_addr));
1679+
LoadInst *load = ctx.builder.CreateAlignedLoad(T_prjlvalue, slot_addr, sizeof(void*));
1680+
load->setAtomic(AtomicOrdering::Unordered);
1681+
tbaa_decorate(tbaa_ptrarraybuf, load);
16791682
Value *res = ctx.builder.CreateZExt(ctx.builder.CreateICmpNE(load, Constant::getNullValue(T_prjlvalue)), T_int32);
16801683
JL_GC_POP();
16811684
return mark_or_box_ccall_result(ctx, res, retboxed, rt, unionall, static_rt);

0 commit comments

Comments
 (0)