Skip to content

Commit 6e387a9

Browse files
committed
annotate pointer loads and stores atomically
This lets us say that anytime you observe the store of a pointer, it's valid to assume consume ordering on that read (on all processors except the Alpha). It's also already the default behavior on the x86 hardware, this just enforces the ordering on the compiler too.
1 parent d7ea190 commit 6e387a9

File tree

8 files changed

+105
-55
lines changed

8 files changed

+105
-55
lines changed

src/array.c

+57-25
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,32 @@ extern "C" {
2020

2121
#define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes)
2222

23+
// this is a version of memcpy that preserves atomic memory ordering
24+
// which makes it safe to use for objects that can contain memory references
25+
// without risk of creating pointers out of thin air
26+
void memmove_refs(void **dstp, void *const *srcp, size_t n) JL_NOTSAFEPOINT
27+
{
28+
size_t i;
29+
if (dstp < srcp || dstp > srcp + n) {
30+
for (i = 0; i < n; i++) {
31+
jl_atomic_store_relaxed(dstp + i, jl_atomic_load_relaxed(srcp + i));
32+
}
33+
}
34+
else {
35+
for (i = 0; i < n; i++) {
36+
jl_atomic_store_relaxed(dstp + n - i - 1, jl_atomic_load_relaxed(srcp + n - i - 1));
37+
}
38+
}
39+
}
40+
41+
void memmove_safe(int hasptr, char *dst, const char *src, size_t nb) JL_NOTSAFEPOINT
42+
{
43+
if (hasptr)
44+
memmove_refs((void**)dst, (void**)src, nb / sizeof(void*));
45+
else
46+
memmove(dst, src, nb);
47+
}
48+
2349
// array constructors ---------------------------------------------------------
2450
char *jl_array_typetagdata(jl_array_t *a) JL_NOTSAFEPOINT
2551
{
@@ -542,10 +568,9 @@ JL_DLLEXPORT jl_value_t *jl_ptrarrayref(jl_array_t *a JL_PROPAGATES_ROOT, size_t
542568
{
543569
assert(i < jl_array_len(a));
544570
assert(a->flags.ptrarray);
545-
jl_value_t *elt = ((jl_value_t**)a->data)[i];
546-
if (elt == NULL) {
571+
jl_value_t *elt = jl_atomic_load_relaxed(((jl_value_t**)a->data) + i);
572+
if (elt == NULL)
547573
jl_throw(jl_undefref_exception);
548-
}
549574
return elt;
550575
}
551576

@@ -569,7 +594,7 @@ JL_DLLEXPORT jl_value_t *jl_arrayref(jl_array_t *a, size_t i)
569594
JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i)
570595
{
571596
if (a->flags.ptrarray) {
572-
return ((jl_value_t**)jl_array_data(a))[i] != NULL;
597+
return jl_atomic_load_relaxed(((jl_value_t**)jl_array_data(a)) + i) != NULL;
573598
}
574599
else if (a->flags.hasptr) {
575600
jl_datatype_t *eltype = (jl_datatype_t*)jl_tparam0(jl_typeof(a));
@@ -600,12 +625,18 @@ JL_DLLEXPORT void jl_arrayset(jl_array_t *a JL_ROOTING_ARGUMENT, jl_value_t *rhs
600625
if (jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(rhs)))
601626
return;
602627
}
603-
jl_assign_bits(&((char*)a->data)[i * a->elsize], rhs);
628+
if (a->flags.hasptr) {
629+
jl_fence_release();
630+
memmove_refs((void**)&((char*)a->data)[i * a->elsize], (void**)rhs, a->elsize / sizeof(void*));
631+
}
632+
else {
633+
jl_assign_bits(&((char*)a->data)[i * a->elsize], rhs);
634+
}
604635
if (a->flags.hasptr)
605636
jl_gc_multi_wb(jl_array_owner(a), rhs);
606637
}
607638
else {
608-
((jl_value_t**)a->data)[i] = rhs;
639+
jl_atomic_store_release(((jl_value_t**)a->data) + i, rhs);
609640
jl_gc_wb(jl_array_owner(a), rhs);
610641
}
611642
}
@@ -615,7 +646,7 @@ JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i)
615646
if (i >= jl_array_len(a))
616647
jl_bounds_error_int((jl_value_t*)a, i + 1);
617648
if (a->flags.ptrarray)
618-
((jl_value_t**)a->data)[i] = NULL;
649+
jl_atomic_store_release(((jl_value_t**)a->data) + i, NULL);
619650
else if (a->flags.hasptr) {
620651
size_t elsize = a->elsize;
621652
jl_assume(elsize >= sizeof(void*) && elsize % sizeof(void*) == 0);
@@ -762,7 +793,7 @@ STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc,
762793
if (isbitsunion) newtypetagdata = typetagdata - inc;
763794
if (idx > 0) {
764795
// inserting new elements after 1st element
765-
memmove(newdata, data, idx * elsz);
796+
memmove_safe(a->flags.hasptr, newdata, data, idx * elsz);
766797
if (isbitsunion) {
767798
memmove(newtypetagdata, typetagdata, idx);
768799
memset(newtypetagdata + idx, 0, inc);
@@ -796,11 +827,11 @@ STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc,
796827
// We could use memcpy if resizing allocates a new buffer,
797828
// hopefully it's not a particularly important optimization.
798829
if (idx > 0 && newdata < data) {
799-
memmove(newdata, data, nb1);
830+
memmove_safe(a->flags.hasptr, newdata, data, nb1);
800831
}
801-
memmove(newdata + nbinc + nb1, data + nb1, n * elsz - nb1);
832+
memmove_safe(a->flags.hasptr, newdata + nbinc + nb1, data + nb1, n * elsz - nb1);
802833
if (idx > 0 && newdata > data) {
803-
memmove(newdata, data, nb1);
834+
memmove_safe(a->flags.hasptr, newdata, data, nb1);
804835
}
805836
a->offset = newoffset;
806837
}
@@ -810,16 +841,16 @@ STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc,
810841
newdata = data - oldoffsnb + a->offset * elsz;
811842
if (isbitsunion) newtypetagdata = newdata + (a->maxsize - a->offset) * elsz + a->offset;
812843
if (idx > 0 && newdata < data) {
813-
memmove(newdata, data, nb1);
844+
memmove_safe(a->flags.hasptr, newdata, data, nb1);
814845
if (isbitsunion) {
815846
memmove(newtypetagdata, typetagdata, idx);
816847
memset(newtypetagdata + idx, 0, inc);
817848
}
818849
}
819-
memmove(newdata + nbinc + nb1, data + nb1, n * elsz - nb1);
850+
memmove_safe(a->flags.hasptr, newdata + nbinc + nb1, data + nb1, n * elsz - nb1);
820851
if (isbitsunion) memmove(newtypetagdata + idx + inc, typetagdata + idx, n - idx);
821852
if (idx > 0 && newdata > data) {
822-
memmove(newdata, data, nb1);
853+
memmove_safe(a->flags.hasptr, newdata, data, nb1);
823854
if (isbitsunion) {
824855
memmove(newtypetagdata, typetagdata, idx);
825856
memset(newtypetagdata + idx, 0, inc);
@@ -891,7 +922,7 @@ STATIC_INLINE void jl_array_grow_at_end(jl_array_t *a, size_t idx,
891922
memmove(newtypetagdata, typetagdata, idx);
892923
memset(newtypetagdata + idx, 0, inc);
893924
}
894-
if (has_gap) memmove(newdata + nb1 + nbinc, newdata + nb1, n * elsz - nb1);
925+
if (has_gap) memmove_safe(a->flags.hasptr, newdata + nb1 + nbinc, newdata + nb1, n * elsz - nb1);
895926
}
896927
a->data = data = newdata;
897928
}
@@ -901,7 +932,7 @@ STATIC_INLINE void jl_array_grow_at_end(jl_array_t *a, size_t idx,
901932
memset(typetagdata + idx, 0, inc);
902933
}
903934
size_t nb1 = idx * elsz;
904-
memmove(data + nb1 + inc * elsz, data + nb1, n * elsz - nb1);
935+
memmove_safe(a->flags.hasptr, data + nb1 + inc * elsz, data + nb1, n * elsz - nb1);
905936
}
906937
else {
907938
// there was enough room for requested growth already in a->maxsize
@@ -1036,12 +1067,12 @@ STATIC_INLINE void jl_array_del_at_beg(jl_array_t *a, size_t idx, size_t dec,
10361067
if (elsz == 1 && !isbitsunion)
10371068
nbtotal++;
10381069
if (idx > 0) {
1039-
memmove(newdata, olddata, nb1);
1070+
memmove_safe(a->flags.hasptr, newdata, olddata, nb1);
10401071
if (isbitsunion) memmove(newtypetagdata, typetagdata, idx);
10411072
}
10421073
// Move the rest of the data if the offset changed
10431074
if (newoffs != offset) {
1044-
memmove(newdata + nb1, olddata + nb1 + nbdec, nbtotal - nb1);
1075+
memmove_safe(a->flags.hasptr, newdata + nb1, olddata + nb1 + nbdec, nbtotal - nb1);
10451076
if (isbitsunion) memmove(newtypetagdata + idx, typetagdata + idx + dec, n - idx);
10461077
}
10471078
a->data = newdata;
@@ -1063,7 +1094,7 @@ STATIC_INLINE void jl_array_del_at_end(jl_array_t *a, size_t idx, size_t dec,
10631094
int isbitsunion = jl_array_isbitsunion(a);
10641095
size_t last = idx + dec;
10651096
if (n > last) {
1066-
memmove(data + idx * elsz, data + last * elsz, (n - last) * elsz);
1097+
memmove_safe(a->flags.hasptr, data + idx * elsz, data + last * elsz, (n - last) * elsz);
10671098
if (isbitsunion) {
10681099
char *typetagdata = jl_array_typetagdata(a);
10691100
memmove(typetagdata + idx, typetagdata + last, n - last);
@@ -1161,14 +1192,14 @@ JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary)
11611192
}
11621193

11631194
// Copy element by element until we hit a young object, at which point
1164-
// we can continue using `memmove`.
1195+
// we can finish by using `memmove`.
11651196
static NOINLINE ssize_t jl_array_ptr_copy_forward(jl_value_t *owner,
11661197
void **src_p, void **dest_p,
11671198
ssize_t n)
11681199
{
11691200
for (ssize_t i = 0; i < n; i++) {
1170-
void *val = src_p[i];
1171-
dest_p[i] = val;
1201+
void *val = jl_atomic_load_relaxed(src_p + i);
1202+
jl_atomic_store_relaxed(dest_p + i, val);
11721203
// `val` is young or old-unmarked
11731204
if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
11741205
jl_gc_queue_root(owner);
@@ -1183,8 +1214,8 @@ static NOINLINE ssize_t jl_array_ptr_copy_backward(jl_value_t *owner,
11831214
ssize_t n)
11841215
{
11851216
for (ssize_t i = 0; i < n; i++) {
1186-
void *val = src_p[n - i - 1];
1187-
dest_p[n - i - 1] = val;
1217+
void *val = jl_atomic_load_relaxed(src_p + n - i - 1);
1218+
jl_atomic_store_relaxed(dest_p + n - i - 1, val);
11881219
// `val` is young or old-unmarked
11891220
if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
11901221
jl_gc_queue_root(owner);
@@ -1200,6 +1231,7 @@ JL_DLLEXPORT void jl_array_ptr_copy(jl_array_t *dest, void **dest_p,
12001231
{
12011232
assert(dest->flags.ptrarray && src->flags.ptrarray);
12021233
jl_value_t *owner = jl_array_owner(dest);
1234+
jl_fence_release(); // ensure contents of src are visible on other processors
12031235
// Destination is old and doesn't refer to any young object
12041236
if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) {
12051237
jl_value_t *src_owner = jl_array_owner(src);
@@ -1218,7 +1250,7 @@ JL_DLLEXPORT void jl_array_ptr_copy(jl_array_t *dest, void **dest_p,
12181250
n -= done;
12191251
}
12201252
}
1221-
memmove(dest_p, src_p, n * sizeof(void*));
1253+
memmove_refs(dest_p, src_p, n);
12221254
}
12231255

12241256
JL_DLLEXPORT void jl_array_ptr_1d_push(jl_array_t *a, jl_value_t *item)

src/atomics.h

+5
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444
* specified.
4545
*/
4646
#if defined(__GNUC__)
47+
# define jl_fence() __atomic_thread_fence(__ATOMIC_SEQ_CST)
48+
# define jl_fence_release() __atomic_thread_fence(__ATOMIC_RELEASE)
4749
# define jl_signal_fence() __atomic_signal_fence(__ATOMIC_SEQ_CST)
4850
# define jl_atomic_fetch_add_relaxed(obj, arg) \
4951
__atomic_fetch_add(obj, arg, __ATOMIC_RELAXED)
@@ -96,6 +98,9 @@
9698
# define jl_atomic_load_relaxed(obj) \
9799
__atomic_load_n(obj, __ATOMIC_RELAXED)
98100
#elif defined(_COMPILER_MICROSOFT_)
101+
// TODO: these only define compiler barriers, and aren't correct outside of x86
102+
# define jl_fence() _ReadWriteBarrier()
103+
# define jl_fence_release() _WriteBarrier()
99104
# define jl_signal_fence() _ReadWriteBarrier()
100105

101106
// add

src/cgutils.cpp

+24-12
Original file line numberDiff line numberDiff line change
@@ -1383,26 +1383,34 @@ static void typed_store(jl_codectx_t &ctx,
13831383
if (type_is_ghost(elty))
13841384
return;
13851385
Value *r;
1386-
if (!isboxed) {
1386+
if (!isboxed)
13871387
r = emit_unbox(ctx, elty, rhs, jltype);
1388-
if (parent != NULL)
1389-
emit_write_multibarrier(ctx, parent, r);
1390-
}
1391-
else {
1388+
else
13921389
r = maybe_decay_untracked(boxed(ctx, rhs));
1393-
if (parent != NULL)
1394-
emit_write_barrier(ctx, parent, r);
1395-
}
13961390
Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace());
13971391
if (ptr->getType() != ptrty)
13981392
ptr = ctx.builder.CreateBitCast(ptr, ptrty);
13991393
if (idx_0based)
14001394
ptr = ctx.builder.CreateInBoundsGEP(r->getType(), ptr, idx_0based);
1401-
Instruction *store = ctx.builder.CreateAlignedStore(r, ptr, isboxed || alignment ? alignment : julia_alignment(jltype));
1395+
if (!isboxed && CountTrackedPointers(elty).count)
1396+
ctx.builder.CreateFence(AtomicOrdering::Release);
1397+
if (isboxed)
1398+
alignment = sizeof(void*);
1399+
else if (!alignment)
1400+
alignment = julia_alignment(jltype);
1401+
StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, alignment);
1402+
if (isboxed)
1403+
store->setOrdering(AtomicOrdering::Release);
14021404
if (aliasscope)
14031405
store->setMetadata("noalias", aliasscope);
14041406
if (tbaa)
14051407
tbaa_decorate(tbaa, store);
1408+
if (parent != NULL) {
1409+
if (!isboxed)
1410+
emit_write_multibarrier(ctx, parent, r);
1411+
else
1412+
emit_write_barrier(ctx, parent, r);
1413+
}
14061414
}
14071415

14081416
// --- convert boolean value to julia ---
@@ -2617,8 +2625,10 @@ static void emit_setfield(jl_codectx_t &ctx,
26172625
jl_value_t *jfty = jl_svecref(sty->types, idx0);
26182626
if (jl_field_isptr(sty, idx0)) {
26192627
Value *r = maybe_decay_untracked(boxed(ctx, rhs)); // don't need a temporary gcroot since it'll be rooted by strct
2620-
tbaa_decorate(strct.tbaa, ctx.builder.CreateStore(r,
2621-
emit_bitcast(ctx, addr, T_pprjlvalue)));
2628+
cast<StoreInst>(tbaa_decorate(strct.tbaa, ctx.builder.CreateAlignedStore(r,
2629+
emit_bitcast(ctx, addr, T_pprjlvalue),
2630+
sizeof(jl_value_t*))))
2631+
->setOrdering(AtomicOrdering::Release);
26222632
if (wb && strct.isboxed)
26232633
emit_write_barrier(ctx, boxed(ctx, strct), r);
26242634
}
@@ -2712,7 +2722,9 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
27122722
if (jl_field_isptr(sty, i)) {
27132723
fval = boxed(ctx, fval_info);
27142724
if (!init_as_value)
2715-
tbaa_decorate(tbaa_stack, ctx.builder.CreateAlignedStore(fval, dest, jl_field_align(sty, i)));
2725+
cast<StoreInst>(tbaa_decorate(tbaa_stack,
2726+
ctx.builder.CreateAlignedStore(fval, dest, jl_field_align(sty, i))))
2727+
->setOrdering(AtomicOrdering::Release);
27162728
}
27172729
else if (jl_is_uniontype(jtype)) {
27182730
// compute tindex from rhs

src/datatype.c

+8-7
Original file line numberDiff line numberDiff line change
@@ -1011,7 +1011,7 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field(jl_value_t *v, size_t i)
10111011
assert(i < jl_datatype_nfields(st));
10121012
size_t offs = jl_field_offset(st, i);
10131013
if (jl_field_isptr(st, i)) {
1014-
return *(jl_value_t**)((char*)v + offs);
1014+
return jl_atomic_load_relaxed((jl_value_t**)((char*)v + offs));
10151015
}
10161016
jl_value_t *ty = jl_field_type(st, i);
10171017
if (jl_is_uniontype(ty)) {
@@ -1029,7 +1029,7 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field_noalloc(jl_value_t *v JL_PROPAGATES_RO
10291029
assert(i < jl_datatype_nfields(st));
10301030
size_t offs = jl_field_offset(st,i);
10311031
assert(jl_field_isptr(st,i));
1032-
return *(jl_value_t**)((char*)v + offs);
1032+
return jl_atomic_load_relaxed((jl_value_t**)((char*)v + offs));
10331033
}
10341034

10351035
JL_DLLEXPORT jl_value_t *jl_get_nth_field_checked(jl_value_t *v, size_t i)
@@ -1039,7 +1039,7 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field_checked(jl_value_t *v, size_t i)
10391039
jl_bounds_error_int(v, i + 1);
10401040
size_t offs = jl_field_offset(st, i);
10411041
if (jl_field_isptr(st, i)) {
1042-
jl_value_t *fval = *(jl_value_t**)((char*)v + offs);
1042+
jl_value_t *fval = jl_atomic_load_relaxed((jl_value_t**)((char*)v + offs));
10431043
if (__unlikely(fval == NULL))
10441044
jl_throw(jl_undefref_exception);
10451045
return fval;
@@ -1057,11 +1057,11 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field_checked(jl_value_t *v, size_t i)
10571057

10581058
void set_nth_field(jl_datatype_t *st, void *v, size_t i, jl_value_t *rhs) JL_NOTSAFEPOINT
10591059
{
1060+
assert(rhs != NULL);
10601061
size_t offs = jl_field_offset(st, i);
10611062
if (jl_field_isptr(st, i)) {
1062-
*(jl_value_t**)((char*)v + offs) = rhs;
1063-
if (rhs != NULL)
1064-
jl_gc_wb(v, rhs);
1063+
jl_atomic_store_release((jl_value_t**)((char*)v + offs), rhs);
1064+
jl_gc_wb(v, rhs);
10651065
}
10661066
else {
10671067
jl_value_t *ty = jl_field_type_concrete(st, i);
@@ -1085,7 +1085,8 @@ JL_DLLEXPORT int jl_field_isdefined(jl_value_t *v, size_t i)
10851085
size_t offs = jl_field_offset(st, i);
10861086
char *fld = (char*)v + offs;
10871087
if (jl_field_isptr(st, i)) {
1088-
return *(jl_value_t**)fld != NULL;
1088+
jl_value_t *fval = jl_atomic_load_relaxed((jl_value_t**)fld);
1089+
return fval != NULL;
10891090
}
10901091
jl_datatype_t *ft = (jl_datatype_t*)jl_field_type(st, i);
10911092
if (jl_is_datatype(ft) && ft->layout->first_ptr >= 0) {

src/dump.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1923,7 +1923,7 @@ static void jl_insert_backedges(jl_array_t *list, jl_array_t *targets)
19231923
while (codeinst) {
19241924
if (codeinst->min_world > 0)
19251925
codeinst->max_world = ~(size_t)0;
1926-
codeinst = codeinst->next;
1926+
codeinst = jl_atomic_load_relaxed(&codeinst->next);
19271927
}
19281928
}
19291929
else {

0 commit comments

Comments
 (0)