Skip to content

Commit cc8f544

Browse files
aclementsrobpike
authored andcommitted
runtime: don't free large spans until heapBitsSweepSpan returns
This fixes a race between 1) sweeping and freeing an unmarked large span and 2) reusing that span and allocating from it. This race arises because mSpan_Sweep returns spans for large objects to the heap *before* heapBitsSweepSpan clears the mark bit on the object in the span. Specifically, the following sequence of events can lead to an incorrectly zeroed bitmap byte, which causes the garbage collector to not trace any pointers in that object (the pointer bits for the first four words are cleared, and the scan bits are also cleared, so it looks like a no-scan object). 1) P0 calls mSpan_Sweep on a large span S0 with an unmarked object on it. 2) mSpan_Sweep calls heapBitsSweepSpan, which invokes the callback for the one (unmarked) object on the span. 3) The callback calls mHeap_Free, which makes span S0 available for allocation, but this is too early. 4) P1 grabs this S0 from the heap to use for allocation. 5) P1 allocates an object on this span and writes that object's type bits to the bitmap. 6) P0 returns from the callback to heapBitsSweepSpan. heapBitsSweepSpan clears the byte containing the mark, even though this span is now owned by P1 and this byte contains important bitmap information. This fixes this problem by simply delaying the mHeap_Free until after the heapBitsSweepSpan. I think the overall logic of mSpan_Sweep could be simplified now, but this seems like the minimal change. Fixes #11617. Change-Id: I6b1382c7e7cc35f81984467c0772fe9848b7522a Reviewed-on: https://go-review.googlesource.com/12320 Run-TryBot: Austin Clements <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Matthew Dempsky <[email protected]> Reviewed-by: Rob Pike <[email protected]>
1 parent 5c62e5f commit cc8f544

File tree

1 file changed

+36
-28
lines changed

1 file changed

+36
-28
lines changed

src/runtime/mgcsweep.go

+36-28
Original file line numberDiff line numberDiff line change
@@ -170,12 +170,12 @@ func mSpan_Sweep(s *mspan, preserve bool) bool {
170170
cl := s.sizeclass
171171
size := s.elemsize
172172
res := false
173-
nfree := 0
173+
nfree := 0 // Set to -1 for large span
174174

175175
var head, end gclinkptr
176176

177177
c := _g_.m.mcache
178-
sweepgenset := false
178+
freeToHeap := false
179179

180180
// Mark any free objects in this span so we don't collect them.
181181
sstart := uintptr(s.start << _PageShift)
@@ -237,31 +237,10 @@ func mSpan_Sweep(s *mspan, preserve bool) bool {
237237

238238
// important to set sweepgen before returning it to heap
239239
atomicstore(&s.sweepgen, sweepgen)
240-
sweepgenset = true
241-
242-
// NOTE(rsc,dvyukov): The original implementation of efence
243-
// in CL 22060046 used SysFree instead of SysFault, so that
244-
// the operating system would eventually give the memory
245-
// back to us again, so that an efence program could run
246-
// longer without running out of memory. Unfortunately,
247-
// calling SysFree here without any kind of adjustment of the
248-
// heap data structures means that when the memory does
249-
// come back to us, we have the wrong metadata for it, either in
250-
// the MSpan structures or in the garbage collection bitmap.
251-
// Using SysFault here means that the program will run out of
252-
// memory fairly quickly in efence mode, but at least it won't
253-
// have mysterious crashes due to confused memory reuse.
254-
// It should be possible to switch back to SysFree if we also
255-
// implement and then call some kind of MHeap_DeleteSpan.
256-
if debug.efence > 0 {
257-
s.limit = 0 // prevent mlookup from finding this span
258-
sysFault(unsafe.Pointer(p), size)
259-
} else {
260-
mHeap_Free(&mheap_, s, 1)
261-
}
262-
c.local_nlargefree++
263-
c.local_largefree += size
264-
res = true
240+
241+
// Free the span after heapBitsSweepSpan
242+
// returns, since it's not done with the span.
243+
freeToHeap = true
265244
} else {
266245
// Free small object.
267246
if size > 2*ptrSize {
@@ -285,7 +264,10 @@ func mSpan_Sweep(s *mspan, preserve bool) bool {
285264
// But we need to set it before we make the span available for allocation
286265
// (return it to heap or mcentral), because allocation code assumes that a
287266
// span is already swept if available for allocation.
288-
if !sweepgenset && nfree == 0 {
267+
//
268+
// TODO(austin): Clean this up by consolidating atomicstore in
269+
// large span path above with this.
270+
if !freeToHeap && nfree == 0 {
289271
// The span must be in our exclusive ownership until we update sweepgen,
290272
// check for potential races.
291273
if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
@@ -298,6 +280,32 @@ func mSpan_Sweep(s *mspan, preserve bool) bool {
298280
c.local_nsmallfree[cl] += uintptr(nfree)
299281
res = mCentral_FreeSpan(&mheap_.central[cl].mcentral, s, int32(nfree), head, end, preserve)
300282
// MCentral_FreeSpan updates sweepgen
283+
} else if freeToHeap {
284+
// Free large span to heap
285+
286+
// NOTE(rsc,dvyukov): The original implementation of efence
287+
// in CL 22060046 used SysFree instead of SysFault, so that
288+
// the operating system would eventually give the memory
289+
// back to us again, so that an efence program could run
290+
// longer without running out of memory. Unfortunately,
291+
// calling SysFree here without any kind of adjustment of the
292+
// heap data structures means that when the memory does
293+
// come back to us, we have the wrong metadata for it, either in
294+
// the MSpan structures or in the garbage collection bitmap.
295+
// Using SysFault here means that the program will run out of
296+
// memory fairly quickly in efence mode, but at least it won't
297+
// have mysterious crashes due to confused memory reuse.
298+
// It should be possible to switch back to SysFree if we also
299+
// implement and then call some kind of MHeap_DeleteSpan.
300+
if debug.efence > 0 {
301+
s.limit = 0 // prevent mlookup from finding this span
302+
sysFault(unsafe.Pointer(uintptr(s.start<<_PageShift)), size)
303+
} else {
304+
mHeap_Free(&mheap_, s, 1)
305+
}
306+
c.local_nlargefree++
307+
c.local_largefree += size
308+
res = true
301309
}
302310
if trace.enabled {
303311
traceGCSweepDone()

0 commit comments

Comments
 (0)