Skip to content

Commit e72f5f6

Browse files
committed
runtime: fix tracing of syscallexit
There were two issues. 1. Delayed EvGoSysExit could have been emitted during TraceStart, while it had not yet emitted EvGoInSyscall. 2. Delayed EvGoSysExit could have been emitted during next tracing session. Fixes #10476 Fixes #11262 Change-Id: Iab68eb31cf38eb6eb6eee427f49c5ca0865a8c64 Reviewed-on: https://go-review.googlesource.com/9132 Reviewed-by: Russ Cox <[email protected]>
1 parent 62f169c commit e72f5f6

File tree

4 files changed

+64
-51
lines changed

4 files changed

+64
-51
lines changed

src/runtime/pprof/trace_test.go

-1
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,6 @@ func TestTraceStress(t *testing.T) {
231231
// And concurrently with all that start/stop trace 3 times.
232232
func TestTraceStressStartStop(t *testing.T) {
233233
skipTraceTestsIfNeeded(t)
234-
t.Skip("test is unreliable; issue #10476")
235234

236235
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8))
237236
outerDone := make(chan bool)

src/runtime/proc1.go

+10-15
Original file line numberDiff line numberDiff line change
@@ -1339,6 +1339,11 @@ func execute(gp *g, inheritTime bool) {
13391339
}
13401340

13411341
if trace.enabled {
1342+
// GoSysExit has to happen when we have a P, but before GoStart.
1343+
// So we emit it here.
1344+
if gp.syscallsp != 0 && gp.sysblocktraced {
1345+
traceGoSysExit(gp.sysexitticks)
1346+
}
13421347
traceGoStart()
13431348
}
13441349

@@ -1824,6 +1829,7 @@ func reentersyscall(pc, sp uintptr) {
18241829
}
18251830

18261831
_g_.m.syscalltick = _g_.m.p.ptr().syscalltick
1832+
_g_.sysblocktraced = true
18271833
_g_.m.mcache = nil
18281834
_g_.m.p.ptr().m = 0
18291835
atomicstore(&_g_.m.p.ptr().status, _Psyscall)
@@ -1885,6 +1891,7 @@ func entersyscallblock(dummy int32) {
18851891
_g_.throwsplit = true
18861892
_g_.stackguard0 = stackPreempt // see comment in entersyscall
18871893
_g_.m.syscalltick = _g_.m.p.ptr().syscalltick
1894+
_g_.sysblocktraced = true
18881895
_g_.m.p.ptr().syscalltick++
18891896

18901897
// Leave SP around for GC and traceback.
@@ -1970,7 +1977,7 @@ func exitsyscall(dummy int32) {
19701977
return
19711978
}
19721979

1973-
var exitTicks int64
1980+
_g_.sysexitticks = 0
19741981
if trace.enabled {
19751982
// Wait till traceGoSysBlock event is emitted.
19761983
// This ensures consistency of the trace (the goroutine is started after it is blocked).
@@ -1980,20 +1987,15 @@ func exitsyscall(dummy int32) {
19801987
// We can't trace syscall exit right now because we don't have a P.
19811988
// Tracing code can invoke write barriers that cannot run without a P.
19821989
// So instead we remember the syscall exit time and emit the event
1983-
// below when we have a P.
1984-
exitTicks = cputicks()
1990+
// in execute when we have a P.
1991+
_g_.sysexitticks = cputicks()
19851992
}
19861993

19871994
_g_.m.locks--
19881995

19891996
// Call the scheduler.
19901997
mcall(exitsyscall0)
19911998

1992-
// The goroutine must not be re-scheduled up to traceGoSysExit.
1993-
// Otherwise we can emit GoStart but not GoSysExit, that would lead
1994-
// no an inconsistent trace.
1995-
_g_.m.locks++
1996-
19971999
if _g_.m.mcache == nil {
19982000
throw("lost mcache")
19992001
}
@@ -2007,13 +2009,6 @@ func exitsyscall(dummy int32) {
20072009
_g_.syscallsp = 0
20082010
_g_.m.p.ptr().syscalltick++
20092011
_g_.throwsplit = false
2010-
2011-
if exitTicks != 0 {
2012-
systemstack(func() {
2013-
traceGoSysExit(exitTicks)
2014-
})
2015-
}
2016-
_g_.m.locks--
20172012
}
20182013

20192014
//go:nosplit

src/runtime/runtime2.go

+36-33
Original file line numberDiff line numberDiff line change
@@ -220,39 +220,41 @@ type g struct {
220220
stackguard0 uintptr // offset known to liblink
221221
stackguard1 uintptr // offset known to liblink
222222

223-
_panic *_panic // innermost panic - offset known to liblink
224-
_defer *_defer // innermost defer
225-
stackAlloc uintptr // stack allocation is [stack.lo,stack.lo+stackAlloc)
226-
sched gobuf
227-
syscallsp uintptr // if status==Gsyscall, syscallsp = sched.sp to use during gc
228-
syscallpc uintptr // if status==Gsyscall, syscallpc = sched.pc to use during gc
229-
stkbar []stkbar // stack barriers, from low to high
230-
stkbarPos uintptr // index of lowest stack barrier not hit
231-
param unsafe.Pointer // passed parameter on wakeup
232-
atomicstatus uint32
233-
goid int64
234-
waitsince int64 // approx time when the g become blocked
235-
waitreason string // if status==Gwaiting
236-
schedlink guintptr
237-
preempt bool // preemption signal, duplicates stackguard0 = stackpreempt
238-
paniconfault bool // panic (instead of crash) on unexpected fault address
239-
preemptscan bool // preempted g does scan for gc
240-
gcscandone bool // g has scanned stack; protected by _Gscan bit in status
241-
gcscanvalid bool // false at start of gc cycle, true if G has not run since last scan
242-
throwsplit bool // must not split stack
243-
raceignore int8 // ignore race detection events
244-
m *m // for debuggers, but offset not hard-coded
245-
lockedm *m
246-
sig uint32
247-
writebuf []byte
248-
sigcode0 uintptr
249-
sigcode1 uintptr
250-
sigpc uintptr
251-
gopc uintptr // pc of go statement that created this goroutine
252-
startpc uintptr // pc of goroutine function
253-
racectx uintptr
254-
waiting *sudog // sudog structures this g is waiting on (that have a valid elem ptr)
255-
readyg *g // scratch for readyExecute
223+
_panic *_panic // innermost panic - offset known to liblink
224+
_defer *_defer // innermost defer
225+
stackAlloc uintptr // stack allocation is [stack.lo,stack.lo+stackAlloc)
226+
sched gobuf
227+
syscallsp uintptr // if status==Gsyscall, syscallsp = sched.sp to use during gc
228+
syscallpc uintptr // if status==Gsyscall, syscallpc = sched.pc to use during gc
229+
stkbar []stkbar // stack barriers, from low to high
230+
stkbarPos uintptr // index of lowest stack barrier not hit
231+
param unsafe.Pointer // passed parameter on wakeup
232+
atomicstatus uint32
233+
goid int64
234+
waitsince int64 // approx time when the g become blocked
235+
waitreason string // if status==Gwaiting
236+
schedlink guintptr
237+
preempt bool // preemption signal, duplicates stackguard0 = stackpreempt
238+
paniconfault bool // panic (instead of crash) on unexpected fault address
239+
preemptscan bool // preempted g does scan for gc
240+
gcscandone bool // g has scanned stack; protected by _Gscan bit in status
241+
gcscanvalid bool // false at start of gc cycle, true if G has not run since last scan
242+
throwsplit bool // must not split stack
243+
raceignore int8 // ignore race detection events
244+
sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
245+
sysexitticks int64 // cputicks when syscall has returned (for tracing)
246+
m *m // for debuggers, but offset not hard-coded
247+
lockedm *m
248+
sig uint32
249+
writebuf []byte
250+
sigcode0 uintptr
251+
sigcode1 uintptr
252+
sigpc uintptr
253+
gopc uintptr // pc of go statement that created this goroutine
254+
startpc uintptr // pc of goroutine function
255+
racectx uintptr
256+
waiting *sudog // sudog structures this g is waiting on (that have a valid elem ptr)
257+
readyg *g // scratch for readyExecute
256258

257259
// Per-G gcController state
258260
gcalloc uintptr // bytes allocated during this GC cycle
@@ -320,6 +322,7 @@ type m struct {
320322
waitlock unsafe.Pointer
321323
waittraceev byte
322324
waittraceskip int
325+
startingtrace bool
323326
syscalltick uint32
324327
//#ifdef GOOS_windows
325328
thread uintptr // thread handle

src/runtime/trace.go

+18-2
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,18 @@ func StartTrace() error {
149149

150150
trace.ticksStart = cputicks()
151151
trace.timeStart = nanotime()
152-
trace.enabled = true
153152
trace.headerWritten = false
154153
trace.footerWritten = false
155154

155+
// Can't set trace.enabled yet. While the world is stopped, exitsyscall could
156+
// already emit a delayed event (see exitTicks in exitsyscall) if we set trace.enabled here.
157+
// That would lead to an inconsistent trace:
158+
// - either GoSysExit appears before EvGoInSyscall,
159+
// - or GoSysExit appears for a goroutine for which we don't emit EvGoInSyscall below.
160+
// To instruct traceEvent that it must not ignore events below, we set startingtrace.
161+
// trace.enabled is set afterwards once we have emitted all preliminary events.
162+
_g_ := getg()
163+
_g_.m.startingtrace = true
156164
for _, gp := range allgs {
157165
status := readgstatus(gp)
158166
if status != _Gdead {
@@ -163,10 +171,14 @@ func StartTrace() error {
163171
}
164172
if status == _Gsyscall {
165173
traceEvent(traceEvGoInSyscall, -1, uint64(gp.goid))
174+
} else {
175+
gp.sysblocktraced = false
166176
}
167177
}
168178
traceProcStart()
169179
traceGoStart()
180+
_g_.m.startingtrace = false
181+
trace.enabled = true
170182

171183
unlock(&trace.bufLock)
172184

@@ -418,7 +430,7 @@ func traceEvent(ev byte, skip int, args ...uint64) {
418430
// so if we see trace.enabled == true now, we know it's true for the rest of the function.
419431
// Exitsyscall can run even during stopTheWorld. The race with StartTrace/StopTrace
420432
// during tracing in exitsyscall is resolved by locking trace.bufLock in traceLockBuffer.
421-
if !trace.enabled {
433+
if !trace.enabled && !mp.startingtrace {
422434
traceReleaseBuffer(pid)
423435
return
424436
}
@@ -789,6 +801,10 @@ func traceGoSysCall() {
789801
}
790802

791803
func traceGoSysExit(ts int64) {
804+
if ts != 0 && ts < trace.ticksStart {
805+
// The timestamp was obtained during a previous tracing session, ignore.
806+
return
807+
}
792808
traceEvent(traceEvGoSysExit, -1, uint64(getg().m.curg.goid), uint64(ts)/traceTickDiv)
793809
}
794810

0 commit comments

Comments
 (0)