Skip to content

Commit 80c98fa

Browse files
committed
runtime/trace: record event sequence numbers explicitly
Nearly all the flaky failures we've seen in trace tests have been due to the use of time stamps to determine relative event ordering. This is tricky for many reasons, including: - different cores might not have exactly synchronized clocks - VMs are worse than real hardware - non-x86 chips have different timer resolution than x86 chips - on fast systems two events can end up with the same time stamp Stop trying to make time reliable. It's clearly not going to be for Go 1.5. Instead, record an explicit event sequence number for ordering. Using our own counter solves all of the above problems. The trace still contains time stamps, of course. The sequence number is just used for ordering. Should alleviate #10554 somewhat. Then tickDiv can be chosen to be a useful time unit instead of having to be exact for ordering. Separating ordering and time stamps lets the trace parser diagnose systems where the time stamp order and actual order do not match for one reason or another. This CL adds that check to the end of trace.Parse, after all other sequence order-based checking. If that error is found, we skip the test instead of failing it. Putting the check in trace.Parse means that cmd/trace will pick up the same check, refusing to display a trace where the time stamps do not match actual ordering. Using net/http's BenchmarkClientServerParallel4 on various CPU counts, not tracing vs tracing: name old time/op new time/op delta ClientServerParallel4 50.4µs ± 4% 80.2µs ± 4% +59.06% (p=0.000 n=10+10) ClientServerParallel4-2 33.1µs ± 7% 57.8µs ± 5% +74.53% (p=0.000 n=10+10) ClientServerParallel4-4 18.5µs ± 4% 32.6µs ± 3% +75.77% (p=0.000 n=10+10) ClientServerParallel4-6 12.9µs ± 5% 24.4µs ± 2% +89.33% (p=0.000 n=10+10) ClientServerParallel4-8 11.4µs ± 6% 21.0µs ± 3% +83.40% (p=0.000 n=10+10) ClientServerParallel4-12 14.4µs ± 4% 23.8µs ± 4% +65.67% (p=0.000 n=10+10) Fixes #10512. Change-Id: I173eecf8191e86feefd728a5aad25bf1bc094b12 Reviewed-on: https://go-review.googlesource.com/12579 Reviewed-by: Austin Clements <[email protected]>
1 parent fde3926 commit 80c98fa

File tree

7 files changed

+116
-53
lines changed

7 files changed

+116
-53
lines changed

src/internal/trace/parser.go

+47-27
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,13 @@ import (
2020
type Event struct {
2121
Off int // offset in input file (for debugging and error reporting)
2222
Type byte // one of Ev*
23+
Seq int64 // sequence number
2324
Ts int64 // timestamp in nanoseconds
2425
P int // P on which the event happened (can be one of TimerP, NetpollP, SyscallP)
2526
G uint64 // G on which the event happened
2627
StkID uint64 // unique stack ID
2728
Stk []*Frame // stack trace (can be empty)
28-
Args [2]uint64 // event-type-specific arguments
29+
Args [3]uint64 // event-type-specific arguments
2930
// linked event (can be nil), depends on event type:
3031
// for GCStart: the GCStop
3132
// for GCScanStart: the GCScanDone
@@ -88,12 +89,12 @@ type rawEvent struct {
8889
// It does not care about specific event types and argument meaning.
8990
func readTrace(r io.Reader) ([]rawEvent, error) {
9091
// Read and validate trace header.
91-
var buf [8]byte
92+
var buf [16]byte
9293
off, err := r.Read(buf[:])
93-
if off != 8 || err != nil {
94+
if off != 16 || err != nil {
9495
return nil, fmt.Errorf("failed to read header: read %v, err %v", off, err)
9596
}
96-
if bytes.Compare(buf[:], []byte("gotrace\x00")) != 0 {
97+
if bytes.Compare(buf[:], []byte("go 1.5 trace\x00\x00\x00\x00")) != 0 {
9798
return nil, fmt.Errorf("not a trace file")
9899
}
99100

@@ -111,10 +112,10 @@ func readTrace(r io.Reader) ([]rawEvent, error) {
111112
}
112113
off += n
113114
typ := buf[0] << 2 >> 2
114-
narg := buf[0]>>6 + 1
115+
narg := buf[0] >> 6
115116
ev := rawEvent{typ: typ, off: off0}
116-
if narg <= 3 {
117-
for i := 0; i < int(narg); i++ {
117+
if narg < 3 {
118+
for i := 0; i < int(narg)+2; i++ { // sequence number and time stamp are present but not counted in narg
118119
var v uint64
119120
v, off, err = readVal(r, off)
120121
if err != nil {
@@ -123,7 +124,7 @@ func readTrace(r io.Reader) ([]rawEvent, error) {
123124
ev.args = append(ev.args, v)
124125
}
125126
} else {
126-
// If narg == 4, the first value is length of the event in bytes.
127+
// If narg == 3, the first value is length of the event in bytes.
127128
var v uint64
128129
v, off, err = readVal(r, off)
129130
if err != nil {
@@ -150,7 +151,7 @@ func readTrace(r io.Reader) ([]rawEvent, error) {
150151
// Parse events transforms raw events into events.
151152
// It does analyze and verify per-event-type arguments.
152153
func parseEvents(rawEvents []rawEvent) (events []*Event, err error) {
153-
var ticksPerSec, lastTs int64
154+
var ticksPerSec, lastSeq, lastTs int64
154155
var lastG, timerGoid uint64
155156
var lastP int
156157
lastGs := make(map[int]uint64) // last goroutine running on P
@@ -171,6 +172,7 @@ func parseEvents(rawEvents []rawEvent) (events []*Event, err error) {
171172
narg++
172173
}
173174
if raw.typ != EvBatch && raw.typ != EvFrequency && raw.typ != EvTimerGoroutine {
175+
narg++ // sequence number
174176
narg++ // timestamp
175177
}
176178
if len(raw.args) != narg {
@@ -184,7 +186,8 @@ func parseEvents(rawEvents []rawEvent) (events []*Event, err error) {
184186
lastGs[lastP] = lastG
185187
lastP = int(raw.args[0])
186188
lastG = lastGs[lastP]
187-
lastTs = int64(raw.args[1])
189+
lastSeq = int64(raw.args[1])
190+
lastTs = int64(raw.args[2])
188191
case EvFrequency:
189192
ticksPerSec = int64(raw.args[0])
190193
if ticksPerSec <= 0 {
@@ -221,13 +224,15 @@ func parseEvents(rawEvents []rawEvent) (events []*Event, err error) {
221224
}
222225
default:
223226
e := &Event{Off: raw.off, Type: raw.typ, P: lastP, G: lastG}
224-
e.Ts = lastTs + int64(raw.args[0])
227+
e.Seq = lastSeq + int64(raw.args[0])
228+
e.Ts = lastTs + int64(raw.args[1])
229+
lastSeq = e.Seq
225230
lastTs = e.Ts
226231
for i := range desc.Args {
227-
e.Args[i] = raw.args[i+1]
232+
e.Args[i] = raw.args[i+2]
228233
}
229234
if desc.Stack {
230-
e.StkID = raw.args[len(desc.Args)+1]
235+
e.StkID = raw.args[len(desc.Args)+2]
231236
}
232237
switch raw.typ {
233238
case EvGoStart:
@@ -241,10 +246,11 @@ func parseEvents(rawEvents []rawEvent) (events []*Event, err error) {
241246
EvGoSysBlock:
242247
lastG = 0
243248
case EvGoSysExit:
244-
if e.Args[1] != 0 {
245-
// EvGoSysExit emission is delayed until the thread has a P.
246-
// Give it the real timestamp.
247-
e.Ts = int64(e.Args[1])
249+
// EvGoSysExit emission is delayed until the thread has a P.
250+
// Give it the real sequence number and time stamp.
251+
e.Seq = int64(e.Args[1])
252+
if e.Args[2] != 0 {
253+
e.Ts = int64(e.Args[2])
248254
}
249255
}
250256
events = append(events, e)
@@ -262,10 +268,8 @@ func parseEvents(rawEvents []rawEvent) (events []*Event, err error) {
262268
}
263269
}
264270

265-
// Sort by time and translate cpu ticks to real time.
266-
// Use stable sort because adjacent events in a batch can have the same timestamp
267-
// (this was observed on some VMs).
268-
sort.Stable(eventList(events))
271+
// Sort by sequence number and translate cpu ticks to real time.
272+
sort.Sort(eventList(events))
269273
if ticksPerSec == 0 {
270274
err = fmt.Errorf("no EvFrequency event")
271275
return
@@ -341,6 +345,10 @@ func removeFutile(events []*Event) ([]*Event, error) {
341345
return newEvents, nil
342346
}
343347

348+
// ErrTimeOrder is returned by Parse when the trace contains
349+
// time stamps that do not respect actual event ordering.
350+
var ErrTimeOrder = fmt.Errorf("time stamps out of order")
351+
344352
// postProcessTrace does inter-event verification and information restoration.
345353
// The resulting trace is guaranteed to be consistent
346354
// (for example, a P does not run two Gs at the same time, or a G is indeed
@@ -555,6 +563,18 @@ func postProcessTrace(events []*Event) error {
555563
// TODO(dvyukov): restore stacks for EvGoStart events.
556564
// TODO(dvyukov): test that all EvGoStart events has non-nil Link.
557565

566+
// Last, after all the other consistency checks,
567+
// make sure time stamps respect sequence numbers.
568+
// The tests will skip (not fail) the test case if they see this error,
569+
// so check everything else that could possibly be wrong first.
570+
lastTs := int64(0)
571+
for _, ev := range events {
572+
if ev.Ts < lastTs {
573+
return ErrTimeOrder
574+
}
575+
lastTs = ev.Ts
576+
}
577+
558578
return nil
559579
}
560580

@@ -639,7 +659,7 @@ func readVal(r io.Reader, off0 int) (v uint64, off int, err error) {
639659
var n int
640660
n, err = r.Read(buf[:])
641661
if err != nil || n != 1 {
642-
return 0, 0, fmt.Errorf("failed to read trace at offset: read %v, error %v", off0, n, err)
662+
return 0, 0, fmt.Errorf("failed to read trace at offset %d: read %v, error %v", off0, n, err)
643663
}
644664
off++
645665
v |= uint64(buf[0]&0x7f) << (uint(i) * 7)
@@ -657,7 +677,7 @@ func (l eventList) Len() int {
657677
}
658678

659679
func (l eventList) Less(i, j int) bool {
660-
return l[i].Ts < l[j].Ts
680+
return l[i].Seq < l[j].Seq
661681
}
662682

663683
func (l eventList) Swap(i, j int) {
@@ -725,8 +745,8 @@ var EventDescriptions = [EvCount]struct {
725745
Args []string
726746
}{
727747
EvNone: {"None", false, []string{}},
728-
EvBatch: {"Batch", false, []string{"p", "ticks"}},
729-
EvFrequency: {"Frequency", false, []string{"freq"}},
748+
EvBatch: {"Batch", false, []string{"p", "seq", "ticks"}},
749+
EvFrequency: {"Frequency", false, []string{"freq", "unused"}},
730750
EvStack: {"Stack", false, []string{"id", "siz"}},
731751
EvGomaxprocs: {"Gomaxprocs", true, []string{"procs"}},
732752
EvProcStart: {"ProcStart", false, []string{"thread"}},
@@ -753,12 +773,12 @@ var EventDescriptions = [EvCount]struct {
753773
EvGoBlockCond: {"GoBlockCond", true, []string{}},
754774
EvGoBlockNet: {"GoBlockNet", true, []string{}},
755775
EvGoSysCall: {"GoSysCall", true, []string{}},
756-
EvGoSysExit: {"GoSysExit", false, []string{"g", "ts"}},
776+
EvGoSysExit: {"GoSysExit", false, []string{"g", "seq", "ts"}},
757777
EvGoSysBlock: {"GoSysBlock", false, []string{}},
758778
EvGoWaiting: {"GoWaiting", false, []string{"g"}},
759779
EvGoInSyscall: {"GoInSyscall", false, []string{"g"}},
760780
EvHeapAlloc: {"HeapAlloc", false, []string{"mem"}},
761781
EvNextGC: {"NextGC", false, []string{"mem"}},
762-
EvTimerGoroutine: {"TimerGoroutine", false, []string{"g"}},
782+
EvTimerGoroutine: {"TimerGoroutine", false, []string{"g", "unused"}},
763783
EvFutileWakeup: {"FutileWakeup", false, []string{}},
764784
}

src/internal/trace/parser_test.go

+4
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ func TestCorruptedInputs(t *testing.T) {
1616
"gotrace\x00Q00\x020",
1717
"gotrace\x00T00\x020",
1818
"gotrace\x00\xc3\x0200",
19+
"go 1.5 trace\x00\x00\x00\x00\x020",
20+
"go 1.5 trace\x00\x00\x00\x00Q00\x020",
21+
"go 1.5 trace\x00\x00\x00\x00T00\x020",
22+
"go 1.5 trace\x00\x00\x00\x00\xc3\x0200",
1923
}
2024
for _, data := range tests {
2125
events, err := Parse(strings.NewReader(data))

src/runtime/proc1.go

+5-4
Original file line numberDiff line numberDiff line change
@@ -1348,7 +1348,7 @@ func execute(gp *g, inheritTime bool) {
13481348
// GoSysExit has to happen when we have a P, but before GoStart.
13491349
// So we emit it here.
13501350
if gp.syscallsp != 0 && gp.sysblocktraced {
1351-
traceGoSysExit(gp.sysexitticks)
1351+
traceGoSysExit(gp.sysexitseq, gp.sysexitticks)
13521352
}
13531353
traceGoStart()
13541354
}
@@ -1986,6 +1986,7 @@ func exitsyscall(dummy int32) {
19861986
}
19871987

19881988
_g_.sysexitticks = 0
1989+
_g_.sysexitseq = 0
19891990
if trace.enabled {
19901991
// Wait till traceGoSysBlock event is emitted.
19911992
// This ensures consistency of the trace (the goroutine is started after it is blocked).
@@ -1996,7 +1997,7 @@ func exitsyscall(dummy int32) {
19961997
// Tracing code can invoke write barriers that cannot run without a P.
19971998
// So instead we remember the syscall exit time and emit the event
19981999
// in execute when we have a P.
1999-
_g_.sysexitticks = cputicks()
2000+
_g_.sysexitseq, _g_.sysexitticks = tracestamp()
20002001
}
20012002

20022003
_g_.m.locks--
@@ -2044,7 +2045,7 @@ func exitsyscallfast() bool {
20442045
// Denote blocking of the new syscall.
20452046
traceGoSysBlock(_g_.m.p.ptr())
20462047
// Denote completion of the current syscall.
2047-
traceGoSysExit(0)
2048+
traceGoSysExit(tracestamp())
20482049
})
20492050
}
20502051
_g_.m.p.ptr().syscalltick++
@@ -2068,7 +2069,7 @@ func exitsyscallfast() bool {
20682069
osyield()
20692070
}
20702071
}
2071-
traceGoSysExit(0)
2072+
traceGoSysExit(tracestamp())
20722073
}
20732074
})
20742075
if ok {

src/runtime/runtime2.go

+11-10
Original file line numberDiff line numberDiff line change
@@ -235,16 +235,17 @@ type g struct {
235235
waitsince int64 // approx time when the g become blocked
236236
waitreason string // if status==Gwaiting
237237
schedlink guintptr
238-
preempt bool // preemption signal, duplicates stackguard0 = stackpreempt
239-
paniconfault bool // panic (instead of crash) on unexpected fault address
240-
preemptscan bool // preempted g does scan for gc
241-
gcscandone bool // g has scanned stack; protected by _Gscan bit in status
242-
gcscanvalid bool // false at start of gc cycle, true if G has not run since last scan
243-
throwsplit bool // must not split stack
244-
raceignore int8 // ignore race detection events
245-
sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
246-
sysexitticks int64 // cputicks when syscall has returned (for tracing)
247-
m *m // for debuggers, but offset not hard-coded
238+
preempt bool // preemption signal, duplicates stackguard0 = stackpreempt
239+
paniconfault bool // panic (instead of crash) on unexpected fault address
240+
preemptscan bool // preempted g does scan for gc
241+
gcscandone bool // g has scanned stack; protected by _Gscan bit in status
242+
gcscanvalid bool // false at start of gc cycle, true if G has not run since last scan
243+
throwsplit bool // must not split stack
244+
raceignore int8 // ignore race detection events
245+
sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
246+
sysexitticks int64 // cputicks when syscall has returned (for tracing)
247+
sysexitseq uint64 // trace seq when syscall has returned (for tracing)
248+
m *m // for debuggers, but offset not hard-coded
248249
lockedm *m
249250
sig uint32
250251
writebuf []byte

0 commit comments

Comments
 (0)