runtime: add as many extra M's as needed

ianlancetaylor · ianlancetaylor · commit 50048a4e8ee1 · 2016-07-20T13:31:55.000Z
When a non-Go thread calls into Go, the runtime needs an M to run the Go code. The runtime keeps a list of extra M's available. When the last extra M is allocated, the needextram field is set to tell it to allocate a new extra M as soon as it is running in Go. This ensures that an extra M will always be available for the next thread. However, if many threads need an extra M at the same time, this serializes them all. One thread will get an extra M with the needextram field set. All the other threads will see that there is no M available and will go to sleep. The one thread that succeeded will create a new extra M. One lucky thread will get it. All the other threads will see that there is no M available and will go to sleep. The effect is thundering herd, as all the threads looking for an extra M go through the process one by one. This seems to have a particularly bad effect on the FreeBSD scheduler for some reason. With this change, we track the number of threads waiting for an M, and create all of them as soon as one thread gets through. This still means that all the threads will fight for the lock to pick up the next M. But at least each thread that gets the lock will succeed, instead of going to sleep only to fight again. This smooths out the performance greatly on FreeBSD, reducing the average wall time of `testprogcgo CgoCallbackGC` by 74%. On GNU/Linux the average wall time goes down by 9%. Fixes #13926 Fixes #16396 Change-Id: I6dc42a4156085a7ed4e5334c60b39db8f8ef8fea Reviewed-on: https://go-review.googlesource.com/25047 Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go
@@ -80,6 +80,7 @@
 package runtime
 
 import (
+	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
 )
@@ -176,7 +177,7 @@ func cgocallbackg(ctxt uintptr) {
 
 func cgocallbackg1(ctxt uintptr) {
 	gp := getg()
-	if gp.m.needextram {
+	if gp.m.needextram || atomic.Load(&extraMWaiters) > 0 {
 		gp.m.needextram = false
 		systemstack(newextram)
 	}
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
@@ -1389,10 +1389,27 @@ func needm(x byte) {
 
 var earlycgocallback = []byte("fatal error: cgo callback before cgo call\n")
 
-// newextram allocates an m and puts it on the extra list.
+// newextram allocates m's and puts them on the extra list.
 // It is called with a working local m, so that it can do things
 // like call schedlock and allocate.
 func newextram() {
+	c := atomic.Xchg(&extraMWaiters, 0)
+	if c > 0 {
+		for i := uint32(0); i < c; i++ {
+			oneNewExtraM()
+		}
+	} else {
+		// Make sure there is at least one extra M.
+		mp := lockextra(true)
+		unlockextra(mp)
+		if mp == nil {
+			oneNewExtraM()
+		}
+	}
+}
+
+// oneNewExtraM allocates an m and puts it on the extra list.
+func oneNewExtraM() {
 	// Create extra goroutine locked to extra m.
 	// The goroutine is the context in which the cgo callback will run.
 	// The sched.pc will never be returned to, but setting it to
@@ -1485,6 +1502,7 @@ func getm() uintptr {
 }
 
 var extram uintptr
+var extraMWaiters uint32
 
 // lockextra locks the extra list and returns the list head.
 // The caller must unlock the list by storing a new list head
@@ -1495,6 +1513,7 @@ var extram uintptr
 func lockextra(nilokay bool) *m {
 	const locked = 1
 
+	incr := false
 	for {
 		old := atomic.Loaduintptr(&extram)
 		if old == locked {
@@ -1503,6 +1522,13 @@ func lockextra(nilokay bool) *m {
 			continue
 		}
 		if old == 0 && !nilokay {
+			if !incr {
+				// Add 1 to the number of threads
+				// waiting for an M.
+				// This is cleared by newextram.
+				atomic.Xadd(&extraMWaiters, 1)
+				incr = true
+			}
 			usleep(1)
 			continue
 		}

Original file line number	Diff line number	Diff line change
`@@ -80,6 +80,7 @@`
`80`	`80`	`package runtime`
`81`	`81`
`82`	`82`	`import (`
	`83`	`+ "runtime/internal/atomic"`
`83`	`84`	`"runtime/internal/sys"`
`84`	`85`	`"unsafe"`
`85`	`86`	`)`
`@@ -176,7 +177,7 @@ func cgocallbackg(ctxt uintptr) {`
`176`	`177`
`177`	`178`	`func cgocallbackg1(ctxt uintptr) {`
`178`	`179`	`gp := getg()`
`179`		`- if gp.m.needextram {`
	`180`	`+ if gp.m.needextram \|\| atomic.Load(&extraMWaiters) > 0 {`
`180`	`181`	`gp.m.needextram = false`
`181`	`182`	`systemstack(newextram)`
`182`	`183`	`}`