Skip to content

Commit 7b87631

Browse files
committed
cmd/go: detect when package or binary is stale due to removed source file
The go command uses file modification times to decide when a package is out of date: if the .a file is older than a source file, the .a file needs to be rebuilt. This scheme breaks down when multiple source files compile into a single .a file: if one source file is removed but no other changes are made, there is no indication that the .a file is out of date. The fix is to store a value called a build ID in the package archive itself. The build ID is a hash of the names of all source files compiled into the package. A later go command can read the build ID out of the package archive and compare to the build ID derived from the list of source files it now sees in the directory. If the build IDs differ, the file list has changed, and the package must be rebuilt. There is a cost here: when scanning a package directory, in addition to reading the beginning of every source file for build tags and imports, the go command now also reads the beginning of the associated package archive, for the build ID. This is at most a doubling in the number of files read. On my 2012 MacBook Pro, the time for 'go list std' increases from about 0.215 seconds to about 0.23 seconds. For executable binaries, the approach is the same except that the build ID information is stored in a trailer at the end of the executable file. It remains to be seen if anything objects to the trailer. I don't expect problems except maybe on Plan 9. Fixes #3895. Change-Id: I21b4ebf5890c1a39e4a013eabe1ddbb5f3510c04 Reviewed-on: https://go-review.googlesource.com/9154 Reviewed-by: Ian Lance Taylor <[email protected]>
1 parent 81d5810 commit 7b87631

File tree

4 files changed

+306
-59
lines changed

4 files changed

+306
-59
lines changed

src/cmd/dist/build.go

+4-57
Original file line numberDiff line numberDiff line change
@@ -888,72 +888,19 @@ var buildorder = []string{
888888
"text/template",
889889
"go/doc",
890890
"go/build",
891+
"hash",
892+
"crypto",
893+
"crypto/sha1",
891894
"cmd/go",
892895
}
893896

894-
// cleantab records the directories to clean in 'go clean'.
895-
// It is bigger than the buildorder because we clean all the
896-
// compilers but build only the $GOARCH ones.
897-
var cleantab = []string{
898-
// Commands and C libraries.
899-
"cmd/compile",
900-
"cmd/go",
901-
"cmd/link",
902-
"cmd/old5a",
903-
"cmd/old6a",
904-
"cmd/old8a",
905-
"cmd/old9a",
906-
907-
// Go packages.
908-
"bufio",
909-
"bytes",
910-
"container/heap",
911-
"encoding",
912-
"encoding/base64",
913-
"encoding/json",
914-
"errors",
915-
"flag",
916-
"fmt",
917-
"go/ast",
918-
"go/build",
919-
"go/doc",
920-
"go/parser",
921-
"go/scanner",
922-
"go/token",
923-
"io",
924-
"io/ioutil",
925-
"log",
926-
"math",
927-
"net/url",
928-
"os",
929-
"os/exec",
930-
"path",
931-
"path/filepath",
932-
"reflect",
933-
"regexp",
934-
"regexp/syntax",
935-
"runtime",
936-
"sort",
937-
"strconv",
938-
"strings",
939-
"sync",
940-
"sync/atomic",
941-
"syscall",
942-
"text/template",
943-
"text/template/parse",
944-
"time",
945-
"unicode",
946-
"unicode/utf16",
947-
"unicode/utf8",
948-
}
949-
950897
var runtimegen = []string{
951898
"zaexperiment.h",
952899
"zversion.go",
953900
}
954901

955902
func clean() {
956-
for _, name := range cleantab {
903+
for _, name := range buildorder {
957904
path := pathf("%s/src/%s", goroot, name)
958905
// Remove generated files.
959906
for _, elem := range xreaddir(path) {

src/cmd/go/build.go

+23
Original file line numberDiff line numberDiff line change
@@ -1406,6 +1406,26 @@ func (b *builder) build(a *action) (err error) {
14061406
if err := buildToolchain.ld(b, a.p, a.target, all, a.objpkg, objects); err != nil {
14071407
return err
14081408
}
1409+
1410+
// Write build ID to end of binary.
1411+
// We could try to put it in a custom section or some such,
1412+
// but then we'd need different code for ELF, Mach-O, PE, and Plan 9.
1413+
// Instead, just append to the binary. No one should care.
1414+
// Issue #11048 is to fix this for ELF and Mach-O at least.
1415+
if buildToolchain == (gcToolchain{}) && a.p.buildID != "" {
1416+
f, err := os.OpenFile(a.target, os.O_WRONLY|os.O_APPEND, 0)
1417+
if err != nil {
1418+
return err
1419+
}
1420+
defer f.Close()
1421+
// Note: This string must match readBuildIDFromBinary in pkg.go.
1422+
if _, err := fmt.Fprintf(f, "\x00\n\ngo binary\nbuild id %q\nend go binary\n", a.p.buildID); err != nil {
1423+
return err
1424+
}
1425+
if err := f.Close(); err != nil {
1426+
return err
1427+
}
1428+
}
14091429
}
14101430

14111431
return nil
@@ -2131,6 +2151,9 @@ func (gcToolchain) gc(b *builder, p *Package, archive, obj string, asmhdr bool,
21312151
if buildContext.InstallSuffix != "" {
21322152
gcargs = append(gcargs, "-installsuffix", buildContext.InstallSuffix)
21332153
}
2154+
if p.buildID != "" {
2155+
gcargs = append(gcargs, "-buildid", p.buildID)
2156+
}
21342157

21352158
args := []interface{}{buildToolExec, tool("compile"), "-o", ofile, "-trimpath", b.work, buildGcflags, gcargs, "-D", p.localPrefix, importArgs}
21362159
if ofile == archive {

src/cmd/go/pkg.go

+211-2
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,20 @@ package main
66

77
import (
88
"bytes"
9+
"crypto/sha1"
910
"errors"
1011
"fmt"
1112
"go/build"
1213
"go/scanner"
1314
"go/token"
15+
"io"
1416
"io/ioutil"
1517
"os"
1618
pathpkg "path"
1719
"path/filepath"
1820
"runtime"
1921
"sort"
22+
"strconv"
2023
"strings"
2124
"time"
2225
"unicode"
@@ -95,6 +98,7 @@ type Package struct {
9598
coverMode string // preprocess Go source files with the coverage tool in this mode
9699
coverVars map[string]*CoverVar // variables created by coverage analysis
97100
omitDWARF bool // tell linker not to write DWARF information
101+
buildID string // expected build ID for generated package
98102
}
99103

100104
// CoverVar holds the name of the generated coverage variables targeting the named file.
@@ -687,6 +691,36 @@ func (p *Package) load(stk *importStack, bp *build.Package, err error) *Package
687691
}
688692
}
689693

694+
// Compute build ID for this package.
695+
// Build ID is hash of information we want to detect changes in.
696+
// The mtime-based checks in computeStale take care of most
697+
// of that information, but they cannot detect the removal of a
698+
// source file from a directory (with no changes to files that remain
699+
// and no new files in that directory). We hash the list of source
700+
// files (without full path, to allow moving the entire tree)
701+
// so that if one is removed, we detect it via the build IDs.
702+
// In the future we might include other relevant information,
703+
// like build tags or whether we're using the race detector or
704+
// (if it becomes cheap enough) file contents.
705+
h := sha1.New()
706+
inputFiles := stringList(
707+
p.GoFiles,
708+
p.CgoFiles,
709+
p.CFiles,
710+
p.CXXFiles,
711+
p.MFiles,
712+
p.HFiles,
713+
p.SFiles,
714+
p.SysoFiles,
715+
p.SwigFiles,
716+
p.SwigCXXFiles,
717+
)
718+
fmt.Fprintf(h, "%d files\n", len(inputFiles))
719+
for _, file := range inputFiles {
720+
fmt.Fprintf(h, "%s\n", file)
721+
}
722+
p.buildID = fmt.Sprintf("%x", h.Sum(nil))
723+
690724
return p
691725
}
692726

@@ -795,6 +829,14 @@ func isStale(p *Package, topRoot map[string]bool) bool {
795829
}
796830
}
797831

832+
// Package is stale if the expected build ID differs from the
833+
// recorded build ID. This catches changes like a source file
834+
// being removed from a package directory. See issue 3895.
835+
targetBuildID, err := readBuildID(p)
836+
if err == nil && targetBuildID != p.buildID {
837+
return true
838+
}
839+
798840
// As a courtesy to developers installing new versions of the compiler
799841
// frequently, define that packages are stale if they are
800842
// older than the compiler, and commands if they are older than
@@ -814,9 +856,10 @@ func isStale(p *Package, topRoot map[string]bool) bool {
814856
}
815857

816858
// Have installed copy, probably built using current compilers,
817-
// and built after its imported packages. The only reason now
859+
// built with the right set of source files,
860+
// and built after its imported packages. The only reason now
818861
// that we'd have to rebuild it is if the sources were newer than
819-
// the package. If a package p is not in the same tree as any
862+
// the package. If a package p is not in the same tree as any
820863
// package named on the command-line, assume it is up-to-date
821864
// no matter what the modification times on the source files indicate.
822865
// This avoids rebuilding $GOROOT packages when people are
@@ -994,3 +1037,169 @@ func hasSubdir(root, dir string) (rel string, ok bool) {
9941037
}
9951038
return filepath.ToSlash(dir[len(root):]), true
9961039
}
1040+
1041+
var (
1042+
errBuildIDToolchain = fmt.Errorf("build ID only supported in gc toolchain")
1043+
errBuildIDMalformed = fmt.Errorf("malformed object file")
1044+
errBuildIDUnknown = fmt.Errorf("lost build ID")
1045+
)
1046+
1047+
var (
1048+
bangArch = []byte("!<arch>")
1049+
pkgdef = []byte("__.PKGDEF")
1050+
goobject = []byte("go object ")
1051+
buildid = []byte("build id ")
1052+
)
1053+
1054+
// readBuildID reads the build ID from an archive or binary.
1055+
// It only supports the gc toolchain.
1056+
// Other toolchain maintainers should adjust this function.
1057+
func readBuildID(p *Package) (id string, err error) {
1058+
if buildToolchain != (gcToolchain{}) {
1059+
return "", errBuildIDToolchain
1060+
}
1061+
1062+
// For commands, read build ID directly from binary.
1063+
if p.Name == "main" {
1064+
return readBuildIDFromBinary(p)
1065+
}
1066+
1067+
// Otherwise, we expect to have an archive (.a) file,
1068+
// and we can read the build ID from the Go export data.
1069+
if !strings.HasSuffix(p.Target, ".a") {
1070+
return "", &os.PathError{Op: "parse", Path: p.Target, Err: errBuildIDUnknown}
1071+
}
1072+
1073+
// Read just enough of the target to fetch the build ID.
1074+
// The archive is expected to look like:
1075+
//
1076+
// !<arch>
1077+
// __.PKGDEF 0 0 0 644 7955 `
1078+
// go object darwin amd64 devel X:none
1079+
// build id "b41e5c45250e25c9fd5e9f9a1de7857ea0d41224"
1080+
//
1081+
// The variable-sized strings are GOOS, GOARCH, and the experiment list (X:none).
1082+
// Reading the first 1024 bytes should be plenty.
1083+
f, err := os.Open(p.Target)
1084+
if err != nil {
1085+
return "", err
1086+
}
1087+
data := make([]byte, 1024)
1088+
n, err := io.ReadFull(f, data)
1089+
f.Close()
1090+
1091+
if err != nil && n == 0 {
1092+
return "", err
1093+
}
1094+
1095+
bad := func() (string, error) {
1096+
return "", &os.PathError{Op: "parse", Path: p.Target, Err: errBuildIDMalformed}
1097+
}
1098+
1099+
// Archive header.
1100+
for i := 0; ; i++ { // returns during i==3
1101+
j := bytes.IndexByte(data, '\n')
1102+
if j < 0 {
1103+
return bad()
1104+
}
1105+
line := data[:j]
1106+
data = data[j+1:]
1107+
switch i {
1108+
case 0:
1109+
if !bytes.Equal(line, bangArch) {
1110+
return bad()
1111+
}
1112+
case 1:
1113+
if !bytes.HasPrefix(line, pkgdef) {
1114+
return bad()
1115+
}
1116+
case 2:
1117+
if !bytes.HasPrefix(line, goobject) {
1118+
return bad()
1119+
}
1120+
case 3:
1121+
if !bytes.HasPrefix(line, buildid) {
1122+
// Found the object header, just doesn't have a build id line.
1123+
// Treat as successful, with empty build id.
1124+
return "", nil
1125+
}
1126+
id, err := strconv.Unquote(string(line[len(buildid):]))
1127+
if err != nil {
1128+
return bad()
1129+
}
1130+
return id, nil
1131+
}
1132+
}
1133+
}
1134+
1135+
var (
1136+
goBinary = []byte("\x00\n\ngo binary\n")
1137+
endGoBinary = []byte("\nend go binary\n")
1138+
newlineAndBuildid = []byte("\nbuild id ")
1139+
)
1140+
1141+
// readBuildIDFromBinary reads the build ID from a binary.
1142+
// Instead of trying to be good citizens and store the build ID in a
1143+
// custom section of the binary, which would be different for each
1144+
// of the four binary types we support (ELF, Mach-O, Plan 9, PE),
1145+
// we write a few lines to the end of the binary.
1146+
//
1147+
// At the very end of the binary we expect to find:
1148+
//
1149+
// <NUL>
1150+
//
1151+
// go binary
1152+
// build id "XXX"
1153+
// end go binary
1154+
//
1155+
func readBuildIDFromBinary(p *Package) (id string, err error) {
1156+
if p.Target == "" {
1157+
return "", &os.PathError{Op: "parse", Path: p.Target, Err: errBuildIDUnknown}
1158+
}
1159+
1160+
f, err := os.Open(p.Target)
1161+
if err != nil {
1162+
return "", err
1163+
}
1164+
defer f.Close()
1165+
1166+
off, err := f.Seek(0, 2)
1167+
if err != nil {
1168+
return "", err
1169+
}
1170+
n := 1024
1171+
if off < int64(n) {
1172+
n = int(off)
1173+
}
1174+
if _, err := f.Seek(off-int64(n), 0); err != nil {
1175+
return "", err
1176+
}
1177+
data := make([]byte, n)
1178+
if _, err := io.ReadFull(f, data); err != nil {
1179+
return "", err
1180+
}
1181+
if !bytes.HasSuffix(data, endGoBinary) {
1182+
// Trailer missing. Treat as successful but build ID empty.
1183+
return "", nil
1184+
}
1185+
i := bytes.LastIndex(data, goBinary)
1186+
if i < 0 {
1187+
// Trailer missing. Treat as successful but build ID empty.
1188+
return "", nil
1189+
}
1190+
1191+
// Have trailer. Find build id line.
1192+
data = data[i:]
1193+
i = bytes.Index(data, newlineAndBuildid)
1194+
if i < 0 {
1195+
// Trailer present; build ID missing. Treat as successful but empty.
1196+
return "", nil
1197+
}
1198+
line := data[i+len(newlineAndBuildid):]
1199+
j := bytes.IndexByte(line, '\n') // must succeed - endGoBinary is at end and has newlines
1200+
id, err = strconv.Unquote(string(line[:j]))
1201+
if err != nil {
1202+
return "", &os.PathError{Op: "parse", Path: p.Target, Err: errBuildIDMalformed}
1203+
}
1204+
return id, nil
1205+
}

0 commit comments

Comments
 (0)