Skip to content

Commit 799b253

Browse files
committed
cmd/go/internal/test: add opt-in file hashing instead of modtime for test caching (w/ git)
Updates golang#58571 Updates #150
1 parent 0c028ef commit 799b253

3 files changed

Lines changed: 188 additions & 4 deletions

File tree

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
// Copyright 2026 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package githash
6+
7+
import (
8+
"bytes"
9+
"io/fs"
10+
"os"
11+
"os/exec"
12+
"path"
13+
"strconv"
14+
"strings"
15+
"sync"
16+
)
17+
18+
// GitHash is a git hash in hex form.
19+
//
20+
// It's usually a SHA-1 hash, but could be SHA-256 depending on the git
21+
// configuration.
22+
type GitHash string
23+
24+
// Enabled is whether git hash lookups are enabled via the CMD_GO_USE_GIT_HASH
25+
// environment variable.
26+
var Enabled bool
27+
28+
func init() {
29+
s := os.Getenv("CMD_GO_USE_GIT_HASH")
30+
if s != "" {
31+
Enabled, _ = strconv.ParseBool(s)
32+
}
33+
}
34+
35+
// gitHashKey is the key used to look up possible files in
36+
// a git repo that match the same base name & size.
37+
//
38+
// This is used to avoid statting all files in a git repo
39+
// when trying to find the git hash for a given file.
40+
// Instead, we only stat files that match on name & size.
41+
type gitHashKey struct {
42+
baseName string // base name of file; as that's fs.FileInfo.Name gives us
43+
size int64
44+
}
45+
46+
type gitHashMap struct {
47+
gitRoot string // absolute path to git repo root
48+
49+
// cands is a list of files in the git repo, bucketed by their (base name,
50+
// size) bucket key. This makes looking for a file faster later, without
51+
// statting the whole world, yet still permitting lookup only from a
52+
// fs.FileInfo that only has a base name & size & Sys info.
53+
cands map[gitHashKey][]*gitHashCand
54+
}
55+
56+
type gitHashCand struct {
57+
rel string // the relative git path from "git ls-files -r"
58+
hash GitHash
59+
60+
statOnce sync.Once
61+
stat fs.FileInfo
62+
}
63+
64+
func (c *gitHashCand) getStat(m *gitHashMap) fs.FileInfo {
65+
c.statOnce.Do(func() {
66+
fullPath := path.Join(m.gitRoot, c.rel)
67+
info, err := os.Lstat(fullPath)
68+
if err == nil {
69+
c.stat = info
70+
}
71+
})
72+
return c.stat
73+
}
74+
75+
var getGitHashMap = sync.OnceValue(buildGitHashMap)
76+
77+
func buildGitHashMap() *gitHashMap {
78+
m := &gitHashMap{
79+
cands: make(map[gitHashKey][]*gitHashCand),
80+
}
81+
gitRoot, err := exec.Command("git", "rev-parse", "--show-toplevel").Output()
82+
if err != nil {
83+
return nil
84+
}
85+
m.gitRoot = strings.TrimSpace(string(gitRoot))
86+
87+
cmd := exec.Command("git", "ls-tree",
88+
"-r", // recursive
89+
"--long", // include file sizes
90+
"-z", // null-separated entries; don't have to deal with C quoting of some filenames
91+
"HEAD",
92+
)
93+
cmd.Dir = m.gitRoot // effectively git -C <dir>; either way.
94+
out, err := cmd.Output()
95+
if err != nil {
96+
return nil
97+
}
98+
// Parse lines of the form:
99+
//
100+
// 100644 blob cabbb1732c418125f9c773ce7a28ba34f2708554 639 .gitattributes
101+
// 100644 blob 2b4a5fccdaf12f98cf8e255affa28cfd7e6a784d 95 .github/CODE_OF_CONDUCT.md
102+
//
103+
// .... but null-terminated instead of newline-terminated, so we don't have to deal
104+
// with C quoting of filenames with certain characters.
105+
//
106+
// We don't care about the permissions.
107+
remain := out
108+
for len(remain) > 0 {
109+
line, rest, ok := bytes.Cut(remain, []byte{0})
110+
if !ok {
111+
break
112+
}
113+
remain = rest
114+
meta, nameB, ok := bytes.Cut(line, []byte("\t"))
115+
116+
_, hashAndSize, ok := bytes.Cut(meta, []byte(" blob "))
117+
if !ok {
118+
continue
119+
}
120+
hashB, sizeB, ok := bytes.Cut(hashAndSize, []byte(" "))
121+
if !ok {
122+
continue
123+
}
124+
size, err := strconv.ParseInt(strings.TrimSpace(string(sizeB)), 10, 64)
125+
if err != nil {
126+
continue
127+
}
128+
name := strings.TrimSpace(string(nameB))
129+
hash := strings.TrimSpace(string(hashB))
130+
k := gitHashKey{
131+
baseName: path.Base(name),
132+
size: size,
133+
}
134+
m.cands[k] = append(m.cands[k], &gitHashCand{
135+
rel: name,
136+
hash: GitHash(hash),
137+
})
138+
}
139+
return m
140+
}
141+
142+
// Hash returns the git hash for the given file info, if available.
143+
func Hash(info fs.FileInfo) (GitHash, bool) {
144+
if !Enabled || info == nil || !info.Mode().IsRegular() {
145+
return "", false
146+
}
147+
k := gitHashKey{
148+
baseName: info.Name(),
149+
size: info.Size(),
150+
}
151+
m := getGitHashMap()
152+
if m == nil {
153+
return "", false
154+
}
155+
for _, cand := range m.cands[k] {
156+
if os.SameFile(info, cand.getStat(m)) {
157+
return cand.hash, true
158+
}
159+
}
160+
return "", false
161+
}
162+
163+
// ModTimeOrHash returns either the git hash (if enabled and available) or the
164+
// mod time of the given file info.
165+
//
166+
// For non-regular files (notably directories), it returns nil if git hash is
167+
// enabled.
168+
//
169+
// It always returns one of nil, time.Time, or GitHash (a string), all suitable
170+
// for use in Sprintf verb %v.
171+
func ModTimeOrHash(info fs.FileInfo) any {
172+
if !Enabled {
173+
return info.ModTime()
174+
}
175+
if h, ok := Hash(info); ok {
176+
return h
177+
}
178+
if info.Mode().IsRegular() {
179+
return info.ModTime()
180+
}
181+
return nil
182+
}

src/cmd/go/internal/modindex/read.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"cmd/go/internal/cache"
2929
"cmd/go/internal/cfg"
3030
"cmd/go/internal/fsys"
31+
"cmd/go/internal/githash"
3132
"cmd/go/internal/imports"
3233
"cmd/go/internal/str"
3334
"cmd/internal/par"
@@ -109,11 +110,11 @@ func dirHash(modroot, pkgdir string) (cache.ActionID, error) {
109110
if err != nil {
110111
return cache.ActionID{}, ErrNotIndexed
111112
}
112-
if info.ModTime().After(cutoff) {
113+
if !githash.Enabled && info.ModTime().After(cutoff) {
113114
return cache.ActionID{}, ErrNotIndexed
114115
}
115116

116-
fmt.Fprintf(h, "file %v %v %v\n", info.Name(), info.ModTime(), info.Size())
117+
fmt.Fprintf(h, "file %v %v %v\n", info.Name(), githash.ModTimeOrHash(info), info.Size())
117118
}
118119
return h.Sum(), nil
119120
}

src/cmd/go/internal/test/test.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"cmd/go/internal/base"
2828
"cmd/go/internal/cache"
2929
"cmd/go/internal/cfg"
30+
"cmd/go/internal/githash"
3031
"cmd/go/internal/load"
3132
"cmd/go/internal/lockedfile"
3233
"cmd/go/internal/modload"
@@ -2028,7 +2029,7 @@ func hashOpen(name string) (cache.ActionID, error) {
20282029
hashWriteStat(h, finfo)
20292030
}
20302031
}
2031-
} else if info.Mode().IsRegular() {
2032+
} else if info.Mode().IsRegular() && !githash.Enabled {
20322033
// Because files might be very large, do not attempt
20332034
// to hash the entirety of their content. Instead assume
20342035
// the mtime and size recorded in hashWriteStat above
@@ -2061,7 +2062,7 @@ func hashStat(name string) cache.ActionID {
20612062
}
20622063

20632064
func hashWriteStat(h io.Writer, info fs.FileInfo) {
2064-
fmt.Fprintf(h, "stat %d %x %v %v\n", info.Size(), uint64(info.Mode()), info.ModTime(), info.IsDir())
2065+
fmt.Fprintf(h, "stat %d %x %v %v\n", info.Size(), uint64(info.Mode()), githash.ModTimeOrHash(info), info.IsDir())
20652066
}
20662067

20672068
// testAndInputKey returns the actual cache key for the pair (testID, testInputsID).

0 commit comments

Comments
 (0)