Skip to content

Commit 0fbf2cd

Browse files
authored
feat(cache): delta-based incremental SQLite analysis cache for Go (shivasurya#649)
## Summary Adds `AnalysisCache` backed by SQLite to speed up repeated Go call graph builds. **Cold → warm speedup**: ~2m 39s → ~24s (**6.6×**) on 392 Go files / 39,743 call sites. Enable with `--enable-db-cache` on `resolution-report` (scan/ci in PR-10). ## Design **Which passes are cached:** - Passes 2a/2b/3 (file-local): cached by content hash — always safe - Pass 4 (cross-file): delta-based — files are replayed from cache unless dirty - Pass 1: never cached — must run to detect what changed **Pass 4 delta invalidation** (`NeedsPass4Rerun`): a file is dirty if: 1. No cache entry exists 2. Content hash changed 3. A resolved callee FQN was removed from the index 4. An unresolved call name matches a newly-added FQN Warm files skip resolution entirely; cached edges replay directly into the call graph. **Per-table schema versioning** — three constants (`fileCacheVersion`, `functionIndexVersion`, `pass4Version`) stored in the `meta` table. On version mismatch only the affected table is wiped; other tables stay warm. Project-root change wipes all. ## Test coverage - `analysis_cache_test.go`: per-table version wipe, file-cache wipe, project-root change - `go_builder_test.go`: cold→warm round-trip, nil cache regression, dirty-file re-resolution 🤖 Generated with [Claude Code](https://claude.ai/claude-code)
1 parent 00edcd8 commit 0fbf2cd

26 files changed

Lines changed: 2677 additions & 111 deletions
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
name: Upload Go Third-Party Registries to R2
2+
3+
on:
4+
release:
5+
types: [published]
6+
workflow_dispatch:
7+
inputs:
8+
force_upload:
9+
description: 'Force upload even if not a release'
10+
required: false
11+
default: 'false'
12+
13+
permissions:
14+
contents: read
15+
16+
jobs:
17+
upload-go-thirdparty-registries:
18+
runs-on: ubuntu-latest
19+
steps:
20+
- name: Checkout code
21+
uses: actions/checkout@v6
22+
23+
- name: Setup Go
24+
uses: actions/setup-go@v6
25+
with:
26+
go-version: '1.26'
27+
cache: false
28+
29+
- name: Verify Go installation
30+
run: |
31+
echo "Go compiler: $(go version)"
32+
echo "GOPATH: $(go env GOPATH)"
33+
echo "GOMODCACHE: $(go env GOMODCACHE)"
34+
35+
- name: Install AWS CLI
36+
run: |
37+
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
38+
unzip -q awscliv2.zip
39+
sudo ./aws/install --update
40+
aws --version
41+
42+
- name: Generate and upload Go third-party registries
43+
env:
44+
R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }}
45+
R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
46+
R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
47+
run: |
48+
cd sast-engine/tools
49+
chmod +x upload_go_thirdparty_to_r2.sh
50+
./upload_go_thirdparty_to_r2.sh
51+
52+
- name: Verify uploads in R2
53+
env:
54+
R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }}
55+
R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
56+
R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
57+
run: |
58+
export AWS_ACCESS_KEY_ID="${R2_ACCESS_KEY_ID}"
59+
export AWS_SECRET_ACCESS_KEY="${R2_SECRET_ACCESS_KEY}"
60+
R2_ENDPOINT="https://${R2_ACCOUNT_ID}.r2.cloudflarestorage.com"
61+
62+
echo "Verifying uploaded files in R2..."
63+
aws s3 ls "s3://code-pathfinder-assets/registries/go-thirdparty/v1/manifest.json" \
64+
--endpoint-url "$R2_ENDPOINT" \
65+
|| echo "Warning: manifest.json not found"
66+
67+
echo ""
68+
echo "Listing uploaded files..."
69+
aws s3 ls "s3://code-pathfinder-assets/registries/go-thirdparty/v1/" \
70+
--endpoint-url "$R2_ENDPOINT" \
71+
--summarize --human-readable
72+
73+
- name: Test public CDN accessibility
74+
run: |
75+
echo "Testing public CDN URL..."
76+
URL="https://assets.codepathfinder.dev/registries/go-thirdparty/v1/manifest.json"
77+
echo "Testing: $URL"
78+
STATUS=$(curl -s -o /dev/null -w "%{http_code}" "$URL")
79+
if [ "$STATUS" = "200" ]; then
80+
echo " Go third-party manifest is publicly accessible"
81+
echo ""
82+
echo "Manifest contents:"
83+
curl -s "$URL" | python3 -c "import json,sys; m=json.load(sys.stdin); pkgs=m.get('packages',[]); print(' Schema version: ', m.get('schema_version','unknown')); print(' Registry version:', m.get('registry_version','unknown')); print(' Generated at: ', m.get('generated_at','unknown')); print(' Packages: ', len(pkgs)); [print(f' - {p[\"import_path\"]} ({p.get(\"type_count\",0)} types, {p.get(\"function_count\",0)} functions)') for p in pkgs[:5]]"
84+
else
85+
echo " Go third-party manifest returned HTTP $STATUS"
86+
exit 1
87+
fi
88+
89+
- name: Summary
90+
run: |
91+
echo "========================================="
92+
echo "Go Third-Party Registry Upload Complete"
93+
echo "========================================="
94+
echo ""
95+
echo "Registry available at:"
96+
echo " https://assets.codepathfinder.dev/registries/go-thirdparty/v1/manifest.json"
97+
echo ""
98+
echo "The code-pathfinder engine will use this CDN registry for Go third-party"
99+
echo "type inference. Popular packages (gin, gorm, sqlx, etc.) will resolve"
100+
echo "via CDN first; project-specific deps fall back to local vendor/ parsing."

sast-engine/cmd/ci.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,10 +281,25 @@ Examples:
281281
builder.InitGoStdlibLoader(goRegistry, projectPath, logger)
282282
goTypeEngine := resolution.NewGoTypeInferenceEngine(goRegistry)
283283

284-
goCG, err := builder.BuildGoCallGraph(codeGraph, goRegistry, goTypeEngine, logger)
284+
enableDBCache, _ := cmd.Flags().GetBool("enable-db-cache")
285+
var analysisCache *builder.AnalysisCache
286+
if enableDBCache {
287+
var cacheErr error
288+
analysisCache, cacheErr = builder.OpenAnalysisCache(projectPath)
289+
if cacheErr != nil {
290+
logger.Warning("Could not open analysis cache: %v — running full analysis", cacheErr)
291+
} else {
292+
defer analysisCache.Close()
293+
}
294+
}
295+
296+
goCG, err := builder.BuildGoCallGraph(codeGraph, goRegistry, goTypeEngine, logger, analysisCache)
285297
if err != nil {
286298
logger.Warning("Failed to build Go call graph: %v", err)
287299
} else {
300+
if analysisCache != nil {
301+
logger.Progress("Cache: incremental analysis cache updated")
302+
}
288303
builder.MergeCallGraphs(cg, goCG)
289304
logger.Statistic("Go call graph merged: %d functions, %d call sites",
290305
len(goCG.Functions), countTotalCallSites(goCG))
@@ -507,5 +522,6 @@ func init() {
507522
ciCmd.Flags().Int("github-pr", 0, "Pull request number for posting comments")
508523
ciCmd.Flags().Bool("pr-comment", false, "Post summary comment on the pull request")
509524
ciCmd.Flags().Bool("pr-inline", false, "Post inline review comments for critical/high findings")
525+
ciCmd.Flags().Bool("enable-db-cache", false, "Enable SQLite-backed incremental analysis cache (experimental)")
510526
ciCmd.MarkFlagRequired("project")
511527
}

sast-engine/cmd/ci_test.go

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,3 +370,81 @@ func TestCICmdPrepareRulesLocalOnly(t *testing.T) {
370370
_, err = os.Stat(outputFile)
371371
require.NoError(t, err)
372372
}
373+
374+
// TestCICmdEnableDBCacheFlag verifies that the --enable-db-cache flag is
375+
// registered on the ci command with the correct default value.
376+
func TestCICmdEnableDBCacheFlag(t *testing.T) {
377+
flag := ciCmd.Flags().Lookup("enable-db-cache")
378+
require.NotNil(t, flag, "enable-db-cache flag should be registered on ci command")
379+
assert.Equal(t, "false", flag.DefValue)
380+
}
381+
382+
// TestCICmdEnableDBCacheWithGoProject verifies the --enable-db-cache code path
383+
// is exercised when running the ci command against a Go project.
384+
func TestCICmdEnableDBCacheWithGoProject(t *testing.T) {
385+
projectDir, rulesFile := setupCIIntegrationTest(t)
386+
387+
// Add a go.mod so the ci command enters the Go analysis branch.
388+
require.NoError(t, os.WriteFile(
389+
filepath.Join(projectDir, "go.mod"),
390+
[]byte("module example.com/test\n\ngo 1.21\n"),
391+
0o644,
392+
))
393+
require.NoError(t, os.WriteFile(
394+
filepath.Join(projectDir, "main.go"),
395+
[]byte("package main\n\nfunc main() {}\n"),
396+
0o644,
397+
))
398+
399+
outputFile := filepath.Join(t.TempDir(), "results.sarif")
400+
401+
resetCIFlags()
402+
ciCmd.Flags().Set("rules", rulesFile)
403+
ciCmd.Flags().Set("project", projectDir)
404+
ciCmd.Flags().Set("output-file", outputFile)
405+
require.NoError(t, ciCmd.Flags().Set("enable-db-cache", "true"))
406+
defer ciCmd.Flags().Set("enable-db-cache", "false")
407+
408+
// Should complete without error — the cache is created and closed.
409+
err := ciCmd.RunE(ciCmd, []string{})
410+
require.NoError(t, err)
411+
}
412+
413+
// TestCICmdEnableDBCacheOpenError verifies that when OpenAnalysisCache fails
414+
// (e.g. the cache directory is blocked), the ci command logs a warning and
415+
// continues without error, covering ci.go lines 289-291.
416+
func TestCICmdEnableDBCacheOpenError(t *testing.T) {
417+
projectDir, rulesFile := setupCIIntegrationTest(t)
418+
419+
// Add a go.mod so the ci command enters the Go analysis branch.
420+
require.NoError(t, os.WriteFile(
421+
filepath.Join(projectDir, "go.mod"),
422+
[]byte("module example.com/test\n\ngo 1.21\n"),
423+
0o644,
424+
))
425+
require.NoError(t, os.WriteFile(
426+
filepath.Join(projectDir, "main.go"),
427+
[]byte("package main\n\nfunc main() {}\n"),
428+
0o644,
429+
))
430+
431+
// Block the pathfinder cache directory by placing a regular file where the
432+
// cache directory would be created, forcing MkdirAll to fail.
433+
fakeCache := t.TempDir()
434+
blockFile := filepath.Join(fakeCache, "pathfinder")
435+
require.NoError(t, os.WriteFile(blockFile, []byte("block"), 0o444))
436+
t.Setenv("XDG_CACHE_HOME", fakeCache)
437+
438+
outputFile := filepath.Join(t.TempDir(), "results.sarif")
439+
440+
resetCIFlags()
441+
ciCmd.Flags().Set("rules", rulesFile)
442+
ciCmd.Flags().Set("project", projectDir)
443+
ciCmd.Flags().Set("output-file", outputFile)
444+
require.NoError(t, ciCmd.Flags().Set("enable-db-cache", "true"))
445+
defer ciCmd.Flags().Set("enable-db-cache", "false")
446+
447+
// Should warn about cache failure but continue without error.
448+
err := ciCmd.RunE(ciCmd, []string{})
449+
require.NoError(t, err)
450+
}

sast-engine/cmd/resolution_report.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ Use --csv to export unresolved calls with file, line, target, and reason.`,
3535
projectInput := cmd.Flag("project").Value.String()
3636
csvOutput := cmd.Flag("csv").Value.String()
3737
dumpJSON := cmd.Flag("dump-callsites-json").Value.String()
38+
enableDBCache, _ := cmd.Flags().GetBool("enable-db-cache")
3839

3940
if projectInput == "" {
4041
fmt.Println("Error: --project flag is required")
@@ -60,10 +61,30 @@ Use --csv to export unresolved calls with file, line, target, and reason.`,
6061
builder.InitGoStdlibLoader(goRegistry, projectInput, logger)
6162
builder.InitGoThirdPartyLoader(goRegistry, projectInput, false, logger)
6263
goTypeEngine := resolution.NewGoTypeInferenceEngine(goRegistry)
63-
goCG, goErr := builder.BuildGoCallGraph(codeGraph, goRegistry, goTypeEngine, logger)
64+
65+
// Open SQLite analysis cache only when --enable-db-cache is set.
66+
// Disabled by default to preserve current behaviour.
67+
var analysisCache *builder.AnalysisCache
68+
if enableDBCache {
69+
var cacheErr error
70+
analysisCache, cacheErr = builder.OpenAnalysisCache(projectInput)
71+
if cacheErr != nil {
72+
fmt.Fprintf(os.Stderr, " [cache] warn: could not open cache: %v — running full analysis\n", cacheErr)
73+
analysisCache = nil
74+
}
75+
if analysisCache != nil {
76+
defer analysisCache.Close()
77+
}
78+
}
79+
80+
goCG, goErr := builder.BuildGoCallGraph(codeGraph, goRegistry, goTypeEngine, logger, analysisCache)
6481
if goErr == nil && goCG != nil {
6582
builder.MergeCallGraphs(cg, goCG)
6683
}
84+
85+
if enableDBCache && analysisCache != nil {
86+
fmt.Println("Cache: incremental analysis cache updated")
87+
}
6788
}
6889
}
6990

@@ -910,4 +931,5 @@ func init() {
910931
resolutionReportCmd.MarkFlagRequired("project")
911932
resolutionReportCmd.Flags().String("csv", "", "Export unresolved calls to CSV file (e.g., --csv unresolved.csv)")
912933
resolutionReportCmd.Flags().String("dump-callsites-json", "", "Export all Go call sites as JSONL for accuracy validation (e.g., --dump-callsites-json callsites.jsonl)")
934+
resolutionReportCmd.Flags().Bool("enable-db-cache", false, "Enable SQLite-backed incremental analysis cache (experimental). Caches Pass 2b scopes and Pass 3 call sites per file keyed by content hash; only changed files are re-analysed on subsequent runs.")
913935
}

sast-engine/cmd/resolution_report_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -866,3 +866,49 @@ func main() {
866866
resolutionReportCmd.Run(resolutionReportCmd, []string{})
867867
})
868868
}
869+
870+
// TestDumpCallSitesJSON verifies that dumpCallSitesJSON writes Go call site
871+
// records to a JSONL file and skips non-Go callers.
872+
func TestDumpCallSitesJSON(t *testing.T) {
873+
goFunc := &graph.Node{ID: "fn", Language: "go", Name: "Fn"}
874+
pyFunc := &graph.Node{ID: "pyfn", Language: "python", Name: "PyFn"}
875+
876+
cg := &core.CallGraph{
877+
Functions: map[string]*graph.Node{
878+
"example.com/pkg.Fn": goFunc,
879+
"pymod.PyFn": pyFunc,
880+
},
881+
CallSites: map[string][]core.CallSite{
882+
"example.com/pkg.Fn": {
883+
{Target: "fmt.Println", TargetFQN: "fmt.Println", Resolved: true, IsStdlib: true,
884+
Location: core.Location{File: "main.go", Line: 5}},
885+
},
886+
"pymod.PyFn": {
887+
{Target: "os.exit", Resolved: false, Location: core.Location{File: "script.py", Line: 2}},
888+
},
889+
},
890+
}
891+
892+
out := filepath.Join(t.TempDir(), "callsites.jsonl")
893+
err := dumpCallSitesJSON(cg, out)
894+
assert.NoError(t, err)
895+
896+
data, err := os.ReadFile(out)
897+
assert.NoError(t, err)
898+
content := string(data)
899+
// Only the Go call site should be written
900+
assert.Contains(t, content, "fmt.Println")
901+
assert.NotContains(t, content, "os.exit")
902+
}
903+
904+
// TestDumpCallSitesJSON_WriteError verifies that an unwritable output path
905+
// returns an error from dumpCallSitesJSON.
906+
func TestDumpCallSitesJSON_WriteError(t *testing.T) {
907+
cg := &core.CallGraph{
908+
Functions: map[string]*graph.Node{},
909+
CallSites: map[string][]core.CallSite{},
910+
}
911+
err := dumpCallSitesJSON(cg, "/nonexistent/dir/out.jsonl")
912+
assert.Error(t, err)
913+
assert.Contains(t, err.Error(), "failed to create JSON file")
914+
}

sast-engine/cmd/scan.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,10 +262,25 @@ Examples:
262262

263263
goTypeEngine := resolution.NewGoTypeInferenceEngine(goRegistry)
264264

265-
goCG, err := builder.BuildGoCallGraph(codeGraph, goRegistry, goTypeEngine, logger)
265+
enableDBCache, _ := cmd.Flags().GetBool("enable-db-cache")
266+
var analysisCache *builder.AnalysisCache
267+
if enableDBCache {
268+
var cacheErr error
269+
analysisCache, cacheErr = builder.OpenAnalysisCache(projectPath)
270+
if cacheErr != nil {
271+
logger.Warning("Could not open analysis cache: %v — running full analysis", cacheErr)
272+
} else {
273+
defer analysisCache.Close()
274+
}
275+
}
276+
277+
goCG, err := builder.BuildGoCallGraph(codeGraph, goRegistry, goTypeEngine, logger, analysisCache)
266278
if err != nil {
267279
logger.Warning("Failed to build Go call graph: %v", err)
268280
} else {
281+
if analysisCache != nil {
282+
logger.Progress("Cache: incremental analysis cache updated")
283+
}
269284
builder.MergeCallGraphs(cg, goCG)
270285
logger.Statistic("Go call graph merged: %d functions, %d call sites",
271286
len(goCG.Functions), countTotalCallSites(goCG))
@@ -1052,5 +1067,6 @@ func init() {
10521067
scanCmd.Flags().Bool("diff-aware", false, "Enable diff-aware scanning (only report findings in changed files)")
10531068
scanCmd.Flags().String("base", "", "Base git ref for diff-aware scanning (required with --diff-aware)")
10541069
scanCmd.Flags().String("head", "HEAD", "Head git ref for diff-aware scanning")
1070+
scanCmd.Flags().Bool("enable-db-cache", false, "Enable SQLite-backed incremental analysis cache (experimental)")
10551071
scanCmd.MarkFlagRequired("project")
10561072
}

0 commit comments

Comments
 (0)