diff --git a/CHANGELOG.md b/CHANGELOG.md index d727e6cd0..5567542fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,22 @@ a [GitHub Release](https://github.com/colbymchenry/codegraph/releases) tagged This project follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added +- **Dagger 2 / Hilt `@Provides` / `@Binds` binding edges.** The whole-graph + synthesizer now emits `interface → impl` `references` edges for every + Dagger module binding it can verify (Java + Kotlin). `@Binds` abstract + methods are accepted unconditionally; `@Provides` requires a pure-identity + body (`return impl;` / `= impl`) to avoid claiming factory methods that + happen to share the Interface(Impl) signature shape. Lets `callers` / + `context` / `trace` follow a DI flow across the Dagger binding boundary + without depending on the generic `class X implements Y` heuristic — and + stays clean on factory-heavy codebases (validated: Plaid 0 false + positives, janishar/android-mvp-architecture 12/12 in-project bindings). + Registers a `daggerResolver` in the framework registry so + `detectFrameworks` can report Dagger usage. + ## [0.9.4] - 2026-05-24 ### Added diff --git a/__tests__/frameworks-integration.test.ts b/__tests__/frameworks-integration.test.ts index 2eb994478..1bfebf1a8 100644 --- a/__tests__/frameworks-integration.test.ts +++ b/__tests__/frameworks-integration.test.ts @@ -197,3 +197,223 @@ describe('C++ end-to-end — virtual override synthesis', () => { cg.close(); }); }); + +describe('Dagger 2 — @Provides / @Binds binding synthesis', () => { + let tmpDir: string | undefined; + afterEach(() => { + if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true }); + tmpDir = undefined; + }); + + // Helper: find the synthesized binding edge between two named nodes. + const findBindingEdge = (cg: CodeGraph, ifaceName: string, implName: string) => { + const iface = cg.getNodesByKind('interface').find((n) => n.name === ifaceName) + ?? cg.getNodesByKind('class').find((n) => n.name === ifaceName); + const impl = cg.getNodesByKind('class').find((n) => n.name === implName); + if (!iface || !impl) return undefined; + return cg + .getOutgoingEdges(iface.id) + .find((e) => e.target === impl.id + && e.kind === 'references' + && (e.metadata as { synthesizedBy?: string } | undefined)?.synthesizedBy === 'dagger-provides'); + }; + + it('links interface to impl through @Provides Interface(Impl impl)', async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-dagger-')); + fs.writeFileSync( + path.join(tmpDir, 'DataManager.java'), + 'package com.example.di;\npublic interface DataManager { void load(); }\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'AppDataManager.java'), + 'package com.example.di;\npublic class AppDataManager implements DataManager {\n' + + ' public void load() {}\n' + + '}\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'ApplicationModule.java'), + 'package com.example.di;\nimport dagger.Module;\nimport dagger.Provides;\n' + + '@Module\npublic class ApplicationModule {\n' + + ' @Provides DataManager provideDataManager(AppDataManager impl) { return impl; }\n' + + '}\n' + ); + + const cg = CodeGraph.initSync(tmpDir); + await cg.indexAll(); + + expect(findBindingEdge(cg, 'DataManager', 'AppDataManager')).toBeDefined(); + cg.close(); + }); + + it('links interface to impl through @Binds abstract method', async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-dagger-')); + fs.writeFileSync( + path.join(tmpDir, 'Foo.java'), + 'package com.example;\npublic interface Foo {}\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'FooImpl.java'), + 'package com.example;\npublic class FooImpl implements Foo {}\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'FooModule.java'), + 'package com.example;\nimport dagger.Module;\nimport dagger.Binds;\n' + + '@Module\npublic abstract class FooModule {\n' + + ' @Binds abstract Foo bindFoo(FooImpl impl);\n' + + '}\n' + ); + + const cg = CodeGraph.initSync(tmpDir); + await cg.indexAll(); + + expect(findBindingEdge(cg, 'Foo', 'FooImpl')).toBeDefined(); + cg.close(); + }); + + it('works for Kotlin @Module object with @Provides', async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-dagger-')); + fs.writeFileSync( + path.join(tmpDir, 'Repo.kt'), + 'package com.example\n\ninterface Repo { fun load() }\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'RepoImpl.kt'), + 'package com.example\n\nclass RepoImpl : Repo { override fun load() {} }\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'RepoModule.kt'), + 'package com.example\n\nimport dagger.Module\nimport dagger.Provides\n\n' + + '@Module\nclass RepoModule {\n' + + ' @Provides fun provideRepo(impl: RepoImpl): Repo = impl\n' + + '}\n' + ); + + const cg = CodeGraph.initSync(tmpDir); + await cg.indexAll(); + + expect(findBindingEdge(cg, 'Repo', 'RepoImpl')).toBeDefined(); + cg.close(); + }); + + it('disambiguates two impls of the same interface across modules', async () => { + // Both ImplA and ImplB implement Foo. The generic interface-impl pass would + // pair Foo with both; the Dagger pass should emit edges only to the impl + // each module actually binds (binding-precise). + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-dagger-')); + fs.writeFileSync( + path.join(tmpDir, 'Foo.java'), + 'package com.example;\npublic interface Foo {}\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'ImplA.java'), + 'package com.example;\npublic class ImplA implements Foo {}\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'ImplB.java'), + 'package com.example;\npublic class ImplB implements Foo {}\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'ModuleA.java'), + 'package com.example;\nimport dagger.Module;\nimport dagger.Provides;\n' + + '@Module public class ModuleA { @Provides Foo a(ImplA impl) { return impl; } }\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'ModuleB.java'), + 'package com.example;\nimport dagger.Module;\nimport dagger.Provides;\n' + + '@Module public class ModuleB { @Provides Foo b(ImplB impl) { return impl; } }\n' + ); + + const cg = CodeGraph.initSync(tmpDir); + await cg.indexAll(); + + // Both binding edges should exist with binding-precise provenance. + // (Resolving which one wins in any particular call site is downstream; + // here we just check the graph carries both binding facts.) + expect(findBindingEdge(cg, 'Foo', 'ImplA')).toBeDefined(); + expect(findBindingEdge(cg, 'Foo', 'ImplB')).toBeDefined(); + cg.close(); + }); + + it('does not emit a binding for a factory @Provides whose body builds the return value', async () => { + // Real-world failure mode (seen in Plaid): `@Provides Interface m(Impl impl)` + // signature, but body calls a factory instead of returning impl. The + // signature *looks* like a binding; the body proves it's a factory. + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-dagger-')); + fs.writeFileSync( + path.join(tmpDir, 'ViewModel.java'), + 'package com.example;\npublic class ViewModel {}\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'ViewModelFactory.java'), + 'package com.example;\npublic class ViewModelFactory {}\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'AppModule.java'), + 'package com.example;\nimport dagger.Module;\nimport dagger.Provides;\n' + + '@Module public class AppModule {\n' + + ' @Provides ViewModel provideViewModel(ViewModelFactory factory) {\n' + + ' return factory.create();\n' + + ' }\n' + + '}\n' + ); + + const cg = CodeGraph.initSync(tmpDir); + await cg.indexAll(); + + expect(findBindingEdge(cg, 'ViewModel', 'ViewModelFactory')).toBeUndefined(); + cg.close(); + }); + + it('does not emit a binding for a factory method (no impl parameter)', async () => { + // `@Provides Foo provideFoo() { return new Foo(); }` — no impl param. + // Identity-style shapes (return type == param type) are not bindings. + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-dagger-')); + fs.writeFileSync( + path.join(tmpDir, 'Foo.java'), + 'package com.example;\npublic class Foo {}\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'ConfigModule.java'), + 'package com.example;\nimport dagger.Module;\nimport dagger.Provides;\n' + + '@Module public class ConfigModule {\n' + + ' @Provides Foo provideFoo() { return new Foo(); }\n' + + '}\n' + ); + + const cg = CodeGraph.initSync(tmpDir); + await cg.indexAll(); + + const foo = cg.getNodesByKind('class').find((n) => n.name === 'Foo'); + expect(foo).toBeDefined(); + const daggerOut = cg + .getOutgoingEdges(foo!.id) + .filter((e) => (e.metadata as { synthesizedBy?: string } | undefined)?.synthesizedBy === 'dagger-provides'); + expect(daggerOut.length).toBe(0); + cg.close(); + }); + + it('does not emit bindings from a non-@Module class', async () => { + // The same method shape outside a @Module class — must be ignored. + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-dagger-')); + fs.writeFileSync( + path.join(tmpDir, 'Foo.java'), + 'package com.example;\npublic interface Foo {}\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'FooImpl.java'), + 'package com.example;\npublic class FooImpl implements Foo {}\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'NotAModule.java'), + 'package com.example;\npublic class NotAModule {\n' + + ' Foo provide(FooImpl impl) { return impl; }\n' + + '}\n' + ); + + const cg = CodeGraph.initSync(tmpDir); + await cg.indexAll(); + + expect(findBindingEdge(cg, 'Foo', 'FooImpl')).toBeUndefined(); + cg.close(); + }); +}); diff --git a/src/resolution/callback-synthesizer.ts b/src/resolution/callback-synthesizer.ts index 159cb5929..81279c50f 100644 --- a/src/resolution/callback-synthesizer.ts +++ b/src/resolution/callback-synthesizer.ts @@ -338,6 +338,149 @@ function cppOverrideEdges(queries: QueryBuilder): Edge[] { * (reachability-correct); capped per class, gated to JVM languages. */ const IFACE_OVERRIDE_LANGS = new Set(['java', 'kotlin']); + +/** + * Phase 5.6: Dagger 2 / Hilt `@Provides` / `@Binds` bindings. A Dagger module + * + * @Module class Foo { @Provides DataManager m(AppDataManager impl) { return impl; } } + * + * declares at runtime "anywhere you ask for DataManager, you get AppDataManager." + * Statically, the only link is the method's signature — there's no `implements` + * declaration to mine. Without this pass an injected-via-DI flow stops at the + * interface method and a multi-impl interface picks an arbitrary impl via the + * generic interface-override pass. This synthesizer is *binding-precise*: only + * the impl named in the @Provides/@Binds method gets the edge. + * + * Emits `interface → impl` `references` edges with `synthesizedBy:'dagger-provides'`, + * keyed by binding line so dupes across modules collapse cleanly. + */ +const DAGGER_LANGS = new Set(['java', 'kotlin']); +const MODULE_ANNOTATION_RE = /@Module\b/; +const BINDS_ANNOTATION_RE = /@Binds\b/; +const ANY_BINDING_ANNOTATION_RE = /@(?:Provides|Binds)\b/; +// Require an actual Dagger import in the file to avoid claiming `@Module` +// declarations from unrelated libraries (Guice, NestJS shims, custom test +// helpers) as Dagger modules. +const DAGGER_IMPORT_PRESENT_RE = /^import\s+dagger\./m; +// Kotlin method head: fun name(p: ParamType[, …]): ReturnType. We capture +// the first param's name (group 1) along with its type (group 2) so the +// body-identity check below can verify the method *really* returns it. +const KOTLIN_BINDING_HEAD_RE = + /\bfun\s+\w+\s*\(\s*(\w+)\s*:\s*([\w.<>?]+)[\s\S]*?\)\s*:\s*([\w.<>?]+)/; +// Java method head: [modifiers] ReturnType name([@Anno ]ParamType paramName[, …]). +const JAVA_BINDING_HEAD_RE = + /(?:^|\s)(?:public|private|protected|abstract|static|final|default|\s)*\b([\w.<>]+)\s+\w+\s*\(\s*(?:@\w+(?:\([^)]*\))?\s+)?([\w.<>]+)\s+(\w+)\s*[,)]/; + +/** Strip generic parameters and dotted qualifiers down to the bare type name. */ +function bareTypeName(t: string): string { + const noGeneric = t.replace(/<.*$/, ''); + const noNullable = noGeneric.replace(/\?$/, ''); + const noDot = noNullable.split('.').pop() ?? noNullable; + return noDot; +} + +/** Extract (return type, first param type, first param name) from a method head. */ +function parseBindingHead( + headSrc: string, + language: string +): { returnType: string; paramType: string; paramName: string } | null { + if (language === 'kotlin') { + const km = KOTLIN_BINDING_HEAD_RE.exec(headSrc); + if (!km) return null; + return { paramName: km[1]!, paramType: km[2]!, returnType: km[3]! }; + } + const jm = JAVA_BINDING_HEAD_RE.exec(headSrc); + if (!jm) return null; + return { returnType: jm[1]!, paramType: jm[2]!, paramName: jm[3]! }; +} + +/** + * Does the method body look like a pure identity (`return paramName;` in + * Java, `= paramName` or `return paramName` in Kotlin)? Pure-identity + * `@Provides` methods are equivalent to `@Binds` — they declare a real + * interface-to-impl binding. Methods that do anything else (factory + * calls, ViewModelProviders, configuration) are NOT bindings, even if + * their parameter happens to be a subtype of their return type. + */ +function isPureIdentityBody(content: string, m: Node, paramName: string): boolean { + const body = sliceLines(content, m.startLine, m.endLine); + if (!body) return false; + const escaped = paramName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const re = new RegExp(`(?:return\\s+|=\\s*)${escaped}(?=\\s*[;\\n}]|\\s*$)`); + return re.test(body); +} + +function daggerProvidesBindingEdges(queries: QueryBuilder, ctx: ResolutionContext): Edge[] { + const edges: Edge[] = []; + const seen = new Set(); + + for (const cls of queries.getNodesByKind('class')) { + if (!DAGGER_LANGS.has(cls.language)) continue; + const content = ctx.readFile(cls.filePath); + if (!content || !DAGGER_IMPORT_PRESENT_RE.test(content)) continue; + const classSrc = sliceLines(content, cls.startLine, cls.endLine); + if (!classSrc || !MODULE_ANNOTATION_RE.test(classSrc)) continue; + + const methods = queries + .getOutgoingEdges(cls.id, ['contains']) + .map((e) => queries.getNodeById(e.target)) + .filter((n): n is Node => !!n && n.kind === 'method'); + + for (const m of methods) { + // A single window covers both the annotations and the signature head. + // Tree-sitter's `method_declaration.startPosition` is fickle on Java: + // sometimes it lands on the first annotation (`@Provides` line), + // sometimes on the return-type line below the annotations. Reading a + // few lines on either side handles both, plus multi-line parameter + // lists. Decorator extraction doesn't populate `decorators` for JVM + // annotations, so we scan the source directly. + const window = sliceLines(content, Math.max(1, m.startLine - 3), m.startLine + 4); + if (!window || !ANY_BINDING_ANNOTATION_RE.test(window)) continue; + + const parsed = parseBindingHead(window, m.language); + if (!parsed) continue; + const returnType = bareTypeName(parsed.returnType); + const paramType = bareTypeName(parsed.paramType); + if (!returnType || !paramType || returnType === paramType) continue; + + // `@Binds` is abstract — declaration alone IS the binding. `@Provides` + // needs body-level verification: only count it as a binding when the + // body literally returns the parameter (`return impl;` / `= impl`). + // Factory shapes like `provideX(dep): X { return ViewModelProviders.of(...) }` + // share the Interface(Impl) signature shape but are NOT bindings. + const isBinds = BINDS_ANNOTATION_RE.test(window); + if (!isBinds && !isPureIdentityBody(content, m, parsed.paramName)) continue; + + const ifaceCandidates = ctx + .getNodesByName(returnType) + .filter((n) => (n.kind === 'interface' || n.kind === 'class') && DAGGER_LANGS.has(n.language)); + const implCandidates = ctx + .getNodesByName(paramType) + .filter((n) => n.kind === 'class' && DAGGER_LANGS.has(n.language)); + if (ifaceCandidates.length === 0 || implCandidates.length === 0) continue; + + const iface = ifaceCandidates[0]!; + const impl = implCandidates[0]!; + if (iface.id === impl.id) continue; + + const key = `${iface.id}>${impl.id}`; + if (seen.has(key)) continue; + seen.add(key); + + edges.push({ + source: iface.id, + target: impl.id, + kind: 'references', + line: m.startLine, + provenance: 'heuristic', + metadata: { synthesizedBy: 'dagger-provides', via: m.name, registeredAt: `${m.filePath}:${m.startLine}` }, + }); + } + } + + return edges; +} + function interfaceOverrideEdges(queries: QueryBuilder): Edge[] { const edges: Edge[] = []; const seen = new Set(); @@ -534,10 +677,11 @@ export function synthesizeCallbackEdges(queries: QueryBuilder, ctx: ResolutionCo const flutterEdges = flutterBuildEdges(queries, ctx); const cppEdges = cppOverrideEdges(queries); const ifaceEdges = interfaceOverrideEdges(queries); + const daggerEdges = daggerProvidesBindingEdges(queries, ctx); const merged: Edge[] = []; const seen = new Set(); - for (const e of [...fieldEdges, ...emitterEdges, ...renderEdges, ...jsxEdges, ...vueEdges, ...flutterEdges, ...cppEdges, ...ifaceEdges]) { + for (const e of [...fieldEdges, ...emitterEdges, ...renderEdges, ...jsxEdges, ...vueEdges, ...flutterEdges, ...cppEdges, ...ifaceEdges, ...daggerEdges]) { const key = `${e.source}>${e.target}`; if (seen.has(key)) continue; seen.add(key); diff --git a/src/resolution/frameworks/dagger.ts b/src/resolution/frameworks/dagger.ts new file mode 100644 index 000000000..6c36c1b9c --- /dev/null +++ b/src/resolution/frameworks/dagger.ts @@ -0,0 +1,37 @@ +/** + * Dagger 2 / Hilt resolver + * + * Lightweight detection + recognition of Dagger modules. Binding edges + * (`interface → impl`) are actually emitted by the whole-graph synthesizer + * pass in `callback-synthesizer.ts` — that needs every `@Module` class and + * every `@Provides`/`@Binds` method in the project to reason about + * disambiguation, which is awkward inside per-file `extract`. This resolver + * is the architectural counterpart: it lets `detectFrameworks` advertise + * "this project uses Dagger" and gives a stable name to register against. + */ + +import { FrameworkResolver, ResolutionContext } from '../types'; + +const DAGGER_IMPORT_RE = /^import\s+dagger\./m; +const DAGGER_FILE_EXT_RE = /\.(?:java|kt)$/; + +export const daggerResolver: FrameworkResolver = { + name: 'dagger', + languages: ['java', 'kotlin'], + + detect(context: ResolutionContext): boolean { + for (const file of context.getAllFiles()) { + if (!DAGGER_FILE_EXT_RE.test(file)) continue; + const content = context.readFile(file); + if (content && DAGGER_IMPORT_RE.test(content)) return true; + } + return false; + }, + + // No per-reference resolution. Binding edges are emitted by the + // synthesizer; ordinary symbol references fall through to the + // standard import / name-matcher chain. + resolve(): null { + return null; + }, +}; diff --git a/src/resolution/frameworks/index.ts b/src/resolution/frameworks/index.ts index f377c8f50..f4f0f4da7 100644 --- a/src/resolution/frameworks/index.ts +++ b/src/resolution/frameworks/index.ts @@ -16,6 +16,7 @@ import { vueResolver } from './vue'; import { djangoResolver, flaskResolver, fastapiResolver } from './python'; import { railsResolver } from './ruby'; import { springResolver } from './java'; +import { daggerResolver } from './dagger'; import { playResolver } from './play'; import { goResolver } from './go'; import { rustResolver } from './rust'; @@ -43,6 +44,7 @@ const FRAMEWORK_RESOLVERS: FrameworkResolver[] = [ railsResolver, // Java springResolver, + daggerResolver, playResolver, // Go goResolver, @@ -119,6 +121,7 @@ export { vueResolver } from './vue'; export { djangoResolver, flaskResolver, fastapiResolver } from './python'; export { railsResolver } from './ruby'; export { springResolver } from './java'; +export { daggerResolver } from './dagger'; export { playResolver } from './play'; export { goResolver } from './go'; export { rustResolver } from './rust';