Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 140 additions & 8 deletions cli/src/main/kotlin/com/bazel_diff/bazel/BazelQueryService.kt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class BazelQueryService(
private val noBazelrc: Boolean,
) : KoinComponent {
private val logger: Logger by inject()
private val modService: BazelModService by inject()
private val version: Triple<Int, Int, Int> by lazy { runBlocking { determineBazelVersion() } }

@OptIn(ExperimentalCoroutinesApi::class)
Expand Down Expand Up @@ -307,16 +308,74 @@ class BazelQueryService(
}
}

// Discover the bzlmod module-graph edges so we can encode the dep relationships between
// synthetic //external:* targets. Without this, a target that depends on @outer//... only
// sees //external:outer's *metadata* hash and never picks up content changes in @outer's
// own bzlmod deps (e.g. @inner). With these edges in place, RuleHasher follows the chain
// //:consumer -> //external:outer -> //external:inner during digest computation, so a
// change inside @inner propagates all the way to the main-repo consumer without the user
// having to enumerate every wrapping repo in --fineGrainedHashExternalRepos. See
// https://github.com/Tinder/bazel-diff/issues/184 (transitive build-time chain) and
// https://github.com/Tinder/bazel-diff/issues/197 (alias-wrap chain).
val moduleGraphJson = modService.getModuleGraphJson()
val moduleDepEdges =
if (moduleGraphJson != null) {
val parser = ModuleGraphParser()
// `bazel mod graph` can return cycles (e.g. rules_go <-> gazelle via the latter's
// dev_dependency). Emitting both directions as rule_inputs on the synthetic
// //external:* targets triggers RuleHasher.CircularDependencyException, so break
// cycles into a deterministic DAG before deriving dep edges.
parser.breakCycles(parser.parseModuleGraphDepEdges(moduleGraphJson))
} else {
emptyMap()
}
// `bazel mod show_repo` does not populate Repository.module_key in current Bazel, so
// bridge from a module's `name` (always present in `bazel mod graph` output) to that
// repo's `canonical_name` by stripping any trailing `+<version>` suffix produced by
// bzlmod's canonical-name scheme. This is best-effort: it works for the no-version-conflict
// case (canonical = "<name>+" or "<name>+<version>"). Module-extension repos do not appear
// in `bazel mod graph` at all, so they get no synthetic dep edges -- their contents are
// captured via repo metadata + the per-repo content hash below.
val moduleNameToCanonical = mutableMapOf<String, String>()
for (repo in repos) {
val canonical = repo.canonicalName
val moduleName = canonical.substringBefore('+').ifEmpty { canonical }
// Only register a name -> canonical edge if the canonical "looks like a module repo"
// (single `+`, no extension separator). Skip extension-generated repos like
// "rules_jvm_external++maven+maven".
if (canonical.count { it == '+' } == 1) {
moduleNameToCanonical[moduleName] = canonical
}
}
val canonicalToRootApparent: Map<String, List<String>> =
canonicalToApparent.mapValues { it.value.toList() }

val targets = mutableListOf<BazelTarget.Rule>()
for (repo in repos) {
// Derive this repo's bzlmod module name from its canonical name and look up its direct
// deps in the module graph. Translate each dep's module name -> its canonical name ->
// root-visible apparent name; that's what `BazelRule.transformRuleInput` collapses
// non-fine-grained `@<apparent>//...` rule_inputs to, so adding `//external:<apparent>`
// as a rule_input here is what wires up the dep chain.
val moduleName =
repo.canonicalName.takeIf { it.count { c -> c == '+' } == 1 }?.substringBefore('+')
val depApparentNames =
if (moduleName != null) {
moduleDepEdges[moduleName]
.orEmpty()
.mapNotNull { moduleNameToCanonical[it] }
.flatMap { canonicalToRootApparent[it].orEmpty() }
} else {
emptyList()
}
val apparentNames = canonicalToApparent[repo.canonicalName]
if (apparentNames != null) {
for (apparentName in apparentNames) {
targets.add(repositoryToTarget(repo, apparentName))
targets.add(repositoryToTarget(repo, apparentName, depApparentNames))
}
} else {
// Fallback: use canonical name if no apparent name mapping exists
targets.add(repositoryToTarget(repo, repo.canonicalName))
targets.add(repositoryToTarget(repo, repo.canonicalName, depApparentNames))
}
}

Expand All @@ -328,22 +387,95 @@ class BazelQueryService(
* Converts a Build.Repository proto into a synthetic BazelTarget.Rule named
* `//external:<targetName>`. This mirrors how WORKSPACE repos appear as `//external:*`
* targets, and matches the names produced by `transformRuleInput` in BazelRule.kt.
*
* For each bzlmod dep of this repo (as discovered from `bazel mod graph`) a corresponding
* `//external:<dep_apparent_name>` is added to the rule's `rule_input` list, so
* [RuleHasher] follows the dep chain when computing the digest. For repos backed by a
* `local_repository` rule (which is what `local_path_override` lowers to), the contents
* of the local directory are also rolled into a synthetic `_bazel_diff_content_hash`
* attribute so file content changes inside the repo flip the synthetic target's hash.
*/
private fun repositoryToTarget(repo: Build.Repository, targetName: String): BazelTarget.Rule {
private fun repositoryToTarget(
repo: Build.Repository,
targetName: String,
depApparentNames: List<String>
): BazelTarget.Rule {
val ruleClass = repo.repoRuleName.ifEmpty { "bzlmod_repo" }

val attributes = repo.attributeList.toMutableList()
val contentHash = computeLocalRepoContentHash(repo)
if (contentHash != null) {
attributes.add(
Build.Attribute.newBuilder()
.setName("_bazel_diff_content_hash")
.setType(Build.Attribute.Discriminator.STRING)
.setStringValue(contentHash)
.build())
}

val ruleBuilder =
Build.Rule.newBuilder()
.setName("//external:$targetName")
.setRuleClass(ruleClass)
.addAllAttribute(attributes)
for (dep in depApparentNames.toSortedSet()) {
if (dep != targetName) ruleBuilder.addRuleInput("//external:$dep")
}

val target =
Build.Target.newBuilder()
.setType(Build.Target.Discriminator.RULE)
.setRule(
Build.Rule.newBuilder()
.setName("//external:$targetName")
.setRuleClass(ruleClass)
.addAllAttribute(repo.attributeList))
.setRule(ruleBuilder)
.build()
return BazelTarget.Rule(target)
}

/**
* Returns a stable hex sha256 over the files inside a `local_repository`-backed repo on
* disk, or null if the repo is not local-backed or the directory cannot be read.
*
* `local_path_override(module_name = "X", path = "...")` in MODULE.bazel lowers to a
* `local_repository` rule, whose `path` attribute is relative to the workspace root. Hashing
* that directory makes file content edits surface in the synthetic //external:X target's
* digest, which fixes the "external repo file change is invisible" half of
* [#184](https://github.com/Tinder/bazel-diff/issues/184) /
* [#197](https://github.com/Tinder/bazel-diff/issues/197).
*/
private fun computeLocalRepoContentHash(repo: Build.Repository): String? {
if (repo.repoRuleName != "local_repository") return null
val pathAttr =
repo.attributeList.find { it.name == "path" && it.type == Build.Attribute.Discriminator.STRING }
?: return null
val pathStr = pathAttr.stringValue.ifEmpty { return null }
val rawPath = java.nio.file.Paths.get(pathStr)
val repoDir =
(if (rawPath.isAbsolute) rawPath.toFile() else workingDirectory.resolve(rawPath).toFile())
if (!repoDir.exists() || !repoDir.isDirectory) return null

return try {
val digest = java.security.MessageDigest.getInstance("SHA-256")
repoDir
.walkTopDown()
.filter { it.isFile }
// Skip MODULE.bazel.lock: bazel auto-regenerates it on every invocation in ways
// that don't reflect a real source change (it depends on resolution state). Letting
// it flip the content hash makes generate-hashes non-deterministic across runs.
.filter { it.name != "MODULE.bazel.lock" }
.map { Pair(it.relativeTo(repoDir).invariantSeparatorsPath, it) }
.sortedBy { it.first }
.forEach { (relPath, file) ->
digest.update(relPath.toByteArray(Charsets.UTF_8))
digest.update(0x00)
digest.update(file.readBytes())
digest.update(0x00)
}
digest.digest().joinToString("") { "%02x".format(it) }
} catch (e: Exception) {
logger.w { "Failed to content-hash local repo at $repoDir: ${e.message}" }
null
}
}

/**
* Discovers the root module's apparent→canonical repo name mapping by running
* `bazel mod dump_repo_mapping ""`. Returns a map of apparent name → canonical name.
Expand Down
95 changes: 95 additions & 0 deletions cli/src/main/kotlin/com/bazel_diff/bazel/ModuleGraphParser.kt
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,101 @@ class ModuleGraphParser {
}
}

/**
* Parses the JSON from `bazel mod graph --output=json` and returns each module's direct
* `bazel_dep` neighbours as a `module_name -> [dep_module_name, ...]` map.
*
* Module names (the `name` field of the `module(name = ...)` declaration) are used as the
* key here because the alternative -- `module_key` -- is not always populated on the
* `Build.Repository` protos returned by `bazel mod show_repo`, which is what consumers want
* to look up against. Module names are universally present and sufficient to find a unique
* row in the graph for the common no-multi-version case.
*
* The same module may appear in multiple places in the JSON tree (`bazel mod graph` inlines
* each module once and references it via `unexpanded` afterwards). This method walks every
* `dependencies` array it sees, so even the `unexpanded` references contribute an edge. The
* resulting map is keyed by the parent's `module_name` and contains the union of all direct
* dep names observed across the tree.
*
* Returns an empty map on parse failure (same tolerance as [parseModuleGraph]).
*/
fun parseModuleGraphDepEdges(json: String): Map<String, List<String>> {
val edges = mutableMapOf<String, MutableSet<String>>()
try {
val root = try {
JsonParser.parseString(json).asJsonObject
} catch (_: Exception) {
val start = json.indexOf('{')
if (start < 0) return emptyMap()
JsonParser.parseString(json.substring(start)).asJsonObject
}
extractDepEdges(root, edges)
} catch (_: Exception) {
return emptyMap()
}
return edges.mapValues { it.value.toList() }
}

private fun extractDepEdges(obj: JsonObject, edges: MutableMap<String, MutableSet<String>>) {
val name = obj.get("name")?.asString ?: return
val deps = obj.get("dependencies")?.asJsonArray ?: return
val collected = edges.getOrPut(name) { mutableSetOf() }
for (dep in deps) {
if (!dep.isJsonObject) continue
val depObj = dep.asJsonObject
val depName = depObj.get("name")?.asString ?: continue
collected.add(depName)
// Even if this child is `unexpanded`, recurse to pick up edges from its own expansion
// elsewhere in the tree.
extractDepEdges(depObj, edges)
}
}

/**
* Returns a copy of [edges] with back-edges removed so the result is acyclic.
*
* `bazel mod graph` legitimately contains cycles: for example `rules_go` declares
* `bazel_dep(name = "gazelle", dev_dependency = True)` while `gazelle` declares
* `bazel_dep(name = "rules_go")`, so the dep graph has `rules_go <-> gazelle`. Feeding both
* edges into [BazelQueryService.queryBzlmodRepos] as `rule_input`s on the synthetic
* `//external:*` targets makes `RuleHasher` recurse infinitely and throw
* `CircularDependencyException`. We need a cycle-free dep DAG before emitting edges.
*
* The algorithm is a single DFS, visiting nodes in lexicographic order with their out-edges
* also sorted. An edge to a node currently on the DFS path is a back-edge (it would close
* a cycle) and is dropped; every other edge is kept. The result is therefore (a) acyclic
* and (b) deterministic across runs.
*
* Dropping the back-edge is conservative: a content change in the dropped-edge target still
* surfaces via its own synthetic `//external:*` target's hash (each repo gets one), so
* main-repo consumers that depend on either side of the cycle still see the change. We
* only lose the ability to propagate through the cycle itself, which is fine because all
* SCC members are co-dependent and a change in any of them already invalidates their own
* hashes directly.
*/
fun breakCycles(edges: Map<String, List<String>>): Map<String, List<String>> {
val result = mutableMapOf<String, List<String>>()
val visited = mutableSetOf<String>()
val onPath = mutableSetOf<String>()

fun dfs(node: String) {
if (node in visited) return
onPath.add(node)
val kept = mutableListOf<String>()
for (target in edges[node].orEmpty().sorted()) {
if (target in onPath) continue // back-edge
kept.add(target)
dfs(target)
}
result[node] = kept
onPath.remove(node)
visited.add(node)
}

for (node in edges.keys.sorted()) dfs(node)
return result
}

/**
* Compares two module graphs and returns the keys of modules that changed.
*
Expand Down
64 changes: 64 additions & 0 deletions cli/src/test/kotlin/com/bazel_diff/bazel/ModuleGraphParserTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,70 @@ class ModuleGraphParserTest {
assertThat(result).containsExactlyInAnyOrder("root", "abseil-cpp@20240116.2")
}

// ---------------------------------------------------------------------------------------
// breakCycles
// ---------------------------------------------------------------------------------------

@Test
fun breakCycles_acyclicInput_returnsEdgesUnchanged() {
val edges = mapOf("a" to listOf("b", "c"), "b" to listOf("c"), "c" to emptyList())

val result = parser.breakCycles(edges)

assertThat(result["a"]!!).containsExactlyInAnyOrder("b", "c")
assertThat(result["b"]!!).containsExactlyInAnyOrder("c")
assertThat(result["c"]!!).isEmpty()
}

@Test
fun breakCycles_twoNodeCycle_dropsOneEdge() {
// The real-world case: rules_go <-> gazelle. Adding both rule_inputs
// makes RuleHasher recurse infinitely; we keep exactly one direction.
val edges = mapOf("gazelle" to listOf("rules_go"), "rules_go" to listOf("gazelle"))

val result = parser.breakCycles(edges)

val total = result.values.sumOf { it.size }
assertThat(total).isEqualTo(1)
// Deterministic: sorted DFS starts at "gazelle" first, so its edge survives
// and rules_go's back-edge is the one that gets dropped.
assertThat(result["gazelle"]!!).containsExactlyInAnyOrder("rules_go")
assertThat(result["rules_go"]!!).isEmpty()
}

@Test
fun breakCycles_threeNodeCycle_breaksCycleDeterministically() {
val edges = mapOf("a" to listOf("b"), "b" to listOf("c"), "c" to listOf("a"))

val result = parser.breakCycles(edges)

// Whatever the algorithm picks, the result must be a DAG: total edges = nodes - 1
// (otherwise the algorithm would have kept a cycle), and both forward edges survive
// because DFS visits a -> b -> c first and then c -> a is the back-edge.
assertThat(result["a"]!!).containsExactlyInAnyOrder("b")
assertThat(result["b"]!!).containsExactlyInAnyOrder("c")
assertThat(result["c"]!!).isEmpty()
}

@Test
fun breakCycles_selfLoop_dropsSelfEdge() {
val edges = mapOf("a" to listOf("a", "b"), "b" to emptyList())

val result = parser.breakCycles(edges)

assertThat(result["a"]!!).containsExactlyInAnyOrder("b")
}

@Test
fun breakCycles_isDeterministic() {
val edges = mapOf("gazelle" to listOf("rules_go"), "rules_go" to listOf("gazelle"))

val first = parser.breakCycles(edges)
val second = parser.breakCycles(edges)

assertThat(first).isEqualTo(second)
}

@Test
fun findChangedModules_withNewGraphEmpty_returnsAllOldModuleKeys() {
val oldGraph =
Expand Down
Loading
Loading