Tinder · tinder-maxwellelliott · May 18, 2026 · May 13, 2026 · May 13, 2026 · May 17, 2026
diff --git a/cli/src/main/kotlin/com/bazel_diff/bazel/BazelQueryService.kt b/cli/src/main/kotlin/com/bazel_diff/bazel/BazelQueryService.kt
@@ -26,6 +26,7 @@ class BazelQueryService(
     private val noBazelrc: Boolean,
 ) : KoinComponent {
   private val logger: Logger by inject()
+  private val modService: BazelModService by inject()
   private val version: Triple<Int, Int, Int> by lazy { runBlocking { determineBazelVersion() } }
 
   @OptIn(ExperimentalCoroutinesApi::class)
@@ -307,16 +308,74 @@ class BazelQueryService(
           }
         }
 
+    // Discover the bzlmod module-graph edges so we can encode the dep relationships between
+    // synthetic //external:* targets. Without this, a target that depends on @outer//... only
+    // sees //external:outer's *metadata* hash and never picks up content changes in @outer's
+    // own bzlmod deps (e.g. @inner). With these edges in place, RuleHasher follows the chain
+    // //:consumer -> //external:outer -> //external:inner during digest computation, so a
+    // change inside @inner propagates all the way to the main-repo consumer without the user
+    // having to enumerate every wrapping repo in --fineGrainedHashExternalRepos. See
+    // https://github.com/Tinder/bazel-diff/issues/184 (transitive build-time chain) and
+    // https://github.com/Tinder/bazel-diff/issues/197 (alias-wrap chain).
+    val moduleGraphJson = modService.getModuleGraphJson()
+    val moduleDepEdges =
+        if (moduleGraphJson != null) {
+          val parser = ModuleGraphParser()
+          // `bazel mod graph` can return cycles (e.g. rules_go <-> gazelle via the latter's
+          // dev_dependency). Emitting both directions as rule_inputs on the synthetic
+          // //external:* targets triggers RuleHasher.CircularDependencyException, so break
+          // cycles into a deterministic DAG before deriving dep edges.
+          parser.breakCycles(parser.parseModuleGraphDepEdges(moduleGraphJson))
+        } else {
+          emptyMap()
+        }
+    // `bazel mod show_repo` does not populate Repository.module_key in current Bazel, so
+    // bridge from a module's `name` (always present in `bazel mod graph` output) to that
+    // repo's `canonical_name` by stripping any trailing `+<version>` suffix produced by
+    // bzlmod's canonical-name scheme. This is best-effort: it works for the no-version-conflict
+    // case (canonical = "<name>+" or "<name>+<version>"). Module-extension repos do not appear
+    // in `bazel mod graph` at all, so they get no synthetic dep edges -- their contents are
+    // captured via repo metadata + the per-repo content hash below.
+    val moduleNameToCanonical = mutableMapOf<String, String>()
+    for (repo in repos) {
+      val canonical = repo.canonicalName
+      val moduleName = canonical.substringBefore('+').ifEmpty { canonical }
+      // Only register a name -> canonical edge if the canonical "looks like a module repo"
+      // (single `+`, no extension separator). Skip extension-generated repos like
+      // "rules_jvm_external++maven+maven".
+      if (canonical.count { it == '+' } == 1) {
+        moduleNameToCanonical[moduleName] = canonical
+      }
+    }
+    val canonicalToRootApparent: Map<String, List<String>> =
+        canonicalToApparent.mapValues { it.value.toList() }
+
     val targets = mutableListOf<BazelTarget.Rule>()
     for (repo in repos) {
+      // Derive this repo's bzlmod module name from its canonical name and look up its direct
+      // deps in the module graph. Translate each dep's module name -> its canonical name ->
+      // root-visible apparent name; that's what `BazelRule.transformRuleInput` collapses
+      // non-fine-grained `@<apparent>//...` rule_inputs to, so adding `//external:<apparent>`
+      // as a rule_input here is what wires up the dep chain.
+      val moduleName =
+          repo.canonicalName.takeIf { it.count { c -> c == '+' } == 1 }?.substringBefore('+')
+      val depApparentNames =
+          if (moduleName != null) {
+            moduleDepEdges[moduleName]
+                .orEmpty()
+                .mapNotNull { moduleNameToCanonical[it] }
+                .flatMap { canonicalToRootApparent[it].orEmpty() }
+          } else {
+            emptyList()
+          }
       val apparentNames = canonicalToApparent[repo.canonicalName]
       if (apparentNames != null) {
         for (apparentName in apparentNames) {
-          targets.add(repositoryToTarget(repo, apparentName))
+          targets.add(repositoryToTarget(repo, apparentName, depApparentNames))
         }
       } else {
         // Fallback: use canonical name if no apparent name mapping exists
-        targets.add(repositoryToTarget(repo, repo.canonicalName))
+        targets.add(repositoryToTarget(repo, repo.canonicalName, depApparentNames))
       }
     }
 
@@ -328,22 +387,95 @@ class BazelQueryService(
    * Converts a Build.Repository proto into a synthetic BazelTarget.Rule named
    * `//external:<targetName>`. This mirrors how WORKSPACE repos appear as `//external:*`
    * targets, and matches the names produced by `transformRuleInput` in BazelRule.kt.
+   *
+   * For each bzlmod dep of this repo (as discovered from `bazel mod graph`) a corresponding
+   * `//external:<dep_apparent_name>` is added to the rule's `rule_input` list, so
+   * [RuleHasher] follows the dep chain when computing the digest. For repos backed by a
+   * `local_repository` rule (which is what `local_path_override` lowers to), the contents
+   * of the local directory are also rolled into a synthetic `_bazel_diff_content_hash`
+   * attribute so file content changes inside the repo flip the synthetic target's hash.
    */
-  private fun repositoryToTarget(repo: Build.Repository, targetName: String): BazelTarget.Rule {
+  private fun repositoryToTarget(
+      repo: Build.Repository,
+      targetName: String,
+      depApparentNames: List<String>
+  ): BazelTarget.Rule {
     val ruleClass = repo.repoRuleName.ifEmpty { "bzlmod_repo" }
 
+    val attributes = repo.attributeList.toMutableList()
+    val contentHash = computeLocalRepoContentHash(repo)
+    if (contentHash != null) {
+      attributes.add(
+          Build.Attribute.newBuilder()
+              .setName("_bazel_diff_content_hash")
+              .setType(Build.Attribute.Discriminator.STRING)
+              .setStringValue(contentHash)
+              .build())
+    }
+
+    val ruleBuilder =
+        Build.Rule.newBuilder()
+            .setName("//external:$targetName")
+            .setRuleClass(ruleClass)
+            .addAllAttribute(attributes)
+    for (dep in depApparentNames.toSortedSet()) {
+      if (dep != targetName) ruleBuilder.addRuleInput("//external:$dep")
+    }
+
     val target =
         Build.Target.newBuilder()
             .setType(Build.Target.Discriminator.RULE)
-            .setRule(
-                Build.Rule.newBuilder()
-                    .setName("//external:$targetName")
-                    .setRuleClass(ruleClass)
-                    .addAllAttribute(repo.attributeList))
+            .setRule(ruleBuilder)
             .build()
     return BazelTarget.Rule(target)
   }
 
+  /**
+   * Returns a stable hex sha256 over the files inside a `local_repository`-backed repo on
+   * disk, or null if the repo is not local-backed or the directory cannot be read.
+   *
+   * `local_path_override(module_name = "X", path = "...")` in MODULE.bazel lowers to a
+   * `local_repository` rule, whose `path` attribute is relative to the workspace root. Hashing
+   * that directory makes file content edits surface in the synthetic //external:X target's
+   * digest, which fixes the "external repo file change is invisible" half of
+   * [#184](https://github.com/Tinder/bazel-diff/issues/184) /
+   * [#197](https://github.com/Tinder/bazel-diff/issues/197).
+   */
+  private fun computeLocalRepoContentHash(repo: Build.Repository): String? {
+    if (repo.repoRuleName != "local_repository") return null
+    val pathAttr =
+        repo.attributeList.find { it.name == "path" && it.type == Build.Attribute.Discriminator.STRING }
+            ?: return null
+    val pathStr = pathAttr.stringValue.ifEmpty { return null }
+    val rawPath = java.nio.file.Paths.get(pathStr)
+    val repoDir =
+        (if (rawPath.isAbsolute) rawPath.toFile() else workingDirectory.resolve(rawPath).toFile())
+    if (!repoDir.exists() || !repoDir.isDirectory) return null
+
+    return try {
+      val digest = java.security.MessageDigest.getInstance("SHA-256")
+      repoDir
+          .walkTopDown()
+          .filter { it.isFile }
+          // Skip MODULE.bazel.lock: bazel auto-regenerates it on every invocation in ways
+          // that don't reflect a real source change (it depends on resolution state). Letting
+          // it flip the content hash makes generate-hashes non-deterministic across runs.
+          .filter { it.name != "MODULE.bazel.lock" }
+          .map { Pair(it.relativeTo(repoDir).invariantSeparatorsPath, it) }
+          .sortedBy { it.first }
+          .forEach { (relPath, file) ->
+            digest.update(relPath.toByteArray(Charsets.UTF_8))
+            digest.update(0x00)
+            digest.update(file.readBytes())
+            digest.update(0x00)
+          }
+      digest.digest().joinToString("") { "%02x".format(it) }
+    } catch (e: Exception) {
+      logger.w { "Failed to content-hash local repo at $repoDir: ${e.message}" }
+      null
+    }
+  }
+
   /**
    * Discovers the root module's apparent→canonical repo name mapping by running
    * `bazel mod dump_repo_mapping ""`. Returns a map of apparent name → canonical name.

diff --git a/cli/src/main/kotlin/com/bazel_diff/bazel/ModuleGraphParser.kt b/cli/src/main/kotlin/com/bazel_diff/bazel/ModuleGraphParser.kt
@@ -69,6 +69,101 @@ class ModuleGraphParser {
     }
   }
 
+  /**
+   * Parses the JSON from `bazel mod graph --output=json` and returns each module's direct
+   * `bazel_dep` neighbours as a `module_name -> [dep_module_name, ...]` map.
+   *
+   * Module names (the `name` field of the `module(name = ...)` declaration) are used as the
+   * key here because the alternative -- `module_key` -- is not always populated on the
+   * `Build.Repository` protos returned by `bazel mod show_repo`, which is what consumers want
+   * to look up against. Module names are universally present and sufficient to find a unique
+   * row in the graph for the common no-multi-version case.
+   *
+   * The same module may appear in multiple places in the JSON tree (`bazel mod graph` inlines
+   * each module once and references it via `unexpanded` afterwards). This method walks every
+   * `dependencies` array it sees, so even the `unexpanded` references contribute an edge. The
+   * resulting map is keyed by the parent's `module_name` and contains the union of all direct
+   * dep names observed across the tree.
+   *
+   * Returns an empty map on parse failure (same tolerance as [parseModuleGraph]).
+   */
+  fun parseModuleGraphDepEdges(json: String): Map<String, List<String>> {
+    val edges = mutableMapOf<String, MutableSet<String>>()
+    try {
+      val root = try {
+        JsonParser.parseString(json).asJsonObject
+      } catch (_: Exception) {
+        val start = json.indexOf('{')
+        if (start < 0) return emptyMap()
+        JsonParser.parseString(json.substring(start)).asJsonObject
+      }
+      extractDepEdges(root, edges)
+    } catch (_: Exception) {
+      return emptyMap()
+    }
+    return edges.mapValues { it.value.toList() }
+  }
+
+  private fun extractDepEdges(obj: JsonObject, edges: MutableMap<String, MutableSet<String>>) {
+    val name = obj.get("name")?.asString ?: return
+    val deps = obj.get("dependencies")?.asJsonArray ?: return
+    val collected = edges.getOrPut(name) { mutableSetOf() }
+    for (dep in deps) {
+      if (!dep.isJsonObject) continue
+      val depObj = dep.asJsonObject
+      val depName = depObj.get("name")?.asString ?: continue
+      collected.add(depName)
+      // Even if this child is `unexpanded`, recurse to pick up edges from its own expansion
+      // elsewhere in the tree.
+      extractDepEdges(depObj, edges)
+    }
+  }
+
+  /**
+   * Returns a copy of [edges] with back-edges removed so the result is acyclic.
+   *
+   * `bazel mod graph` legitimately contains cycles: for example `rules_go` declares
+   * `bazel_dep(name = "gazelle", dev_dependency = True)` while `gazelle` declares
+   * `bazel_dep(name = "rules_go")`, so the dep graph has `rules_go <-> gazelle`. Feeding both
+   * edges into [BazelQueryService.queryBzlmodRepos] as `rule_input`s on the synthetic
+   * `//external:*` targets makes `RuleHasher` recurse infinitely and throw
+   * `CircularDependencyException`. We need a cycle-free dep DAG before emitting edges.
+   *
+   * The algorithm is a single DFS, visiting nodes in lexicographic order with their out-edges
+   * also sorted. An edge to a node currently on the DFS path is a back-edge (it would close
+   * a cycle) and is dropped; every other edge is kept. The result is therefore (a) acyclic
+   * and (b) deterministic across runs.
+   *
+   * Dropping the back-edge is conservative: a content change in the dropped-edge target still
+   * surfaces via its own synthetic `//external:*` target's hash (each repo gets one), so
+   * main-repo consumers that depend on either side of the cycle still see the change. We
+   * only lose the ability to propagate through the cycle itself, which is fine because all
+   * SCC members are co-dependent and a change in any of them already invalidates their own
+   * hashes directly.
+   */
+  fun breakCycles(edges: Map<String, List<String>>): Map<String, List<String>> {
+    val result = mutableMapOf<String, List<String>>()
+    val visited = mutableSetOf<String>()
+    val onPath = mutableSetOf<String>()
+
+    fun dfs(node: String) {
+      if (node in visited) return
+      onPath.add(node)
+      val kept = mutableListOf<String>()
+      for (target in edges[node].orEmpty().sorted()) {
+        if (target in onPath) continue // back-edge
+        kept.add(target)
+        dfs(target)
+      }
+      result[node] = kept
+      onPath.remove(node)
+      visited.add(node)
+    }
+
+    for (node in edges.keys.sorted()) dfs(node)
+    return result
+  }
+
   /**
    * Compares two module graphs and returns the keys of modules that changed.
    *

diff --git a/cli/src/test/kotlin/com/bazel_diff/bazel/ModuleGraphParserTest.kt b/cli/src/test/kotlin/com/bazel_diff/bazel/ModuleGraphParserTest.kt
@@ -274,6 +274,70 @@ class ModuleGraphParserTest {
     assertThat(result).containsExactlyInAnyOrder("root", "abseil-cpp@20240116.2")
   }
 
+  // ---------------------------------------------------------------------------------------
+  // breakCycles
+  // ---------------------------------------------------------------------------------------
+
+  @Test
+  fun breakCycles_acyclicInput_returnsEdgesUnchanged() {
+    val edges = mapOf("a" to listOf("b", "c"), "b" to listOf("c"), "c" to emptyList())
+
+    val result = parser.breakCycles(edges)
+
+    assertThat(result["a"]!!).containsExactlyInAnyOrder("b", "c")
+    assertThat(result["b"]!!).containsExactlyInAnyOrder("c")
+    assertThat(result["c"]!!).isEmpty()
+  }
+
+  @Test
+  fun breakCycles_twoNodeCycle_dropsOneEdge() {
+    // The real-world case: rules_go <-> gazelle. Adding both rule_inputs
+    // makes RuleHasher recurse infinitely; we keep exactly one direction.
+    val edges = mapOf("gazelle" to listOf("rules_go"), "rules_go" to listOf("gazelle"))
+
+    val result = parser.breakCycles(edges)
+
+    val total = result.values.sumOf { it.size }
+    assertThat(total).isEqualTo(1)
+    // Deterministic: sorted DFS starts at "gazelle" first, so its edge survives
+    // and rules_go's back-edge is the one that gets dropped.
+    assertThat(result["gazelle"]!!).containsExactlyInAnyOrder("rules_go")
+    assertThat(result["rules_go"]!!).isEmpty()
+  }
+
+  @Test
+  fun breakCycles_threeNodeCycle_breaksCycleDeterministically() {
+    val edges = mapOf("a" to listOf("b"), "b" to listOf("c"), "c" to listOf("a"))
+
+    val result = parser.breakCycles(edges)
+
+    // Whatever the algorithm picks, the result must be a DAG: total edges = nodes - 1
+    // (otherwise the algorithm would have kept a cycle), and both forward edges survive
+    // because DFS visits a -> b -> c first and then c -> a is the back-edge.
+    assertThat(result["a"]!!).containsExactlyInAnyOrder("b")
+    assertThat(result["b"]!!).containsExactlyInAnyOrder("c")
+    assertThat(result["c"]!!).isEmpty()
+  }
+
+  @Test
+  fun breakCycles_selfLoop_dropsSelfEdge() {
+    val edges = mapOf("a" to listOf("a", "b"), "b" to emptyList())
+
+    val result = parser.breakCycles(edges)
+
+    assertThat(result["a"]!!).containsExactlyInAnyOrder("b")
+  }
+
+  @Test
+  fun breakCycles_isDeterministic() {
+    val edges = mapOf("gazelle" to listOf("rules_go"), "rules_go" to listOf("gazelle"))
+
+    val first = parser.breakCycles(edges)
+    val second = parser.breakCycles(edges)
+
+    assertThat(first).isEqualTo(second)
+  }
+
   @Test
   fun findChangedModules_withNewGraphEmpty_returnsAllOldModuleKeys() {
     val oldGraph =