Tinder · rdark · Apr 28, 2026 · Apr 28, 2026
diff --git a/cli/BUILD b/cli/BUILD
@@ -72,6 +72,12 @@ kt_jvm_test(
     runtime_deps = [":cli-test-lib"],
 )
 
+kt_jvm_test(
+    name = "CalculateImpactedTargetsInteractorModuleQueryTest",
+    test_class = "com.bazel_diff.interactor.CalculateImpactedTargetsInteractorModuleQueryTest",
+    runtime_deps = [":cli-test-lib"],
+)
+
 kt_jvm_test(
     name = "NormalisingPathConverterTest",
     test_class = "com.bazel_diff.cli.converter.NormalisingPathConverterTest",

diff --git a/cli/src/main/kotlin/com/bazel_diff/bazel/BazelQueryService.kt b/cli/src/main/kotlin/com/bazel_diff/bazel/BazelQueryService.kt
@@ -349,9 +349,13 @@ class BazelQueryService(
    * `bazel mod dump_repo_mapping ""`. Returns a map of apparent name → canonical name.
    * Filters out internal repos (bazel_tools, _builtins, local_config_*) that aren't
    * relevant for dependency hashing.
+   *
+   * Used by both `queryBzlmodRepos` (for synthetic //external:* target generation) and
+   * `CalculateImpactedTargetsInteractor.queryTargetsDependingOnModules` (to resolve a
+   * changed module's canonical repo prefix without substring matching `allTargets.keys`).
    */
   @OptIn(ExperimentalCoroutinesApi::class)
-  private suspend fun discoverRepoMapping(): Map<String, String> {
+  suspend fun discoverRepoMapping(): Map<String, String> {
     val cmd: MutableList<String> =
         ArrayList<String>().apply {
           add(bazelPath.toString())

diff --git a/cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt b/cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt
@@ -1,6 +1,7 @@
 package com.bazel_diff.interactor
 
 import com.bazel_diff.bazel.BazelQueryService
+import com.bazel_diff.bazel.Module
 import com.bazel_diff.bazel.ModuleGraphParser
 import com.bazel_diff.hash.TargetHash
 import com.bazel_diff.log.Logger
@@ -53,7 +54,7 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
 
     val impactedTargets = if (changedModules.isNotEmpty()) {
       logger.i { "Module changes detected - querying for targets that depend on changed modules" }
-      queryTargetsDependingOnModules(changedModules, to)
+      queryTargetsDependingOnModules(changedModules, from, to)
     } else {
       computeSimpleImpactedTargets(from, to)
     }
@@ -106,7 +107,7 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
 
     val impactedTargets = if (changedModules.isNotEmpty()) {
       logger.i { "Module changes detected - querying for targets that depend on changed modules" }
-      val moduleImpactedTargets = queryTargetsDependingOnModules(changedModules, to)
+      val moduleImpactedTargets = queryTargetsDependingOnModules(changedModules, from, to)
       // Mark module-impacted targets with distance 0, then compute distances from there
       val moduleImpactedHashes = from.filterKeys { !moduleImpactedTargets.contains(it) }
       computeAllDistances(moduleImpactedHashes, to, depEdges)
@@ -239,61 +240,76 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
   }
 
   /**
-   * Detects module changes by comparing module graphs and returns changed module keys.
+   * Detects module changes by comparing module graphs and returns the changed Modules.
    *
-   * This method:
-   * 1. Parses the from and to module graphs
-   * 2. Identifies which modules changed (added, removed, or version changed)
-   * 3. Logs the changes for visibility
-   * 4. Returns the set of changed module keys
+   * Resolves each changed key against the "to" graph first (the state we will query
+   * against), falling back to the "from" graph for modules that were removed.
    *
-   * @param fromModuleGraphJson JSON from `bazel mod graph --output=json` for starting revision
-   * @param toModuleGraphJson JSON from `bazel mod graph --output=json` for final revision
-   * @return Set of changed module keys, empty if no changes
+   * @param fromModuleGraphJson JSON from `bazel mod graph --output=json` for the starting revision
+   * @param toModuleGraphJson JSON from `bazel mod graph --output=json` for the final revision
+   * @return Set of changed Modules, empty if no changes
    */
   private fun detectChangedModules(
       fromModuleGraphJson: String?,
       toModuleGraphJson: String?
-  ): Set<String> {
-    // If either module graph is missing, assume no changes
+  ): Set<Module> {
     if (fromModuleGraphJson == null || toModuleGraphJson == null) {
       return emptySet()
     }
 
-    // Parse module graphs
     val fromGraph = moduleGraphParser.parseModuleGraph(fromModuleGraphJson)
     val toGraph = moduleGraphParser.parseModuleGraph(toModuleGraphJson)
+    val changedKeys = moduleGraphParser.findChangedModules(fromGraph, toGraph)
 
-    // Find changed modules
-    val changedModules = moduleGraphParser.findChangedModules(fromGraph, toGraph)
-
-    if (changedModules.isEmpty()) {
+    if (changedKeys.isEmpty()) {
       logger.i { "No module changes detected" }
-    } else {
-      logger.i { "Detected ${changedModules.size} module changes: ${changedModules.joinToString(", ")}" }
+      return emptySet()
     }
 
+    val changedModules = changedKeys.mapNotNull { key -> toGraph[key] ?: fromGraph[key] }.toSet()
+    logger.i { "Detected ${changedModules.size} module changes: ${changedModules.joinToString(", ") { it.key }}" }
     return changedModules
   }
 
   /**
    * Queries Bazel to find all targets that depend on the changed modules.
    *
-   * This uses an efficient module-level query approach:
-   * 1. Identifies which external repository each changed module corresponds to
-   * 2. Uses `rdeps(//..., @@module~version//...)` to find workspace targets depending on each module
-   * 3. Returns the union of all impacted targets
+   * For each changed module M we compute the set of canonical repos in `allTargets`
+   * that belong to M, then run `rdeps(//..., @@<repo>//...)` once per canonical repo.
+   *
+   * Repo ownership is decided by two ordered predicates:
+   *
+   * 1. Tier A — root repo mapping. If `bazel mod dump_repo_mapping ""` maps
+   *    `M.apparentName` to canonical C, any repo R with R == C or R starts with
+   *    "C+" / "C~" belongs to M. Covers extension-created children (`C++ext+repo`,
+   *    `C~~ext~repo`) whose parent canonical lives in M.
+   * 2. Tier B — name-prefix fallback. R belongs to M if R starts with
+   *    "{M.name}+" or "{M.name}~". Extension-created forms (`name++ext+repo`,
+   *    `name~~ext~repo`) are already covered by these prefixes. Handles
+   *    transitive modules absent from root's mapping and the case where
+   *    `discoverRepoMapping` failed.
+   *
+   * Modules that match nothing (Tier C) are logged and skipped — a module with no
+   * materialised repos in `allTargets.keys` cannot impact any hashed target, and
+   * `computeSimpleImpactedTargets` still runs below to catch direct source changes.
+   *
+   * The key invariant vs. the previous implementation: we match on the parsed
+   * canonical repo name (`label.substring(2).substringBefore("//")`), not on a
+   * `contains` substring of the full label, so a module named "cpp" no longer
+   * matches canonical `abseil-cpp+`.
    *
-   * @param changedModuleKeys Set of changed module keys (e.g., "abseil-cpp@20240722.0")
-   * @param allTargets Map of all targets from the final revision
+   * @param changedModules Modules identified as changed between the two graphs
+   * @param from Starting-revision target hashes; used only to pick up labels whose
+   *     content changed independently of any module bump
+   * @param allTargets Final-revision target hashes (the set we can query against)
    * @return Set of target labels that are impacted by module changes
    */
   private fun queryTargetsDependingOnModules(
-      changedModuleKeys: Set<String>,
+      changedModules: Set<Module>,
+      from: Map<String, TargetHash>,
       allTargets: Map<String, TargetHash>
   ): Set<String> {
     return try {
-      // Inject BazelQueryService if available
       val queryService: BazelQueryService? = try {
         inject<BazelQueryService>().value
       } catch (e: Exception) {
@@ -305,59 +321,123 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
         return allTargets.keys
       }
 
-      val impactedTargets = mutableSetOf<String>()
-
-      for (moduleKey in changedModuleKeys) {
-        // Extract module name from key (e.g., "abseil-cpp" from "abseil-cpp@20240722.0")
-        val moduleName = moduleKey.substringBefore("@")
-        logger.i { "Querying targets depending on module: $moduleName (key: $moduleKey)" }
-
-        // Find the canonical repository name for this module from allTargets
-        // Bzlmod repos look like: @@abseil-cpp~20240116.2//... or @@rules_jvm_external~~maven~maven//...
-        val moduleRepos = allTargets.keys
-            .filter { it.startsWith("@@") && it.contains(moduleName) }
-            .map { it.substring(2).substringBefore("//") } // Extract repo name
-            .toSet()
-
+      val repoMapping: Map<String, String> =
+          try {
+            runBlocking { queryService.discoverRepoMapping() }
+          } catch (e: Exception) {
+            logger.w { "discoverRepoMapping failed, falling back to module-name matching: ${e.message}" }
+            emptyMap()
+          }
+      // Log size so operators can distinguish "Tier A had nothing to match
+      // against" from "Tier A matched but the module wasn't in root mapping".
+      // `discoverRepoMapping` returns an empty map on subprocess exit != 0
+      // without throwing, so we cannot rely on the catch block above to
+      // surface that case.
+      logger.i { "Discovered ${repoMapping.size} root repo mapping entries" }
+
+      // Parse `allTargets` into the set of canonical repo names once, instead of
+      // rescanning every label per changed module.
+      val canonicalRepos: Set<String> = allTargets.keys.asSequence()
+          .filter { it.startsWith("@@") }
+          .map { it.substring(2).substringBefore("//") }
+          .filter { it.isNotEmpty() }
+          .toSet()
+
+      // Collect the canonical repos to query once — multiple "changed modules"
+      // often collapse to the same canonical name (e.g. `findChangedModules`
+      // reports both `foo@1.0` removed and `foo@2.0` added, or two modules have
+      // overlapping extension-created children). Running `rdeps` per distinct
+      // canonical name — not per module iteration — avoids the redundant work.
+      val reposToQuery = mutableSetOf<String>()
+      for (module in changedModules) {
+        logger.i { "Resolving repos for changed module: ${module.name} (key: ${module.key})" }
+        val moduleRepos = reposOwnedBy(module, canonicalRepos, repoMapping)
         if (moduleRepos.isEmpty()) {
-          logger.w { "No external repository found for module $moduleName" }
+          logger.w { "No external repository found for module ${module.name}" }
           continue
         }
+        logger.i { "Found ${moduleRepos.size} repositories for module ${module.name}: ${moduleRepos.joinToString(", ")}" }
+        reposToQuery.addAll(moduleRepos)
+      }
 
-        logger.i { "Found ${moduleRepos.size} repositories for module $moduleName: ${moduleRepos.joinToString(", ")}" }
-
-        // Query workspace targets that depend on any target in the changed module repo
-        for (repoName in moduleRepos) {
-          try {
-            // Use rdeps to find all workspace targets depending on this module
-            // rdeps(universe, target_set) finds all targets in universe that depend on target_set
-            val queryExpression = "rdeps(//..., @@$repoName//...)"
-            logger.i { "Executing query: $queryExpression" }
-
-            val rdeps = runBlocking { queryService.query(queryExpression, useCquery = false) }
-            val rdepLabels = rdeps.map { it.name }.filter { !it.startsWith("@@") } // Filter to workspace targets only
-
-            logger.i { "Found ${rdepLabels.size} workspace targets depending on @@$repoName" }
-            impactedTargets.addAll(rdepLabels)
-          } catch (e: Exception) {
-            logger.w { "Failed to query rdeps for @@$repoName: ${e.message}" }
-            logger.w { "Conservatively marking all targets as impacted for this module" }
-            // On error for this module, add all workspace targets
-            impactedTargets.addAll(allTargets.keys.filter { !it.startsWith("@@") })
+      val impactedTargets = mutableSetOf<String>()
+      for (repoName in reposToQuery) {
+        try {
+          val queryExpression = "rdeps(//..., @@$repoName//...)"
+          logger.i { "Executing query: $queryExpression" }
+
+          val rdeps = runBlocking { queryService.query(queryExpression, useCquery = false) }
+          val rdepLabels = rdeps.map { it.name }.filter { !it.startsWith("@@") }
+
+          logger.i { "Found ${rdepLabels.size} workspace targets depending on @@$repoName" }
+          impactedTargets.addAll(rdepLabels)
+        } catch (e: Exception) {
+          logger.w { "Failed to query rdeps for @@$repoName: ${e.message}" }
+          logger.w { "Conservatively marking all targets as impacted for this module" }
+          // Emit the buildable workspace subset when it exists; otherwise
+          // (bzlmod-only shape) fall through to every hashed label so the
+          // downstream `excludeExternalTargets` strip does not reduce the
+          // fallback to empty.
+          val buildableWorkspaceTargets = allTargets.keys.filter {
+            !it.startsWith("@@") && !it.startsWith("//external:")
           }
+          impactedTargets.addAll(
+              if (buildableWorkspaceTargets.isEmpty()) allTargets.keys
+              else buildableWorkspaceTargets
+          )
         }
       }
 
-      // Add directly changed targets from hash comparison (e.g., code changes)
-      val directlyChanged = computeSimpleImpactedTargets(emptyMap(), allTargets)
+      // Union with hash-diff results so we still surface labels whose content changed
+      // independently of any module version bump (e.g. a source file in `//app:app`
+      // edited in the same commit as a MODULE.bazel update). The earlier
+      // `computeSimpleImpactedTargets(emptyMap(), allTargets)` form returned every
+      // key in `allTargets`, which silently defeated the rdeps filtering above.
+      val directlyChanged = computeSimpleImpactedTargets(from, allTargets)
       impactedTargets.addAll(directlyChanged)
 
       logger.i { "Total targets impacted by module changes: ${impactedTargets.size}" }
       impactedTargets
     } catch (e: Exception) {
       logger.e(e) { "Error querying targets depending on modules" }
-      // On error, conservatively mark all targets as impacted
       allTargets.keys
     }
   }
+
+  /**
+   * Canonical repos in `canonicalRepos` that belong to `module`, using Tier A
+   * (root repo mapping) plus Tier B (name-prefix fallback). See
+   * [queryTargetsDependingOnModules] for the full contract.
+   *
+   * @param module Changed module whose owned repos we want to resolve
+   * @param canonicalRepos Set of canonical repo names parsed from `allTargets.keys`
+   * @param repoMapping Root module's apparent→canonical repo mapping (may be empty)
+   * @return Canonical repos belonging to `module`, or empty if none matched (Tier C)
+   */
+  private fun reposOwnedBy(
+      module: Module,
+      canonicalRepos: Set<String>,
+      repoMapping: Map<String, String>
+  ): Set<String> {
+    val prefixes = mutableListOf<String>()
+    val exactMatches = mutableSetOf<String>()
+
+    val mappedCanonical = repoMapping[module.apparentName]
+    if (!mappedCanonical.isNullOrEmpty()) {
+      exactMatches.add(mappedCanonical)
+      prefixes.add("$mappedCanonical+")
+      prefixes.add("$mappedCanonical~")
+    }
+
+    // Tier B prefixes always apply — they cover transitive modules that root's
+    // `dump_repo_mapping` does not include, and also act as the sole source when
+    // `discoverRepoMapping` returned empty. `++`/`~~` are subsumed by `+`/`~`
+    // (extension-created repos start with `name+` or `name~` by definition).
+    prefixes.add("${module.name}+")
+    prefixes.add("${module.name}~")
+
+    return canonicalRepos.filter { repo ->
+      repo in exactMatches || prefixes.any { repo.startsWith(it) }
+    }.toSet()
+  }
 }