Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cli/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ kt_jvm_test(
runtime_deps = [":cli-test-lib"],
)

kt_jvm_test(
name = "CalculateImpactedTargetsInteractorModuleQueryTest",
test_class = "com.bazel_diff.interactor.CalculateImpactedTargetsInteractorModuleQueryTest",
runtime_deps = [":cli-test-lib"],
)

kt_jvm_test(
name = "NormalisingPathConverterTest",
test_class = "com.bazel_diff.cli.converter.NormalisingPathConverterTest",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,9 +349,13 @@ class BazelQueryService(
* `bazel mod dump_repo_mapping ""`. Returns a map of apparent name → canonical name.
* Filters out internal repos (bazel_tools, _builtins, local_config_*) that aren't
* relevant for dependency hashing.
*
* Used by both `queryBzlmodRepos` (for synthetic //external:* target generation) and
* `CalculateImpactedTargetsInteractor.queryTargetsDependingOnModules` (to resolve a
* changed module's canonical repo prefix without substring matching `allTargets.keys`).
*/
@OptIn(ExperimentalCoroutinesApi::class)
private suspend fun discoverRepoMapping(): Map<String, String> {
suspend fun discoverRepoMapping(): Map<String, String> {
val cmd: MutableList<String> =
ArrayList<String>().apply {
add(bazelPath.toString())
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.bazel_diff.interactor

import com.bazel_diff.bazel.BazelQueryService
import com.bazel_diff.bazel.Module
import com.bazel_diff.bazel.ModuleGraphParser
import com.bazel_diff.hash.TargetHash
import com.bazel_diff.log.Logger
Expand Down Expand Up @@ -53,7 +54,7 @@ class CalculateImpactedTargetsInteractor : KoinComponent {

val impactedTargets = if (changedModules.isNotEmpty()) {
logger.i { "Module changes detected - querying for targets that depend on changed modules" }
queryTargetsDependingOnModules(changedModules, to)
queryTargetsDependingOnModules(changedModules, from, to)
} else {
computeSimpleImpactedTargets(from, to)
}
Expand Down Expand Up @@ -106,7 +107,7 @@ class CalculateImpactedTargetsInteractor : KoinComponent {

val impactedTargets = if (changedModules.isNotEmpty()) {
logger.i { "Module changes detected - querying for targets that depend on changed modules" }
val moduleImpactedTargets = queryTargetsDependingOnModules(changedModules, to)
val moduleImpactedTargets = queryTargetsDependingOnModules(changedModules, from, to)
// Mark module-impacted targets with distance 0, then compute distances from there
val moduleImpactedHashes = from.filterKeys { !moduleImpactedTargets.contains(it) }
computeAllDistances(moduleImpactedHashes, to, depEdges)
Expand Down Expand Up @@ -239,61 +240,76 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
}

/**
* Detects module changes by comparing module graphs and returns changed module keys.
* Detects module changes by comparing module graphs and returns the changed Modules.
*
* This method:
* 1. Parses the from and to module graphs
* 2. Identifies which modules changed (added, removed, or version changed)
* 3. Logs the changes for visibility
* 4. Returns the set of changed module keys
* Resolves each changed key against the "to" graph first (the state we will query
* against), falling back to the "from" graph for modules that were removed.
*
* @param fromModuleGraphJson JSON from `bazel mod graph --output=json` for starting revision
* @param toModuleGraphJson JSON from `bazel mod graph --output=json` for final revision
* @return Set of changed module keys, empty if no changes
* @param fromModuleGraphJson JSON from `bazel mod graph --output=json` for the starting revision
* @param toModuleGraphJson JSON from `bazel mod graph --output=json` for the final revision
* @return Set of changed Modules, empty if no changes
*/
private fun detectChangedModules(
fromModuleGraphJson: String?,
toModuleGraphJson: String?
): Set<String> {
// If either module graph is missing, assume no changes
): Set<Module> {
if (fromModuleGraphJson == null || toModuleGraphJson == null) {
return emptySet()
}

// Parse module graphs
val fromGraph = moduleGraphParser.parseModuleGraph(fromModuleGraphJson)
val toGraph = moduleGraphParser.parseModuleGraph(toModuleGraphJson)
val changedKeys = moduleGraphParser.findChangedModules(fromGraph, toGraph)

// Find changed modules
val changedModules = moduleGraphParser.findChangedModules(fromGraph, toGraph)

if (changedModules.isEmpty()) {
if (changedKeys.isEmpty()) {
logger.i { "No module changes detected" }
} else {
logger.i { "Detected ${changedModules.size} module changes: ${changedModules.joinToString(", ")}" }
return emptySet()
}

val changedModules = changedKeys.mapNotNull { key -> toGraph[key] ?: fromGraph[key] }.toSet()
logger.i { "Detected ${changedModules.size} module changes: ${changedModules.joinToString(", ") { it.key }}" }
return changedModules
}

/**
* Queries Bazel to find all targets that depend on the changed modules.
*
* This uses an efficient module-level query approach:
* 1. Identifies which external repository each changed module corresponds to
* 2. Uses `rdeps(//..., @@module~version//...)` to find workspace targets depending on each module
* 3. Returns the union of all impacted targets
* For each changed module M we compute the set of canonical repos in `allTargets`
* that belong to M, then run `rdeps(//..., @@<repo>//...)` once per canonical repo.
*
* Repo ownership is decided by two ordered predicates:
*
* 1. Tier A — root repo mapping. If `bazel mod dump_repo_mapping ""` maps
* `M.apparentName` to canonical C, any repo R with R == C or R starts with
* "C+" / "C~" belongs to M. Covers extension-created children (`C++ext+repo`,
* `C~~ext~repo`) whose parent canonical lives in M.
* 2. Tier B — name-prefix fallback. R belongs to M if R starts with
* "{M.name}+" or "{M.name}~". Extension-created forms (`name++ext+repo`,
* `name~~ext~repo`) are already covered by these prefixes. Handles
* transitive modules absent from root's mapping and the case where
* `discoverRepoMapping` failed.
*
* Modules that match nothing (Tier C) are logged and skipped — a module with no
* materialised repos in `allTargets.keys` cannot impact any hashed target, and
* `computeSimpleImpactedTargets` still runs below to catch direct source changes.
*
* The key invariant vs. the previous implementation: we match on the parsed
* canonical repo name (`label.substring(2).substringBefore("//")`), not on a
* `contains` substring of the full label, so a module named "cpp" no longer
* matches canonical `abseil-cpp+`.
*
* @param changedModuleKeys Set of changed module keys (e.g., "abseil-cpp@20240722.0")
* @param allTargets Map of all targets from the final revision
* @param changedModules Modules identified as changed between the two graphs
* @param from Starting-revision target hashes; used only to pick up labels whose
* content changed independently of any module bump
* @param allTargets Final-revision target hashes (the set we can query against)
* @return Set of target labels that are impacted by module changes
*/
private fun queryTargetsDependingOnModules(
changedModuleKeys: Set<String>,
changedModules: Set<Module>,
from: Map<String, TargetHash>,
allTargets: Map<String, TargetHash>
): Set<String> {
return try {
// Inject BazelQueryService if available
val queryService: BazelQueryService? = try {
inject<BazelQueryService>().value
} catch (e: Exception) {
Expand All @@ -305,59 +321,123 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
return allTargets.keys
}

val impactedTargets = mutableSetOf<String>()

for (moduleKey in changedModuleKeys) {
// Extract module name from key (e.g., "abseil-cpp" from "abseil-cpp@20240722.0")
val moduleName = moduleKey.substringBefore("@")
logger.i { "Querying targets depending on module: $moduleName (key: $moduleKey)" }

// Find the canonical repository name for this module from allTargets
// Bzlmod repos look like: @@abseil-cpp~20240116.2//... or @@rules_jvm_external~~maven~maven//...
val moduleRepos = allTargets.keys
.filter { it.startsWith("@@") && it.contains(moduleName) }
.map { it.substring(2).substringBefore("//") } // Extract repo name
.toSet()

val repoMapping: Map<String, String> =
try {
runBlocking { queryService.discoverRepoMapping() }
} catch (e: Exception) {
logger.w { "discoverRepoMapping failed, falling back to module-name matching: ${e.message}" }
emptyMap()
}
// Log size so operators can distinguish "Tier A had nothing to match
// against" from "Tier A matched but the module wasn't in root mapping".
// `discoverRepoMapping` returns an empty map on subprocess exit != 0
// without throwing, so we cannot rely on the catch block above to
// surface that case.
logger.i { "Discovered ${repoMapping.size} root repo mapping entries" }

// Parse `allTargets` into the set of canonical repo names once, instead of
// rescanning every label per changed module.
val canonicalRepos: Set<String> = allTargets.keys.asSequence()
.filter { it.startsWith("@@") }
.map { it.substring(2).substringBefore("//") }
.filter { it.isNotEmpty() }
.toSet()

// Collect the canonical repos to query once — multiple "changed modules"
// often collapse to the same canonical name (e.g. `findChangedModules`
// reports both `foo@1.0` removed and `foo@2.0` added, or two modules have
// overlapping extension-created children). Running `rdeps` per distinct
// canonical name — not per module iteration — avoids the redundant work.
val reposToQuery = mutableSetOf<String>()
for (module in changedModules) {
logger.i { "Resolving repos for changed module: ${module.name} (key: ${module.key})" }
val moduleRepos = reposOwnedBy(module, canonicalRepos, repoMapping)
if (moduleRepos.isEmpty()) {
logger.w { "No external repository found for module $moduleName" }
logger.w { "No external repository found for module ${module.name}" }
continue
}
logger.i { "Found ${moduleRepos.size} repositories for module ${module.name}: ${moduleRepos.joinToString(", ")}" }
reposToQuery.addAll(moduleRepos)
}

logger.i { "Found ${moduleRepos.size} repositories for module $moduleName: ${moduleRepos.joinToString(", ")}" }

// Query workspace targets that depend on any target in the changed module repo
for (repoName in moduleRepos) {
try {
// Use rdeps to find all workspace targets depending on this module
// rdeps(universe, target_set) finds all targets in universe that depend on target_set
val queryExpression = "rdeps(//..., @@$repoName//...)"
logger.i { "Executing query: $queryExpression" }

val rdeps = runBlocking { queryService.query(queryExpression, useCquery = false) }
val rdepLabels = rdeps.map { it.name }.filter { !it.startsWith("@@") } // Filter to workspace targets only

logger.i { "Found ${rdepLabels.size} workspace targets depending on @@$repoName" }
impactedTargets.addAll(rdepLabels)
} catch (e: Exception) {
logger.w { "Failed to query rdeps for @@$repoName: ${e.message}" }
logger.w { "Conservatively marking all targets as impacted for this module" }
// On error for this module, add all workspace targets
impactedTargets.addAll(allTargets.keys.filter { !it.startsWith("@@") })
val impactedTargets = mutableSetOf<String>()
for (repoName in reposToQuery) {
try {
val queryExpression = "rdeps(//..., @@$repoName//...)"
logger.i { "Executing query: $queryExpression" }

val rdeps = runBlocking { queryService.query(queryExpression, useCquery = false) }
val rdepLabels = rdeps.map { it.name }.filter { !it.startsWith("@@") }

logger.i { "Found ${rdepLabels.size} workspace targets depending on @@$repoName" }
impactedTargets.addAll(rdepLabels)
} catch (e: Exception) {
logger.w { "Failed to query rdeps for @@$repoName: ${e.message}" }
logger.w { "Conservatively marking all targets as impacted for this module" }
// Emit the buildable workspace subset when it exists; otherwise
// (bzlmod-only shape) fall through to every hashed label so the
// downstream `excludeExternalTargets` strip does not reduce the
// fallback to empty.
val buildableWorkspaceTargets = allTargets.keys.filter {
!it.startsWith("@@") && !it.startsWith("//external:")
}
impactedTargets.addAll(
if (buildableWorkspaceTargets.isEmpty()) allTargets.keys
else buildableWorkspaceTargets
)
}
}

// Add directly changed targets from hash comparison (e.g., code changes)
val directlyChanged = computeSimpleImpactedTargets(emptyMap(), allTargets)
// Union with hash-diff results so we still surface labels whose content changed
// independently of any module version bump (e.g. a source file in `//app:app`
// edited in the same commit as a MODULE.bazel update). The earlier
// `computeSimpleImpactedTargets(emptyMap(), allTargets)` form returned every
// key in `allTargets`, which silently defeated the rdeps filtering above.
val directlyChanged = computeSimpleImpactedTargets(from, allTargets)
impactedTargets.addAll(directlyChanged)

logger.i { "Total targets impacted by module changes: ${impactedTargets.size}" }
impactedTargets
} catch (e: Exception) {
logger.e(e) { "Error querying targets depending on modules" }
// On error, conservatively mark all targets as impacted
allTargets.keys
}
}

/**
* Canonical repos in `canonicalRepos` that belong to `module`, using Tier A
* (root repo mapping) plus Tier B (name-prefix fallback). See
* [queryTargetsDependingOnModules] for the full contract.
*
* @param module Changed module whose owned repos we want to resolve
* @param canonicalRepos Set of canonical repo names parsed from `allTargets.keys`
* @param repoMapping Root module's apparent→canonical repo mapping (may be empty)
* @return Canonical repos belonging to `module`, or empty if none matched (Tier C)
*/
private fun reposOwnedBy(
module: Module,
canonicalRepos: Set<String>,
repoMapping: Map<String, String>
): Set<String> {
val prefixes = mutableListOf<String>()
val exactMatches = mutableSetOf<String>()

val mappedCanonical = repoMapping[module.apparentName]
if (!mappedCanonical.isNullOrEmpty()) {
exactMatches.add(mappedCanonical)
prefixes.add("$mappedCanonical+")
prefixes.add("$mappedCanonical~")
}

// Tier B prefixes always apply — they cover transitive modules that root's
// `dump_repo_mapping` does not include, and also act as the sole source when
// `discoverRepoMapping` returned empty. `++`/`~~` are subsumed by `+`/`~`
// (extension-created repos start with `name+` or `name~` by definition).
prefixes.add("${module.name}+")
prefixes.add("${module.name}~")

return canonicalRepos.filter { repo ->
repo in exactMatches || prefixes.any { repo.startsWith(it) }
}.toSet()
}
}
Loading