From a3cb64ff12033815dc6087e6ed6ddc04b3065cdd Mon Sep 17 00:00:00 2001 From: Richard Clark Date: Tue, 28 Apr 2026 00:27:04 +0100 Subject: [PATCH] fix: tolerate stderr-polluted moduleGraphJson in ModuleGraphParser bazel-diff 17.0.1..18.0.5 configured BazelModService.getModuleGraphJson() with stdout=CAPTURE, stderr=CAPTURE. In Process.kt, captureAll triggers ProcessBuilder.redirectErrorStream(true), physically merging stderr into stdout. The captured moduleGraphJson therefore contained bazel's INFO lines ("INFO: Invocation ID: ...", "Loading: 0 packages loaded", etc.) prepended to the actual JSON output. PR #330 (shipped in v18.1.0) correctly switched stderr to SILENT but broke format compatibility: any CI pipeline re-using a base graph from 17.0.1..18.0.5 and a head graph from 18.1.0+ hits parseModuleGraph's try/catch on the polluted side, gets emptyMap(), and cascades into findChangedModules reporting every head-side module as "added". The downstream queryTargetsDependingOnModules then spawns thousands of bazel query rdeps(...) subprocesses. Make parseModuleGraph tolerant: attempt the whole-string parse first, and on failure retry from the first '{' to end-of-string. On second failure fall through to the existing emptyMap() behaviour. Clean input continues to parse in one attempt with no extra allocation. Tests: - ModuleGraphParserTest: positive case for stderr-prefixed input. - StderrPollutionRegressionTest: end-to-end regression guard covering parse, findChangedModules, and the CalculateImpactedTargetsInteractor dispatch path. Confirms the cross-version comparison now short-circuits to computeSimpleImpactedTargets instead of the rdeps fan-out. --- cli/BUILD | 6 + .../com/bazel_diff/bazel/ModuleGraphParser.kt | 12 +- .../bazel_diff/bazel/ModuleGraphParserTest.kt | 24 +++ .../bazel/StderrPollutionRegressionTest.kt | 157 ++++++++++++++++++ 4 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 cli/src/test/kotlin/com/bazel_diff/bazel/StderrPollutionRegressionTest.kt diff --git a/cli/BUILD b/cli/BUILD index 000783b..ee701a5 100644 --- a/cli/BUILD +++ b/cli/BUILD @@ -114,6 +114,12 @@ kt_jvm_test( runtime_deps = [":cli-test-lib"], ) +kt_jvm_test( + name = "StderrPollutionRegressionTest", + test_class = "com.bazel_diff.bazel.StderrPollutionRegressionTest", + runtime_deps = [":cli-test-lib"], +) + kt_jvm_test( name = "E2ETest", timeout = "long", diff --git a/cli/src/main/kotlin/com/bazel_diff/bazel/ModuleGraphParser.kt b/cli/src/main/kotlin/com/bazel_diff/bazel/ModuleGraphParser.kt index 360a472..20dee6c 100644 --- a/cli/src/main/kotlin/com/bazel_diff/bazel/ModuleGraphParser.kt +++ b/cli/src/main/kotlin/com/bazel_diff/bazel/ModuleGraphParser.kt @@ -24,6 +24,10 @@ class ModuleGraphParser { /** * Parses the JSON output from `bazel mod graph --output=json`. * + * Tolerates a non-JSON prefix (e.g. leaked stderr from bazel-diff + * 17.0.1..18.0.5, which captured stderr into moduleGraphJson via + * Process.kt's captureAll -> ProcessBuilder.redirectErrorStream(true)). + * * @param json The JSON string from bazel mod graph * @return A map of module keys to Module objects */ @@ -31,7 +35,13 @@ class ModuleGraphParser { val modules = mutableMapOf() try { - val root = JsonParser.parseString(json).asJsonObject + val root = try { + JsonParser.parseString(json).asJsonObject + } catch (_: Exception) { + val start = json.indexOf('{') + if (start < 0) return emptyMap() + JsonParser.parseString(json.substring(start)).asJsonObject + } extractModules(root, modules) } catch (e: Exception) { // If parsing fails, return empty map diff --git a/cli/src/test/kotlin/com/bazel_diff/bazel/ModuleGraphParserTest.kt b/cli/src/test/kotlin/com/bazel_diff/bazel/ModuleGraphParserTest.kt index 689b3c2..192847b 100644 --- a/cli/src/test/kotlin/com/bazel_diff/bazel/ModuleGraphParserTest.kt +++ b/cli/src/test/kotlin/com/bazel_diff/bazel/ModuleGraphParserTest.kt @@ -90,6 +90,30 @@ class ModuleGraphParserTest { assertThat(googletest!!.version).isEqualTo("1.14.0") } + @Test + fun parseModuleGraph_withStderrPrefix_extractsModules() { + val cleanJson = + """ + { + "key": "", + "name": "ws", + "version": "", + "apparentName": "ws", + "dependencies": [ + {"key": "a@1", "name": "a", "version": "1", "apparentName": "a"} + ] + } + """ + .trimIndent() + val polluted = "INFO: Invocation ID: abc\nLoading: 0 packages loaded\n$cleanJson" + + val clean = parser.parseModuleGraph(cleanJson) + val actual = parser.parseModuleGraph(polluted) + + assertThat(actual).hasSize(2) + assertThat(actual).isEqualTo(clean) + } + @Test fun parseModuleGraph_withInvalidJson_returnsEmptyMap() { val json = "{ invalid json" diff --git a/cli/src/test/kotlin/com/bazel_diff/bazel/StderrPollutionRegressionTest.kt b/cli/src/test/kotlin/com/bazel_diff/bazel/StderrPollutionRegressionTest.kt new file mode 100644 index 0000000..4fba567 --- /dev/null +++ b/cli/src/test/kotlin/com/bazel_diff/bazel/StderrPollutionRegressionTest.kt @@ -0,0 +1,157 @@ +package com.bazel_diff.bazel + +import assertk.assertThat +import assertk.assertions.hasSize +import assertk.assertions.isEmpty +import assertk.assertions.isEqualTo +import assertk.assertions.isNotEqualTo +import com.bazel_diff.hash.TargetHash +import com.bazel_diff.interactor.CalculateImpactedTargetsInteractor +import com.bazel_diff.testModule +import java.io.StringWriter +import org.junit.Rule +import org.junit.Test +import org.koin.test.KoinTest +import org.koin.test.KoinTestRule + +/** + * Regression guard for the 17.0.1..18.0.5 -> 18.1.0+ stderr-pollution compatibility break + * introduced by PR #330. See `ModuleGraphParser.parseModuleGraph` for the fix. + */ +class StderrPollutionRegressionTest : KoinTest { + @get:Rule val koinTestRule = KoinTestRule.create { modules(testModule()) } + + private val parser = ModuleGraphParser() + + // Shape of the stderr lines 17.0.1..18.0.5 captured into `moduleGraphJson`. + private val stderrPrefix = + """ + Computing main repo mapping: + Loading: + Loading: 0 packages loaded + Analyzing: 0 targets (0 packages loaded, 0 targets configured) + INFO: Invocation ID: 4d8d5c62-1f1c-4f72-9a3e-5fbd5e6ac3d2 + INFO: Current date is 2026-04-20 + """.trimIndent() + + private val cleanGraphJson = + """ + { + "key": "", + "name": "my-workspace", + "version": "", + "apparentName": "my-workspace", + "dependencies": [ + {"key": "bazel_tools@_", "name": "bazel_tools", "version": "_", "apparentName": "bazel_tools"}, + {"key": "abseil-cpp@20240116.2", "name": "abseil-cpp", "version": "20240116.2", "apparentName": "com_google_absl"}, + {"key": "aspect_bazel_lib@2.22.5", "name": "aspect_bazel_lib", "version": "2.22.5", "apparentName": "aspect_bazel_lib"}, + {"key": "rules_jvm_external@6.10", "name": "rules_jvm_external", "version": "6.10", "apparentName": "rules_jvm_external"}, + {"key": "rules_python@1.8.4", "name": "rules_python", "version": "1.8.4", "apparentName": "rules_python"}, + {"key": "googletest@1.14.0", "name": "googletest", "version": "1.14.0", "apparentName": "com_google_googletest"} + ] + } + """.trimIndent() + + private val pollutedGraphJson = "$stderrPrefix\n$cleanGraphJson" + + @Test + fun `18_0_x polluted moduleGraphJson now parses correctly`() { + val clean = parser.parseModuleGraph(cleanGraphJson) + val result = parser.parseModuleGraph(pollutedGraphJson) + + assertThat(result).hasSize(7) + assertThat(result).isEqualTo(clean) + } + + @Test + fun `18_1_0 clean moduleGraphJson parses successfully`() { + val result = parser.parseModuleGraph(cleanGraphJson) + + assertThat(result).hasSize(7) + } + + @Test + fun `semantically identical graph across bazel-diff versions reports no module changes`() { + val fromGraph = parser.parseModuleGraph(pollutedGraphJson) + val toGraph = parser.parseModuleGraph(cleanGraphJson) + + val changed = parser.findChangedModules(fromGraph, toGraph) + + assertThat(changed).isEmpty() + } + + @Test + fun `string compare at line 44 fires because polluted != clean even when modules are identical`() { + // Documents that the naive byte compare in CalculateImpactedTargetsInteractor.execute + // still fires cross-version, so downstream dispatch must keep handling it gracefully. + assertThat(pollutedGraphJson).isNotEqualTo(cleanGraphJson) + } + + @Test + fun `large head graph produces no spurious fan-out`() { + val deps = (1..100).joinToString(",\n") { i -> + """ {"key": "mod$i@1.0", "name": "mod$i", "version": "1.0", "apparentName": "mod$i"}""" + } + val bigHeadGraph = """ + { + "key": "", + "name": "my-workspace", + "version": "", + "apparentName": "my-workspace", + "dependencies": [ +$deps + ] + } + """.trimIndent() + val bigPolluted = "$stderrPrefix\n$bigHeadGraph" + + val fromGraph = parser.parseModuleGraph(bigPolluted) + val toGraph = parser.parseModuleGraph(bigHeadGraph) + val changed = parser.findChangedModules(fromGraph, toGraph) + + assertThat(changed).isEmpty() + } + + @Test + fun `end-to-end - semantically identical graph compared across versions reports no impacted targets`() { + val hashes = mapOf( + "//:target1" to TargetHash("", "unchanged1", "unchanged1"), + "//:target2" to TargetHash("", "unchanged2", "unchanged2"), + "//:target3" to TargetHash("", "unchanged3", "unchanged3"), + ) + + val outputWriter = StringWriter() + CalculateImpactedTargetsInteractor().execute( + from = hashes, + to = hashes, + outputWriter = outputWriter, + targetTypes = null, + fromModuleGraphJson = pollutedGraphJson, + toModuleGraphJson = cleanGraphJson, + ) + + val impacted = outputWriter.toString().trim().split("\n").filter { it.isNotEmpty() } + assertThat(impacted).isEmpty() + } + + @Test + fun `end-to-end - same bazel-diff version on both sides is fast path`() { + val hashes = mapOf( + "//:target1" to TargetHash("", "unchanged1", "unchanged1"), + "//:target2" to TargetHash("", "unchanged2", "unchanged2"), + ) + + val outputWriter = StringWriter() + CalculateImpactedTargetsInteractor().execute( + from = hashes, + to = hashes, + outputWriter = outputWriter, + targetTypes = null, + fromModuleGraphJson = cleanGraphJson, + toModuleGraphJson = cleanGraphJson, + ) + + val impacted = outputWriter.toString().trim().split("\n").filter { it.isNotEmpty() } + assertThat(impacted).isEmpty() + } +}