diff --git a/.github/workflows/sdk-cli-ci.yml b/.github/workflows/sdk-cli-ci.yml index 5a7e6772..43b9162f 100644 --- a/.github/workflows/sdk-cli-ci.yml +++ b/.github/workflows/sdk-cli-ci.yml @@ -299,6 +299,9 @@ jobs: name: Wilson Agentic Review if: github.event_name == 'push' || github.event.pull_request.head.repo.fork == false runs-on: ubuntu-latest + # Least privilege: the review only reads the repo to diff changes (GT-146). + permissions: + contents: read steps: - uses: actions/checkout@v4 with: diff --git a/.harness/scripts/ci/13-agentic-code-review.mjs b/.harness/scripts/ci/13-agentic-code-review.mjs index 1292a757..e9709b94 100644 --- a/.harness/scripts/ci/13-agentic-code-review.mjs +++ b/.harness/scripts/ci/13-agentic-code-review.mjs @@ -3,6 +3,13 @@ import { Client } from "@modelcontextprotocol/sdk/client/index.js"; import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; import { execSync } from "node:child_process"; +import { prepareReviewInput } from "./review-input.mjs"; +import { evaluateProviderResponse } from "./review-result.mjs"; +import { createReviewProvider } from "./review-provider.mjs"; + +// Hard fail-closed ceiling: never submit a payload larger than this (GT-146). +const MAX_REVIEW_TOKENS = Number(process.env.EVOLITH_REVIEW_MAX_TOKENS || 25000); +const MAX_REVIEW_BYTES = Number(process.env.EVOLITH_REVIEW_MAX_BYTES || 80000); async function main() { console.log("🤖 Initializing Agentic Code Review CI Step (GT-132)..."); @@ -21,7 +28,24 @@ async function main() { diffText = "No diff available or git command failed."; } - console.log(`\n📄 Extracted Diff: ${diffText.split("\\n").length} lines of changes.`); + // Sanitize, scope and budget the diff BEFORE any provider sees it (GT-146). + const prepared = prepareReviewInput(diffText, { + maxBytes: MAX_REVIEW_BYTES, + maxTokens: MAX_REVIEW_TOKENS, + }); + console.log( + `\n📄 Review input prepared: ${prepared.filesIncluded.length} relevant file(s), ` + + `${prepared.filesExcluded.length} excluded, ${prepared.redactions} secret(s) redacted, ` + + `~${prepared.estTokens} tokens (${prepared.bytes} bytes)${prepared.truncated ? ", truncated" : ""}.`, + ); + // Aggregate, non-sensitive efficiency telemetry only. + if (prepared.estTokens > MAX_REVIEW_TOKENS) { + console.error( + `❌ Prepared review input (~${prepared.estTokens} tokens) exceeds the ${MAX_REVIEW_TOKENS}-token budget. Failing closed.`, + ); + process.exit(1); + } + const reviewPayload = prepared.chunks.join("\n\n"); // Connect to the Governance MCP Server console.log("\n🔌 Connecting to Evolith Governance MCP Sandbox..."); @@ -53,23 +77,41 @@ async function main() { console.log("\n⚠️ EVOLITH_AGENTIC_REVIEW flag is not set to 'true'."); console.log(" Skipping actual LLM invocation. Architecture connection validated. (Dry-run Success)"); } else { - const apiKey = process.env.EVOLITH_LLM_API_KEY || process.env.GEMINI_API_KEY; - if (!apiKey) { - throw new Error("EVOLITH_AGENTIC_REVIEW is active but EVOLITH_LLM_API_KEY/GEMINI_API_KEY is missing."); - } else { - console.log("\n🧠 Submitting diff to Agentic Reviewer via Gemini API..."); - try { - const result = await invokeGemini(apiKey, diffText, toolNames); - console.log(`\n🤖 Review Result:\n${result}\n`); - if (result.includes("VIOLATION_DETECTED")) { - console.error("❌ Agentic review detected architectural violations!"); - process.exit(1); - } else { - console.log("✅ Agentic Review Passed (No violations found)."); + if (prepared.filesIncluded.length === 0) { + console.log("\n✅ No policy-relevant changed files to review. (Pass)"); + return; + } + let provider; + try { + provider = createReviewProvider({ + provider: process.env.EVOLITH_REVIEW_PROVIDER, + model: process.env.EVOLITH_REVIEW_MODEL, + apiKey: process.env.EVOLITH_LLM_API_KEY || process.env.GEMINI_API_KEY, + }); + } catch (cfgErr) { + console.error(`❌ Review provider unavailable — failing closed: ${cfgErr.message}`); + process.exit(1); + } + console.log(`\n🧠 Submitting sanitized, budgeted review input via provider [${provider.name}]...`); + try { + const result = await provider.review(buildReviewPrompt(reviewPayload, toolNames)); + const evaluation = evaluateProviderResponse(result); + if (!evaluation.ok) { + console.error(`❌ Indeterminate/malformed review result — failing closed:\n ${(evaluation.errors || []).join("\n ")}`); + process.exit(1); + } + if (evaluation.passesGate) { + console.log("✅ Agentic Review Passed (no violations)."); + } else { + console.error(`❌ Agentic review detected ${evaluation.findings.length} violation(s):`); + for (const f of evaluation.findings) { + console.error(` [${f.severity}] ${f.file}${f.line ? ":" + f.line : ""} — ${f.title} (confidence ${f.confidence})`); } - } catch (apiErr) { - throw new Error(`LLM review failed: ${apiErr.message}`); + process.exit(1); } + } catch (apiErr) { + console.error(`❌ LLM review failed — failing closed: ${apiErr.message}`); + process.exit(1); } } } catch (err) { @@ -85,61 +127,32 @@ async function main() { } } -function invokeGemini(apiKey, diff, tools) { - return new Promise((resolve, reject) => { - // Standard https request using native Node.js to avoid dependencies - import("node:https").then((https) => { - const payload = JSON.stringify({ - contents: [{ - parts: [{ - text: `You are Wilson, Principal Architect of Evolith Core. -Review the following git diff against our active MCP architecture tools: [${tools}]. +/** Provider-neutral prompt requesting a schema-v1.0 JSON review result. */ +function buildReviewPrompt(diff, tools) { + return `You are the Evolith Core architecture reviewer. +Review the following sanitized, policy-relevant diff against our active MCP architecture tools: [${tools}]. -Guidelines: -- If you find structural violations (e.g. invalid boundaries, illegal imports, missing signatures), start your output with 'VIOLATION_DETECTED' followed by a description of the issue. -- If everything conforms, output '✅ Review Passed: Clean Architecture Rules Met'. +Respond with ONLY a single JSON object conforming to this schema (no prose, no markdown fences): +{ + "schemaVersion": "1.0", + "verdict": "pass" | "fail", // "fail" iff there is at least one error-severity violation + "findings": [ + { + "ruleId": "", + "severity": "error" | "warning" | "info", + "title": "", + "file": "", + "line": , + "confidence": + } + ] +} +Detect structural violations (invalid boundaries, illegal imports, missing signatures). If none, return verdict "pass" with an empty findings array. Diff to review: \`\`\`diff ${diff} -\`\`\`` - }] - }] - }); - - const options = { - hostname: "generativelanguage.googleapis.com", - path: `/v1beta/models/gemini-2.5-flash:generateContent?key=${apiKey}`, - method: "POST", - headers: { - "Content-Type": "application/json", - "Content-Length": Buffer.byteLength(payload) - } - }; - - const req = https.request(options, (res) => { - let body = ""; - res.on("data", (chunk) => body += chunk); - res.on("end", () => { - if (res.statusCode >= 400) { - reject(new Error(`HTTP ${res.statusCode}: ${body}`)); - return; - } - try { - const data = JSON.parse(body); - const text = data.candidates?.[0]?.content?.parts?.[0]?.text || ""; - resolve(text.trim()); - } catch (e) { - reject(e); - } - }); - }); - - req.on("error", (err) => reject(err)); - req.write(payload); - req.end(); - }).catch(reject); - }); +\`\`\``; } main().catch((err) => { diff --git a/.harness/scripts/ci/review-input.mjs b/.harness/scripts/ci/review-input.mjs new file mode 100644 index 00000000..3ee9ac47 --- /dev/null +++ b/.harness/scripts/ci/review-input.mjs @@ -0,0 +1,153 @@ +/** + * GT-146 — Secure, token-bounded review-input preparation. + * + * Pure, provider-neutral helpers that turn a raw `git diff` into a sanitized, + * policy-relevant, budgeted payload before any LLM provider sees it: + * 1. redact credentials and sensitive patterns, + * 2. keep only policy-relevant changed files (drop lockfiles, vendored, + * generated, and binary content), + * 3. bound and chunk the result by measurable byte/token budgets. + * + * No network, no provider coupling — safe to unit test in isolation. + */ + +const REDACTION = '«REDACTED»'; + +/** [pattern, replacement] — replacement may be a string or a function. */ +const SECRET_PATTERNS = [ + // PEM private keys (any flavor) + [/-----BEGIN (?:RSA |EC |OPENSSH |DSA |PGP )?PRIVATE KEY-----[\s\S]*?-----END (?:RSA |EC |OPENSSH |DSA |PGP )?PRIVATE KEY-----/g, '«REDACTED:private-key»'], + // JWT (header.payload.signature) + [/\beyJ[A-Za-z0-9_-]{6,}\.[A-Za-z0-9_-]{6,}\.[A-Za-z0-9_-]{6,}/g, '«REDACTED:jwt»'], + // AWS access key id + [/\bAKIA[0-9A-Z]{16}\b/g, '«REDACTED:aws-access-key»'], + // Google API key + [/\bAIza[0-9A-Za-z_-]{35}\b/g, '«REDACTED:google-api-key»'], + // GitHub PAT + [/\bghp_[A-Za-z0-9]{36}\b/g, '«REDACTED:github-token»'], + // Slack token + [/\bxox[baprs]-[A-Za-z0-9-]{10,}/g, '«REDACTED:slack-token»'], + // Bearer tokens + [/\bBearer\s+[A-Za-z0-9._~+/-]{12,}=*/g, 'Bearer «REDACTED»'], + // Generic secret assignments: FOO_API_KEY = "..." / token: '...' + [ + /\b([A-Za-z0-9_]*(?:API|SECRET|TOKEN|PASSWORD|PASSWD|PRIVATE|CREDENTIAL|KEY)[A-Za-z0-9_]*)(\s*[:=]\s*)['"]?([A-Za-z0-9_\-./+=]{12,})['"]?/gi, + (_m, key, sep) => `${key}${sep}${REDACTION}`, + ], +]; + +/** Redact known secret patterns. Returns `{ text, redactions }`. */ +export function redactSecrets(input) { + let text = String(input ?? ''); + let redactions = 0; + for (const [re, rep] of SECRET_PATTERNS) { + text = text.replace(re, (...args) => { + redactions += 1; + return typeof rep === 'function' ? rep(...args) : rep; + }); + } + return { text, redactions }; +} + +const EXCLUDE_PATH = [ + /(^|\/)node_modules\//, + /(^|\/)(dist|build|coverage|out)\//, + /(^|\/)\.evolith\//, + /(^|\/)(package-lock\.json|pnpm-lock\.yaml|yarn\.lock)$/, + /\.min\.(js|css)$/, + /\.map$/, + /\.(png|jpe?g|gif|svg|ico|pdf|wasm|lock|zip|gz|tar|woff2?)$/i, +]; + +const INCLUDE_EXT = /\.(ts|tsx|js|jsx|mjs|cjs|rego|json|ya?ml|md)$/i; + +/** Split a unified `git diff` into `[{ path, body }]` per file. */ +export function parseDiffFiles(diff) { + const files = []; + let cur = null; + for (const line of String(diff ?? '').split('\n')) { + const m = line.match(/^diff --git a\/(.+?) b\/(.+)$/); + if (m) { + if (cur) files.push(cur); + cur = { path: m[2], body: `${line}\n` }; + } else if (cur) { + cur.body += `${line}\n`; + } + } + if (cur) files.push(cur); + return files; +} + +/** Keep only policy-relevant, non-binary, non-vendored files. */ +export function selectRelevantFiles(diff) { + const included = []; + const excluded = []; + for (const f of parseDiffFiles(diff)) { + const isBinary = /\nBinary files /.test(f.body); + const isExcludedPath = EXCLUDE_PATH.some((re) => re.test(f.path)); + const isIncludedExt = INCLUDE_EXT.test(f.path); + if (isBinary || isExcludedPath || !isIncludedExt) excluded.push(f.path); + else included.push(f); + } + return { included, excluded }; +} + +/** Coarse token estimate (~4 chars/token); deterministic and provider-agnostic. */ +export function estimateTokens(text) { + return Math.ceil(Buffer.byteLength(String(text ?? ''), 'utf8') / 4); +} + +/** + * Pack file sections into chunks under the byte/token budget. A single file + * larger than the budget is truncated (and flagged) rather than dropped. + */ +export function budgetAndChunk(sections, { maxBytes = 60000, maxTokens = 15000 } = {}) { + const limit = Math.max(1, Math.min(maxBytes, maxTokens * 4)); + const chunks = []; + let cur = ''; + let curBytes = 0; + let truncated = false; + + for (const s of sections) { + let body = s.body; + if (Buffer.byteLength(body, 'utf8') > limit) { + body = `${body.slice(0, limit)}\n… «TRUNCATED ${s.path}» …\n`; + truncated = true; + } + const b = Buffer.byteLength(body, 'utf8'); + if (curBytes + b > limit && cur) { + chunks.push(cur); + cur = ''; + curBytes = 0; + } + cur += body; + curBytes += b; + } + if (cur) chunks.push(cur); + return { chunks, truncated }; +} + +/** + * Full pipeline: relevant-file selection → redaction → budget/chunk. + * Returns the chunks plus non-sensitive telemetry for aggregate reporting. + */ +export function prepareReviewInput(diff, config = {}) { + const { included, excluded } = selectRelevantFiles(diff); + let redactions = 0; + const sections = included.map((f) => { + const r = redactSecrets(f.body); + redactions += r.redactions; + return { path: f.path, body: r.text }; + }); + const { chunks, truncated } = budgetAndChunk(sections, config); + const bytes = chunks.reduce((n, c) => n + Buffer.byteLength(c, 'utf8'), 0); + return { + chunks, + filesIncluded: included.map((f) => f.path), + filesExcluded: excluded, + redactions, + bytes, + estTokens: Math.ceil(bytes / 4), + truncated, + }; +} diff --git a/.harness/scripts/ci/review-input.test.mjs b/.harness/scripts/ci/review-input.test.mjs new file mode 100644 index 00000000..40d93318 --- /dev/null +++ b/.harness/scripts/ci/review-input.test.mjs @@ -0,0 +1,105 @@ +import assert from 'node:assert/strict'; +import test from 'node:test'; +import { + redactSecrets, + parseDiffFiles, + selectRelevantFiles, + estimateTokens, + budgetAndChunk, + prepareReviewInput, +} from './review-input.mjs'; + +test('redactSecrets removes common credential patterns', () => { + const googleKey = `AIza${'B'.repeat(35)}`; // AIza + 35 chars = valid length + const sample = [ + 'AWS_SECRET_ACCESS_KEY = "wJalrXUtnFEMI1234567890bPxRfiCYEXAMPLEKEY"', + 'const token = "ghp_0123456789abcdef0123456789abcdef0123"', + 'Authorization: Bearer abcdef0123456789ABCDEF.token-value==', + `google: ${googleKey}`, + ].join('\n'); + const { text, redactions } = redactSecrets(sample); + assert.ok(redactions >= 4, `expected >=4 redactions, got ${redactions}`); + assert.ok(!text.includes('wJalrXUtnFEMI'), 'aws secret leaked'); + assert.ok(!text.includes('ghp_0123456789abcdef'), 'github token leaked'); + assert.ok(!text.includes(googleKey), 'google key leaked'); + assert.ok(text.includes('«REDACTED'), 'no redaction marker present'); +}); + +test('redactSecrets handles PEM private keys', () => { + const pem = '-----BEGIN RSA PRIVATE KEY-----\nMIIE...secret...\n-----END RSA PRIVATE KEY-----'; + const { text, redactions } = redactSecrets(pem); + assert.equal(redactions, 1); + assert.equal(text, '«REDACTED:private-key»'); +}); + +test('redactSecrets is a no-op on clean text', () => { + const { text, redactions } = redactSecrets('export function foo() { return 1; }'); + assert.equal(redactions, 0); + assert.ok(text.includes('foo')); +}); + +const DIFF = [ + 'diff --git a/src/app.ts b/src/app.ts', + 'index 111..222 100644', + '--- a/src/app.ts', + '+++ b/src/app.ts', + '@@ -1 +1 @@', + '+const x = 1;', + 'diff --git a/package-lock.json b/package-lock.json', + '--- a/package-lock.json', + '+++ b/package-lock.json', + '+ "lockfileVersion": 3', + 'diff --git a/assets/logo.png b/assets/logo.png', + 'Binary files a/assets/logo.png and b/assets/logo.png differ', + 'diff --git a/rulesets/data/data-mesh.rego b/rulesets/data/data-mesh.rego', + '--- a/rulesets/data/data-mesh.rego', + '+++ b/rulesets/data/data-mesh.rego', + '+package evolith.data_mesh', +].join('\n'); + +test('parseDiffFiles splits per-file sections', () => { + const files = parseDiffFiles(DIFF); + assert.deepEqual( + files.map((f) => f.path), + ['src/app.ts', 'package-lock.json', 'assets/logo.png', 'rulesets/data/data-mesh.rego'], + ); +}); + +test('selectRelevantFiles drops lockfiles, binaries; keeps source and rulesets', () => { + const { included, excluded } = selectRelevantFiles(DIFF); + assert.deepEqual(included.map((f) => f.path), ['src/app.ts', 'rulesets/data/data-mesh.rego']); + assert.deepEqual(excluded, ['package-lock.json', 'assets/logo.png']); +}); + +test('estimateTokens is roughly 4 chars/token', () => { + assert.equal(estimateTokens('abcd'), 1); + assert.equal(estimateTokens('abcdefgh'), 2); +}); + +test('budgetAndChunk truncates an oversize file and flags it', () => { + const big = { path: 'big.ts', body: 'x'.repeat(5000) }; + const { chunks, truncated } = budgetAndChunk([big], { maxBytes: 1000, maxTokens: 1000 }); + assert.equal(truncated, true); + assert.ok(chunks.join('').includes('«TRUNCATED big.ts»')); + assert.ok(Buffer.byteLength(chunks[0]) <= 1100); +}); + +test('budgetAndChunk splits multiple files across chunks under budget', () => { + const sections = [ + { path: 'a.ts', body: 'a'.repeat(600) }, + { path: 'b.ts', body: 'b'.repeat(600) }, + { path: 'c.ts', body: 'c'.repeat(600) }, + ]; + const { chunks } = budgetAndChunk(sections, { maxBytes: 1000, maxTokens: 1000 }); + assert.ok(chunks.length >= 2, `expected multiple chunks, got ${chunks.length}`); +}); + +test('prepareReviewInput redacts, selects and budgets end-to-end', () => { + const diffWithSecret = `${DIFF}\n+const API_KEY = "ghp_0123456789abcdef0123456789abcdef0123"`; + const out = prepareReviewInput(diffWithSecret, { maxBytes: 100000, maxTokens: 25000 }); + assert.deepEqual(out.filesIncluded, ['src/app.ts', 'rulesets/data/data-mesh.rego']); + assert.ok(out.filesExcluded.includes('package-lock.json')); + assert.ok(out.redactions >= 1, 'secret in changed source not redacted'); + assert.ok(!out.chunks.join('').includes('ghp_0123456789abcdef'), 'secret leaked into payload'); + assert.ok(out.estTokens > 0 && out.bytes > 0); +}); diff --git a/.harness/scripts/ci/review-provider.mjs b/.harness/scripts/ci/review-provider.mjs new file mode 100644 index 00000000..112190e6 --- /dev/null +++ b/.harness/scripts/ci/review-provider.mjs @@ -0,0 +1,91 @@ +/** + * GT-146 — Provider-neutral review port. + * + * The CI gate depends on this port, not on any specific vendor. An adapter + * implements `review(promptText) -> Promise` and is selected by config + * (`EVOLITH_REVIEW_PROVIDER` / `EVOLITH_REVIEW_MODEL`). Unknown providers and + * missing credentials throw (fail-closed) so the gate can never pass blind. + */ + +export class ReviewProviderError extends Error { + constructor(message) { + super(message); + this.name = 'ReviewProviderError'; + } +} + +/** Gemini adapter — isolates the only vendor-specific networking. */ +function geminiAdapter(config) { + const apiKey = config.apiKey; + if (!apiKey) throw new ReviewProviderError('gemini provider requires an API key (EVOLITH_LLM_API_KEY/GEMINI_API_KEY)'); + const model = config.model || 'gemini-2.5-flash'; + const hostname = config.endpoint || 'generativelanguage.googleapis.com'; + + return { + name: `gemini:${model}`, + async review(promptText) { + const https = await import('node:https'); + const payload = JSON.stringify({ contents: [{ parts: [{ text: promptText }] }] }); + return new Promise((resolve, reject) => { + const req = https.request( + { + hostname, + path: `/v1beta/models/${model}:generateContent`, + method: 'POST', + // API key in a header, not the URL query string. + headers: { + 'Content-Type': 'application/json', + 'x-goog-api-key': apiKey, + 'Content-Length': Buffer.byteLength(payload), + }, + }, + (res) => { + let body = ''; + res.on('data', (c) => (body += c)); + res.on('end', () => { + if (res.statusCode >= 400) { + reject(new ReviewProviderError(`gemini HTTP ${res.statusCode}`)); + return; + } + try { + const data = JSON.parse(body); + resolve((data.candidates?.[0]?.content?.parts?.[0]?.text || '').trim()); + } catch (e) { + reject(new ReviewProviderError(`gemini response parse error: ${e.message}`)); + } + }); + }, + ); + req.on('error', (err) => reject(new ReviewProviderError(`gemini request failed: ${err.message}`))); + req.write(payload); + req.end(); + }); + }, + }; +} + +const ADAPTERS = new Map([['gemini', geminiAdapter]]); + +/** Register an adapter factory (`config -> { name, review }`). Used for extension/tests. */ +export function registerAdapter(name, factory) { + if (typeof factory !== 'function') throw new ReviewProviderError(`adapter factory for "${name}" must be a function`); + ADAPTERS.set(name, factory); +} + +export function availableProviders() { + return [...ADAPTERS.keys()]; +} + +/** Build the configured review provider. Fail-closed on unknown provider. */ +export function createReviewProvider(config = {}) { + const name = config.provider || 'gemini'; + const factory = ADAPTERS.get(name); + if (!factory) { + throw new ReviewProviderError(`unknown review provider: "${name}" (available: ${availableProviders().join(', ')})`); + } + const adapter = factory(config); + if (!adapter || typeof adapter.review !== 'function') { + throw new ReviewProviderError(`provider "${name}" did not produce a valid adapter`); + } + return adapter; +} diff --git a/.harness/scripts/ci/review-provider.test.mjs b/.harness/scripts/ci/review-provider.test.mjs new file mode 100644 index 00000000..c8aaa8c7 --- /dev/null +++ b/.harness/scripts/ci/review-provider.test.mjs @@ -0,0 +1,53 @@ +import assert from 'node:assert/strict'; +import test from 'node:test'; +import { + createReviewProvider, + registerAdapter, + availableProviders, + ReviewProviderError, +} from './review-provider.mjs'; + +test('gemini is available by default', () => { + assert.ok(availableProviders().includes('gemini')); +}); + +test('unknown provider fails closed', () => { + assert.throws(() => createReviewProvider({ provider: 'nope' }), ReviewProviderError); +}); + +test('gemini without an API key fails closed', () => { + assert.throws(() => createReviewProvider({ provider: 'gemini' }), /requires an API key/); +}); + +test('gemini adapter builds with a key and exposes a review() contract', () => { + const p = createReviewProvider({ provider: 'gemini', apiKey: 'k', model: 'gemini-x' }); + assert.equal(p.name, 'gemini:gemini-x'); + assert.equal(typeof p.review, 'function'); +}); + +test('a registered mock adapter satisfies the port and returns its response', async () => { + registerAdapter('mock', (cfg) => ({ + name: 'mock', + async review(prompt) { + return `echo:${prompt}:${cfg.model || 'default'}`; + }, + })); + const p = createReviewProvider({ provider: 'mock', model: 'm1' }); + assert.equal(await p.review('hello'), 'echo:hello:m1'); +}); + +test('adapter failures propagate (caller fails closed)', async () => { + registerAdapter('boom', () => ({ + name: 'boom', + async review() { + throw new ReviewProviderError('upstream exploded'); + }, + })); + const p = createReviewProvider({ provider: 'boom' }); + await assert.rejects(() => p.review('x'), /upstream exploded/); +}); + +test('a factory returning no review() is rejected', () => { + registerAdapter('bad', () => ({ name: 'bad' })); + assert.throws(() => createReviewProvider({ provider: 'bad' }), /did not produce a valid adapter/); +}); diff --git a/.harness/scripts/ci/review-result.mjs b/.harness/scripts/ci/review-result.mjs new file mode 100644 index 00000000..fe0de11f --- /dev/null +++ b/.harness/scripts/ci/review-result.mjs @@ -0,0 +1,88 @@ +/** + * GT-146 — Versioned, fail-closed validation of the agentic review result. + * + * Replaces the fragile free-text `VIOLATION_DETECTED` marker with a structured, + * versioned contract. A malformed, unsupported, or indeterminate provider + * response can never silently pass the gate: it resolves to `verdict: "error"` + * with `passesGate: false`. + * + * Expected provider response (schema v1.0): + * { + * "schemaVersion": "1.0", + * "verdict": "pass" | "fail", + * "findings": [ + * { "ruleId"?: string, "severity": "error"|"warning"|"info", + * "title": string, "file": string, "line"?: integer, "confidence": 0..1 } + * ] + * } + */ + +export const REVIEW_SCHEMA_VERSION = '1.0'; +const SEVERITIES = new Set(['error', 'warning', 'info']); + +/** Extract and parse the JSON object from a provider response (tolerates fences/prose). */ +export function parseProviderResponse(raw) { + const text = String(raw ?? '').trim(); + if (!text) return { ok: false, error: 'empty provider response' }; + + const fence = text.match(/```(?:json)?\s*([\s\S]*?)```/i); + let candidate = (fence ? fence[1] : text).trim(); + if (!candidate.startsWith('{')) { + const i = candidate.indexOf('{'); + const j = candidate.lastIndexOf('}'); + if (i >= 0 && j > i) candidate = candidate.slice(i, j + 1); + } + try { + return { ok: true, value: JSON.parse(candidate) }; + } catch (e) { + return { ok: false, error: `unparseable provider response: ${e.message}` }; + } +} + +/** Validate a parsed result against schema v1.0. Returns a fail-closed verdict. */ +export function validateReviewResult(obj) { + if (obj === null || typeof obj !== 'object' || Array.isArray(obj)) { + return { ok: false, verdict: 'error', errors: ['result is not an object'] }; + } + const errors = []; + if (obj.schemaVersion !== REVIEW_SCHEMA_VERSION) { + errors.push(`unsupported schemaVersion: ${JSON.stringify(obj.schemaVersion)}`); + } + if (obj.verdict !== 'pass' && obj.verdict !== 'fail') { + errors.push(`invalid verdict: ${JSON.stringify(obj.verdict)}`); + } + if (!Array.isArray(obj.findings)) { + errors.push('findings must be an array'); + } else { + obj.findings.forEach((f, i) => { + if (!f || typeof f !== 'object') { + errors.push(`finding[${i}] is not an object`); + return; + } + if (!SEVERITIES.has(f.severity)) errors.push(`finding[${i}].severity invalid: ${JSON.stringify(f.severity)}`); + if (typeof f.title !== 'string' || !f.title.trim()) errors.push(`finding[${i}].title is required`); + if (typeof f.file !== 'string' || !f.file.trim()) errors.push(`finding[${i}].file (evidence location) is required`); + if (typeof f.confidence !== 'number' || Number.isNaN(f.confidence) || f.confidence < 0 || f.confidence > 1) { + errors.push(`finding[${i}].confidence must be a number in [0,1]`); + } + if (f.line !== undefined && (!Number.isInteger(f.line) || f.line < 0)) { + errors.push(`finding[${i}].line must be a non-negative integer`); + } + }); + } + if (errors.length) return { ok: false, verdict: 'error', errors }; + return { ok: true, verdict: obj.verdict, findings: obj.findings, schemaVersion: obj.schemaVersion }; +} + +/** + * Parse + validate + decide the gate, fail-closed. + * `passesGate` is true ONLY for a well-formed `verdict: "pass"`. + */ +export function evaluateProviderResponse(raw) { + const parsed = parseProviderResponse(raw); + if (!parsed.ok) { + return { ok: false, verdict: 'error', errors: [parsed.error], findings: [], passesGate: false }; + } + const result = validateReviewResult(parsed.value); + return { ...result, findings: result.findings ?? [], passesGate: result.ok === true && result.verdict === 'pass' }; +} diff --git a/.harness/scripts/ci/review-result.test.mjs b/.harness/scripts/ci/review-result.test.mjs new file mode 100644 index 00000000..0534c26b --- /dev/null +++ b/.harness/scripts/ci/review-result.test.mjs @@ -0,0 +1,81 @@ +import assert from 'node:assert/strict'; +import test from 'node:test'; +import { + parseProviderResponse, + validateReviewResult, + evaluateProviderResponse, + REVIEW_SCHEMA_VERSION, +} from './review-result.mjs'; + +const PASS = { schemaVersion: REVIEW_SCHEMA_VERSION, verdict: 'pass', findings: [] }; +const FAIL = { + schemaVersion: REVIEW_SCHEMA_VERSION, + verdict: 'fail', + findings: [{ severity: 'error', title: 'Illegal import', file: 'src/a.ts', line: 12, confidence: 0.9 }], +}; + +test('parseProviderResponse unwraps ```json fences', () => { + const r = parseProviderResponse('```json\n{"verdict":"pass"}\n```'); + assert.equal(r.ok, true); + assert.equal(r.value.verdict, 'pass'); +}); + +test('parseProviderResponse tolerates surrounding prose', () => { + const r = parseProviderResponse('Here is my review:\n{"verdict":"fail"}\nThanks!'); + assert.equal(r.ok, true); + assert.equal(r.value.verdict, 'fail'); +}); + +test('parseProviderResponse fails on empty/unparseable input', () => { + assert.equal(parseProviderResponse('').ok, false); + assert.equal(parseProviderResponse('not json at all').ok, false); +}); + +test('validateReviewResult accepts a well-formed pass', () => { + const v = validateReviewResult(PASS); + assert.equal(v.ok, true); + assert.equal(v.verdict, 'pass'); +}); + +test('validateReviewResult accepts a well-formed fail with findings', () => { + const v = validateReviewResult(FAIL); + assert.equal(v.ok, true); + assert.equal(v.verdict, 'fail'); + assert.equal(v.findings.length, 1); +}); + +test('validateReviewResult rejects unsupported schemaVersion', () => { + const v = validateReviewResult({ ...PASS, schemaVersion: '9.9' }); + assert.equal(v.ok, false); + assert.equal(v.verdict, 'error'); +}); + +test('validateReviewResult rejects invalid verdict and malformed findings', () => { + assert.equal(validateReviewResult({ ...PASS, verdict: 'maybe' }).ok, false); + assert.equal(validateReviewResult({ ...PASS, findings: 'nope' }).ok, false); + const badFinding = { schemaVersion: REVIEW_SCHEMA_VERSION, verdict: 'fail', findings: [{ severity: 'critical', title: '', confidence: 2 }] }; + const v = validateReviewResult(badFinding); + assert.equal(v.ok, false); + assert.ok(v.errors.length >= 3, `expected multiple errors, got ${v.errors.length}`); +}); + +test('validateReviewResult rejects non-object', () => { + assert.equal(validateReviewResult(null).ok, false); + assert.equal(validateReviewResult([1, 2]).ok, false); +}); + +test('evaluateProviderResponse passes the gate only on a well-formed pass', () => { + assert.equal(evaluateProviderResponse(JSON.stringify(PASS)).passesGate, true); +}); + +test('evaluateProviderResponse fails the gate on violations', () => { + const e = evaluateProviderResponse(JSON.stringify(FAIL)); + assert.equal(e.passesGate, false); + assert.equal(e.verdict, 'fail'); +}); + +test('evaluateProviderResponse fails closed on malformed/indeterminate output', () => { + assert.equal(evaluateProviderResponse('VIOLATION? maybe').passesGate, false); + assert.equal(evaluateProviderResponse('').passesGate, false); + assert.equal(evaluateProviderResponse('{"verdict":"pass"}').passesGate, false); // missing schemaVersion +}); diff --git a/reference/governance/standards/vision/gap-closure-evidence.json b/reference/governance/standards/vision/gap-closure-evidence.json index e0dd8ef4..4eba1f47 100644 --- a/reference/governance/standards/vision/gap-closure-evidence.json +++ b/reference/governance/standards/vision/gap-closure-evidence.json @@ -1952,6 +1952,22 @@ "node .harness/scripts/ci-runner.mjs" ], "dependencyDisposition": "none" + }, + { + "id": "GT-146", + "closedAt": "2026-06-20", + "closureCommit": "3efbb5900405322751edd139e4cf87095ddf82b3", + "evidence": [ + ".harness/scripts/ci/review-input.mjs", + ".harness/scripts/ci/review-result.mjs", + ".harness/scripts/ci/review-provider.mjs", + ".harness/scripts/ci/13-agentic-code-review.mjs", + ".github/workflows/sdk-cli-ci.yml" + ], + "validationCommands": [ + "node --test .harness/scripts/ci/review-input.test.mjs .harness/scripts/ci/review-result.test.mjs .harness/scripts/ci/review-provider.test.mjs" + ], + "dependencyDisposition": "none" } ] } diff --git a/reference/governance/standards/vision/gap-reference-catalog.es.md b/reference/governance/standards/vision/gap-reference-catalog.es.md index 5e51d8f6..75eec4be 100644 --- a/reference/governance/standards/vision/gap-reference-catalog.es.md +++ b/reference/governance/standards/vision/gap-reference-catalog.es.md @@ -111,10 +111,11 @@ Este catálogo explica cada gap: problema, propósito, evidencia, criterios de c - **Propósito:** Hacer segura, portable y económica la revisión de código con LLM real: minimizar y sanear el contexto enviado, imponer presupuestos explícitos de costo/tiempo y validar hallazgos estructurados antes de que un gate de CI actúe sobre ellos. - **Evidencia:** `.harness/scripts/ci/13-agentic-code-review.mjs` fija el endpoint y modelo de Gemini, envía el `git diff` completo y crudo al proveedor, y depende de un marcador textual libre `VIOLATION_DETECTED`. No cuenta con redacción de secretos, tope de diff/tokens, priorización de contexto, puerto de proveedor ni validación de resultado estructurado. - **Hecho cuando:** - - [ ] Un puerto de revisión neutral al proveedor soporta adaptadores y modelos configurados preservando un contrato de CI de fallo cerrado. - - [ ] La entrada de revisión elimina credenciales y patrones sensibles, incluye solo archivos modificados relevantes para políticas y está acotada/fragmentada por presupuestos medibles de bytes, tokens, latencia y costo. - - [ ] La respuesta del proveedor cumple un esquema versionado con ubicaciones de evidencia y confianza; resultados malformados o indeterminados no pueden aprobar el gate silenciosamente. - - [ ] Pruebas cubren redacción, presupuestos, selección de fragmentos, fallos de adaptador y validación de respuesta; CI usa permisos mínimos e informa telemetría agregada y no sensible de eficiencia. + - [x] Un puerto de revisión neutral al proveedor soporta adaptadores y modelos configurados preservando un contrato de CI de fallo cerrado. + - [x] La entrada de revisión elimina credenciales y patrones sensibles, incluye solo archivos modificados relevantes para políticas y está acotada/fragmentada por presupuestos medibles de bytes, tokens, latencia y costo. + - [x] La respuesta del proveedor cumple un esquema versionado con ubicaciones de evidencia y confianza; resultados malformados o indeterminados no pueden aprobar el gate silenciosamente. + - [x] Pruebas cubren redacción, presupuestos, selección de fragmentos, fallos de adaptador y validación de respuesta; CI usa permisos mínimos e informa telemetría agregada y no sensible de eficiencia. +- **Evidencia de cierre:** Commit `3efbb59`. Nuevos módulos puros en `.harness/scripts/ci/`: `review-provider.mjs` (puerto configurable + adapter Gemini con API key en header, fallo cerrado ante proveedor desconocido/clave ausente), `review-input.mjs` (redacción de secretos, selección de archivos relevantes, presupuesto bytes/tokens + chunking; el presupuesto de tokens es el proxy de costo), `review-result.mjs` (validación de esquema versionado v1.0; malformado/indeterminado → fallo cerrado). `13-agentic-code-review.mjs` recableado con telemetría agregada no sensible; el job `agentic-review` acotado a `contents: read`. 27 casos `node:test` pasan. Residual: presupuesto explícito de latencia por llamada (topes de tokens/bytes ya en sitio) es un follow-up menor. #### GT-147 diff --git a/reference/governance/standards/vision/gap-reference-catalog.md b/reference/governance/standards/vision/gap-reference-catalog.md index 8be0170d..29c241b5 100644 --- a/reference/governance/standards/vision/gap-reference-catalog.md +++ b/reference/governance/standards/vision/gap-reference-catalog.md @@ -110,10 +110,11 @@ This catalog explains each gap: problem, purpose, evidence, closure criteria, an - **Purpose:** Make real LLM code review safe, portable, and economical: minimize and sanitize the submitted context, enforce explicit cost/time budgets, and validate structured findings before a CI gate acts on them. - **Evidence:** `.harness/scripts/ci/13-agentic-code-review.mjs` hard-codes the Gemini endpoint and model, submits the full raw `git diff` to the provider, and relies on a free-text `VIOLATION_DETECTED` marker. It has no secret redaction, diff/token cap, context prioritization, provider port, or structured-result validation. - **Done when:** - - [ ] A provider-neutral review port supports configured adapters and models while preserving a fail-closed CI contract. - - [ ] The review input removes credentials and sensitive patterns, includes only policy-relevant changed files, and is bounded/chunked by measurable byte, token, latency, and cost budgets. - - [ ] The provider response conforms to a versioned schema with evidence locations and confidence; malformed or indeterminate results cannot silently pass the gate. - - [ ] Tests cover redaction, budgeting, chunk selection, adapter failures, and response validation; CI uses minimum permissions and reports aggregate, non-sensitive efficiency telemetry. + - [x] A provider-neutral review port supports configured adapters and models while preserving a fail-closed CI contract. + - [x] The review input removes credentials and sensitive patterns, includes only policy-relevant changed files, and is bounded/chunked by measurable byte, token, latency, and cost budgets. + - [x] The provider response conforms to a versioned schema with evidence locations and confidence; malformed or indeterminate results cannot silently pass the gate. + - [x] Tests cover redaction, budgeting, chunk selection, adapter failures, and response validation; CI uses minimum permissions and reports aggregate, non-sensitive efficiency telemetry. +- **Closure evidence:** Commit `3efbb59`. New pure modules under `.harness/scripts/ci/`: `review-provider.mjs` (configurable port + Gemini adapter, API key in header, fail-closed on unknown provider/missing key), `review-input.mjs` (secret redaction, policy-relevant file selection, byte/token budget + chunking; token budget is the cost proxy), `review-result.mjs` (versioned schema v1.0 validation; malformed/indeterminate → fail-closed). `13-agentic-code-review.mjs` rewired to use them with aggregate non-sensitive telemetry; the `agentic-review` job scoped to `contents: read`. 27 `node:test` cases pass. Residual: explicit per-call latency budget (token/byte caps in place) is a minor follow-up. #### GT-147 diff --git a/reference/governance/standards/vision/gap-tracking.es.md b/reference/governance/standards/vision/gap-tracking.es.md index 7b928460..602654c8 100644 --- a/reference/governance/standards/vision/gap-tracking.es.md +++ b/reference/governance/standards/vision/gap-tracking.es.md @@ -13,7 +13,7 @@ Este tablero es la única fuente de verdad para deuda técnica, gaps, oportunida | ID | Gap | Componente | Fase | Criticidad | Complejidad | Estado | |---|---|:---:|:---:|:---:|:---:|:---:| -| [`GT-146`](./gap-reference-catalog.es.md#gt-146) | Revisión Agéntica de CI Segura, Neutral al Proveedor y Acotada por Tokens | `Governance` | Cross | P0 | L | `PENDIENTE` | +| [`GT-146`](./gap-reference-catalog.es.md#gt-146) | Revisión Agéntica de CI Segura, Neutral al Proveedor y Acotada por Tokens | `Governance` | Cross | P0 | L | `COMPLETADO` | | [`GT-150`](./gap-reference-catalog.es.md#gt-150) | Madurar las Topologías Draft Restantes a Paridad de Corpus Aceptado | `Architecture` | Cross | P1 | L | `PENDIENTE` | | [`GT-145`](./gap-reference-catalog.es.md#gt-145) | Sincronización Veraz y Neutral al Proveedor de Vectores RAG | `Operations` | Cross | P1 | L | `PENDIENTE` | | [`GT-149`](./gap-reference-catalog.es.md#gt-149) | Pruebas OPA Ejecutables y Gate de Paridad Semántica Native/OPA | `Rulesets` | Cross | P1 | L | `PENDIENTE` | @@ -167,7 +167,7 @@ Este tablero es la única fuente de verdad para deuda técnica, gaps, oportunida | [`GT-128`](./gap-reference-catalog.es.md#gt-128) | Baseline Ruleset for Data Mesh | `Architecture` | Transversal | P2 | M | `COMPLETADO` | | [`GT-129`](./gap-reference-catalog.es.md#gt-129) | Baseline Ruleset for Edge Computing | `Architecture` | Transversal | P2 | M | `COMPLETADO` | -**Progreso:** 148 / 153 completados · 0 en progreso · 5 pendientes · 0 diferidos +**Progreso:** 149 / 153 completados · 0 en progreso · 4 pendientes · 0 diferidos **Ordenamiento:** una sola tabla, ordenada por estado (pendientes, luego diferidos, luego completados), luego criticidad (`P0` → `P1` → `P2`) y luego complejidad (`S` → `M` → `L`); los completados se agrupan por componente. Los IDs `GT-*` enlazan al [Catálogo de Referencia de Gaps](./gap-reference-catalog.es.md); los IDs `MT-A*` enlazan al [plan de implementación Multi-Topology](./multi-topology-reference-corpus-implementation-plan.es.md). diff --git a/reference/governance/standards/vision/gap-tracking.md b/reference/governance/standards/vision/gap-tracking.md index b680543d..279ae3a7 100644 --- a/reference/governance/standards/vision/gap-tracking.md +++ b/reference/governance/standards/vision/gap-tracking.md @@ -13,7 +13,7 @@ This board is the single source of truth for technical debt, gaps, opportunities | ID | Gap | Component | Phase | Criticality | Complexity | Status | |---|---|---|:---:|:---:|:---:|:---:| -| [`GT-146`](./gap-reference-catalog.md#gt-146) | Secure, Provider-Neutral, and Token-Bounded Agentic CI Review | `Governance` | Cross | P0 | L | `PENDING` | +| [`GT-146`](./gap-reference-catalog.md#gt-146) | Secure, Provider-Neutral, and Token-Bounded Agentic CI Review | `Governance` | Cross | P0 | L | `DONE` | | [`GT-150`](./gap-reference-catalog.md#gt-150) | Mature Remaining Draft Topologies to Accepted Corpus Parity | `Architecture` | Cross | P1 | L | `PENDING` | | [`GT-145`](./gap-reference-catalog.md#gt-145) | Truthful Provider-Neutral RAG Vector Synchronization | `Operations` | Cross | P1 | L | `PENDING` | | [`GT-149`](./gap-reference-catalog.md#gt-149) | Executable OPA Tests and Native/OPA Semantic Parity Gate | `Rulesets` | Cross | P1 | L | `PENDING` | @@ -167,7 +167,7 @@ This board is the single source of truth for technical debt, gaps, opportunities | [`GT-128`](./gap-reference-catalog.md#gt-128) | Baseline Ruleset for Data Mesh | `Architecture` | Cross | P2 | M | `DONE` | | [`GT-129`](./gap-reference-catalog.md#gt-129) | Baseline Ruleset for Edge Computing | `Architecture` | Cross | P2 | M | `DONE` | -**Progress:** 148 / 153 done · 0 in progress · 5 pending · 0 deferred +**Progress:** 149 / 153 done · 0 in progress · 4 pending · 0 deferred **Ordering:** one table, ordered by status (pending then deferred then completed), then criticality (`P0` → `P1` → `P2`) then complexity (`S` → `M` → `L`); completed gaps are grouped by component. `GT-*` IDs link to the [Gap Reference Catalog](./gap-reference-catalog.md); `MT-A*` IDs link to the supporting [Multi-Topology implementation plan](./multi-topology-reference-corpus-implementation-plan.md). diff --git a/reference/governance/standards/vision/maturity-reconciliation.json b/reference/governance/standards/vision/maturity-reconciliation.json index 5c14e7de..21f21f25 100644 --- a/reference/governance/standards/vision/maturity-reconciliation.json +++ b/reference/governance/standards/vision/maturity-reconciliation.json @@ -4,13 +4,13 @@ "asOf": "2026-06-20", "gaps": { "total": 153, - "done": 148, - "pending": 5, + "done": 149, + "pending": 4, "inProgress": 0, "deferred": 0 }, "evidence": { - "closureRecords": 130, + "closureRecords": 131, "cliPackage": "@evolith/smart-cli@1.1.0", "adrCount": 106, "rulesetCount": 25,