Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/sdk-cli-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,9 @@ jobs:
name: Wilson Agentic Review
if: github.event_name == 'push' || github.event.pull_request.head.repo.fork == false
runs-on: ubuntu-latest
# Least privilege: the review only reads the repo to diff changes (GT-146).
permissions:
contents: read
steps:
- uses: actions/checkout@v4
with:
Expand Down
145 changes: 79 additions & 66 deletions .harness/scripts/ci/13-agentic-code-review.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
import { execSync } from "node:child_process";
import { prepareReviewInput } from "./review-input.mjs";
import { evaluateProviderResponse } from "./review-result.mjs";
import { createReviewProvider } from "./review-provider.mjs";

// Hard fail-closed ceiling: never submit a payload larger than this (GT-146).
const MAX_REVIEW_TOKENS = Number(process.env.EVOLITH_REVIEW_MAX_TOKENS || 25000);
const MAX_REVIEW_BYTES = Number(process.env.EVOLITH_REVIEW_MAX_BYTES || 80000);

async function main() {
console.log("🤖 Initializing Agentic Code Review CI Step (GT-132)...");
Expand All @@ -21,7 +28,24 @@ async function main() {
diffText = "No diff available or git command failed.";
}

console.log(`\n📄 Extracted Diff: ${diffText.split("\\n").length} lines of changes.`);
// Sanitize, scope and budget the diff BEFORE any provider sees it (GT-146).
const prepared = prepareReviewInput(diffText, {
maxBytes: MAX_REVIEW_BYTES,
maxTokens: MAX_REVIEW_TOKENS,
});
console.log(
`\n📄 Review input prepared: ${prepared.filesIncluded.length} relevant file(s), ` +
`${prepared.filesExcluded.length} excluded, ${prepared.redactions} secret(s) redacted, ` +
`~${prepared.estTokens} tokens (${prepared.bytes} bytes)${prepared.truncated ? ", truncated" : ""}.`,
);
// Aggregate, non-sensitive efficiency telemetry only.
if (prepared.estTokens > MAX_REVIEW_TOKENS) {
console.error(
`❌ Prepared review input (~${prepared.estTokens} tokens) exceeds the ${MAX_REVIEW_TOKENS}-token budget. Failing closed.`,
);
process.exit(1);
}
const reviewPayload = prepared.chunks.join("\n\n");

// Connect to the Governance MCP Server
console.log("\n🔌 Connecting to Evolith Governance MCP Sandbox...");
Expand Down Expand Up @@ -53,23 +77,41 @@ async function main() {
console.log("\n⚠️ EVOLITH_AGENTIC_REVIEW flag is not set to 'true'.");
console.log(" Skipping actual LLM invocation. Architecture connection validated. (Dry-run Success)");
} else {
const apiKey = process.env.EVOLITH_LLM_API_KEY || process.env.GEMINI_API_KEY;
if (!apiKey) {
throw new Error("EVOLITH_AGENTIC_REVIEW is active but EVOLITH_LLM_API_KEY/GEMINI_API_KEY is missing.");
} else {
console.log("\n🧠 Submitting diff to Agentic Reviewer via Gemini API...");
try {
const result = await invokeGemini(apiKey, diffText, toolNames);
console.log(`\n🤖 Review Result:\n${result}\n`);
if (result.includes("VIOLATION_DETECTED")) {
console.error("❌ Agentic review detected architectural violations!");
process.exit(1);
} else {
console.log("✅ Agentic Review Passed (No violations found).");
if (prepared.filesIncluded.length === 0) {
console.log("\n✅ No policy-relevant changed files to review. (Pass)");
return;
}
let provider;
try {
provider = createReviewProvider({
provider: process.env.EVOLITH_REVIEW_PROVIDER,
model: process.env.EVOLITH_REVIEW_MODEL,
apiKey: process.env.EVOLITH_LLM_API_KEY || process.env.GEMINI_API_KEY,
});
} catch (cfgErr) {
console.error(`❌ Review provider unavailable — failing closed: ${cfgErr.message}`);
process.exit(1);
}
console.log(`\n🧠 Submitting sanitized, budgeted review input via provider [${provider.name}]...`);
try {
const result = await provider.review(buildReviewPrompt(reviewPayload, toolNames));
const evaluation = evaluateProviderResponse(result);
if (!evaluation.ok) {
console.error(`❌ Indeterminate/malformed review result — failing closed:\n ${(evaluation.errors || []).join("\n ")}`);
process.exit(1);
}
if (evaluation.passesGate) {
console.log("✅ Agentic Review Passed (no violations).");
} else {
console.error(`❌ Agentic review detected ${evaluation.findings.length} violation(s):`);
for (const f of evaluation.findings) {
console.error(` [${f.severity}] ${f.file}${f.line ? ":" + f.line : ""} — ${f.title} (confidence ${f.confidence})`);
}
} catch (apiErr) {
throw new Error(`LLM review failed: ${apiErr.message}`);
process.exit(1);
}
} catch (apiErr) {
console.error(`❌ LLM review failed — failing closed: ${apiErr.message}`);
process.exit(1);
}
}
} catch (err) {
Expand All @@ -85,61 +127,32 @@ async function main() {
}
}

function invokeGemini(apiKey, diff, tools) {
return new Promise((resolve, reject) => {
// Standard https request using native Node.js to avoid dependencies
import("node:https").then((https) => {
const payload = JSON.stringify({
contents: [{
parts: [{
text: `You are Wilson, Principal Architect of Evolith Core.
Review the following git diff against our active MCP architecture tools: [${tools}].
/** Provider-neutral prompt requesting a schema-v1.0 JSON review result. */
function buildReviewPrompt(diff, tools) {
return `You are the Evolith Core architecture reviewer.
Review the following sanitized, policy-relevant diff against our active MCP architecture tools: [${tools}].

Guidelines:
- If you find structural violations (e.g. invalid boundaries, illegal imports, missing signatures), start your output with 'VIOLATION_DETECTED' followed by a description of the issue.
- If everything conforms, output '✅ Review Passed: Clean Architecture Rules Met'.
Respond with ONLY a single JSON object conforming to this schema (no prose, no markdown fences):
{
"schemaVersion": "1.0",
"verdict": "pass" | "fail", // "fail" iff there is at least one error-severity violation
"findings": [
{
"ruleId": "<optional rule id>",
"severity": "error" | "warning" | "info",
"title": "<short description>",
"file": "<changed file path — evidence location>",
"line": <integer, optional>,
"confidence": <number 0..1>
}
]
}
Detect structural violations (invalid boundaries, illegal imports, missing signatures). If none, return verdict "pass" with an empty findings array.

Diff to review:
\`\`\`diff
${diff}
\`\`\``
}]
}]
});

const options = {
hostname: "generativelanguage.googleapis.com",
path: `/v1beta/models/gemini-2.5-flash:generateContent?key=${apiKey}`,
method: "POST",
headers: {
"Content-Type": "application/json",
"Content-Length": Buffer.byteLength(payload)
}
};

const req = https.request(options, (res) => {
let body = "";
res.on("data", (chunk) => body += chunk);
res.on("end", () => {
if (res.statusCode >= 400) {
reject(new Error(`HTTP ${res.statusCode}: ${body}`));
return;
}
try {
const data = JSON.parse(body);
const text = data.candidates?.[0]?.content?.parts?.[0]?.text || "";
resolve(text.trim());
} catch (e) {
reject(e);
}
});
});

req.on("error", (err) => reject(err));
req.write(payload);
req.end();
}).catch(reject);
});
\`\`\``;
}

main().catch((err) => {
Expand Down
153 changes: 153 additions & 0 deletions .harness/scripts/ci/review-input.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/**
* GT-146 — Secure, token-bounded review-input preparation.
*
* Pure, provider-neutral helpers that turn a raw `git diff` into a sanitized,
* policy-relevant, budgeted payload before any LLM provider sees it:
* 1. redact credentials and sensitive patterns,
* 2. keep only policy-relevant changed files (drop lockfiles, vendored,
* generated, and binary content),
* 3. bound and chunk the result by measurable byte/token budgets.
*
* No network, no provider coupling — safe to unit test in isolation.
*/

const REDACTION = '«REDACTED»';

/** [pattern, replacement] — replacement may be a string or a function. */
const SECRET_PATTERNS = [
// PEM private keys (any flavor)
[/-----BEGIN (?:RSA |EC |OPENSSH |DSA |PGP )?PRIVATE KEY-----[\s\S]*?-----END (?:RSA |EC |OPENSSH |DSA |PGP )?PRIVATE KEY-----/g, '«REDACTED:private-key»'],
// JWT (header.payload.signature)
[/\beyJ[A-Za-z0-9_-]{6,}\.[A-Za-z0-9_-]{6,}\.[A-Za-z0-9_-]{6,}/g, '«REDACTED:jwt»'],
// AWS access key id
[/\bAKIA[0-9A-Z]{16}\b/g, '«REDACTED:aws-access-key»'],
// Google API key
[/\bAIza[0-9A-Za-z_-]{35}\b/g, '«REDACTED:google-api-key»'],
// GitHub PAT
[/\bghp_[A-Za-z0-9]{36}\b/g, '«REDACTED:github-token»'],
// Slack token
[/\bxox[baprs]-[A-Za-z0-9-]{10,}/g, '«REDACTED:slack-token»'],
// Bearer tokens
[/\bBearer\s+[A-Za-z0-9._~+/-]{12,}=*/g, 'Bearer «REDACTED»'],
// Generic secret assignments: FOO_API_KEY = "..." / token: '...'
[
/\b([A-Za-z0-9_]*(?:API|SECRET|TOKEN|PASSWORD|PASSWD|PRIVATE|CREDENTIAL|KEY)[A-Za-z0-9_]*)(\s*[:=]\s*)['"]?([A-Za-z0-9_\-./+=]{12,})['"]?/gi,
(_m, key, sep) => `${key}${sep}${REDACTION}`,
],
];

/** Redact known secret patterns. Returns `{ text, redactions }`. */
export function redactSecrets(input) {
let text = String(input ?? '');
let redactions = 0;
for (const [re, rep] of SECRET_PATTERNS) {
text = text.replace(re, (...args) => {
redactions += 1;
return typeof rep === 'function' ? rep(...args) : rep;
});
}
return { text, redactions };
}

const EXCLUDE_PATH = [
/(^|\/)node_modules\//,
/(^|\/)(dist|build|coverage|out)\//,
/(^|\/)\.evolith\//,
/(^|\/)(package-lock\.json|pnpm-lock\.yaml|yarn\.lock)$/,
/\.min\.(js|css)$/,
/\.map$/,
/\.(png|jpe?g|gif|svg|ico|pdf|wasm|lock|zip|gz|tar|woff2?)$/i,
];

const INCLUDE_EXT = /\.(ts|tsx|js|jsx|mjs|cjs|rego|json|ya?ml|md)$/i;

/** Split a unified `git diff` into `[{ path, body }]` per file. */
export function parseDiffFiles(diff) {
const files = [];
let cur = null;
for (const line of String(diff ?? '').split('\n')) {
const m = line.match(/^diff --git a\/(.+?) b\/(.+)$/);
if (m) {
if (cur) files.push(cur);
cur = { path: m[2], body: `${line}\n` };
} else if (cur) {
cur.body += `${line}\n`;
}
}
if (cur) files.push(cur);
return files;
}

/** Keep only policy-relevant, non-binary, non-vendored files. */
export function selectRelevantFiles(diff) {
const included = [];
const excluded = [];
for (const f of parseDiffFiles(diff)) {
const isBinary = /\nBinary files /.test(f.body);
const isExcludedPath = EXCLUDE_PATH.some((re) => re.test(f.path));
const isIncludedExt = INCLUDE_EXT.test(f.path);
if (isBinary || isExcludedPath || !isIncludedExt) excluded.push(f.path);
else included.push(f);
}
return { included, excluded };
}

/** Coarse token estimate (~4 chars/token); deterministic and provider-agnostic. */
export function estimateTokens(text) {
return Math.ceil(Buffer.byteLength(String(text ?? ''), 'utf8') / 4);
}

/**
* Pack file sections into chunks under the byte/token budget. A single file
* larger than the budget is truncated (and flagged) rather than dropped.
*/
export function budgetAndChunk(sections, { maxBytes = 60000, maxTokens = 15000 } = {}) {
const limit = Math.max(1, Math.min(maxBytes, maxTokens * 4));
const chunks = [];
let cur = '';
let curBytes = 0;
let truncated = false;

for (const s of sections) {
let body = s.body;
if (Buffer.byteLength(body, 'utf8') > limit) {
body = `${body.slice(0, limit)}\n… «TRUNCATED ${s.path}» …\n`;
truncated = true;
}
const b = Buffer.byteLength(body, 'utf8');
if (curBytes + b > limit && cur) {
chunks.push(cur);
cur = '';
curBytes = 0;
}
cur += body;
curBytes += b;
}
if (cur) chunks.push(cur);
return { chunks, truncated };
}

/**
* Full pipeline: relevant-file selection → redaction → budget/chunk.
* Returns the chunks plus non-sensitive telemetry for aggregate reporting.
*/
export function prepareReviewInput(diff, config = {}) {
const { included, excluded } = selectRelevantFiles(diff);
let redactions = 0;
const sections = included.map((f) => {
const r = redactSecrets(f.body);
redactions += r.redactions;
return { path: f.path, body: r.text };
});
const { chunks, truncated } = budgetAndChunk(sections, config);
const bytes = chunks.reduce((n, c) => n + Buffer.byteLength(c, 'utf8'), 0);
return {
chunks,
filesIncluded: included.map((f) => f.path),
filesExcluded: excluded,
redactions,
bytes,
estTokens: Math.ceil(bytes / 4),
truncated,
};
}
Loading
Loading