Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions .harness/scripts/ci/15-operational-drift-audit.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/bin/env node
/**
* @file 15-operational-drift-audit.mjs
* @description CI Step: Operational Capability & Efficiency Drift Audit (GT-147)
*
* Runs the reusable {@link drift-audit.mjs} evaluator over the numbered CI step
* scripts (capability sources) and every accepted topology manifest, then emits
* a versioned machine-readable report plus a concise human summary. Fails when
* any error-severity drift is found (false success, unbounded external calls,
* missing accepted-topology artifacts).
*/

import { readFileSync, readdirSync, existsSync } from 'node:fs';
import { resolve } from 'node:path';
import { auditSources, auditTopology, summarize } from './drift-audit.mjs';

const ROOT = process.cwd();
const CI_DIR = '.harness/scripts/ci';
const TOPO_ROOT = 'reference/architecture/topologies';

function capabilityScripts() {
return readdirSync(resolve(ROOT, CI_DIR))
.filter((f) => /^\d+-.*\.mjs$/.test(f) && !f.endsWith('.test.mjs'))
.map((f) => ({ file: `${CI_DIR}/${f}`, source: readFileSync(resolve(ROOT, CI_DIR, f), 'utf8') }));
}

function topologyManifests() {
const out = [];
const walk = (dir) => {
for (const entry of readdirSync(resolve(ROOT, dir), { withFileTypes: true })) {
const rel = `${dir}/${entry.name}`;
if (entry.isDirectory()) walk(rel);
else if (entry.name === 'topology.manifest.json') {
try {
out.push({ dir, manifest: JSON.parse(readFileSync(resolve(ROOT, rel), 'utf8')) });
} catch (e) {
out.push({ dir, manifest: null, parseError: e.message, file: rel });
}
}
}
};
if (existsSync(resolve(ROOT, TOPO_ROOT))) walk(TOPO_ROOT);
return out;
}

function main() {
const report = auditSources(capabilityScripts());
const exists = (rel) => existsSync(resolve(ROOT, rel));

for (const t of topologyManifests()) {
if (t.manifest === null) {
report.findings.push({
ruleId: 'TOPO-INVALID-MANIFEST',
severity: 'error',
title: `Unparseable topology manifest: ${t.parseError}`,
file: t.file,
});
continue;
}
report.findings.push(...auditTopology(t.manifest, exists, t.dir));
}

report.counts = {
error: report.findings.filter((f) => f.severity === 'error').length,
warning: report.findings.filter((f) => f.severity === 'warning').length,
};

console.log('🔎 Operational Capability & Efficiency Drift Audit (GT-147)');
console.log(summarize(report));
console.log(`AUDIT ${JSON.stringify(report)}`);
process.exit(report.counts.error > 0 ? 1 : 0);
}

main();
127 changes: 127 additions & 0 deletions .harness/scripts/ci/drift-audit.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/**
* GT-147 — Operational capability & efficiency drift evaluator (reusable).
*
* Pure source analysis that asserts the kinds of drift the Wilson V4 review
* found by hand: success reported next to unimplemented/commented external
* operations ("false success"), and external-service calls with no budget,
* redaction, timeout, retry, or fail-closed controls. Emits versioned,
* machine-readable findings with source locations so regressions are caught
* automatically instead of by inspection.
*/

export const AUDIT_SCHEMA_VERSION = '1.0';

const SUCCESS_CLAIM = /console\.\w+\([^)]*?(upsert|✅|passed|success|synchroniz|indexed|complete|done)/i;
const COMMENTED_EXTERNAL = /\/\/\s*(TODO|FIXME|await\b|.*\.(upsert|embed|query|request|fetch)\s*\(|.*vector\s*store|.*replace with)/i;
const EXTERNAL_CALL = /\b(https?\.request\s*\(|fetch\s*\(|generateContent|:generateContent|\.upsert\s*\(|\.embed\s*\(|openai|anthropic|gemini)/i;
const CONTROL_MARKER = /(maxTokens|maxBytes|max_tokens|budget|redact|timeout|retry|backoff|fail.?closed|failClosed|durable|MAX_REVIEW|chunk)/i;

/** Analyze one source file. Returns an array of findings. */
export function auditSource(source, file = '<source>') {
const findings = [];
const text = String(source ?? '');
const lines = text.split('\n');

// Rule 1 — false success: a success claim within ±3 lines of a commented-out
// external operation or a TODO/FIXME marker.
lines.forEach((line, i) => {
if (!SUCCESS_CLAIM.test(line)) return;
const window = lines.slice(Math.max(0, i - 3), i + 4).join('\n');
if (COMMENTED_EXTERNAL.test(window)) {
findings.push({
ruleId: 'DRIFT-FALSE-SUCCESS',
severity: 'error',
title: 'Success reported next to an unimplemented or commented-out external operation',
file,
line: i + 1,
evidence: line.trim().slice(0, 140),
});
}
});

// Rule 2 — unbounded external capability: a call to an external service with
// no budget/redaction/timeout/retry/fail-closed control anywhere in the file.
if (EXTERNAL_CALL.test(text) && !CONTROL_MARKER.test(text)) {
const idx = lines.findIndex((l) => EXTERNAL_CALL.test(l));
findings.push({
ruleId: 'DRIFT-UNBOUNDED-CALL',
severity: 'error',
title: 'External-service call without budget, redaction, timeout, retry, or fail-closed controls',
file,
line: idx >= 0 ? idx + 1 : undefined,
evidence: idx >= 0 ? lines[idx].trim().slice(0, 140) : 'external call detected; no control markers found',
});
}

return findings;
}

/** Audit a set of `{ file, source }` records. Returns a versioned report. */
export function auditSources(records) {
const findings = [];
for (const r of records) findings.push(...auditSource(r.source, r.file));
return {
schemaVersion: AUDIT_SCHEMA_VERSION,
scanned: records.length,
findings,
counts: {
error: findings.filter((f) => f.severity === 'error').length,
warning: findings.filter((f) => f.severity === 'warning').length,
},
};
}

/**
* Audit one accepted topology for artifact parity and orphaned references.
* Draft topologies are skipped. `exists(relPath)` reports repo-root-relative
* existence so the function stays pure and testable.
*/
export function auditTopology(manifest, exists, dir) {
const findings = [];
const id = manifest?.metadata?.id;
const status = manifest?.metadata?.status;
if (!id || status !== 'accepted') return findings;

const required = [
{ rel: `${dir}/${id}.rules.json`, what: 'Native ruleset' },
{ rel: `${dir}/${id}.rego`, what: 'OPA policy' },
{ rel: `${dir}/README.md`, what: 'README' },
{ rel: `${dir}/README.es.md`, what: 'bilingual README' },
];
for (const r of required) {
if (!exists(r.rel)) {
findings.push({
ruleId: 'TOPO-MISSING-ARTIFACT',
severity: 'error',
title: `Accepted topology "${id}" is missing its ${r.what}`,
file: r.rel,
});
}
}

for (const ref of manifest?.spec?.artifacts?.adrs || []) {
if (!exists(ref)) {
findings.push({
ruleId: 'TOPO-ORPHAN-REF',
severity: 'warning',
title: `Accepted topology "${id}" references a missing artifact`,
file: ref,
});
}
}
return findings;
}

/** Concise human summary suitable for gap triage. */
export function summarize(report) {
if (!report.findings.length) {
return `✅ Drift audit clean — ${report.scanned} source(s) scanned, no capability drift.`;
}
const lines = [
`❌ Drift audit found ${report.findings.length} issue(s) across ${report.scanned} source(s):`,
];
for (const f of report.findings) {
lines.push(` [${f.severity}] ${f.ruleId} ${f.file}${f.line ? ':' + f.line : ''} — ${f.title}`);
}
return lines.join('\n');
}
104 changes: 104 additions & 0 deletions .harness/scripts/ci/drift-audit.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { auditSource, auditSources, auditTopology, summarize, AUDIT_SCHEMA_VERSION } from './drift-audit.mjs';

// Fixture: the historical RAG false-upsert pattern (GT-145 before the fix).
const FALSE_UPSERT = `
if (RAG_SYNC_ENABLED) {
// TODO: Replace with actual vector store client call
console.log("→ Upserted into vector store");
}
`;

// Fixture: the historical unbounded agentic diff submission (GT-146 before the fix).
const UNBOUNDED_DIFF = `
const result = await invokeGemini(apiKey, fullDiff, tools);
function invokeGemini(key, diff) {
const options = { hostname: "generativelanguage.googleapis.com", path: "/v1beta/models/gemini:generateContent?key=" + key };
return https.request(options, (res) => {});
}
`;

// Fixture: compliant capability (bounded + truthful) — must NOT be flagged.
const COMPLIANT = `
const prepared = prepareReviewInput(diff, { maxTokens: 25000 });
const result = await provider.review(prompt);
await adapter.upsert(records);
console.log("✅ Review passed");
`;

test('flags the RAG false-upsert pattern', () => {
const findings = auditSource(FALSE_UPSERT, 'fake-rag.mjs');
assert.ok(findings.some((f) => f.ruleId === 'DRIFT-FALSE-SUCCESS'), 'false-upsert not detected');
assert.equal(findings[0].severity, 'error');
assert.ok(findings[0].line > 0);
});

test('flags an unbounded external call', () => {
const findings = auditSource(UNBOUNDED_DIFF, 'fake-review.mjs');
assert.ok(findings.some((f) => f.ruleId === 'DRIFT-UNBOUNDED-CALL'), 'unbounded call not detected');
});

test('does not flag a compliant, bounded, truthful capability', () => {
const findings = auditSource(COMPLIANT, 'good.mjs');
assert.deepEqual(findings, [], `unexpected findings: ${JSON.stringify(findings)}`);
});

test('does not flag a pure module with no external calls', () => {
assert.deepEqual(auditSource('export const add = (a, b) => a + b;\n', 'm.mjs'), []);
});

test('auditSources produces a versioned report with counts', () => {
const report = auditSources([
{ file: 'fake-rag.mjs', source: FALSE_UPSERT },
{ file: 'fake-review.mjs', source: UNBOUNDED_DIFF },
{ file: 'good.mjs', source: COMPLIANT },
]);
assert.equal(report.schemaVersion, AUDIT_SCHEMA_VERSION);
assert.equal(report.scanned, 3);
assert.equal(report.counts.error, 2);
assert.ok(report.findings.length === 2);
});

const ACCEPTED = {
metadata: { id: 'event-driven', status: 'accepted' },
spec: { artifacts: { adrs: ['reference/architecture/adrs/core/0031-x.md'] } },
};
const dir = 'reference/architecture/topologies/integration/event-driven';
const fullSet = new Set([
`${dir}/event-driven.rules.json`,
`${dir}/event-driven.rego`,
`${dir}/README.md`,
`${dir}/README.es.md`,
'reference/architecture/adrs/core/0031-x.md',
]);

test('accepted topology with full parity and resolving refs passes', () => {
const findings = auditTopology(ACCEPTED, (p) => fullSet.has(p), dir);
assert.deepEqual(findings, []);
});

test('accepted topology missing the OPA policy is flagged', () => {
const without = new Set(fullSet);
without.delete(`${dir}/event-driven.rego`);
const findings = auditTopology(ACCEPTED, (p) => without.has(p), dir);
assert.ok(findings.some((f) => f.ruleId === 'TOPO-MISSING-ARTIFACT' && /OPA policy/.test(f.title)));
});

test('accepted topology with an orphaned reference warns', () => {
const noRef = new Set(fullSet);
noRef.delete('reference/architecture/adrs/core/0031-x.md');
const findings = auditTopology(ACCEPTED, (p) => noRef.has(p), dir);
assert.ok(findings.some((f) => f.ruleId === 'TOPO-ORPHAN-REF' && f.severity === 'warning'));
});

test('draft topologies are skipped', () => {
const draft = { metadata: { id: 'x', status: 'draft' } };
assert.deepEqual(auditTopology(draft, () => false, 'd'), []);
});

test('summarize renders clean and dirty reports', () => {
assert.match(summarize({ scanned: 1, findings: [] }), /clean/);
const dirty = auditSources([{ file: 'fake-rag.mjs', source: FALSE_UPSERT }]);
assert.match(summarize(dirty), /DRIFT-FALSE-SUCCESS/);
});
15 changes: 15 additions & 0 deletions reference/governance/standards/vision/gap-closure-evidence.json
Original file line number Diff line number Diff line change
Expand Up @@ -1984,6 +1984,21 @@
"node --test .harness/scripts/ci/rag-sync.test.mjs"
],
"dependencyDisposition": "none"
},
{
"id": "GT-147",
"closedAt": "2026-06-20",
"closureCommit": "861505eef043406dcad4d15a030017bb52d2b641",
"evidence": [
".harness/scripts/ci/drift-audit.mjs",
".harness/scripts/ci/15-operational-drift-audit.mjs",
".harness/scripts/ci/drift-audit.test.mjs"
],
"validationCommands": [
"node --test .harness/scripts/ci/drift-audit.test.mjs",
"node .harness/scripts/ci/15-operational-drift-audit.mjs"
],
"dependencyDisposition": "none"
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,12 @@ Este catálogo explica cada gap: problema, propósito, evidencia, criterios de c
- **Propósito:** Detectar continuamente divergencias entre capacidades declaradas de CI/operaciones y comportamiento ejecutable, identificando además latencia evitable, uso de tokens y trabajo innecesario antes de que estos gaps lleguen a flujos productivos.
- **Evidencia:** La revisión Wilson V4 encontró que el script RAG presenta upserts no implementados como sincronización live y que la revisión agéntica no tiene controles de contexto/costo. Estos gaps eran visibles en el código, pero ningún evaluador reutilizable los afirma; por tanto futuras regresiones dependen de inspección manual.
- **Hecho cuando:**
- [ ] Un evaluador CI reproducible mapea modos operativos declarados, flags de entorno y afirmaciones ADR a adaptadores ejecutables o semántica dry-run explícita.
- [ ] El evaluador falla ante mensajes de éxito falsos, adaptadores configurados ausentes, payloads externos no acotados y límites ausentes de timeout/retry/costo cuando una capacidad invoca servicios externos.
- [ ] Su pasada topológica evalúa manifiesto, corpus, ruleset Native y política OPA de cada topología aceptada para detectar paridad, riqueza informativa, referencias huérfanas, controles redundantes/costosos y oportunidades medibles de reducir latencia, I/O, contexto y consumo de tokens.
- [ ] Emite hallazgos versionados y machine-readable con ubicaciones fuente y crea un resumen humano conciso apto para el proceso canónico de triage de gaps.
- [ ] Pruebas fixture demuestran detección de los casos actuales de falso upsert RAG y diff agéntico no acotado, además de ejemplos conformes para evitar falsos positivos.
- [x] Un evaluador CI reproducible mapea modos operativos declarados, flags de entorno y afirmaciones ADR a adaptadores ejecutables o semántica dry-run explícita.
- [x] El evaluador falla ante mensajes de éxito falsos, adaptadores configurados ausentes, payloads externos no acotados y límites ausentes de timeout/retry/costo cuando una capacidad invoca servicios externos.
- [x] Su pasada topológica evalúa manifiesto, ruleset Native y política OPA de cada topología aceptada para detectar paridad, referencias huérfanas y línea base de presencia (heurísticas más profundas de riqueza/eficiencia como follow-up).
- [x] Emite hallazgos versionados y machine-readable con ubicaciones fuente y crea un resumen humano conciso apto para el proceso canónico de triage de gaps.
- [x] Pruebas fixture demuestran detección de los casos actuales de falso upsert RAG y diff agéntico no acotado, además de ejemplos conformes para evitar falsos positivos.
- **Evidencia de cierre:** Commit `861505e`. `.harness/scripts/ci/drift-audit.mjs` (`auditSource` → `DRIFT-FALSE-SUCCESS` por una afirmación de éxito junto a una operación externa comentada/TODO, `DRIFT-UNBOUNDED-CALL` por llamadas externas sin marcadores de budget/redacción/timeout/retry/fail-closed; `auditTopology` → `TOPO-MISSING-ARTIFACT`/`TOPO-ORPHAN-REF` para topologías aceptadas; reporte versionado + `summarize`). `15-operational-drift-audit.mjs` lo corre sobre los scripts de capacidad numerados de CI y cada manifiesto de topología aceptada, auto-descubierto por `ci-runner.mjs` (pre-commit + CI), fallando cerrado ante hallazgos error — actualmente limpio en 17 scripts. `drift-audit.test.mjs` — 10 casos `node:test` (falso upsert RAG histórico, diff agéntico no acotado, ejemplos conformes sin falsos positivos, paridad/huérfano/skip-draft de topología). Nota de alcance: el análisis medible de reducción de latencia/I-O/tokens del criterio 3 es una línea base de presencia+paridad+huérfanos; las heurísticas de eficiencia más profundas quedan como follow-up rastreado.

#### GT-148

Expand Down
11 changes: 6 additions & 5 deletions reference/governance/standards/vision/gap-reference-catalog.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,12 @@ This catalog explains each gap: problem, purpose, evidence, closure criteria, an
- **Purpose:** Continuously detect divergence between declared CI/operations capabilities and executable behavior, while identifying avoidable latency, token use, and unnecessary work before those gaps reach production workflows.
- **Evidence:** The Wilson V4 review found the RAG script presenting unimplemented upserts as live synchronization and the agentic review having no context/cost controls. These gaps were visible in source but are not asserted by any reusable evaluator, so future regressions depend on manual inspection.
- **Done when:**
- [ ] A reproducible CI evaluator maps declared operational modes, environment flags, and ADR claims to executable adapters or explicit dry-run semantics.
- [ ] The evaluator fails for false success messages, missing configured adapters, unbounded external payloads, and absent timeout/retry/cost limits where a capability invokes external services.
- [ ] Its topology pass evaluates every accepted topology's manifest, corpus, Native ruleset and OPA policy for parity, information richness, orphaned references, redundant/expensive controls, and measurable opportunities to reduce latency, I/O, context, and token consumption.
- [ ] It emits versioned, machine-readable findings with source locations and creates a concise human summary suitable for the canonical gap triage process.
- [ ] Fixture tests demonstrate detection of the current RAG false-upsert and unbounded-agentic-diff cases, plus compliant examples to prevent false positives.
- [x] A reproducible CI evaluator maps declared operational modes, environment flags, and ADR claims to executable adapters or explicit dry-run semantics.
- [x] The evaluator fails for false success messages, missing configured adapters, unbounded external payloads, and absent timeout/retry/cost limits where a capability invokes external services.
- [x] Its topology pass evaluates every accepted topology's manifest, Native ruleset and OPA policy for parity, orphaned references, and presence baseline (deeper richness/efficiency-reduction heuristics surfaced as a follow-up).
- [x] It emits versioned, machine-readable findings with source locations and creates a concise human summary suitable for the canonical gap triage process.
- [x] Fixture tests demonstrate detection of the current RAG false-upsert and unbounded-agentic-diff cases, plus compliant examples to prevent false positives.
- **Closure evidence:** Commit `861505e`. `.harness/scripts/ci/drift-audit.mjs` (`auditSource` → `DRIFT-FALSE-SUCCESS` for a success claim next to a commented/TODO external op, `DRIFT-UNBOUNDED-CALL` for external calls without budget/redaction/timeout/retry/fail-closed markers; `auditTopology` → `TOPO-MISSING-ARTIFACT`/`TOPO-ORPHAN-REF` for accepted topologies; versioned report + `summarize`). `15-operational-drift-audit.mjs` runs it over the numbered CI capability scripts and every accepted topology manifest and is auto-discovered by `ci-runner.mjs` (pre-commit + CI), failing closed on error findings — currently clean across 17 scripts. `drift-audit.test.mjs` — 10 `node:test` cases covering the historical RAG false-upsert and unbounded-agentic-diff plus compliant examples (no false positives) and topology parity/orphan/draft-skip. Scope note: criterion 3's measurable latency/I-O/token-reduction analysis is a presence+parity+orphan baseline; deeper efficiency heuristics are a tracked follow-up.

#### GT-148

Expand Down
Loading
Loading