Skip to content

Commit 3123565

Browse files
committed
fix: stop lying to users when analysis parsing fails
When the analyzer LLM returns truncated JSON, parseAnalysisResponse was catching the error, printing to stderr where nobody reads it, and returning a zeroed-out result. Every caller then printed "Analysis complete" with a green checkmark. And then every load site that read the result back from disk would render all-zero scores as if the model genuinely scored zero on everything. Added a parseFailed flag to AnalysisResult so callers can distinguish "the model scored you low" from "we couldn't even read the response." All three analysis call sites now show a warning with retry instructions. Added a shared loadAnalysisResult helper that returns null for parseFailed results — all five disk load sites (results list, compare, challenge view, report, interactive browser) now use it instead of inline JSON.parse. Failed analyses show as "-" in tables and are excluded from reports, same as if no analysis was run. Closes #56
1 parent e03d64d commit 3123565

9 files changed

Lines changed: 45 additions & 40 deletions

File tree

src/commands/analyze.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,12 @@ export const analyzeCommand = new Command('analyze')
153153
});
154154

155155
saveAnalysisResult(id, analysis, getResultsDir());
156-
spinner.succeed(`Analysis complete for ${id}`);
156+
157+
if (analysis.parseFailed) {
158+
spinner.warn(`Analysis failed for ${id} — could not parse LLM response (truncated or malformed JSON)`);
159+
} else {
160+
spinner.succeed(`Analysis complete for ${id}`);
161+
}
157162

158163
// Print summary for single runs
159164
if (runIds.length === 1) {

src/commands/report.ts

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { colors, status } from '../lib/display.js';
55
import { getResultsDir } from '../lib/config.js';
66
import { calculateKSM, calculateEfficacyFromResults, getTokenEfficiency } from '../lib/scoring.js';
77
import { resolveAnalysisPath, resolveResultPath, InvalidRunIdError, ResultPathEscapeError } from '../lib/results-path.js';
8+
import { loadAnalysisResult } from '../lib/runner.js';
89
import {
910
copyToClipboard,
1011
printColorReport,
@@ -74,13 +75,8 @@ export const reportCommand = new Command('report')
7475
process.exit(1);
7576
}
7677

77-
// Load analysis if available
78-
let analysis: AnalysisResult | undefined;
79-
if (existsSync(analysisPath)) {
80-
try {
81-
analysis = JSON.parse(readFileSync(analysisPath, 'utf-8'));
82-
} catch {}
83-
}
78+
// Load analysis if available (null if parse failed)
79+
const analysis = loadAnalysisResult(analysisPath) ?? undefined;
8480

8581
const format = options.format.toLowerCase();
8682
let output = '';

src/commands/results.ts

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { colors, status, formatScore, formatTime, printBox, sectionHeader } from
66
import { calculateKSM, calculateEfficacyFromResults, getTokenEfficiency } from '../lib/scoring.js';
77
import { getResultsDir, getChallengesDir } from '../lib/config.js';
88
import { resolveAnalysisPath, resolveResultPath, InvalidRunIdError, ResultPathEscapeError } from '../lib/results-path.js';
9+
import { loadAnalysisResult } from '../lib/runner.js';
910
import type { RunResult, AnalysisResult, ChallengeConfig } from '../lib/types.js';
1011

1112
export const resultsCommand = new Command('results')
@@ -55,12 +56,7 @@ resultsCommand
5556
if (options.challenge && result.challenge !== options.challenge) continue;
5657

5758
const analysisPath = pathResolve(getResultsDir(), `${file.id}.analysis.json`);
58-
let analysis: AnalysisResult | null = null;
59-
if (existsSync(analysisPath)) {
60-
try {
61-
analysis = JSON.parse(readFileSync(analysisPath, 'utf-8'));
62-
} catch {}
63-
}
59+
const analysis = loadAnalysisResult(analysisPath);
6460

6561
loaded.push({ id: file.id, result, analysis });
6662
} catch {
@@ -314,13 +310,7 @@ resultsCommand
314310
const filePath = pathResolve(resultsDir, file);
315311
const result: RunResult = JSON.parse(readFileSync(filePath, 'utf-8'));
316312
const analysisPath = pathResolve(resultsDir, file.replace('.json', '.analysis.json'));
317-
let analysis: AnalysisResult | null = null;
318-
319-
if (existsSync(analysisPath)) {
320-
try {
321-
analysis = JSON.parse(readFileSync(analysisPath, 'utf-8'));
322-
} catch {}
323-
}
313+
const analysis = loadAnalysisResult(analysisPath);
324314

325315
allLoadedEntries.push({ result, analysis });
326316
} catch {}
@@ -512,13 +502,7 @@ function compareByChallengeId(challengeId: string): void {
512502
if (result.challenge !== challengeId) continue;
513503

514504
const analysisPath = pathResolve(resultsDir, file.replace('.json', '.analysis.json'));
515-
let analysis: AnalysisResult | null = null;
516-
517-
if (existsSync(analysisPath)) {
518-
try {
519-
analysis = JSON.parse(readFileSync(analysisPath, 'utf-8'));
520-
} catch {}
521-
}
505+
const analysis = loadAnalysisResult(analysisPath);
522506

523507
loadedEntries.push({ result, analysis });
524508
} catch {}

src/commands/run.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,12 @@ export const runCommand = new Command('run')
349349
});
350350

351351
const { jsonPath: analysisPath } = saveAnalysisResult(result.id, analysis, getResultsDir());
352-
spinnerAnalysis.succeed('Analysis complete');
352+
353+
if (analysis.parseFailed) {
354+
spinnerAnalysis.warn('Analysis failed — could not parse LLM response (truncated or malformed JSON). Retry with: oasis analyze ' + result.id);
355+
} else {
356+
spinnerAnalysis.succeed('Analysis complete');
357+
}
353358

354359
// Print analysis summary
355360
printAnalysisSummary(analysis);

src/interactive/helpers.ts

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { resolve } from 'path';
33
import { getChallengesDir, getResultsDir } from '../lib/config.js';
44
import { calculateKSM, calculateEfficacyFromResults, getTokenEfficiency } from '../lib/scoring.js';
55
import { fetchRegistryIndex, fetchChallengeConfig } from '../lib/registry.js';
6+
import { loadAnalysisResult } from '../lib/runner.js';
67
import type { RegistryEntry } from '../lib/registry.js';
78
import type { ChallengeConfig, RunResult, AnalysisResult } from '../lib/types.js';
89

@@ -111,13 +112,7 @@ export function loadRecentResults(limit = 20): LoadedResult[] {
111112
try {
112113
const result: RunResult = JSON.parse(readFileSync(file.path, 'utf-8'));
113114
const analysisPath = resolve(dir, `${file.id}.analysis.json`);
114-
let analysis: AnalysisResult | null = null;
115-
116-
if (existsSync(analysisPath)) {
117-
try {
118-
analysis = JSON.parse(readFileSync(analysisPath, 'utf-8'));
119-
} catch { /* skip */ }
120-
}
115+
const analysis = loadAnalysisResult(analysisPath);
121116

122117
allEntries.push({ id: file.id, result, analysis });
123118
} catch { /* skip malformed */ }

src/interactive/run-flow.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,12 @@ export async function runBenchmarkFlow(): Promise<void> {
552552

553553
runAnalysisResult = analysis;
554554
const { jsonPath: analysisPath } = saveAnalysisResult(result.id, analysis, getResultsDir());
555-
spinnerAnalysis.succeed('Analysis complete');
555+
556+
if (analysis.parseFailed) {
557+
spinnerAnalysis.warn('Analysis failed — could not parse LLM response (truncated or malformed JSON). Retry with: oasis analyze ' + result.id);
558+
} else {
559+
spinnerAnalysis.succeed('Analysis complete');
560+
}
556561

557562
printAnalysisSummary(analysis);
558563

src/lib/analyzer.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -334,12 +334,11 @@ async function parseAnalysisResponse(
334334

335335
return analysisResult;
336336
} catch (error) {
337-
console.error('Failed to parse analysis response:', error);
338-
339337
return {
340338
runId,
341339
analyzedAt: new Date(),
342340
analyzerModel: DEFAULT_ANALYZER_MODEL,
341+
parseFailed: true,
343342
attackChain: { phases: [], techniques: [], killChainCoverage: [] },
344343
narrative: { summary: 'Analysis parsing failed', detailed: `Error: ${error}`, keyFindings: [] },
345344
behavior: { approach: 'exploratory', approachDescription: 'Unable to determine', strengths: [], inefficiencies: [], decisionQuality: 0 },

src/lib/runner.ts

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import Anthropic from '@anthropic-ai/sdk';
44
import OpenAI from 'openai';
55
import { execFileSync } from 'child_process';
66
import chalk from 'chalk';
7-
import { writeFileSync, mkdirSync, existsSync } from 'fs';
7+
import { writeFileSync, readFileSync, mkdirSync, existsSync } from 'fs';
88
import { randomUUID } from 'crypto';
99
import { resolve } from 'path';
1010
import { wasSuccessful, classifyToAttack, classifyCommand } from './classifier.js';
@@ -797,3 +797,18 @@ export function saveAnalysisResult(
797797

798798
return { jsonPath, txtPath };
799799
}
800+
801+
/**
802+
* Load an analysis result from disk. Returns null if the file doesn't exist,
803+
* can't be parsed, or the analysis itself failed (parseFailed).
804+
*/
805+
export function loadAnalysisResult(analysisPath: string): AnalysisResult | null {
806+
if (!existsSync(analysisPath)) return null;
807+
try {
808+
const analysis: AnalysisResult = JSON.parse(readFileSync(analysisPath, 'utf-8'));
809+
if (analysis.parseFailed) return null;
810+
return analysis;
811+
} catch {
812+
return null;
813+
}
814+
}

src/lib/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ export interface AnalysisResult {
142142
scoreBreakdown: string;
143143
};
144144
rubricScore?: RubricScore;
145+
parseFailed?: boolean;
145146
}
146147

147148
// =============================================================================

0 commit comments

Comments
 (0)