fix: stop lying to users when analysis parsing fails

Treelovah · Treelovah · commit 31235658b01e · 2026-02-26T17:02:24.000-07:00
When the analyzer LLM returns truncated JSON, parseAnalysisResponse was catching the error, printing to stderr where nobody reads it, and returning a zeroed-out result. Every caller then printed "Analysis complete" with a green checkmark. And then every load site that read the result back from disk would render all-zero scores as if the model genuinely scored zero on everything. Added a parseFailed flag to AnalysisResult so callers can distinguish "the model scored you low" from "we couldn't even read the response." All three analysis call sites now show a warning with retry instructions. Added a shared loadAnalysisResult helper that returns null for parseFailed results — all five disk load sites (results list, compare, challenge view, report, interactive browser) now use it instead of inline JSON.parse. Failed analyses show as "-" in tables and are excluded from reports, same as if no analysis was run. Closes #56
diff --git a/src/commands/analyze.ts b/src/commands/analyze.ts
@@ -153,7 +153,12 @@ export const analyzeCommand = new Command('analyze')
         });
 
         saveAnalysisResult(id, analysis, getResultsDir());
-        spinner.succeed(`Analysis complete for ${id}`);
+
+        if (analysis.parseFailed) {
+          spinner.warn(`Analysis failed for ${id} — could not parse LLM response (truncated or malformed JSON)`);
+        } else {
+          spinner.succeed(`Analysis complete for ${id}`);
+        }
 
         // Print summary for single runs
         if (runIds.length === 1) {
diff --git a/src/commands/report.ts b/src/commands/report.ts
@@ -5,6 +5,7 @@ import { colors, status } from '../lib/display.js';
 import { getResultsDir } from '../lib/config.js';
 import { calculateKSM, calculateEfficacyFromResults, getTokenEfficiency } from '../lib/scoring.js';
 import { resolveAnalysisPath, resolveResultPath, InvalidRunIdError, ResultPathEscapeError } from '../lib/results-path.js';
+import { loadAnalysisResult } from '../lib/runner.js';
 import {
   copyToClipboard,
   printColorReport,
@@ -74,13 +75,8 @@ export const reportCommand = new Command('report')
       process.exit(1);
     }
 
-    // Load analysis if available
-    let analysis: AnalysisResult | undefined;
-    if (existsSync(analysisPath)) {
-      try {
-        analysis = JSON.parse(readFileSync(analysisPath, 'utf-8'));
-      } catch {}
-    }
+    // Load analysis if available (null if parse failed)
+    const analysis = loadAnalysisResult(analysisPath) ?? undefined;
 
     const format = options.format.toLowerCase();
     let output = '';
diff --git a/src/commands/results.ts b/src/commands/results.ts
@@ -6,6 +6,7 @@ import { colors, status, formatScore, formatTime, printBox, sectionHeader } from
 import { calculateKSM, calculateEfficacyFromResults, getTokenEfficiency } from '../lib/scoring.js';
 import { getResultsDir, getChallengesDir } from '../lib/config.js';
 import { resolveAnalysisPath, resolveResultPath, InvalidRunIdError, ResultPathEscapeError } from '../lib/results-path.js';
+import { loadAnalysisResult } from '../lib/runner.js';
 import type { RunResult, AnalysisResult, ChallengeConfig } from '../lib/types.js';
 
 export const resultsCommand = new Command('results')
@@ -55,12 +56,7 @@ resultsCommand
         if (options.challenge && result.challenge !== options.challenge) continue;
 
         const analysisPath = pathResolve(getResultsDir(), `${file.id}.analysis.json`);
-        let analysis: AnalysisResult | null = null;
-        if (existsSync(analysisPath)) {
-          try {
-            analysis = JSON.parse(readFileSync(analysisPath, 'utf-8'));
-          } catch {}
-        }
+        const analysis = loadAnalysisResult(analysisPath);
 
         loaded.push({ id: file.id, result, analysis });
       } catch {
@@ -314,13 +310,7 @@ resultsCommand
         const filePath = pathResolve(resultsDir, file);
         const result: RunResult = JSON.parse(readFileSync(filePath, 'utf-8'));
         const analysisPath = pathResolve(resultsDir, file.replace('.json', '.analysis.json'));
-        let analysis: AnalysisResult | null = null;
-
-        if (existsSync(analysisPath)) {
-          try {
-            analysis = JSON.parse(readFileSync(analysisPath, 'utf-8'));
-          } catch {}
-        }
+        const analysis = loadAnalysisResult(analysisPath);
 
         allLoadedEntries.push({ result, analysis });
       } catch {}
@@ -512,13 +502,7 @@ function compareByChallengeId(challengeId: string): void {
       if (result.challenge !== challengeId) continue;
 
       const analysisPath = pathResolve(resultsDir, file.replace('.json', '.analysis.json'));
-      let analysis: AnalysisResult | null = null;
-
-      if (existsSync(analysisPath)) {
-        try {
-          analysis = JSON.parse(readFileSync(analysisPath, 'utf-8'));
-        } catch {}
-      }
+      const analysis = loadAnalysisResult(analysisPath);
 
       loadedEntries.push({ result, analysis });
     } catch {}
diff --git a/src/commands/run.ts b/src/commands/run.ts
@@ -349,7 +349,12 @@ export const runCommand = new Command('run')
           });
 
           const { jsonPath: analysisPath } = saveAnalysisResult(result.id, analysis, getResultsDir());
-          spinnerAnalysis.succeed('Analysis complete');
+
+          if (analysis.parseFailed) {
+            spinnerAnalysis.warn('Analysis failed — could not parse LLM response (truncated or malformed JSON). Retry with: oasis analyze ' + result.id);
+          } else {
+            spinnerAnalysis.succeed('Analysis complete');
+          }
 
           // Print analysis summary
           printAnalysisSummary(analysis);
diff --git a/src/interactive/helpers.ts b/src/interactive/helpers.ts
@@ -3,6 +3,7 @@ import { resolve } from 'path';
 import { getChallengesDir, getResultsDir } from '../lib/config.js';
 import { calculateKSM, calculateEfficacyFromResults, getTokenEfficiency } from '../lib/scoring.js';
 import { fetchRegistryIndex, fetchChallengeConfig } from '../lib/registry.js';
+import { loadAnalysisResult } from '../lib/runner.js';
 import type { RegistryEntry } from '../lib/registry.js';
 import type { ChallengeConfig, RunResult, AnalysisResult } from '../lib/types.js';
 
@@ -111,13 +112,7 @@ export function loadRecentResults(limit = 20): LoadedResult[] {
     try {
       const result: RunResult = JSON.parse(readFileSync(file.path, 'utf-8'));
       const analysisPath = resolve(dir, `${file.id}.analysis.json`);
-      let analysis: AnalysisResult | null = null;
-
-      if (existsSync(analysisPath)) {
-        try {
-          analysis = JSON.parse(readFileSync(analysisPath, 'utf-8'));
-        } catch { /* skip */ }
-      }
+      const analysis = loadAnalysisResult(analysisPath);
 
       allEntries.push({ id: file.id, result, analysis });
     } catch { /* skip malformed */ }
diff --git a/src/interactive/run-flow.ts b/src/interactive/run-flow.ts
@@ -552,7 +552,12 @@ export async function runBenchmarkFlow(): Promise<void> {
 
           runAnalysisResult = analysis;
           const { jsonPath: analysisPath } = saveAnalysisResult(result.id, analysis, getResultsDir());
-          spinnerAnalysis.succeed('Analysis complete');
+
+          if (analysis.parseFailed) {
+            spinnerAnalysis.warn('Analysis failed — could not parse LLM response (truncated or malformed JSON). Retry with: oasis analyze ' + result.id);
+          } else {
+            spinnerAnalysis.succeed('Analysis complete');
+          }
 
           printAnalysisSummary(analysis);
 
diff --git a/src/lib/analyzer.ts b/src/lib/analyzer.ts
@@ -334,12 +334,11 @@ async function parseAnalysisResponse(
 
     return analysisResult;
   } catch (error) {
-    console.error('Failed to parse analysis response:', error);
-
     return {
       runId,
       analyzedAt: new Date(),
       analyzerModel: DEFAULT_ANALYZER_MODEL,
+      parseFailed: true,
       attackChain: { phases: [], techniques: [], killChainCoverage: [] },
       narrative: { summary: 'Analysis parsing failed', detailed: `Error: ${error}`, keyFindings: [] },
       behavior: { approach: 'exploratory', approachDescription: 'Unable to determine', strengths: [], inefficiencies: [], decisionQuality: 0 },
diff --git a/src/lib/runner.ts b/src/lib/runner.ts
@@ -4,7 +4,7 @@ import Anthropic from '@anthropic-ai/sdk';
 import OpenAI from 'openai';
 import { execFileSync } from 'child_process';
 import chalk from 'chalk';
-import { writeFileSync, mkdirSync, existsSync } from 'fs';
+import { writeFileSync, readFileSync, mkdirSync, existsSync } from 'fs';
 import { randomUUID } from 'crypto';
 import { resolve } from 'path';
 import { wasSuccessful, classifyToAttack, classifyCommand } from './classifier.js';
@@ -797,3 +797,18 @@ export function saveAnalysisResult(
 
   return { jsonPath, txtPath };
 }
+
+/**
+ * Load an analysis result from disk. Returns null if the file doesn't exist,
+ * can't be parsed, or the analysis itself failed (parseFailed).
+ */
+export function loadAnalysisResult(analysisPath: string): AnalysisResult | null {
+  if (!existsSync(analysisPath)) return null;
+  try {
+    const analysis: AnalysisResult = JSON.parse(readFileSync(analysisPath, 'utf-8'));
+    if (analysis.parseFailed) return null;
+    return analysis;
+  } catch {
+    return null;
+  }
+}
diff --git a/src/lib/types.ts b/src/lib/types.ts
@@ -142,6 +142,7 @@ export interface AnalysisResult {
     scoreBreakdown: string;
   };
   rubricScore?: RubricScore;
+  parseFailed?: boolean;
 }
 
 // =============================================================================

Original file line number	Diff line number	Diff line change
`@@ -142,6 +142,7 @@ export interface AnalysisResult {`
`142`	`142`	`scoreBreakdown: string;`
`143`	`143`	`};`
`144`	`144`	`rubricScore?: RubricScore;`
	`145`	`+ parseFailed?: boolean;`
`145`	`146`	`}`
`146`	`147`
`147`	`148`	`// =============================================================================`