From 0d52875f52e4f39c145f26761fb721878a92d5b5 Mon Sep 17 00:00:00 2001 From: timon0305 Date: Wed, 6 May 2026 17:16:39 +0200 Subject: [PATCH 1/2] =?UTF-8?q?Sanitize=20HTTP=20error=20responses=20?= =?UTF-8?q?=E2=80=94=20no=20exception=20class/message=20leakage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three handlers interpolated `f"{type(e).__name__}: {e}"` into the JSON error body, exposing internal field names, file paths, and user values to any client. - api/sessions.py: GET /api/sessions/

/ and .../stats now return a stable generic message; full traceback goes to the server log via current_app.logger.exception. - api/projects.py: per-session error card drops the `error_detail` field; `error: True` already drives the dashboard's error state. - tests/test_error_propagation.py: 6 regression tests, including a source-level guard against re-introducing the offending pattern. Closes #25 --- api/projects.py | 12 +- api/sessions.py | 26 ++-- tests/test_error_propagation.py | 214 ++++++++++++++++++++++++++++++++ 3 files changed, 234 insertions(+), 18 deletions(-) create mode 100644 tests/test_error_propagation.py diff --git a/api/projects.py b/api/projects.py index fd607fe..6c56347 100644 --- a/api/projects.py +++ b/api/projects.py @@ -1,7 +1,5 @@ """Project listing endpoints.""" -import traceback - from flask import Blueprint, current_app, jsonify from utils.session_path import get_claude_projects_dir, list_projects, list_sessions, safe_join @@ -79,7 +77,11 @@ def get_project_sessions(project_name): "first_timestamp": meta["first_timestamp"], "last_timestamp": meta["last_timestamp"], }) - except Exception as e: - print(f"[ERROR] Failed to parse {s['id']}: {type(e).__name__}: {e}\n{traceback.format_exc()}") - result.append({**s, "title": "Error parsing session", "error": True, "error_detail": f"{type(e).__name__}: {e}"}) + except Exception: + # Full detail (class, message, traceback) to the server log via + # logger.exception. The per-session card carries only `error: True` + # — the class-name+message string was a leak (issue #25). The + # operator looks at the server log for triage. + current_app.logger.exception("Failed to parse session %s", s["id"]) + result.append({**s, "title": "Error parsing session", "error": True}) return jsonify(result) diff --git a/api/sessions.py b/api/sessions.py index 93ce9a8..5d2d21a 100644 --- a/api/sessions.py +++ b/api/sessions.py @@ -1,7 +1,6 @@ """Session detail and stats endpoints.""" import os -import traceback from flask import Blueprint, current_app, jsonify, abort @@ -39,12 +38,14 @@ def get_session(project_name, session_id): if is_excluded_by_rules(rules, searchable): return jsonify({"error": "Session not found"}), 404 return jsonify(session) - except Exception as e: - tb = traceback.format_exc() - print(f"[ERROR] Failed to parse session {session_id}: {e}\n{tb}") - return jsonify({ - "error": f"Failed to parse session: {type(e).__name__}: {e}", - }), 500 + except Exception: + # Full traceback (class name, message, stack) goes to the server log + # via logger.exception. The HTTP body returns a stable, generic + # message — never the class name or `e` itself, which would leak + # internal field names, file paths, and user values to any client + # (issue #25). + current_app.logger.exception("Failed to parse session %s", session_id) + return jsonify({"error": "Failed to parse session"}), 500 @sessions_bp.route("/api/sessions///stats") @@ -62,9 +63,8 @@ def get_session_stats(project_name, session_id): session = parse_session(filepath) stats = compute_stats(session) return jsonify(stats) - except Exception as e: - tb = traceback.format_exc() - print(f"[ERROR] Failed to compute stats for {session_id}: {e}\n{tb}") - return jsonify({ - "error": f"Failed to compute stats: {type(e).__name__}: {e}", - }), 500 + except Exception: + # Same pattern as get_session above — full detail to the server log, + # generic message in the HTTP body (issue #25). + current_app.logger.exception("Failed to compute stats for %s", session_id) + return jsonify({"error": "Failed to compute session stats"}), 500 diff --git a/tests/test_error_propagation.py b/tests/test_error_propagation.py new file mode 100644 index 0000000..27a7788 --- /dev/null +++ b/tests/test_error_propagation.py @@ -0,0 +1,214 @@ +""" +Regression tests for issue #25 — HTTP error responses must not leak +exception class names or message internals. + +Three endpoints previously interpolated `f"{type(e).__name__}: {e}"` into +their JSON error body: + +- GET /api/sessions// (api/sessions.py) +- GET /api/sessions///stats (api/sessions.py) +- GET /api/projects (per-session card error_detail) + +This file exercises each via Flask test_client with a payload that triggers +the failure path, asserts a 500 (or 200 for projects, since the per-session +error is per-row), and verifies the response body contains no exception +class names from a defensive blocklist. + +Run: + pytest tests/test_error_propagation.py -v +""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO_ROOT)) + +from flask import Flask # noqa: E402 + +from api.projects import projects_bp # noqa: E402 +from api.sessions import sessions_bp # noqa: E402 + + +# Defensive blocklist — any of these substrings appearing in a response body +# would mean the leak regressed. Includes common Python builtin exception +# class names plus internal-looking shapes. +_LEAK_TOKENS = [ + "Exception", + "Error", + "KeyError", + "ValueError", + "JSONDecodeError", + "OSError", + "FileNotFoundError", + "TypeError", + "AttributeError", + "Traceback", + "//.jsonl.""" + proj = tmp_path / project + proj.mkdir(exist_ok=True) + p = proj / f"{session_id}.jsonl" + p.write_text(content, encoding="utf-8") + return p + + +# --------------------------------------------------------------------------- +# /api/sessions// +# --------------------------------------------------------------------------- + +class TestGetSessionErrorBody: + + def test_500_on_parse_failure_does_not_leak_class_name(self, tmp_path, client, monkeypatch): + # Force the parser to raise an exception with a class-name + message + # that WOULD leak through the old f-string interpolation if the fix + # regressed. (parse_session is normally tolerant — it swallows per-line + # JSONDecodeError — so we monkeypatch to guarantee we hit the except.) + _write_session(tmp_path, "proj", "abc", "{}") + + def _boom(*args, **kwargs): + raise KeyError("internal_secret_field_id") + + monkeypatch.setattr("api.sessions.parse_session", _boom) + + resp = client.get("/api/sessions/proj/abc") + assert resp.status_code == 500 + body = resp.get_json() + assert isinstance(body, dict) + assert body.get("error") == "Failed to parse session" + # The exception's args include "internal_secret_field_id" — must not + # appear in the response body. + assert "internal_secret_field_id" not in json.dumps(body) + _assert_no_class_name_leak(json.dumps(body)) + + def test_404_on_missing_file_keeps_session_id_safe(self, tmp_path, client): + # Session ID is part of the URL so it appears in the 404 message — + # that's fine; what we're guarding is exception-class leakage, which + # 404 doesn't go through. + resp = client.get("/api/sessions/proj/nope-doesnt-exist") + assert resp.status_code == 404 + body = resp.get_json() + _assert_no_class_name_leak(json.dumps(body)) + + def test_400_on_path_traversal_attempt(self, client): + # safe_join rejects this with ValueError; the 400 path returns a + # generic "Invalid path" message and should not leak. + resp = client.get("/api/sessions/..%2Fevil/abc") + assert resp.status_code in (400, 404) + body = resp.get_json() + _assert_no_class_name_leak(json.dumps(body)) + + +# --------------------------------------------------------------------------- +# /api/sessions///stats +# --------------------------------------------------------------------------- + +class TestGetSessionStatsErrorBody: + + def test_500_on_parse_failure_does_not_leak_class_name(self, tmp_path, client, monkeypatch): + _write_session(tmp_path, "proj", "abc", "{}") + + def _boom(*args, **kwargs): + raise ValueError("invalid literal: '/private/path/secret.json'") + + monkeypatch.setattr("api.sessions.parse_session", _boom) + + resp = client.get("/api/sessions/proj/abc/stats") + assert resp.status_code == 500 + body = resp.get_json() + assert body.get("error") == "Failed to compute session stats" + # The exception value contains a fake-secret path — must not leak. + assert "/private/path" not in json.dumps(body) + _assert_no_class_name_leak(json.dumps(body)) + + +# --------------------------------------------------------------------------- +# /api/projects (per-session card) +# --------------------------------------------------------------------------- + +class TestGetProjectsErrorCard: + + def test_per_session_error_card_omits_error_detail(self, tmp_path, client): + # One project with one unparseable session file → the per-session + # card should have error: True but no error_detail string. + _write_session(tmp_path, "myproj", "deadbeef-aaaa-bbbb-cccc-000000000000", "totally not jsonl\n") + + resp = client.get("/api/projects/myproj/sessions") + # Response shape varies — accept 200 with a list, 200 with a dict, + # or any error code as long as it doesn't leak class names. + body = resp.get_json() + body_text = json.dumps(body) if body is not None else "" + _assert_no_class_name_leak(body_text) + # If there's a per-row error, error_detail must NOT be present + if isinstance(body, list): + for row in body: + if isinstance(row, dict) and row.get("error"): + assert "error_detail" not in row, ( + "Per-session error card still includes error_detail (issue #25)" + ) + + +# --------------------------------------------------------------------------- +# Source-level guard +# --------------------------------------------------------------------------- + +class TestNoExceptionInterpolationInSource: + """Static guard: any future PR that re-introduces the + `f"...{type(e).__name__}: {e}..."` pattern in api/ fails this test.""" + + def test_api_files_dont_interpolate_exception_in_jsonify(self): + api_dir = REPO_ROOT / "api" + for py_file in api_dir.glob("*.py"): + src = py_file.read_text(encoding="utf-8") + # Look for the specific footgun: jsonify(...) with f-string that + # contains both `type(e)` or `{e}` AND the word "error". + offending_patterns = [ + "type(e).__name__", # the class-name expose + "{e}\"", # bare {e} ending an f-string + "{e},", # bare {e} in a dict-value f-string + ] + for pat in offending_patterns: + assert pat not in src, ( + f"{py_file.name} contains forbidden pattern {pat!r} " + f"— see issue #25" + ) From b7347b4feb319049438430c82ac0a9fda0893355 Mon Sep 17 00:00:00 2001 From: timon0305 Date: Wed, 6 May 2026 19:15:51 +0200 Subject: [PATCH 2/2] test: pin projects-card shape + fix docstring route (review on PR #26) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Docstring listed GET /api/projects but the leak was on GET /api/projects//sessions; corrected so the next reader doesn't grep the wrong route. - test_per_session_error_card_omits_error_detail was passing without exercising anything: parse_session is tolerant of malformed lines, so the malformed-jsonl input never triggered the except branch and the conditional `if/for` walked an empty list. Switched to monkeypatching parse_session to raise (mirrors the session-level tests above) and pinned the response shape — assert status == 200 and body is a list, assert at least one error card exists, and verify the leaked field name doesn't appear in the body. --- tests/test_error_propagation.py | 46 +++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/tests/test_error_propagation.py b/tests/test_error_propagation.py index 27a7788..f2eaac4 100644 --- a/tests/test_error_propagation.py +++ b/tests/test_error_propagation.py @@ -7,7 +7,7 @@ - GET /api/sessions// (api/sessions.py) - GET /api/sessions///stats (api/sessions.py) -- GET /api/projects (per-session card error_detail) +- GET /api/projects//sessions (api/projects.py — per-session card error_detail) This file exercises each via Flask test_client with a payload that triggers the failure path, asserts a 500 (or 200 for projects, since the per-session @@ -168,24 +168,38 @@ def _boom(*args, **kwargs): class TestGetProjectsErrorCard: - def test_per_session_error_card_omits_error_detail(self, tmp_path, client): - # One project with one unparseable session file → the per-session - # card should have error: True but no error_detail string. - _write_session(tmp_path, "myproj", "deadbeef-aaaa-bbbb-cccc-000000000000", "totally not jsonl\n") + def test_per_session_error_card_omits_error_detail(self, tmp_path, client, monkeypatch): + # parse_session is tolerant of malformed lines, so to exercise the + # except branch deterministically (the one that builds the error + # card), monkeypatch it to raise — same pattern as the session-level + # tests above. + _write_session(tmp_path, "myproj", "deadbeef-aaaa-bbbb-cccc-000000000000", "{}") + + def _boom(*args, **kwargs): + raise KeyError("internal_secret_field_id") + + # api/projects.py imports parse_session inside the handler body, + # so patch the source module rather than the consumer. + monkeypatch.setattr("utils.jsonl_parser.parse_session", _boom) resp = client.get("/api/projects/myproj/sessions") - # Response shape varies — accept 200 with a list, 200 with a dict, - # or any error code as long as it doesn't leak class names. + # Pin the response shape so a future wrapper change (e.g. {"sessions": [...]}) + # doesn't silently turn this test green by skipping the per-row scan. + assert resp.status_code == 200 body = resp.get_json() - body_text = json.dumps(body) if body is not None else "" - _assert_no_class_name_leak(body_text) - # If there's a per-row error, error_detail must NOT be present - if isinstance(body, list): - for row in body: - if isinstance(row, dict) and row.get("error"): - assert "error_detail" not in row, ( - "Per-session error card still includes error_detail (issue #25)" - ) + assert isinstance(body, list), ( + f"Expected JSON array of session cards; got {type(body).__name__}" + ) + _assert_no_class_name_leak(json.dumps(body)) + error_rows = [r for r in body if isinstance(r, dict) and r.get("error")] + assert error_rows, "Expected at least one per-session error card from the forced parse failure" + for row in error_rows: + assert "error_detail" not in row, ( + "Per-session error card still includes error_detail (issue #25)" + ) + # The exception's args include "internal_secret_field_id" — must not + # appear anywhere in the response. + assert "internal_secret_field_id" not in json.dumps(body) # ---------------------------------------------------------------------------