Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 54 additions & 3 deletions models/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,29 @@ class MessageDict(TypedDict):
parent_tool_use_id: NotRequired[str | None]


class SessionMetadataDict(TypedDict, total=False):
class SessionMetadataDict(TypedDict):
"""Metadata accumulated while parsing a Claude Code JSONL session.

``parse_session()`` always produces every field below via
``_finalize_session_metadata()``; defaults are zeros, empty collections,
or ``None`` where noted. Mypy treats the full shape as required so parser
and finalize code cannot drop a field silently.

The three identity/timing keys are also enforced at the runtime validation
boundary (``validate_session_dict``) with stricter type checks; remaining
keys must be present but are only type-checked lightly there.
"""

session_id: str
models_used: list[str]
first_timestamp: str | None
last_timestamp: str | None
total_input_tokens: int
total_output_tokens: int
total_cache_read_tokens: int
total_cache_creation_tokens: int
total_tool_calls: int
tool_call_counts: dict[str, int]
first_timestamp: str | None
last_timestamp: str | None
version: str | None
cwd: str | None
git_branch: str | None
Expand All @@ -84,6 +96,45 @@ class SessionMetadataDict(TypedDict, total=False):
entry_counts: dict[str, int]


# Derived from SessionMetadataDict — single source of truth for parity tests.
SESSION_METADATA_FIELD_NAMES = frozenset(SessionMetadataDict.__annotations__)
SESSION_METADATA_REQUIRED_KEYS = SessionMetadataDict.__required_keys__


class SessionMetadataBuilderDict(TypedDict):
"""Mutable metadata accumulator during JSONL parsing; sets are sorted at finalize."""

session_id: str
models_used: set[str]
first_timestamp: str | None
last_timestamp: str | None
total_input_tokens: int
total_output_tokens: int
total_cache_read_tokens: int
total_cache_creation_tokens: int
total_tool_calls: int
tool_call_counts: dict[str, int]
version: str | None
cwd: str | None
git_branch: str | None
permission_mode: str | None
compactions: int
total_ephemeral_5m_tokens: int
total_ephemeral_1h_tokens: int
service_tiers: set[str]
session_wall_time_seconds: float | None
compact_boundaries: list[dict[str, Any]]
api_errors: int
files_read: set[str]
files_written: set[str]
files_created: set[str]
bash_commands: list[Any]
web_fetches: list[Any]
sidechain_messages: int
stop_reasons: dict[str, int]
entry_counts: dict[str, int]


class SessionDict(TypedDict):
session_id: str
title: str
Expand Down
6 changes: 5 additions & 1 deletion tests/test_exclusion_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,11 @@ def _session(
) -> dict:
return {
"title": title,
"metadata": {"models_used": models or []},
"metadata": {
"session_id": "stub",
"models_used": models or [],
"first_timestamp": None,
},
"messages": messages or [],
}

Expand Down
37 changes: 36 additions & 1 deletion tests/test_jsonl_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,20 @@

sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

from models.session import SESSION_METADATA_FIELD_NAMES
from utils.jsonl_helpers import (
extract_images,
extract_text,
infer_title,
normalize_content,
strip_system_tags,
)
from utils.jsonl_parser import parse_session, quick_session_info
from utils.jsonl_parser import (
_finalize_session_metadata,
_new_session_metadata_builder,
parse_session,
quick_session_info,
)
from utils.tool_dispatch import _parse_tool_result

# ---------------------------------------------------------------------------
Expand All @@ -38,6 +44,18 @@ def _parse_entries(entries: list) -> dict:
os.unlink(path)


class TestSessionMetadataFinalize:
def test_builder_keys_match_field_names_constant(self):
raw = _new_session_metadata_builder("parity-test")
assert set(raw.keys()) == SESSION_METADATA_FIELD_NAMES

def test_finalize_preserves_all_builder_keys(self):
raw = _new_session_metadata_builder("parity-test")
finalized = _finalize_session_metadata(raw)
assert set(finalized.keys()) == SESSION_METADATA_FIELD_NAMES
assert set(finalized.keys()) == set(raw.keys())


# ---------------------------------------------------------------------------
# _parse_tool_result
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -720,6 +738,23 @@ def test_file_history_snapshot_timestamp(self):
finally:
os.unlink(path)

def test_file_history_snapshot_timestamp_falls_back_when_top_level_invalid(self):
path = _write_jsonl(
[
{
"type": "file-history-snapshot",
"timestamp": 1,
"snapshot": {"timestamp": "2026-01-02T12:00:00Z"},
},
]
)
try:
s = parse_session(path)
assert s["metadata"]["first_timestamp"] == "2026-01-02T12:00:00Z"
assert s["metadata"]["last_timestamp"] == "2026-01-02T12:00:00Z"
finally:
os.unlink(path)

def test_summary_entry_type_produces_no_message(self, caplog):
path = _write_jsonl(
[
Expand Down
48 changes: 46 additions & 2 deletions tests/test_jsonl_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,28 @@
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

from models.errors import SessionValidationError
from utils.jsonl_parser import parse_session
from utils.jsonl_parser import (
_finalize_session_metadata,
_new_session_metadata_builder,
parse_session,
)
from utils.validation import validate_session_dict

FIXTURES = os.path.join(os.path.dirname(__file__), "fixtures")


def _full_metadata(**overrides: Any) -> dict[str, Any]:
raw = _new_session_metadata_builder("abc123")
raw.update(overrides)
return dict(_finalize_session_metadata(raw))


def _valid_payload(**overrides: Any) -> dict[str, Any]:
base: dict[str, Any] = {
"session_id": "abc123",
"title": "Test Session",
"messages": [{"role": "user", "text": "hello"}],
"metadata": {"session_id": "abc123"},
"metadata": _full_metadata(),
}
base.update(overrides)
return base
Expand Down Expand Up @@ -60,6 +70,40 @@ def test_metadata_not_dict(self):
validate_session_dict(_valid_payload(metadata="not-a-dict"))
assert exc_info.value.path == "metadata"

def test_metadata_missing_session_id(self):
metadata = _full_metadata()
del metadata["session_id"]
with pytest.raises(SessionValidationError) as exc_info:
validate_session_dict(_valid_payload(metadata=metadata))
assert exc_info.value.path == "metadata.session_id"

def test_metadata_missing_models_used(self):
metadata = _full_metadata()
del metadata["models_used"]
with pytest.raises(SessionValidationError) as exc_info:
validate_session_dict(_valid_payload(metadata=metadata))
assert exc_info.value.path == "metadata.models_used"

def test_metadata_missing_first_timestamp(self):
metadata = _full_metadata()
del metadata["first_timestamp"]
with pytest.raises(SessionValidationError) as exc_info:
validate_session_dict(_valid_payload(metadata=metadata))
assert exc_info.value.path == "metadata.first_timestamp"

def test_metadata_first_timestamp_null_allowed(self):
result = validate_session_dict(
_valid_payload(metadata=_full_metadata(first_timestamp=None))
)
assert result["metadata"]["first_timestamp"] is None

def test_metadata_models_used_requires_string_elements(self):
metadata = _full_metadata()
metadata["models_used"] = ["claude-sonnet", 42]
with pytest.raises(SessionValidationError) as exc_info:
validate_session_dict(_valid_payload(metadata=metadata))
assert exc_info.value.path == "metadata.models_used[1]"

def test_message_not_dict(self):
with pytest.raises(SessionValidationError) as exc_info:
validate_session_dict(_valid_payload(messages=["not-a-dict"]))
Expand Down
2 changes: 1 addition & 1 deletion utils/export_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def finalize(self, manifest: list[dict[str, Any]]) -> None:

def _resolve_first_timestamp(meta: SessionMetadataDict, sess_info: SessionListItemDict) -> str:
"""Return first_timestamp from metadata, or synthesise from mtime without mutating *meta*."""
ts = (meta.get("first_timestamp") or "").strip()
ts = (meta["first_timestamp"] or "").strip()
if not ts:
ts = datetime.fromtimestamp(sess_info["modified"], tz=timezone.utc).strftime(
"%Y-%m-%dT%H:%M:%S"
Expand Down
Loading
Loading