Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 43 additions & 17 deletions src/copilot_usage/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"session_sort_key",
"shutdown_output_tokens",
"total_output_tokens",
"parse_token_int",
]

# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -208,6 +209,36 @@ class ToolRequest(BaseModel):
type: str = ""


def parse_token_int(raw: object) -> int | None:
"""Parse a raw ``outputTokens`` value into a positive ``int``, or ``None``.

Centralises the token-validation rules shared by
:meth:`AssistantMessageData._sanitize_non_numeric_tokens` (Pydantic
boundary) and :func:`~copilot_usage.parser._extract_output_tokens`
(parser fast path).

Rules:

- ``bool`` / ``str`` → ``None`` (invalid, not coerced)
- non-whole ``float`` → ``None``
- zero or negative ``int`` / ``float`` → ``None``
- positive whole-number ``float`` → coerced to ``int``
- positive ``int`` → returned as-is
- any other type → ``None``
"""
if isinstance(raw, (bool, str)):
return None
if isinstance(raw, float):
if not raw.is_integer():
return None
tokens = int(raw)
elif isinstance(raw, int):
tokens = raw
else:
return None
return tokens if tokens > 0 else None


class AssistantMessageData(BaseModel):
"""Payload for ``assistant.message`` events."""

Expand All @@ -221,25 +252,20 @@ class AssistantMessageData(BaseModel):
def _sanitize_non_numeric_tokens(cls, v: object) -> object:
"""Map non-positive, non-numeric, and non-whole-float token counts to ``0``.

JSON ``true``/``false``, numeric strings like ``"100"``,
non-positive numeric values, and non-integer floats (e.g. ``1.5``)
are not valid token counts. Returning ``0`` preserves parsing of
the rest of the assistant message payload while preventing these
values from being lax-coerced into token counts.

This aligns with ``_extract_output_tokens`` in the parser fast path:
both paths agree that only positive whole-number values contribute
tokens.
Delegates to :func:`parse_token_int` for the actual validation
logic. ``None`` (JSON ``null``) and types the helper recognises
(``bool``, ``str``, ``int``, ``float``) are mapped to ``0`` when
they don't represent a positive whole-number count, so that
Pydantic's downstream ``int`` coercion always succeeds. Unknown
types are passed through so that Pydantic can raise its own
``ValidationError``.
"""
if isinstance(v, (bool, str)):
return 0
if isinstance(v, float):
if not v.is_integer() or v <= 0:
return 0
return int(v)
if isinstance(v, int) and v <= 0:
if v is None:
return 0
return v
if not isinstance(v, (bool, str, int, float)):
return v
result = parse_token_int(v)
return result if result is not None else 0

reasoningText: str | None = None
reasoningOpaque: str | None = None
Expand Down
33 changes: 8 additions & 25 deletions src/copilot_usage/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
TokenUsage,
add_to_model_metrics,
copy_model_metrics,
parse_token_int,
session_sort_key,
)

Expand Down Expand Up @@ -275,35 +276,17 @@ def _read_config_model(config_path: Path | None = None) -> str | None:
def _extract_output_tokens(ev: SessionEvent) -> int | None:
"""Extract ``outputTokens`` from an ``assistant.message`` event via direct dict access.

Mirrors the domain intent of :class:`AssistantMessageData`'s
``_sanitize_non_numeric_tokens`` field-validator: only positive
whole-number values contribute tokens. Both paths agree on whether a
value contributes tokens; the representation differs for
non-contributing values — this function returns ``None``, whereas the
Pydantic model stores ``0``.

Specifically:

- ``bool`` / ``str`` → ``None`` (invalid, not coerced)
- zero or negative ``int`` / ``float`` → ``None`` (non-positive)
- whole-number positive ``float`` → coerced to ``int``
- non-whole ``float`` / other non-numeric → ``None``
Delegates to :func:`~copilot_usage.models.parse_token_int` for the
actual validation logic, ensuring that the fast path and the Pydantic
model path agree on whether a value contributes to token totals. For
contributing values, they also agree on the numeric token count;
non-contributing values may be represented differently here as ``None``
and in the model as ``0``.

Callers are responsible for verifying ``ev.type`` before calling; this
function reads only the ``outputTokens`` key from the event data dict.
"""
raw = ev.data.get("outputTokens")
if raw is None or isinstance(raw, (bool, str)):
return None
if isinstance(raw, float):
if not raw.is_integer():
return None
tokens = int(raw)
elif isinstance(raw, int):
tokens = raw
else:
return None
return tokens if tokens > 0 else None
return parse_token_int(ev.data.get("outputTokens"))


def _full_scandir_discovery(
Expand Down
44 changes: 44 additions & 0 deletions tests/copilot_usage/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4808,6 +4808,7 @@ def test_missing_key_no_pydantic(self) -> None:
# ---------------------------------------------------------------------------

_EQUIVALENCE_CASES: list[tuple[str, object]] = [
("none_null", None),
("bool_true", True),
("bool_false", False),
("str_numeric", "100"),
Expand Down Expand Up @@ -4850,6 +4851,49 @@ def test_positive_contribution_agreement(
)


_VALUE_EQUALITY_CASES: list[tuple[str, object, int]] = [
("positive_int_1", 1, 1),
("positive_int_42", 42, 42),
("whole_float_1234", 1234.0, 1234),
("whole_float_1", 1.0, 1),
]


class TestExtractOutputTokensValueEquality:
"""When both paths agree a value contributes, the numeric counts must be identical."""

@pytest.mark.parametrize(
("label", "raw_value", "expected"),
_VALUE_EQUALITY_CASES,
ids=[c[0] for c in _VALUE_EQUALITY_CASES],
)
def test_token_value_agreement(
self, label: str, raw_value: object, expected: int
) -> None:
"""For contributing values, fast-path and model-path produce the same integer count."""
_ = label
fast_path_result = _extract_output_tokens(_make_assistant_event(raw_value))
model = AssistantMessageData.model_validate({"outputTokens": raw_value})

assert fast_path_result is not None, (
f"Expected fast path to contribute for {raw_value!r}"
)
assert model.outputTokens > 0, f"Expected model to contribute for {raw_value!r}"
assert fast_path_result == expected, (
f"Fast-path count mismatch for {raw_value!r}: "
f"got {fast_path_result}, expected {expected}"
)
assert model.outputTokens == expected, (
f"Model count mismatch for {raw_value!r}: "
f"got {model.outputTokens}, expected {expected}"
)
assert fast_path_result == model.outputTokens, (
f"Value divergence for {raw_value!r}: "
f"_extract_output_tokens → {fast_path_result}, "
f"AssistantMessageData.outputTokens → {model.outputTokens}"
)


class TestExtractOutputTokensIntegration:
"""Integration tests exercising _extract_output_tokens through parse → build_session_summary."""

Expand Down
Loading