From ce7210005306b66edabeeb31ca335ace9f09e19d Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Wed, 25 Feb 2026 12:05:46 +0000 Subject: [PATCH 1/5] feat: add $ai_tokens_source property to detect token value overrides When users pass token properties (e.g. $ai_input_tokens) via posthog_properties, these override the SDK-computed values. This new $ai_tokens_source property ("sdk" or "passthrough") lets us distinguish whether token values came from the SDK or were externally injected, which is critical for diagnosing cost calculation discrepancies. --- posthog/ai/utils.py | 61 +++++++++++++++++---- posthog/test/ai/anthropic/test_anthropic.py | 19 +++++++ posthog/test/ai/test_tokens_source.py | 58 ++++++++++++++++++++ 3 files changed, 128 insertions(+), 10 deletions(-) create mode 100644 posthog/test/ai/test_tokens_source.py diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py index 3c4afe05..d9177686 100644 --- a/posthog/ai/utils.py +++ b/posthog/ai/utils.py @@ -13,6 +13,28 @@ from posthog.client import Client as PostHogClient +_TOKEN_PROPERTY_KEYS = frozenset( + { + "$ai_input_tokens", + "$ai_output_tokens", + "$ai_cache_read_input_tokens", + "$ai_cache_creation_input_tokens", + "$ai_total_tokens", + "$ai_reasoning_tokens", + } +) + + +def _get_tokens_source( + sdk_tags: Dict[str, Any], posthog_properties: Optional[Dict[str, Any]] +) -> str: + if posthog_properties and any( + key in posthog_properties for key in _TOKEN_PROPERTY_KEYS + ): + return "passthrough" + return "sdk" + + def serialize_raw_usage(raw_usage: Any) -> Optional[Dict[str, Any]]: """ Convert raw provider usage objects to JSON-serializable dicts. @@ -413,14 +435,19 @@ def call_llm_and_track_usage( # send the event to posthog if hasattr(ph_client, "capture") and callable(ph_client.capture): + sdk_tags = get_tags() + merged_properties = { + **sdk_tags, + **(posthog_properties or {}), + **(error_params or {}), + } + merged_properties["$ai_tokens_source"] = _get_tokens_source( + sdk_tags, posthog_properties + ) ph_client.capture( distinct_id=posthog_distinct_id or posthog_trace_id, event="$ai_generation", - properties={ - **get_tags(), - **(posthog_properties or {}), - **(error_params or {}), - }, + properties=merged_properties, groups=posthog_groups, ) @@ -543,14 +570,19 @@ async def call_llm_and_track_usage_async( # send the event to posthog if hasattr(ph_client, "capture") and callable(ph_client.capture): + sdk_tags = get_tags() + merged_properties = { + **sdk_tags, + **(posthog_properties or {}), + **(error_params or {}), + } + merged_properties["$ai_tokens_source"] = _get_tokens_source( + sdk_tags, posthog_properties + ) ph_client.capture( distinct_id=posthog_distinct_id or posthog_trace_id, event="$ai_generation", - properties={ - **get_tags(), - **(posthog_properties or {}), - **(error_params or {}), - }, + properties=merged_properties, groups=posthog_groups, ) @@ -627,6 +659,15 @@ def capture_streaming_event( **(event_data.get("properties") or {}), } + # Determine token source: SDK-computed vs externally overridden + sdk_token_tags = { + "$ai_input_tokens": event_data["usage_stats"].get("input_tokens", 0), + "$ai_output_tokens": event_data["usage_stats"].get("output_tokens", 0), + } + event_properties["$ai_tokens_source"] = _get_tokens_source( + sdk_token_tags, event_data.get("properties") + ) + # Extract and add tools based on provider available_tools = extract_available_tool_calls( event_data["provider"], diff --git a/posthog/test/ai/anthropic/test_anthropic.py b/posthog/test/ai/anthropic/test_anthropic.py index 384f4761..bdf71c83 100644 --- a/posthog/test/ai/anthropic/test_anthropic.py +++ b/posthog/test/ai/anthropic/test_anthropic.py @@ -306,6 +306,7 @@ def test_basic_completion(mock_client, mock_anthropic_response): assert props["$ai_output_tokens"] == 10 assert props["$ai_http_status"] == 200 assert props["foo"] == "bar" + assert props["$ai_tokens_source"] == "sdk" assert isinstance(props["$ai_latency"], float) # Verify raw usage metadata is passed for backend processing assert "$ai_usage" in props @@ -318,6 +319,23 @@ def test_basic_completion(mock_client, mock_anthropic_response): assert "output_tokens" in props["$ai_usage"] +def test_tokens_source_passthrough(mock_client, mock_anthropic_response): + with patch( + "anthropic.resources.Messages.create", return_value=mock_anthropic_response + ): + client = Anthropic(api_key="test-key", posthog_client=mock_client) + client.messages.create( + model="claude-3-opus-20240229", + messages=[{"role": "user", "content": "Hello"}], + posthog_distinct_id="test-id", + posthog_properties={"$ai_input_tokens": 99999}, + ) + + props = mock_client.capture.call_args[1]["properties"] + assert props["$ai_tokens_source"] == "passthrough" + assert props["$ai_input_tokens"] == 99999 + + def test_groups(mock_client, mock_anthropic_response): with patch( "anthropic.resources.Messages.create", return_value=mock_anthropic_response @@ -927,6 +945,7 @@ def test_streaming_with_tool_calls(mock_client, mock_anthropic_stream_with_tools assert props["$ai_output_tokens"] == 25 assert props["$ai_cache_read_input_tokens"] == 5 assert props["$ai_cache_creation_input_tokens"] == 0 + assert props["$ai_tokens_source"] == "sdk" # Verify raw usage is captured in streaming mode (merged from events) assert "$ai_usage" in props diff --git a/posthog/test/ai/test_tokens_source.py b/posthog/test/ai/test_tokens_source.py new file mode 100644 index 00000000..2af310f0 --- /dev/null +++ b/posthog/test/ai/test_tokens_source.py @@ -0,0 +1,58 @@ +import pytest +from parameterized import parameterized + +from posthog.ai.utils import _get_tokens_source + + +@parameterized.expand( + [ + ("no_posthog_properties", {"$ai_input_tokens": 100}, None, "sdk"), + ("empty_posthog_properties", {"$ai_input_tokens": 100}, {}, "sdk"), + ("unrelated_posthog_properties", {"$ai_input_tokens": 100}, {"foo": "bar"}, "sdk"), + ( + "override_input_tokens", + {"$ai_input_tokens": 100}, + {"$ai_input_tokens": 999}, + "passthrough", + ), + ( + "override_output_tokens", + {"$ai_output_tokens": 50}, + {"$ai_output_tokens": 999}, + "passthrough", + ), + ( + "override_total_tokens", + {"$ai_input_tokens": 100}, + {"$ai_total_tokens": 999}, + "passthrough", + ), + ( + "override_cache_read", + {"$ai_input_tokens": 100}, + {"$ai_cache_read_input_tokens": 500}, + "passthrough", + ), + ( + "override_cache_creation", + {"$ai_input_tokens": 100}, + {"$ai_cache_creation_input_tokens": 200}, + "passthrough", + ), + ( + "override_reasoning_tokens", + {"$ai_input_tokens": 100}, + {"$ai_reasoning_tokens": 300}, + "passthrough", + ), + ( + "mixed_override_and_custom", + {"$ai_input_tokens": 100}, + {"$ai_input_tokens": 999, "custom_key": "value"}, + "passthrough", + ), + ] +) +def test_get_tokens_source(name, sdk_tags, posthog_properties, expected): + result = _get_tokens_source(sdk_tags, posthog_properties) + assert result == expected From a9e6f0b0b363308cc53dd53958ff9d0ff4fb049c Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Wed, 25 Feb 2026 12:07:36 +0000 Subject: [PATCH 2/5] chore: bump version to 7.9.4 --- posthog/version.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/posthog/version.py b/posthog/version.py index b9c5371e..63e7f1f4 100644 --- a/posthog/version.py +++ b/posthog/version.py @@ -1 +1 @@ -VERSION = "7.9.3" +VERSION = "7.9.4" diff --git a/pyproject.toml b/pyproject.toml index 6928b94c..65a3c8fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "posthog" -version = "7.9.3" +version = "7.9.4" description = "Integrate PostHog into any python application." authors = [{ name = "PostHog", email = "hey@posthog.com" }] maintainers = [{ name = "PostHog", email = "hey@posthog.com" }] From caa830b31376bd1857819fa7055e266b2f6e16fc Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Wed, 25 Feb 2026 12:08:10 +0000 Subject: [PATCH 3/5] chore: add changelog entry for 7.9.4 --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b01d811c..1f3315ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # posthog +## 7.9.4 — 2026-02-25 + +feat(llma): add `$ai_tokens_source` property ("sdk" or "passthrough") to all `$ai_generation` events to detect when token values are externally overridden via `posthog_properties` + ## 7.9.3 — 2026-02-18 ### Patch changes From 5d4e83b85ec77722e7f5f1c21039a0e31b44b356 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Wed, 25 Feb 2026 12:11:36 +0000 Subject: [PATCH 4/5] chore: fix ruff formatting --- posthog/test/ai/test_tokens_source.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/posthog/test/ai/test_tokens_source.py b/posthog/test/ai/test_tokens_source.py index 2af310f0..715a1753 100644 --- a/posthog/test/ai/test_tokens_source.py +++ b/posthog/test/ai/test_tokens_source.py @@ -8,7 +8,12 @@ [ ("no_posthog_properties", {"$ai_input_tokens": 100}, None, "sdk"), ("empty_posthog_properties", {"$ai_input_tokens": 100}, {}, "sdk"), - ("unrelated_posthog_properties", {"$ai_input_tokens": 100}, {"foo": "bar"}, "sdk"), + ( + "unrelated_posthog_properties", + {"$ai_input_tokens": 100}, + {"foo": "bar"}, + "sdk", + ), ( "override_input_tokens", {"$ai_input_tokens": 100}, From 78a821200ecef530ffbdf3f9b137525783e43d6a Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Wed, 25 Feb 2026 12:29:21 +0000 Subject: [PATCH 5/5] chore: remove unused pytest import --- posthog/test/ai/test_tokens_source.py | 1 - 1 file changed, 1 deletion(-) diff --git a/posthog/test/ai/test_tokens_source.py b/posthog/test/ai/test_tokens_source.py index 715a1753..c9adcf95 100644 --- a/posthog/test/ai/test_tokens_source.py +++ b/posthog/test/ai/test_tokens_source.py @@ -1,4 +1,3 @@ -import pytest from parameterized import parameterized from posthog.ai.utils import _get_tokens_source