diff --git a/CHANGELOG.md b/CHANGELOG.md index b01d811c..1f3315ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # posthog +## 7.9.4 — 2026-02-25 + +feat(llma): add `$ai_tokens_source` property ("sdk" or "passthrough") to all `$ai_generation` events to detect when token values are externally overridden via `posthog_properties` + ## 7.9.3 — 2026-02-18 ### Patch changes diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py index 3c4afe05..d9177686 100644 --- a/posthog/ai/utils.py +++ b/posthog/ai/utils.py @@ -13,6 +13,28 @@ from posthog.client import Client as PostHogClient +_TOKEN_PROPERTY_KEYS = frozenset( + { + "$ai_input_tokens", + "$ai_output_tokens", + "$ai_cache_read_input_tokens", + "$ai_cache_creation_input_tokens", + "$ai_total_tokens", + "$ai_reasoning_tokens", + } +) + + +def _get_tokens_source( + sdk_tags: Dict[str, Any], posthog_properties: Optional[Dict[str, Any]] +) -> str: + if posthog_properties and any( + key in posthog_properties for key in _TOKEN_PROPERTY_KEYS + ): + return "passthrough" + return "sdk" + + def serialize_raw_usage(raw_usage: Any) -> Optional[Dict[str, Any]]: """ Convert raw provider usage objects to JSON-serializable dicts. @@ -413,14 +435,19 @@ def call_llm_and_track_usage( # send the event to posthog if hasattr(ph_client, "capture") and callable(ph_client.capture): + sdk_tags = get_tags() + merged_properties = { + **sdk_tags, + **(posthog_properties or {}), + **(error_params or {}), + } + merged_properties["$ai_tokens_source"] = _get_tokens_source( + sdk_tags, posthog_properties + ) ph_client.capture( distinct_id=posthog_distinct_id or posthog_trace_id, event="$ai_generation", - properties={ - **get_tags(), - **(posthog_properties or {}), - **(error_params or {}), - }, + properties=merged_properties, groups=posthog_groups, ) @@ -543,14 +570,19 @@ async def call_llm_and_track_usage_async( # send the event to posthog if hasattr(ph_client, "capture") and callable(ph_client.capture): + sdk_tags = get_tags() + merged_properties = { + **sdk_tags, + **(posthog_properties or {}), + **(error_params or {}), + } + merged_properties["$ai_tokens_source"] = _get_tokens_source( + sdk_tags, posthog_properties + ) ph_client.capture( distinct_id=posthog_distinct_id or posthog_trace_id, event="$ai_generation", - properties={ - **get_tags(), - **(posthog_properties or {}), - **(error_params or {}), - }, + properties=merged_properties, groups=posthog_groups, ) @@ -627,6 +659,15 @@ def capture_streaming_event( **(event_data.get("properties") or {}), } + # Determine token source: SDK-computed vs externally overridden + sdk_token_tags = { + "$ai_input_tokens": event_data["usage_stats"].get("input_tokens", 0), + "$ai_output_tokens": event_data["usage_stats"].get("output_tokens", 0), + } + event_properties["$ai_tokens_source"] = _get_tokens_source( + sdk_token_tags, event_data.get("properties") + ) + # Extract and add tools based on provider available_tools = extract_available_tool_calls( event_data["provider"], diff --git a/posthog/test/ai/anthropic/test_anthropic.py b/posthog/test/ai/anthropic/test_anthropic.py index 384f4761..bdf71c83 100644 --- a/posthog/test/ai/anthropic/test_anthropic.py +++ b/posthog/test/ai/anthropic/test_anthropic.py @@ -306,6 +306,7 @@ def test_basic_completion(mock_client, mock_anthropic_response): assert props["$ai_output_tokens"] == 10 assert props["$ai_http_status"] == 200 assert props["foo"] == "bar" + assert props["$ai_tokens_source"] == "sdk" assert isinstance(props["$ai_latency"], float) # Verify raw usage metadata is passed for backend processing assert "$ai_usage" in props @@ -318,6 +319,23 @@ def test_basic_completion(mock_client, mock_anthropic_response): assert "output_tokens" in props["$ai_usage"] +def test_tokens_source_passthrough(mock_client, mock_anthropic_response): + with patch( + "anthropic.resources.Messages.create", return_value=mock_anthropic_response + ): + client = Anthropic(api_key="test-key", posthog_client=mock_client) + client.messages.create( + model="claude-3-opus-20240229", + messages=[{"role": "user", "content": "Hello"}], + posthog_distinct_id="test-id", + posthog_properties={"$ai_input_tokens": 99999}, + ) + + props = mock_client.capture.call_args[1]["properties"] + assert props["$ai_tokens_source"] == "passthrough" + assert props["$ai_input_tokens"] == 99999 + + def test_groups(mock_client, mock_anthropic_response): with patch( "anthropic.resources.Messages.create", return_value=mock_anthropic_response @@ -927,6 +945,7 @@ def test_streaming_with_tool_calls(mock_client, mock_anthropic_stream_with_tools assert props["$ai_output_tokens"] == 25 assert props["$ai_cache_read_input_tokens"] == 5 assert props["$ai_cache_creation_input_tokens"] == 0 + assert props["$ai_tokens_source"] == "sdk" # Verify raw usage is captured in streaming mode (merged from events) assert "$ai_usage" in props diff --git a/posthog/test/ai/test_tokens_source.py b/posthog/test/ai/test_tokens_source.py new file mode 100644 index 00000000..c9adcf95 --- /dev/null +++ b/posthog/test/ai/test_tokens_source.py @@ -0,0 +1,62 @@ +from parameterized import parameterized + +from posthog.ai.utils import _get_tokens_source + + +@parameterized.expand( + [ + ("no_posthog_properties", {"$ai_input_tokens": 100}, None, "sdk"), + ("empty_posthog_properties", {"$ai_input_tokens": 100}, {}, "sdk"), + ( + "unrelated_posthog_properties", + {"$ai_input_tokens": 100}, + {"foo": "bar"}, + "sdk", + ), + ( + "override_input_tokens", + {"$ai_input_tokens": 100}, + {"$ai_input_tokens": 999}, + "passthrough", + ), + ( + "override_output_tokens", + {"$ai_output_tokens": 50}, + {"$ai_output_tokens": 999}, + "passthrough", + ), + ( + "override_total_tokens", + {"$ai_input_tokens": 100}, + {"$ai_total_tokens": 999}, + "passthrough", + ), + ( + "override_cache_read", + {"$ai_input_tokens": 100}, + {"$ai_cache_read_input_tokens": 500}, + "passthrough", + ), + ( + "override_cache_creation", + {"$ai_input_tokens": 100}, + {"$ai_cache_creation_input_tokens": 200}, + "passthrough", + ), + ( + "override_reasoning_tokens", + {"$ai_input_tokens": 100}, + {"$ai_reasoning_tokens": 300}, + "passthrough", + ), + ( + "mixed_override_and_custom", + {"$ai_input_tokens": 100}, + {"$ai_input_tokens": 999, "custom_key": "value"}, + "passthrough", + ), + ] +) +def test_get_tokens_source(name, sdk_tags, posthog_properties, expected): + result = _get_tokens_source(sdk_tags, posthog_properties) + assert result == expected diff --git a/posthog/version.py b/posthog/version.py index b9c5371e..63e7f1f4 100644 --- a/posthog/version.py +++ b/posthog/version.py @@ -1 +1 @@ -VERSION = "7.9.3" +VERSION = "7.9.4" diff --git a/pyproject.toml b/pyproject.toml index 6928b94c..65a3c8fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "posthog" -version = "7.9.3" +version = "7.9.4" description = "Integrate PostHog into any python application." authors = [{ name = "PostHog", email = "hey@posthog.com" }] maintainers = [{ name = "PostHog", email = "hey@posthog.com" }]