diff --git a/.sampo/changesets/steady-context-resolver.md b/.sampo/changesets/steady-context-resolver.md new file mode 100644 index 00000000..79b122fe --- /dev/null +++ b/.sampo/changesets/steady-context-resolver.md @@ -0,0 +1,5 @@ +--- +pypi/posthog: patch +--- + +fix(llma): use distinct_id from outer context if not provided, fix $process_person_profile for context-based identity diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py index d9177686..6e7a7037 100644 --- a/posthog/ai/utils.py +++ b/posthog/ai/utils.py @@ -2,7 +2,7 @@ import uuid from typing import Any, Callable, Dict, List, Optional, cast -from posthog import get_tags, identify_context, new_context, tag +from posthog import get_tags, identify_context, new_context, tag, contexts from posthog.ai.sanitization import ( sanitize_anthropic, sanitize_gemini, @@ -366,6 +366,16 @@ def call_llm_and_track_usage( if posthog_trace_id is None: posthog_trace_id = str(uuid.uuid4()) + # Check if we have a real user distinct_id (from param or outer context) + has_person_distinct_id = ( + posthog_distinct_id is not None + or contexts.get_context_distinct_id() is not None + ) + + if not has_person_distinct_id: + # Fall back to trace_id as distinct_id when no real user id is available. + identify_context(posthog_trace_id) + if response and ( hasattr(response, "usage") or (provider == "gemini" and hasattr(response, "usage_metadata")) @@ -421,7 +431,7 @@ def call_llm_and_track_usage( # Already serialized by converters tag("$ai_usage", raw_usage) - if posthog_distinct_id is None: + if not has_person_distinct_id: tag("$process_person_profile", False) # Process instructions for Responses API @@ -445,7 +455,7 @@ def call_llm_and_track_usage( sdk_tags, posthog_properties ) ph_client.capture( - distinct_id=posthog_distinct_id or posthog_trace_id, + distinct_id=contexts.get_context_distinct_id(), event="$ai_generation", properties=merged_properties, groups=posthog_groups, @@ -501,6 +511,16 @@ async def call_llm_and_track_usage_async( if posthog_trace_id is None: posthog_trace_id = str(uuid.uuid4()) + # Check if we have a real user distinct_id (from param or outer context) + has_person_distinct_id = ( + posthog_distinct_id is not None + or contexts.get_context_distinct_id() is not None + ) + + if not has_person_distinct_id: + # Fall back to trace_id as distinct_id when no real user id is available. + identify_context(posthog_trace_id) + if response and ( hasattr(response, "usage") or (provider == "gemini" and hasattr(response, "usage_metadata")) @@ -556,7 +576,7 @@ async def call_llm_and_track_usage_async( # Already serialized by converters tag("$ai_usage", raw_usage) - if posthog_distinct_id is None: + if not has_person_distinct_id: tag("$process_person_profile", False) # Process instructions for Responses API @@ -580,7 +600,7 @@ async def call_llm_and_track_usage_async( sdk_tags, posthog_properties ) ph_client.capture( - distinct_id=posthog_distinct_id or posthog_trace_id, + distinct_id=contexts.get_context_distinct_id(), event="$ai_generation", properties=merged_properties, groups=posthog_groups, diff --git a/posthog/test/ai/anthropic/test_anthropic.py b/posthog/test/ai/anthropic/test_anthropic.py index bdf71c83..7900b82c 100644 --- a/posthog/test/ai/anthropic/test_anthropic.py +++ b/posthog/test/ai/anthropic/test_anthropic.py @@ -3,6 +3,8 @@ import pytest +from posthog import identify_context, new_context + try: from anthropic.types import Message, Usage @@ -1302,3 +1304,99 @@ async def run_test(): assert props["$ai_web_search_count"] == 2 assert props["$ai_input_tokens"] == 50 assert props["$ai_output_tokens"] == 25 + + +# ======================= +# Distinct ID Context Tests +# ======================= + + +def test_no_distinct_id_uses_trace_id_and_personless( + mock_client, mock_anthropic_response +): + """When no distinct_id is provided and no outer context, trace_id is used and event is personless.""" + with patch( + "anthropic.resources.Messages.create", return_value=mock_anthropic_response + ): + client = Anthropic(api_key="test-key", posthog_client=mock_client) + client.messages.create( + model="claude-3-opus-20240229", + messages=[{"role": "user", "content": "Hello"}], + posthog_trace_id="trace-123", + ) + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + assert call_args["distinct_id"] == "trace-123" + assert props["$process_person_profile"] is False + + +def test_explicit_distinct_id_creates_person_profile( + mock_client, mock_anthropic_response +): + """When posthog_distinct_id is explicitly passed, it is used and event is not personless.""" + with patch( + "anthropic.resources.Messages.create", return_value=mock_anthropic_response + ): + client = Anthropic(api_key="test-key", posthog_client=mock_client) + client.messages.create( + model="claude-3-opus-20240229", + messages=[{"role": "user", "content": "Hello"}], + posthog_distinct_id="user-123", + posthog_trace_id="trace-123", + ) + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + assert call_args["distinct_id"] == "user-123" + assert ( + "$process_person_profile" not in props + or props["$process_person_profile"] is not False + ) + + +def test_outer_context_distinct_id_is_used(mock_client, mock_anthropic_response): + """When an outer context has a distinct_id, it should be used instead of trace_id.""" + with patch( + "anthropic.resources.Messages.create", return_value=mock_anthropic_response + ): + client = Anthropic(api_key="test-key", posthog_client=mock_client) + with new_context(): + identify_context("outer-user-456") + client.messages.create( + model="claude-3-opus-20240229", + messages=[{"role": "user", "content": "Hello"}], + posthog_trace_id="trace-123", + ) + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + assert call_args["distinct_id"] == "outer-user-456" + assert ( + "$process_person_profile" not in props + or props["$process_person_profile"] is not False + ) + + +def test_explicit_distinct_id_overrides_outer_context( + mock_client, mock_anthropic_response +): + """When both outer context and explicit posthog_distinct_id are set, explicit wins.""" + with patch( + "anthropic.resources.Messages.create", return_value=mock_anthropic_response + ): + client = Anthropic(api_key="test-key", posthog_client=mock_client) + with new_context(): + identify_context("outer-user-456") + client.messages.create( + model="claude-3-opus-20240229", + messages=[{"role": "user", "content": "Hello"}], + posthog_distinct_id="explicit-user-789", + posthog_trace_id="trace-123", + ) + + call_args = mock_client.capture.call_args[1] + assert call_args["distinct_id"] == "explicit-user-789"