diff --git a/.changeset/quiet-llamas-join.md b/.changeset/quiet-llamas-join.md new file mode 100644 index 00000000..20cacbc6 --- /dev/null +++ b/.changeset/quiet-llamas-join.md @@ -0,0 +1,5 @@ +--- +'pypi/posthog': patch +--- + +Generate SDK-created event, personless, and AI telemetry IDs as UUID v7. diff --git a/posthog/_uuid.py b/posthog/_uuid.py new file mode 100644 index 00000000..909f6fc5 --- /dev/null +++ b/posthog/_uuid.py @@ -0,0 +1,31 @@ +"""UUID generation helpers.""" + +import os +import time +import uuid + + +_UUID7_RANDOM_BITS = 74 +_UUID7_RANDOM_MASK = (1 << _UUID7_RANDOM_BITS) - 1 +_UUID7_TIMESTAMP_MASK = (1 << 48) - 1 + + +def uuid7() -> str: + """Return a UUID v7 string. + + Python 3.14+ includes ``uuid.uuid7`` in the standard library. Older + supported runtimes do not, so fall back to a small RFC 9562-compatible + implementation using the current Unix epoch milliseconds and 74 random bits. + """ + + stdlib_uuid7 = getattr(uuid, "uuid7", None) + if stdlib_uuid7 is not None: + return str(stdlib_uuid7()) + + unix_ts_ms = int(time.time() * 1000) & _UUID7_TIMESTAMP_MASK + random_bits = int.from_bytes(os.urandom(10), "big") & _UUID7_RANDOM_MASK + rand_a = random_bits >> 62 + rand_b = random_bits & ((1 << 62) - 1) + + uuid_int = (unix_ts_ms << 80) | (0x7 << 76) | (rand_a << 64) | (0b10 << 62) | rand_b + return str(uuid.UUID(int=uuid_int)) diff --git a/posthog/ai/anthropic/anthropic.py b/posthog/ai/anthropic/anthropic.py index 288eabac..f0022d98 100644 --- a/posthog/ai/anthropic/anthropic.py +++ b/posthog/ai/anthropic/anthropic.py @@ -7,7 +7,6 @@ ) import time -import uuid from typing import Any, Dict, List, Optional from posthog.ai.types import StreamingContentBlock, TokenUsage, ToolInProgress @@ -24,6 +23,7 @@ ) from posthog.ai.sanitization import sanitize_anthropic from posthog.client import Client as PostHogClient +from posthog._uuid import uuid7 from posthog import setup @@ -70,7 +70,7 @@ def create( """ if posthog_trace_id is None: - posthog_trace_id = str(uuid.uuid4()) + posthog_trace_id = uuid7() if kwargs.get("stream", False): return self._create_streaming( @@ -119,7 +119,7 @@ def stream( A streaming iterator yielding Anthropic events. """ if posthog_trace_id is None: - posthog_trace_id = str(uuid.uuid4()) + posthog_trace_id = uuid7() return self._create_streaming( posthog_distinct_id, diff --git a/posthog/ai/anthropic/anthropic_async.py b/posthog/ai/anthropic/anthropic_async.py index df098955..ebcf1989 100644 --- a/posthog/ai/anthropic/anthropic_async.py +++ b/posthog/ai/anthropic/anthropic_async.py @@ -7,7 +7,6 @@ ) import time -import uuid from typing import Any, Dict, List, Optional from posthog import setup @@ -26,6 +25,7 @@ ) from posthog.ai.sanitization import sanitize_anthropic from posthog.client import Client as PostHogClient +from posthog._uuid import uuid7 class AsyncAnthropic(anthropic.AsyncAnthropic): @@ -71,7 +71,7 @@ async def create( """ if posthog_trace_id is None: - posthog_trace_id = str(uuid.uuid4()) + posthog_trace_id = uuid7() if kwargs.get("stream", False): return await self._create_streaming( @@ -120,7 +120,7 @@ async def stream( An async streaming iterator yielding Anthropic events. """ if posthog_trace_id is None: - posthog_trace_id = str(uuid.uuid4()) + posthog_trace_id = uuid7() return await self._create_streaming( posthog_distinct_id, diff --git a/posthog/ai/claude_agent_sdk/client.py b/posthog/ai/claude_agent_sdk/client.py index 689fe3db..5a19f244 100644 --- a/posthog/ai/claude_agent_sdk/client.py +++ b/posthog/ai/claude_agent_sdk/client.py @@ -6,7 +6,6 @@ import logging import time -import uuid from typing import Any, Callable, Dict, List, Optional, Union try: @@ -28,6 +27,7 @@ _GenerationTracker, ) from posthog.client import Client +from posthog._uuid import uuid7 log = logging.getLogger("posthog") @@ -91,7 +91,7 @@ def __init__( groups=posthog_groups, properties=posthog_properties or {}, ) - self._trace_id = posthog_trace_id or str(uuid.uuid4()) + self._trace_id = posthog_trace_id or uuid7() self._distinct_id = posthog_distinct_id self._extra_props = posthog_properties or {} self._privacy = posthog_privacy_mode diff --git a/posthog/ai/claude_agent_sdk/processor.py b/posthog/ai/claude_agent_sdk/processor.py index 09d8187e..3ca952c8 100644 --- a/posthog/ai/claude_agent_sdk/processor.py +++ b/posthog/ai/claude_agent_sdk/processor.py @@ -6,7 +6,6 @@ import logging import time -import uuid from dataclasses import dataclass, field from typing import Any, Callable, Dict, List, Optional, Union @@ -26,6 +25,7 @@ from posthog import setup from posthog.client import Client +from posthog._uuid import uuid7 log = logging.getLogger("posthog") @@ -41,7 +41,7 @@ class _GenerationData: cache_creation_input_tokens: int = 0 start_time: float = 0.0 end_time: float = 0.0 - span_id: str = field(default_factory=lambda: str(uuid.uuid4())) + span_id: str = field(default_factory=lambda: uuid7()) stop_reason: Optional[str] = None @@ -240,7 +240,7 @@ async def query( # Per-call overrides distinct_id_override = posthog_distinct_id or self._distinct_id - trace_id = posthog_trace_id or str(uuid.uuid4()) + trace_id = posthog_trace_id or uuid7() extra_props = posthog_properties or {} privacy = ( posthog_privacy_mode @@ -480,7 +480,7 @@ def _emit_generation_from_result( properties: Dict[str, Any] = { "$ai_trace_id": trace_id, - "$ai_span_id": str(uuid.uuid4()), + "$ai_span_id": uuid7(), "$ai_span_name": "generation_1", "$ai_provider": "anthropic", "$ai_framework": "claude-agent-sdk", @@ -534,7 +534,7 @@ def _emit_tool_span( properties: Dict[str, Any] = { "$ai_trace_id": trace_id, - "$ai_span_id": str(uuid.uuid4()), + "$ai_span_id": uuid7(), "$ai_parent_id": parent_span_id, "$ai_span_name": block.name, "$ai_span_type": "tool", diff --git a/posthog/ai/gemini/gemini.py b/posthog/ai/gemini/gemini.py index dd749a27..43f28f4b 100644 --- a/posthog/ai/gemini/gemini.py +++ b/posthog/ai/gemini/gemini.py @@ -1,6 +1,5 @@ import os import time -import uuid from typing import Any, Dict, Optional from posthog.ai.types import TokenUsage, StreamingEventData @@ -29,6 +28,7 @@ from posthog.ai.utils import with_privacy_mode from posthog.ai.sanitization import sanitize_gemini from posthog.client import Client as PostHogClient +from posthog._uuid import uuid7 class Client: @@ -229,7 +229,7 @@ def _merge_posthog_params( properties.update(call_properties) if call_trace_id is None: - call_trace_id = str(uuid.uuid4()) + call_trace_id = uuid7() return distinct_id, call_trace_id, properties, privacy_mode, groups diff --git a/posthog/ai/gemini/gemini_async.py b/posthog/ai/gemini/gemini_async.py index 07ba3f02..225018b9 100644 --- a/posthog/ai/gemini/gemini_async.py +++ b/posthog/ai/gemini/gemini_async.py @@ -1,6 +1,5 @@ import os import time -import uuid from typing import Any, Dict, Optional from posthog.ai.stream import AsyncStreamWrapper @@ -30,6 +29,7 @@ from posthog.ai.utils import with_privacy_mode from posthog.ai.sanitization import sanitize_gemini from posthog.client import Client as PostHogClient +from posthog._uuid import uuid7 class AsyncClient: @@ -230,7 +230,7 @@ def _merge_posthog_params( properties.update(call_properties) if call_trace_id is None: - call_trace_id = str(uuid.uuid4()) + call_trace_id = uuid7() return distinct_id, call_trace_id, properties, privacy_mode, groups diff --git a/posthog/ai/openai/openai.py b/posthog/ai/openai/openai.py index 539d2a6b..3f68346e 100644 --- a/posthog/ai/openai/openai.py +++ b/posthog/ai/openai/openai.py @@ -1,5 +1,4 @@ import time -import uuid from typing import Any, Dict, List, Optional from posthog.ai.types import TokenUsage @@ -25,6 +24,7 @@ ) from posthog.ai.sanitization import sanitize_openai, sanitize_openai_response from posthog.client import Client as PostHogClient +from posthog._uuid import uuid7 from posthog import setup from posthog.ai.openai.wrapper_utils import _OpenAIWrapperResource @@ -118,7 +118,7 @@ def create( The OpenAI response, or a streaming iterator when ``stream=True``. """ if posthog_trace_id is None: - posthog_trace_id = str(uuid.uuid4()) + posthog_trace_id = uuid7() if kwargs.get("stream", False): return self._create_streaming( @@ -372,7 +372,7 @@ def create( The OpenAI chat completion, or a streaming iterator when ``stream=True``. """ if posthog_trace_id is None: - posthog_trace_id = str(uuid.uuid4()) + posthog_trace_id = uuid7() if kwargs.get("stream", False): return self._create_streaming( @@ -567,7 +567,7 @@ def create( """ if posthog_trace_id is None: - posthog_trace_id = str(uuid.uuid4()) + posthog_trace_id = uuid7() start_time = time.time() response = self._original.create(**kwargs) diff --git a/posthog/ai/openai/openai_async.py b/posthog/ai/openai/openai_async.py index 7e7b5838..1e244a98 100644 --- a/posthog/ai/openai/openai_async.py +++ b/posthog/ai/openai/openai_async.py @@ -1,5 +1,4 @@ import time -import uuid from typing import Any, Dict, List, Optional from posthog.ai.stream import AsyncStreamWrapper @@ -29,6 +28,7 @@ ) from posthog.ai.sanitization import sanitize_openai, sanitize_openai_response from posthog.client import Client as PostHogClient +from posthog._uuid import uuid7 from posthog.ai.openai.wrapper_utils import _OpenAIWrapperResource @@ -121,7 +121,7 @@ async def create( The OpenAI response, or an async streaming iterator when ``stream=True``. """ if posthog_trace_id is None: - posthog_trace_id = str(uuid.uuid4()) + posthog_trace_id = uuid7() if kwargs.get("stream", False): return await self._create_streaming( @@ -240,7 +240,7 @@ async def _capture_streaming_event( stop_reason: Optional[str] = None, ): if posthog_trace_id is None: - posthog_trace_id = str(uuid.uuid4()) + posthog_trace_id = uuid7() # Use model from kwargs, fallback to model from response model = kwargs.get("model") or model_from_response or "unknown" @@ -401,7 +401,7 @@ async def create( The OpenAI chat completion, or an async streaming iterator when ``stream=True``. """ if posthog_trace_id is None: - posthog_trace_id = str(uuid.uuid4()) + posthog_trace_id = uuid7() # If streaming, handle streaming specifically if kwargs.get("stream", False): @@ -535,7 +535,7 @@ async def _capture_streaming_event( stop_reason: Optional[str] = None, ): if posthog_trace_id is None: - posthog_trace_id = str(uuid.uuid4()) + posthog_trace_id = uuid7() # Use model from kwargs, fallback to model from response model = kwargs.get("model") or model_from_response or "unknown" @@ -623,7 +623,7 @@ async def create( """ if posthog_trace_id is None: - posthog_trace_id = str(uuid.uuid4()) + posthog_trace_id = uuid7() start_time = time.time() response = await self._original.create(**kwargs) diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py index 127f5d11..b8fe05cd 100644 --- a/posthog/ai/utils.py +++ b/posthog/ai/utils.py @@ -1,5 +1,4 @@ import time -import uuid from typing import Any, Callable, Dict, List, Optional, cast from posthog import get_tags, identify_context, new_context, tag, contexts @@ -12,6 +11,7 @@ ) from posthog.ai.types import FormattedMessage, StreamingEventData, TokenUsage from posthog.client import Client as PostHogClient +from posthog._uuid import uuid7 _TOKEN_PROPERTY_KEYS = frozenset( @@ -384,7 +384,7 @@ def call_llm_and_track_usage( latency = end_time - start_time if posthog_trace_id is None: - posthog_trace_id = str(uuid.uuid4()) + posthog_trace_id = uuid7() # Check if we have a real user distinct_id (from param or outer context) has_person_distinct_id = ( @@ -534,7 +534,7 @@ async def call_llm_and_track_usage_async( latency = end_time - start_time if posthog_trace_id is None: - posthog_trace_id = str(uuid.uuid4()) + posthog_trace_id = uuid7() # Check if we have a real user distinct_id (from param or outer context) has_person_distinct_id = ( @@ -683,7 +683,7 @@ def capture_streaming_event( ph_client: PostHog client instance event_data: Standardized streaming event data containing all necessary information """ - trace_id = event_data.get("trace_id") or str(uuid.uuid4()) + trace_id = event_data.get("trace_id") or uuid7() # Build base event properties event_properties = { diff --git a/posthog/client.py b/posthog/client.py index 800cc577..a20f7681 100644 --- a/posthog/client.py +++ b/posthog/client.py @@ -9,8 +9,6 @@ import weakref from datetime import datetime, timedelta, timezone from typing import Any, Dict, List, Optional, Union -from uuid import uuid4 - from typing_extensions import Unpack from posthog._async_utils import _BackgroundEventLoopRunner @@ -92,6 +90,7 @@ system_context, ) from posthog.version import VERSION +from posthog._uuid import uuid7 from queue import Queue, Full @@ -110,7 +109,7 @@ def get_identity_state(passed) -> tuple[str, bool]: if context_id: return (context_id, False) - return (str(uuid4()), True) + return (uuid7(), True) def add_context_tags(properties): @@ -1353,7 +1352,7 @@ def _enqueue(self, msg, disable_geoip): if "uuid" not in msg: # Always send a uuid, so we can always return one - msg["uuid"] = stringify_id(uuid4()) + msg["uuid"] = uuid7() sent_uuid = msg["uuid"] diff --git a/posthog/test/ai/test_uuid_generation.py b/posthog/test/ai/test_uuid_generation.py new file mode 100644 index 00000000..5d91b396 --- /dev/null +++ b/posthog/test/ai/test_uuid_generation.py @@ -0,0 +1,34 @@ +from unittest.mock import MagicMock +from uuid import UUID + +from posthog.ai.utils import capture_streaming_event + + +def test_capture_streaming_event_generates_uuid_v7_trace_id_when_missing(): + ph_client = MagicMock() + + capture_streaming_event( + ph_client, + { + "provider": "openai", + "model": "gpt-test", + "base_url": "https://api.openai.com/v1", + "kwargs": {}, + "formatted_input": [], + "formatted_output": [], + "usage_stats": {}, + "latency": 0.1, + "distinct_id": None, + "trace_id": None, + "properties": None, + "privacy_mode": False, + "groups": None, + "stop_reason": None, + }, + ) + + capture_kwargs = ph_client.capture.call_args.kwargs + trace_id = capture_kwargs["properties"]["$ai_trace_id"] + assert UUID(trace_id).version == 7 + assert capture_kwargs["distinct_id"] == trace_id + assert capture_kwargs["properties"]["$process_person_profile"] is False diff --git a/posthog/test/test_client.py b/posthog/test/test_client.py index e9d7054c..753536a8 100644 --- a/posthog/test/test_client.py +++ b/posthog/test/test_client.py @@ -1,7 +1,7 @@ import time import unittest from datetime import datetime -from uuid import uuid4 +from uuid import UUID, uuid4 from unittest import mock from parameterized import parameterized @@ -156,6 +156,7 @@ def test_basic_capture(self): self.assertEqual(msg["event"], "python test event") self.assertTrue(isinstance(msg["timestamp"], str)) self.assertIsNotNone(msg.get("uuid")) + self.assertEqual(UUID(msg["uuid"]).version, 7) self.assertEqual(msg["distinct_id"], "distinct_id") self.assertEqual(msg["properties"]["$lib"], "posthog-python") self.assertEqual(msg["properties"]["$lib_version"], VERSION) @@ -166,6 +167,16 @@ def test_basic_capture(self): assert msg["properties"]["$os"] == mock.ANY assert msg["properties"]["$os_version"] == mock.ANY + def test_capture_without_distinct_id_generates_uuid_v7_personless_distinct_id(self): + with mock.patch("posthog.client.batch_post") as mock_post: + client = Client(FAKE_TEST_API_KEY, on_error=self.set_fail, sync_mode=True) + client.capture("personless event") + + msg = mock_post.call_args[1]["batch"][0] + self.assertEqual(UUID(msg["uuid"]).version, 7) + self.assertEqual(UUID(msg["distinct_id"]).version, 7) + self.assertFalse(msg["properties"]["$process_person_profile"]) + def test_capture_omits_is_server_when_disabled(self): with mock.patch("posthog.client.batch_post") as mock_post: client = Client( diff --git a/posthog/test/test_uuid_utils.py b/posthog/test/test_uuid_utils.py new file mode 100644 index 00000000..ca495cb9 --- /dev/null +++ b/posthog/test/test_uuid_utils.py @@ -0,0 +1,33 @@ +from uuid import UUID + +import pytest + +from posthog import _uuid as uuid_utils + + +@pytest.mark.parametrize( + ("stdlib_uuid7", "expected"), + [ + ( + UUID("01920000-0000-7000-8000-000000000001"), + "01920000-0000-7000-8000-000000000001", + ), + (None, "01234567-89ab-7fff-bfff-ffffffffffff"), + ], +) +def test_uuid7_generates_version_7_uuid_string(monkeypatch, stdlib_uuid7, expected): + if stdlib_uuid7 is None: + monkeypatch.delattr(uuid_utils.uuid, "uuid7", raising=False) + monkeypatch.setattr(uuid_utils.time, "time", lambda: 0x0123456789AB / 1000) + monkeypatch.setattr(uuid_utils.os, "urandom", lambda length: b"\xff" * length) + else: + monkeypatch.setattr( + uuid_utils.uuid, "uuid7", lambda: stdlib_uuid7, raising=False + ) + + generated = uuid_utils.uuid7() + parsed = UUID(generated) + + assert generated == expected + assert parsed.version == 7 + assert str(parsed) == generated