From 9dcb4378120657d4c35f95ec1677031eff30250c Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 3 Mar 2026 11:05:55 +0100 Subject: [PATCH 1/4] test(openai-agents): Replace mocks with library types for streamed responses --- tests/conftest.py | 9 + .../integrations/anthropic/test_anthropic.py | 11 +- tests/integrations/openai/test_openai.py | 25 +- .../openai_agents/test_openai_agents.py | 324 ++++++++++-------- 4 files changed, 210 insertions(+), 159 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index d6240e17eb..0853013dfd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1004,6 +1004,15 @@ async def parse_stream(): return inner +@pytest.fixture() +def async_iterator(): + async def inner(values): + for value in values: + yield value + + return inner + + class MockServerRequestHandler(BaseHTTPRequestHandler): def do_GET(self): # noqa: N802 # Process an HTTP GET request and return a response. diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 4361ba9629..ea48f5d4db 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -67,11 +67,6 @@ async def __call__(self, *args, **kwargs): ) -async def async_iterator(values): - for value in values: - yield value - - @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -324,7 +319,7 @@ def test_streaming_create_message( ], ) async def test_streaming_create_message_async( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_events, send_default_pii, include_prompts, async_iterator ): client = AsyncAnthropic(api_key="z") returned_stream = AsyncStream(cast_to=None, response=None, client=client) @@ -567,7 +562,7 @@ def test_streaming_create_message_with_input_json_delta( ], ) async def test_streaming_create_message_with_input_json_delta_async( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_events, send_default_pii, include_prompts, async_iterator ): client = AsyncAnthropic(api_key="z") returned_stream = AsyncStream(cast_to=None, response=None, client=client) @@ -1361,7 +1356,7 @@ def test_streaming_create_message_with_system_prompt( ], ) async def test_streaming_create_message_with_system_prompt_async( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_events, send_default_pii, include_prompts, async_iterator ): """Test that system prompts are properly captured in streaming mode (async).""" client = AsyncAnthropic(api_key="z") diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index b8701a65c0..7f75bb977e 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -124,11 +124,6 @@ async def __call__(self, *args, **kwargs): ) -async def async_iterator(values): - for value in values: - yield value - - @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -689,7 +684,7 @@ def test_streaming_chat_completion(sentry_init, capture_events, messages, reques ], ) async def test_streaming_chat_completion_async_no_prompts( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_events, send_default_pii, include_prompts, async_iterator ): sentry_init( integrations=[ @@ -829,7 +824,7 @@ async def test_streaming_chat_completion_async_no_prompts( ], ) async def test_streaming_chat_completion_async( - sentry_init, capture_events, messages, request + sentry_init, capture_events, messages, request, async_iterator ): sentry_init( integrations=[ @@ -1463,7 +1458,9 @@ def test_span_origin_streaming_chat(sentry_init, capture_events): @pytest.mark.asyncio -async def test_span_origin_streaming_chat_async(sentry_init, capture_events): +async def test_span_origin_streaming_chat_async( + sentry_init, capture_events, async_iterator +): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, @@ -2420,7 +2417,7 @@ async def test_ai_client_span_responses_async_api( ) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") async def test_ai_client_span_streaming_responses_async_api( - sentry_init, capture_events, instructions, input, request + sentry_init, capture_events, instructions, input, request, async_iterator ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], @@ -2799,7 +2796,7 @@ def test_streaming_responses_api( ) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") async def test_streaming_responses_api_async( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_events, send_default_pii, include_prompts, async_iterator ): sentry_init( integrations=[ @@ -3037,7 +3034,9 @@ def test_streaming_chat_completion_ttft(sentry_init, capture_events): # noinspection PyTypeChecker @pytest.mark.asyncio -async def test_streaming_chat_completion_ttft_async(sentry_init, capture_events): +async def test_streaming_chat_completion_ttft_async( + sentry_init, capture_events, async_iterator +): """ Test that async streaming chat completions capture time-to-first-token (TTFT). """ @@ -3142,7 +3141,9 @@ def test_streaming_responses_api_ttft(sentry_init, capture_events): # noinspection PyTypeChecker @pytest.mark.asyncio @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -async def test_streaming_responses_api_ttft_async(sentry_init, capture_events): +async def test_streaming_responses_api_ttft_async( + sentry_init, capture_events, async_iterator +): """ Test that async streaming responses API captures time-to-first-token (TTFT). """ diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 491223e804..2276010d99 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -5,6 +5,7 @@ import os import json import logging +import httpx import sentry_sdk from sentry_sdk import start_span @@ -14,7 +15,7 @@ from sentry_sdk.integrations.openai_agents.utils import _set_input_data, safe_serialize from sentry_sdk.utils import parse_version -from openai import AsyncOpenAI +from openai import AsyncOpenAI, AsyncStream from agents.models.openai_responses import OpenAIResponsesModel from unittest import mock @@ -43,12 +44,16 @@ ResponseCompletedEvent, Response, ResponseUsage, + ResponseStreamEvent, ) from openai.types.responses.response_usage import ( InputTokensDetails, OutputTokensDetails, ) +from openai._response import AsyncAPIResponse +from openai._models import FinalRequestOptions + test_run_config = agents.RunConfig(tracing_disabled=True) EXAMPLE_RESPONSE = Response( @@ -88,140 +93,6 @@ ) -async def EXAMPLE_STREAMED_RESPONSE(*args, **kwargs): - yield ResponseCreatedEvent( - response=Response( - id="chat-id", - output=[], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="response-model-id", - object="response", - ), - type="response.created", - sequence_number=0, - ) - - yield ResponseCompletedEvent( - response=Response( - id="chat-id", - output=[ - ResponseOutputMessage( - id="message-id", - content=[ - ResponseOutputText( - annotations=[], - text="the model response", - type="output_text", - ), - ], - role="assistant", - status="completed", - type="message", - ), - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="response-model-id", - object="response", - usage=ResponseUsage( - input_tokens=20, - input_tokens_details=InputTokensDetails( - cached_tokens=5, - ), - output_tokens=10, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=8, - ), - total_tokens=30, - ), - ), - type="response.completed", - sequence_number=1, - ) - - -async def EXAMPLE_STREAMED_RESPONSE_WITH_DELTA(*args, **kwargs): - yield ResponseCreatedEvent( - response=Response( - id="chat-id", - output=[], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="response-model-id", - object="response", - ), - type="response.created", - sequence_number=0, - ) - - yield ResponseTextDeltaEvent( - type="response.output_text.delta", - item_id="message-id", - output_index=0, - content_index=0, - delta="Hello", - logprobs=[], - sequence_number=1, - ) - - yield ResponseTextDeltaEvent( - type="response.output_text.delta", - item_id="message-id", - output_index=0, - content_index=0, - delta=" world!", - logprobs=[], - sequence_number=2, - ) - - yield ResponseCompletedEvent( - response=Response( - id="chat-id", - output=[ - ResponseOutputMessage( - id="message-id", - content=[ - ResponseOutputText( - annotations=[], - text="Hello world!", - type="output_text", - ), - ], - role="assistant", - status="completed", - type="message", - ), - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="response-model-id", - object="response", - usage=ResponseUsage( - input_tokens=20, - input_tokens_details=InputTokensDetails( - cached_tokens=5, - ), - output_tokens=10, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=8, - ), - total_tokens=30, - ), - ), - type="response.completed", - sequence_number=3, - ) - - @pytest.fixture def mock_usage(): return Usage( @@ -1332,8 +1203,17 @@ def simple_test_tool(message: str) -> str: assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25 +def sse_chunks(events): + for event in events: + payload = event.model_dump() + chunk = f"event: {payload['type']}\ndata: {json.dumps(payload)}\n\n" + yield chunk.encode("utf-8") + + @pytest.mark.asyncio -async def test_hosted_mcp_tool_propagation_header_streamed(sentry_init, test_agent): +async def test_hosted_mcp_tool_propagation_header_streamed( + sentry_init, test_agent, async_iterator +): """ Test responses API is given trace propagation headers with HostedMCPTool. """ @@ -1365,10 +1245,84 @@ async def test_hosted_mcp_tool_propagation_header_streamed(sentry_init, test_age release="d08ebdb9309e1b004c6f52202de58a09c2268e42", ) + response = httpx.Response( + 200, + content=async_iterator( + sse_chunks( + [ + ResponseCreatedEvent( + response=Response( + id="chat-id", + output=[], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="response-model-id", + object="response", + ), + type="response.created", + sequence_number=0, + ), + ResponseCompletedEvent( + response=Response( + id="chat-id", + output=[ + ResponseOutputMessage( + id="message-id", + content=[ + ResponseOutputText( + annotations=[], + text="the model response", + type="output_text", + ), + ], + role="assistant", + status="completed", + type="message", + ), + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="response-model-id", + object="response", + usage=ResponseUsage( + input_tokens=20, + input_tokens_details=InputTokensDetails( + cached_tokens=5, + ), + output_tokens=10, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=8, + ), + total_tokens=30, + ), + ), + type="response.completed", + sequence_number=1, + ), + ] + ) + ), + ) + + # Emulate https://github.com/openai/openai-python/blob/656e3cab4a18262a49b961d41293367e45ee71b9/src/openai/_base_client.py#L1751 + api_response = AsyncAPIResponse( + raw=response, + cast_to=Response, + client=client, + stream=True, + stream_cls=AsyncStream[ResponseStreamEvent], + options=FinalRequestOptions.construct(method="post", url="/responses"), + retries_taken=0, + ) + with patch.object( model._client.responses, "create", - side_effect=EXAMPLE_STREAMED_RESPONSE, + return_value=api_response, ) as create, mock.patch( "sentry_sdk.tracing_utils.Random.randrange", return_value=500000 ): @@ -2848,7 +2802,7 @@ async def test_streaming_span_update_captures_response_data( @pytest.mark.asyncio -async def test_streaming_ttft_on_chat_span(sentry_init, test_agent): +async def test_streaming_ttft_on_chat_span(sentry_init, test_agent, async_iterator): """ Test that time-to-first-token (TTFT) is recorded on chat spans during streaming. @@ -2876,10 +2830,102 @@ async def test_streaming_ttft_on_chat_span(sentry_init, test_agent): traces_sample_rate=1.0, ) + response = httpx.Response( + 200, + content=async_iterator( + sse_chunks( + [ + ResponseCreatedEvent( + response=Response( + id="chat-id", + output=[], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="response-model-id", + object="response", + ), + type="response.created", + sequence_number=0, + ), + ResponseTextDeltaEvent( + type="response.output_text.delta", + item_id="message-id", + output_index=0, + content_index=0, + delta="Hello", + logprobs=[], + sequence_number=1, + ), + ResponseTextDeltaEvent( + type="response.output_text.delta", + item_id="message-id", + output_index=0, + content_index=0, + delta=" world!", + logprobs=[], + sequence_number=2, + ), + ResponseCompletedEvent( + response=Response( + id="chat-id", + output=[ + ResponseOutputMessage( + id="message-id", + content=[ + ResponseOutputText( + annotations=[], + text="Hello world!", + type="output_text", + ), + ], + role="assistant", + status="completed", + type="message", + ), + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="response-model-id", + object="response", + usage=ResponseUsage( + input_tokens=20, + input_tokens_details=InputTokensDetails( + cached_tokens=5, + ), + output_tokens=10, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=8, + ), + total_tokens=30, + ), + ), + type="response.completed", + sequence_number=3, + ), + ] + ) + ), + ) + + # Emulate https://github.com/openai/openai-python/blob/656e3cab4a18262a49b961d41293367e45ee71b9/src/openai/_base_client.py#L1751 + api_response = AsyncAPIResponse( + raw=response, + cast_to=Response, + client=client, + stream=True, + stream_cls=AsyncStream[ResponseStreamEvent], + options=FinalRequestOptions.construct(method="post", url="/responses"), + retries_taken=0, + ) + with patch.object( model._client.responses, "create", - side_effect=EXAMPLE_STREAMED_RESPONSE_WITH_DELTA, + return_value=api_response, ) as _: with sentry_sdk.start_transaction( name="test_ttft", sampled=True From 0de90f0093901cae4fce8b6deb1fe78d8a762495 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 3 Mar 2026 11:28:10 +0100 Subject: [PATCH 2/4] . --- tests/integrations/openai_agents/test_openai_agents.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 2276010d99..f3320d74fb 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -1322,7 +1322,8 @@ async def test_hosted_mcp_tool_propagation_header_streamed( with patch.object( model._client.responses, "create", - return_value=api_response, + # based on https://github.com/openai/openai-python/blob/656e3cab4a18262a49b961d41293367e45ee71b9/src/openai/_base_client.py#L1763 + return_value=await api_response.parse(), ) as create, mock.patch( "sentry_sdk.tracing_utils.Random.randrange", return_value=500000 ): @@ -2925,7 +2926,8 @@ async def test_streaming_ttft_on_chat_span(sentry_init, test_agent, async_iterat with patch.object( model._client.responses, "create", - return_value=api_response, + # based on https://github.com/openai/openai-python/blob/656e3cab4a18262a49b961d41293367e45ee71b9/src/openai/_base_client.py#L1763 + return_value=await api_response.parse(), ) as _: with sentry_sdk.start_transaction( name="test_ttft", sampled=True From 01022593d3262774e7fb0abbe9650bba4e145043 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 3 Mar 2026 14:45:10 +0100 Subject: [PATCH 3/4] patch further down call stack --- .../openai_agents/test_openai_agents.py | 63 ++++++++----------- 1 file changed, 27 insertions(+), 36 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index f3320d74fb..d5bd38c97c 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -1230,7 +1230,6 @@ async def test_hosted_mcp_tool_propagation_header_streamed( ) client = AsyncOpenAI(api_key="z") - client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE) model = OpenAIResponsesModel(model="gpt-4", openai_client=client) @@ -1245,8 +1244,14 @@ async def test_hosted_mcp_tool_propagation_header_streamed( release="d08ebdb9309e1b004c6f52202de58a09c2268e42", ) + request = httpx.Request( + "POST", + "/responses", + ) + response = httpx.Response( 200, + request=request, content=async_iterator( sse_chunks( [ @@ -1308,22 +1313,10 @@ async def test_hosted_mcp_tool_propagation_header_streamed( ), ) - # Emulate https://github.com/openai/openai-python/blob/656e3cab4a18262a49b961d41293367e45ee71b9/src/openai/_base_client.py#L1751 - api_response = AsyncAPIResponse( - raw=response, - cast_to=Response, - client=client, - stream=True, - stream_cls=AsyncStream[ResponseStreamEvent], - options=FinalRequestOptions.construct(method="post", url="/responses"), - retries_taken=0, - ) - with patch.object( - model._client.responses, - "create", - # based on https://github.com/openai/openai-python/blob/656e3cab4a18262a49b961d41293367e45ee71b9/src/openai/_base_client.py#L1763 - return_value=await api_response.parse(), + agent_with_tool.model._client._client, + "send", + return_value=response, ) as create, mock.patch( "sentry_sdk.tracing_utils.Random.randrange", return_value=500000 ): @@ -1341,13 +1334,17 @@ async def test_hosted_mcp_tool_propagation_header_streamed( async for event in result.stream_events(): pass - ai_client_span = transaction._span_recorder.spans[-1] + ai_client_span = next( + span + for span in transaction._span_recorder.spans + if span.op == "gen_ai.chat" + ) args, kwargs = create.call_args - assert "tools" in kwargs - assert len(kwargs["tools"]) == 1 - hosted_mcp_tool = kwargs["tools"][0] + request = args[0] + body = json.loads(request.content.decode("utf-8")) + hosted_mcp_tool = body["tools"][0] assert hosted_mcp_tool["headers"][ "sentry-trace" @@ -2818,7 +2815,6 @@ async def test_streaming_ttft_on_chat_span(sentry_init, test_agent, async_iterat should NOT trigger TTFT. """ client = AsyncOpenAI(api_key="z") - client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE) model = OpenAIResponsesModel(model="gpt-4", openai_client=client) @@ -2831,8 +2827,14 @@ async def test_streaming_ttft_on_chat_span(sentry_init, test_agent, async_iterat traces_sample_rate=1.0, ) + request = httpx.Request( + "POST", + "/responses", + ) + response = httpx.Response( 200, + request=request, content=async_iterator( sse_chunks( [ @@ -2912,22 +2914,11 @@ async def test_streaming_ttft_on_chat_span(sentry_init, test_agent, async_iterat ), ) - # Emulate https://github.com/openai/openai-python/blob/656e3cab4a18262a49b961d41293367e45ee71b9/src/openai/_base_client.py#L1751 - api_response = AsyncAPIResponse( - raw=response, - cast_to=Response, - client=client, - stream=True, - stream_cls=AsyncStream[ResponseStreamEvent], - options=FinalRequestOptions.construct(method="post", url="/responses"), - retries_taken=0, - ) - + # Patching https://github.com/openai/openai-python/blob/656e3cab4a18262a49b961d41293367e45ee71b9/src/openai/_base_client.py#L1604 with patch.object( - model._client.responses, - "create", - # based on https://github.com/openai/openai-python/blob/656e3cab4a18262a49b961d41293367e45ee71b9/src/openai/_base_client.py#L1763 - return_value=await api_response.parse(), + agent_with_tool.model._client._client, + "send", + return_value=response, ) as _: with sentry_sdk.start_transaction( name="test_ttft", sampled=True From a920f4e51d735263e2a800c82ca3e41c94611aea Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 3 Mar 2026 15:00:58 +0100 Subject: [PATCH 4/4] add docstring --- tests/integrations/openai_agents/test_openai_agents.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index d5bd38c97c..b9c5910501 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -1313,6 +1313,7 @@ async def test_hosted_mcp_tool_propagation_header_streamed( ), ) + # Patching https://github.com/openai/openai-python/blob/656e3cab4a18262a49b961d41293367e45ee71b9/src/openai/_base_client.py#L1604 with patch.object( agent_with_tool.model._client._client, "send",