diff --git a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py index 5cda4991c8..8d731744b9 100644 --- a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py +++ b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py @@ -716,12 +716,49 @@ def _prepare_message_for_anthropic(self, message: Message) -> dict[str, Any]: "input": content.parse_arguments(), }) case "function_result": - a_content.append({ - "type": "tool_result", - "tool_use_id": content.call_id, - "content": content.result if content.result is not None else "", - "is_error": content.exception is not None, - }) + if content.items: + # Rich content: build array with text + image blocks + tool_content: list[dict[str, Any]] = [] + if content.result: + tool_content.append({"type": "text", "text": content.result}) + for item in content.items: + if item.type == "data" and item.has_top_level_media_type("image"): + tool_content.append({ + "type": "image", + "source": { + "data": _get_data_bytes_as_str(item), # type: ignore[attr-defined] + "media_type": item.media_type, + "type": "base64", + }, + }) + elif item.type == "uri" and item.has_top_level_media_type("image"): + tool_content.append({ + "type": "image", + "source": {"type": "url", "url": item.uri}, + }) + else: + logger.debug( + "Ignoring unsupported rich content media type in tool result: %s", + item.media_type, + ) + tool_result_content = ( + tool_content + if tool_content + else (content.result if content.result is not None else "") + ) + a_content.append({ + "type": "tool_result", + "tool_use_id": content.call_id, + "content": tool_result_content, + "is_error": content.exception is not None, + }) + else: + a_content.append({ + "type": "tool_result", + "tool_use_id": content.call_id, + "content": content.result if content.result is not None else "", + "is_error": content.exception is not None, + }) case "mcp_server_tool_call": mcp_call: dict[str, Any] = { "type": "mcp_tool_use", diff --git a/python/packages/anthropic/tests/test_anthropic_client.py b/python/packages/anthropic/tests/test_anthropic_client.py index 4f86c3eac2..22d6cd4ed0 100644 --- a/python/packages/anthropic/tests/test_anthropic_client.py +++ b/python/packages/anthropic/tests/test_anthropic_client.py @@ -96,7 +96,9 @@ def test_anthropic_settings_init_with_explicit_values() -> None: @pytest.mark.parametrize("exclude_list", [["ANTHROPIC_API_KEY"]], indirect=True) -def test_anthropic_settings_missing_api_key(anthropic_unit_test_env: dict[str, str]) -> None: +def test_anthropic_settings_missing_api_key( + anthropic_unit_test_env: dict[str, str], +) -> None: """Test AnthropicSettings when API key is missing.""" settings = load_settings(AnthropicSettings, env_prefix="ANTHROPIC_") assert settings["api_key"] is None @@ -115,7 +117,9 @@ def test_anthropic_client_init_with_client(mock_anthropic_client: MagicMock) -> assert isinstance(client, SupportsChatGetResponse) -def test_anthropic_client_init_auto_create_client(anthropic_unit_test_env: dict[str, str]) -> None: +def test_anthropic_client_init_auto_create_client( + anthropic_unit_test_env: dict[str, str], +) -> None: """Test AnthropicClient initialization with auto-created anthropic_client.""" client = AnthropicClient( api_key=anthropic_unit_test_env["ANTHROPIC_API_KEY"], @@ -129,7 +133,10 @@ def test_anthropic_client_init_auto_create_client(anthropic_unit_test_env: dict[ def test_anthropic_client_init_missing_api_key() -> None: """Test AnthropicClient initialization when API key is missing.""" with patch("agent_framework_anthropic._chat_client.load_settings") as mock_load: - mock_load.return_value = {"api_key": None, "chat_model_id": "claude-3-5-sonnet-20241022"} + mock_load.return_value = { + "api_key": None, + "chat_model_id": "claude-3-5-sonnet-20241022", + } with pytest.raises(ValueError, match="Anthropic API key is required"): AnthropicClient() @@ -157,7 +164,9 @@ def test_prepare_message_for_anthropic_text(mock_anthropic_client: MagicMock) -> assert result["content"][0]["text"] == "Hello, world!" -def test_prepare_message_for_anthropic_function_call(mock_anthropic_client: MagicMock) -> None: +def test_prepare_message_for_anthropic_function_call( + mock_anthropic_client: MagicMock, +) -> None: """Test converting function call message to Anthropic format.""" client = create_test_anthropic_client(mock_anthropic_client) message = Message( @@ -181,7 +190,9 @@ def test_prepare_message_for_anthropic_function_call(mock_anthropic_client: Magi assert result["content"][0]["input"] == {"location": "San Francisco"} -def test_prepare_message_for_anthropic_function_result(mock_anthropic_client: MagicMock) -> None: +def test_prepare_message_for_anthropic_function_result( + mock_anthropic_client: MagicMock, +) -> None: """Test converting function result message to Anthropic format.""" client = create_test_anthropic_client(mock_anthropic_client) message = Message( @@ -206,7 +217,115 @@ def test_prepare_message_for_anthropic_function_result(mock_anthropic_client: Ma assert result["content"][0]["is_error"] is False -def test_prepare_message_for_anthropic_text_reasoning(mock_anthropic_client: MagicMock) -> None: +def test_prepare_message_for_anthropic_function_result_with_data_image( + mock_anthropic_client: MagicMock, +) -> None: + """Test function result with a data-type image item produces a base64 image block.""" + client = create_test_anthropic_client(mock_anthropic_client) + image_content = Content.from_data(data=b"fake_image_bytes", media_type="image/png") + message = Message( + role="tool", + contents=[ + Content.from_function_result( + call_id="call_img", + result=[Content.from_text("Here is the image"), image_content], + ) + ], + ) + + result = client._prepare_message_for_anthropic(message) + + assert result["role"] == "user" + tool_result = result["content"][0] + assert tool_result["type"] == "tool_result" + assert tool_result["tool_use_id"] == "call_img" + content = tool_result["content"] + assert len(content) == 2 + assert content[0]["type"] == "text" + assert content[0]["text"] == "Here is the image" + assert content[1]["type"] == "image" + assert content[1]["source"]["type"] == "base64" + assert content[1]["source"]["media_type"] == "image/png" + + +def test_prepare_message_for_anthropic_function_result_with_uri_image( + mock_anthropic_client: MagicMock, +) -> None: + """Test function result with a uri-type image item produces a URL image block.""" + client = create_test_anthropic_client(mock_anthropic_client) + uri_content = Content.from_uri(uri="https://example.com/image.png", media_type="image/png") + message = Message( + role="tool", + contents=[ + Content.from_function_result( + call_id="call_uri", + result=[uri_content], + ) + ], + ) + + result = client._prepare_message_for_anthropic(message) + + tool_result = result["content"][0] + content = tool_result["content"] + assert len(content) == 1 + assert content[0]["type"] == "image" + assert content[0]["source"]["type"] == "url" + assert content[0]["source"]["url"] == "https://example.com/image.png" + + +def test_prepare_message_for_anthropic_function_result_with_unsupported_media( + mock_anthropic_client: MagicMock, +) -> None: + """Test function result with unsupported media type skips the item.""" + client = create_test_anthropic_client(mock_anthropic_client) + audio_content = Content.from_data(data=b"audio_bytes", media_type="audio/wav") + message = Message( + role="tool", + contents=[ + Content.from_function_result( + call_id="call_audio", + result=[Content.from_text("Some text"), audio_content], + ) + ], + ) + + result = client._prepare_message_for_anthropic(message) + + tool_result = result["content"][0] + content = tool_result["content"] + # Audio should be skipped, only text remains + assert len(content) == 1 + assert content[0]["type"] == "text" + assert content[0]["text"] == "Some text" + + +def test_prepare_message_for_anthropic_function_result_all_unsupported_media( + mock_anthropic_client: MagicMock, +) -> None: + """Test function result where all items are unsupported falls back to string result.""" + client = create_test_anthropic_client(mock_anthropic_client) + audio_content = Content.from_data(data=b"audio_bytes", media_type="audio/wav") + message = Message( + role="tool", + contents=[ + Content.from_function_result( + call_id="call_all_unsupported", + result=[audio_content], + ) + ], + ) + + result = client._prepare_message_for_anthropic(message) + + tool_result = result["content"][0] + # All items unsupported → tool_content is empty → falls back to string result + assert tool_result["content"] == "" + + +def test_prepare_message_for_anthropic_text_reasoning( + mock_anthropic_client: MagicMock, +) -> None: """Test converting text reasoning message to Anthropic format.""" client = create_test_anthropic_client(mock_anthropic_client) message = Message( @@ -223,7 +342,9 @@ def test_prepare_message_for_anthropic_text_reasoning(mock_anthropic_client: Mag assert "signature" not in result["content"][0] -def test_prepare_message_for_anthropic_text_reasoning_with_signature(mock_anthropic_client: MagicMock) -> None: +def test_prepare_message_for_anthropic_text_reasoning_with_signature( + mock_anthropic_client: MagicMock, +) -> None: """Test converting text reasoning message with signature to Anthropic format.""" client = create_test_anthropic_client(mock_anthropic_client) message = Message( @@ -240,7 +361,9 @@ def test_prepare_message_for_anthropic_text_reasoning_with_signature(mock_anthro assert result["content"][0]["signature"] == "sig_abc123" -def test_prepare_message_for_anthropic_mcp_server_tool_call(mock_anthropic_client: MagicMock) -> None: +def test_prepare_message_for_anthropic_mcp_server_tool_call( + mock_anthropic_client: MagicMock, +) -> None: """Test converting MCP server tool call message to Anthropic format.""" client = create_test_anthropic_client(mock_anthropic_client) message = Message( @@ -266,7 +389,9 @@ def test_prepare_message_for_anthropic_mcp_server_tool_call(mock_anthropic_clien assert result["content"][0]["input"] == {"query": "Azure Functions"} -def test_prepare_message_for_anthropic_mcp_server_tool_call_no_server_name(mock_anthropic_client: MagicMock) -> None: +def test_prepare_message_for_anthropic_mcp_server_tool_call_no_server_name( + mock_anthropic_client: MagicMock, +) -> None: """Test converting MCP server tool call with no server name defaults to empty string.""" client = create_test_anthropic_client(mock_anthropic_client) message = Message( @@ -291,7 +416,9 @@ def test_prepare_message_for_anthropic_mcp_server_tool_call_no_server_name(mock_ assert result["content"][0]["input"] == {} -def test_prepare_message_for_anthropic_mcp_server_tool_result(mock_anthropic_client: MagicMock) -> None: +def test_prepare_message_for_anthropic_mcp_server_tool_result( + mock_anthropic_client: MagicMock, +) -> None: """Test converting MCP server tool result message to Anthropic format.""" client = create_test_anthropic_client(mock_anthropic_client) message = Message( @@ -313,7 +440,9 @@ def test_prepare_message_for_anthropic_mcp_server_tool_result(mock_anthropic_cli assert result["content"][0]["content"] == "Found 3 results for Azure Functions." -def test_prepare_message_for_anthropic_mcp_server_tool_result_none_output(mock_anthropic_client: MagicMock) -> None: +def test_prepare_message_for_anthropic_mcp_server_tool_result_none_output( + mock_anthropic_client: MagicMock, +) -> None: """Test converting MCP server tool result with None output defaults to empty string.""" client = create_test_anthropic_client(mock_anthropic_client) message = Message( @@ -335,7 +464,9 @@ def test_prepare_message_for_anthropic_mcp_server_tool_result_none_output(mock_a assert result["content"][0]["content"] == "" -def test_prepare_messages_for_anthropic_with_system(mock_anthropic_client: MagicMock) -> None: +def test_prepare_messages_for_anthropic_with_system( + mock_anthropic_client: MagicMock, +) -> None: """Test converting messages list with system message.""" client = create_test_anthropic_client(mock_anthropic_client) messages = [ @@ -351,7 +482,9 @@ def test_prepare_messages_for_anthropic_with_system(mock_anthropic_client: Magic assert result[0]["content"][0]["text"] == "Hello!" -def test_prepare_messages_for_anthropic_without_system(mock_anthropic_client: MagicMock) -> None: +def test_prepare_messages_for_anthropic_without_system( + mock_anthropic_client: MagicMock, +) -> None: """Test converting messages list without system message.""" client = create_test_anthropic_client(mock_anthropic_client) messages = [ @@ -374,7 +507,9 @@ def test_prepare_tools_for_anthropic_tool(mock_anthropic_client: MagicMock) -> N client = create_test_anthropic_client(mock_anthropic_client) @tool(approval_mode="never_require") - def get_weather(location: Annotated[str, Field(description="Location to get weather for")]) -> str: + def get_weather( + location: Annotated[str, Field(description="Location to get weather for")], + ) -> str: """Get weather for a location.""" return f"Weather for {location}" @@ -389,7 +524,9 @@ def get_weather(location: Annotated[str, Field(description="Location to get weat assert "Get weather for a location" in result["tools"][0]["description"] -def test_prepare_tools_for_anthropic_web_search(mock_anthropic_client: MagicMock) -> None: +def test_prepare_tools_for_anthropic_web_search( + mock_anthropic_client: MagicMock, +) -> None: """Test converting web_search dict tool to Anthropic format.""" client = create_test_anthropic_client(mock_anthropic_client) chat_options = ChatOptions(tools=[client.get_web_search_tool()]) @@ -403,7 +540,9 @@ def test_prepare_tools_for_anthropic_web_search(mock_anthropic_client: MagicMock assert result["tools"][0]["name"] == "web_search" -def test_prepare_tools_for_anthropic_code_interpreter(mock_anthropic_client: MagicMock) -> None: +def test_prepare_tools_for_anthropic_code_interpreter( + mock_anthropic_client: MagicMock, +) -> None: """Test converting code_interpreter dict tool to Anthropic format.""" client = create_test_anthropic_client(mock_anthropic_client) chat_options = ChatOptions(tools=[client.get_code_interpreter_tool()]) @@ -421,7 +560,9 @@ def _dummy_bash(command: str) -> str: return f"executed: {command}" -def test_prepare_tools_for_anthropic_shell_tool(mock_anthropic_client: MagicMock) -> None: +def test_prepare_tools_for_anthropic_shell_tool( + mock_anthropic_client: MagicMock, +) -> None: """Test converting tool-decorated FunctionTool to Anthropic bash format.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -440,7 +581,9 @@ def run_bash(command: str) -> str: assert result["tools"][0]["name"] == "bash" -def test_prepare_tools_for_anthropic_shell_tool_custom_type(mock_anthropic_client: MagicMock) -> None: +def test_prepare_tools_for_anthropic_shell_tool_custom_type( + mock_anthropic_client: MagicMock, +) -> None: """Test shell tool with custom type via additional_properties.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -458,7 +601,9 @@ def run_bash(command: str) -> str: assert result["tools"][0]["name"] == "bash" -def test_prepare_tools_for_anthropic_shell_tool_does_not_mutate_name(mock_anthropic_client: MagicMock) -> None: +def test_prepare_tools_for_anthropic_shell_tool_does_not_mutate_name( + mock_anthropic_client: MagicMock, +) -> None: """Shell tool API name should be 'bash' without mutating local FunctionTool name.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -478,7 +623,9 @@ def run_local_shell(command: str) -> str: assert run_local_shell.name == "run_local_shell" -def test_get_shell_tool_reuses_function_tool_instance(mock_anthropic_client: MagicMock) -> None: +def test_get_shell_tool_reuses_function_tool_instance( + mock_anthropic_client: MagicMock, +) -> None: """Passing a FunctionTool should update and return the same tool instance.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -513,7 +660,9 @@ def test_prepare_tools_for_anthropic_mcp_tool(mock_anthropic_client: MagicMock) assert result["mcp_servers"][0]["url"] == "https://example.com/mcp" -def test_prepare_tools_for_anthropic_mcp_with_auth(mock_anthropic_client: MagicMock) -> None: +def test_prepare_tools_for_anthropic_mcp_with_auth( + mock_anthropic_client: MagicMock, +) -> None: """Test converting MCP dict tool with authorization token.""" client = create_test_anthropic_client(mock_anthropic_client) # Use the static method with authorization_token @@ -533,7 +682,9 @@ def test_prepare_tools_for_anthropic_mcp_with_auth(mock_anthropic_client: MagicM assert result["mcp_servers"][0]["authorization_token"] == "Bearer token123" -def test_prepare_tools_for_anthropic_dict_tool(mock_anthropic_client: MagicMock) -> None: +def test_prepare_tools_for_anthropic_dict_tool( + mock_anthropic_client: MagicMock, +) -> None: """Test converting dict tool to Anthropic format.""" client = create_test_anthropic_client(mock_anthropic_client) chat_options = ChatOptions(tools=[{"type": "custom", "name": "custom_tool", "description": "A custom tool"}]) @@ -574,7 +725,9 @@ async def test_prepare_options_basic(mock_anthropic_client: MagicMock) -> None: assert "messages" in run_options -async def test_prepare_options_with_system_message(mock_anthropic_client: MagicMock) -> None: +async def test_prepare_options_with_system_message( + mock_anthropic_client: MagicMock, +) -> None: """Test _prepare_options with system message.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -590,7 +743,9 @@ async def test_prepare_options_with_system_message(mock_anthropic_client: MagicM assert len(run_options["messages"]) == 1 # System message not in messages list -async def test_anthropic_shell_tool_is_invoked_in_function_loop(mock_anthropic_client: MagicMock) -> None: +async def test_anthropic_shell_tool_is_invoked_in_function_loop( + mock_anthropic_client: MagicMock, +) -> None: """Function invocation loop should execute shell tool when Anthropic returns bash tool_use.""" client = create_test_anthropic_client(mock_anthropic_client) executed_commands: list[str] = [] @@ -625,7 +780,10 @@ def run_local_shell(command: str) -> str: second_message.model = "claude-test" second_message.stop_reason = "end_turn" - mock_anthropic_client.beta.messages.create.side_effect = [first_message, second_message] + mock_anthropic_client.beta.messages.create.side_effect = [ + first_message, + second_message, + ] await client.get_response( messages=[Message(role="user", text="Run pwd")], @@ -646,7 +804,9 @@ def run_local_shell(command: str) -> str: assert "executed: pwd" in tool_results[0]["content"] -async def test_prepare_options_with_tool_choice_auto(mock_anthropic_client: MagicMock) -> None: +async def test_prepare_options_with_tool_choice_auto( + mock_anthropic_client: MagicMock, +) -> None: """Test _prepare_options with auto tool choice.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -660,7 +820,9 @@ async def test_prepare_options_with_tool_choice_auto(mock_anthropic_client: Magi assert "allow_multiple_tool_calls" not in run_options -async def test_prepare_options_with_tool_choice_required(mock_anthropic_client: MagicMock) -> None: +async def test_prepare_options_with_tool_choice_required( + mock_anthropic_client: MagicMock, +) -> None: """Test _prepare_options with required tool choice.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -674,7 +836,9 @@ async def test_prepare_options_with_tool_choice_required(mock_anthropic_client: assert run_options["tool_choice"]["name"] == "get_weather" -async def test_prepare_options_with_tool_choice_none(mock_anthropic_client: MagicMock) -> None: +async def test_prepare_options_with_tool_choice_none( + mock_anthropic_client: MagicMock, +) -> None: """Test _prepare_options with none tool choice.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -704,7 +868,9 @@ def get_weather(location: str) -> str: assert len(run_options["tools"]) == 1 -async def test_prepare_options_with_stop_sequences(mock_anthropic_client: MagicMock) -> None: +async def test_prepare_options_with_stop_sequences( + mock_anthropic_client: MagicMock, +) -> None: """Test _prepare_options with stop sequences.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -728,7 +894,9 @@ async def test_prepare_options_with_top_p(mock_anthropic_client: MagicMock) -> N assert run_options["top_p"] == 0.9 -async def test_prepare_options_excludes_stream_option(mock_anthropic_client: MagicMock) -> None: +async def test_prepare_options_excludes_stream_option( + mock_anthropic_client: MagicMock, +) -> None: """Test _prepare_options excludes stream when stream is provided in options.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -740,7 +908,9 @@ async def test_prepare_options_excludes_stream_option(mock_anthropic_client: Mag assert "stream" not in run_options -async def test_prepare_options_filters_internal_kwargs(mock_anthropic_client: MagicMock) -> None: +async def test_prepare_options_filters_internal_kwargs( + mock_anthropic_client: MagicMock, +) -> None: """Test _prepare_options filters internal framework kwargs. Internal kwargs like _function_middleware_pipeline, thread, and middleware @@ -859,7 +1029,9 @@ def test_parse_contents_from_anthropic_text(mock_anthropic_client: MagicMock) -> assert result[0].text == "Hello!" -def test_parse_contents_from_anthropic_tool_use(mock_anthropic_client: MagicMock) -> None: +def test_parse_contents_from_anthropic_tool_use( + mock_anthropic_client: MagicMock, +) -> None: """Test _parse_contents_from_anthropic with tool use.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -879,7 +1051,9 @@ def test_parse_contents_from_anthropic_tool_use(mock_anthropic_client: MagicMock assert result[0].name == "get_weather" -def test_parse_contents_from_anthropic_input_json_delta_no_duplicate_name(mock_anthropic_client: MagicMock) -> None: +def test_parse_contents_from_anthropic_input_json_delta_no_duplicate_name( + mock_anthropic_client: MagicMock, +) -> None: """Test that input_json_delta events have empty name to prevent duplicate ToolCallStartEvents. When streaming tool calls, the initial tool_use event provides the name, @@ -969,7 +1143,9 @@ async def test_inner_get_response(mock_anthropic_client: MagicMock) -> None: assert len(response.messages) == 1 -async def test_inner_get_response_ignores_options_stream_non_streaming(mock_anthropic_client: MagicMock) -> None: +async def test_inner_get_response_ignores_options_stream_non_streaming( + mock_anthropic_client: MagicMock, +) -> None: """Test stream option in options does not conflict in non-streaming mode.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -1019,7 +1195,9 @@ async def mock_stream(): assert isinstance(chunks, list) -async def test_inner_get_response_ignores_options_stream_streaming(mock_anthropic_client: MagicMock) -> None: +async def test_inner_get_response_ignores_options_stream_streaming( + mock_anthropic_client: MagicMock, +) -> None: """Test stream option in options does not conflict in streaming mode.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -1368,7 +1546,9 @@ def test_prepare_response_format_openai_style(mock_anthropic_client: MagicMock) assert result["schema"]["properties"]["name"]["type"] == "string" -def test_prepare_response_format_direct_schema(mock_anthropic_client: MagicMock) -> None: +def test_prepare_response_format_direct_schema( + mock_anthropic_client: MagicMock, +) -> None: """Test response_format with direct schema key.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -1402,7 +1582,9 @@ def test_prepare_response_format_raw_schema(mock_anthropic_client: MagicMock) -> assert result["schema"]["properties"]["count"]["type"] == "integer" -def test_prepare_response_format_pydantic_model(mock_anthropic_client: MagicMock) -> None: +def test_prepare_response_format_pydantic_model( + mock_anthropic_client: MagicMock, +) -> None: """Test response_format with Pydantic BaseModel.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -1475,7 +1657,9 @@ def test_prepare_message_with_unsupported_data_type( assert len(result["content"]) == 0 -def test_prepare_message_with_unsupported_uri_type(mock_anthropic_client: MagicMock) -> None: +def test_prepare_message_with_unsupported_uri_type( + mock_anthropic_client: MagicMock, +) -> None: """Test preparing messages with unsupported URI content type.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -1612,7 +1796,9 @@ def test_parse_contents_mcp_tool_result_object_content( assert result[0].type == "mcp_server_tool_result" -def test_parse_contents_web_search_tool_result(mock_anthropic_client: MagicMock) -> None: +def test_parse_contents_web_search_tool_result( + mock_anthropic_client: MagicMock, +) -> None: """Test parsing web search tool result.""" client = create_test_anthropic_client(mock_anthropic_client) client._last_call_id_name = ("call_789", "web_search") @@ -1742,7 +1928,9 @@ def test_func() -> str: assert result["tool_choice"]["type"] == "any" -def test_tool_choice_required_specific_function(mock_anthropic_client: MagicMock) -> None: +def test_tool_choice_required_specific_function( + mock_anthropic_client: MagicMock, +) -> None: """Test tool_choice required mode with specific function.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -1782,7 +1970,9 @@ def test_func() -> str: assert result["tool_choice"]["type"] == "none" -def test_tool_choice_required_allows_parallel_use(mock_anthropic_client: MagicMock) -> None: +def test_tool_choice_required_allows_parallel_use( + mock_anthropic_client: MagicMock, +) -> None: """Test tool choice required mode with allow_multiple=True.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -1902,7 +2092,9 @@ def test_parse_usage_with_cache_tokens(mock_anthropic_client: MagicMock) -> None # Code Execution Result Tests -def test_parse_code_execution_result_with_error(mock_anthropic_client: MagicMock) -> None: +def test_parse_code_execution_result_with_error( + mock_anthropic_client: MagicMock, +) -> None: """Test parsing code execution result with error.""" client = create_test_anthropic_client(mock_anthropic_client) client._last_call_id_name = ("call_code1", "code_execution_tool") @@ -1925,7 +2117,9 @@ def test_parse_code_execution_result_with_error(mock_anthropic_client: MagicMock assert result[0].type == "code_interpreter_tool_result" -def test_parse_code_execution_result_with_stdout(mock_anthropic_client: MagicMock) -> None: +def test_parse_code_execution_result_with_stdout( + mock_anthropic_client: MagicMock, +) -> None: """Test parsing code execution result with stdout.""" client = create_test_anthropic_client(mock_anthropic_client) client._last_call_id_name = ("call_code2", "code_execution_tool") @@ -1947,7 +2141,9 @@ def test_parse_code_execution_result_with_stdout(mock_anthropic_client: MagicMoc assert result[0].type == "code_interpreter_tool_result" -def test_parse_code_execution_result_with_stderr(mock_anthropic_client: MagicMock) -> None: +def test_parse_code_execution_result_with_stderr( + mock_anthropic_client: MagicMock, +) -> None: """Test parsing code execution result with stderr.""" client = create_test_anthropic_client(mock_anthropic_client) client._last_call_id_name = ("call_code3", "code_execution_tool") @@ -1969,7 +2165,9 @@ def test_parse_code_execution_result_with_stderr(mock_anthropic_client: MagicMoc assert result[0].type == "code_interpreter_tool_result" -def test_parse_code_execution_result_with_files(mock_anthropic_client: MagicMock) -> None: +def test_parse_code_execution_result_with_files( + mock_anthropic_client: MagicMock, +) -> None: """Test parsing code execution result with file outputs.""" client = create_test_anthropic_client(mock_anthropic_client) client._last_call_id_name = ("call_code4", "code_execution_tool") @@ -1998,8 +2196,10 @@ def test_parse_code_execution_result_with_files(mock_anthropic_client: MagicMock # Bash Execution Result Tests -def test_parse_bash_execution_result_with_stdout(mock_anthropic_client: MagicMock) -> None: - """Test parsing bash execution result with stdout produces shell_tool_result.""" +def test_parse_bash_execution_result_with_stdout( + mock_anthropic_client: MagicMock, +) -> None: + """Test parsing bash execution result with stdout.""" client = create_test_anthropic_client(mock_anthropic_client) client._last_call_id_name = ("call_bash2", "bash_code_execution") @@ -2028,8 +2228,10 @@ def test_parse_bash_execution_result_with_stdout(mock_anthropic_client: MagicMoc assert result[0].outputs[0].timed_out is False -def test_parse_bash_execution_result_with_stderr(mock_anthropic_client: MagicMock) -> None: - """Test parsing bash execution result with stderr produces shell_tool_result.""" +def test_parse_bash_execution_result_with_stderr( + mock_anthropic_client: MagicMock, +) -> None: + """Test parsing bash execution result with stderr.""" client = create_test_anthropic_client(mock_anthropic_client) client._last_call_id_name = ("call_bash3", "bash_code_execution") @@ -2056,7 +2258,9 @@ def test_parse_bash_execution_result_with_stderr(mock_anthropic_client: MagicMoc assert result[0].outputs[0].exit_code == 1 -def test_parse_bash_execution_result_with_error(mock_anthropic_client: MagicMock) -> None: +def test_parse_bash_execution_result_with_error( + mock_anthropic_client: MagicMock, +) -> None: """Test parsing bash execution error produces shell_tool_result with error info.""" from anthropic.types.beta.beta_bash_code_execution_tool_result_error import ( BetaBashCodeExecutionToolResultError, @@ -2277,7 +2481,9 @@ def test_parse_citations_page_location(mock_anthropic_client: MagicMock) -> None assert len(result) > 0 -def test_parse_citations_content_block_location(mock_anthropic_client: MagicMock) -> None: +def test_parse_citations_content_block_location( + mock_anthropic_client: MagicMock, +) -> None: """Test parsing citations with content_block_location.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -2322,7 +2528,9 @@ def test_parse_citations_web_search_location(mock_anthropic_client: MagicMock) - assert len(result) > 0 -def test_parse_citations_search_result_location(mock_anthropic_client: MagicMock) -> None: +def test_parse_citations_search_result_location( + mock_anthropic_client: MagicMock, +) -> None: """Test parsing citations with search_result_location.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -2344,3 +2552,32 @@ def test_parse_citations_search_result_location(mock_anthropic_client: MagicMock result = client._parse_citations_from_anthropic(mock_block) assert len(result) > 0 + + +@pytest.mark.flaky +@pytest.mark.integration +@skip_if_anthropic_integration_tests_disabled +async def test_anthropic_client_integration_tool_rich_content_image() -> None: + """Integration test: a tool returns an image and the model describes it.""" + image_path = Path(__file__).parent / "assets" / "sample_image.jpg" + image_bytes = image_path.read_bytes() + + @tool(approval_mode="never_require") + def get_test_image() -> Content: + """Return a test image for analysis.""" + return Content.from_data(data=image_bytes, media_type="image/jpeg") + + client = AnthropicClient() + client.function_invocation_configuration["max_iterations"] = 2 + + messages = [Message(role="user", text="Call the get_test_image tool and describe what you see.")] + + response = await client.get_response( + messages=messages, + options={"tools": [get_test_image], "tool_choice": "auto", "max_tokens": 200}, + ) + + assert response is not None + assert response.text is not None + assert len(response.text) > 0 + assert "house" in response.text.lower(), f"Model did not describe the house image. Response: {response.text}" diff --git a/python/packages/azure-ai/agent_framework_azure_ai/_chat_client.py b/python/packages/azure-ai/agent_framework_azure_ai/_chat_client.py index 4c0e3a56e7..dbab99c3ef 100644 --- a/python/packages/azure-ai/agent_framework_azure_ai/_chat_client.py +++ b/python/packages/azure-ai/agent_framework_azure_ai/_chat_client.py @@ -1402,6 +1402,11 @@ def _prepare_tool_outputs_for_azure_ai( call_id = run_and_call_ids[1] if content.type == "function_result": + if content.items: + logger.warning( + "Azure AI Agents does not support rich content (images, audio) in tool results. " + "Rich content items will be omitted." + ) if tool_outputs is None: tool_outputs = [] tool_outputs.append( diff --git a/python/packages/bedrock/agent_framework_bedrock/_chat_client.py b/python/packages/bedrock/agent_framework_bedrock/_chat_client.py index 5bc9735846..61f44e26a6 100644 --- a/python/packages/bedrock/agent_framework_bedrock/_chat_client.py +++ b/python/packages/bedrock/agent_framework_bedrock/_chat_client.py @@ -518,10 +518,16 @@ def _convert_content_to_bedrock_block(self, content: Content) -> dict[str, Any] } } case "function_result": + tool_result_blocks = self._convert_tool_result_to_blocks(content.result) + if content.items: + logger.warning( + "Bedrock does not support rich content (images, audio) in tool results. " + "Rich content items will be omitted." + ) tool_result_block = { "toolResult": { "toolUseId": content.call_id, - "content": self._convert_tool_result_to_blocks(content.result), + "content": tool_result_blocks, "status": "error" if content.exception else "success", } } @@ -543,6 +549,8 @@ def _convert_content_to_bedrock_block(self, content: Content) -> dict[str, Any] def _convert_tool_result_to_blocks(self, result: Any) -> list[dict[str, Any]]: prepared_result = result if isinstance(result, str) else FunctionTool.parse_result(result) + if not isinstance(prepared_result, str): + return [{"text": str(prepared_result)}] try: parsed_result: object = json.loads(prepared_result) except json.JSONDecodeError: diff --git a/python/packages/core/agent_framework/_mcp.py b/python/packages/core/agent_framework/_mcp.py index b07a872204..e35e1d6fb3 100644 --- a/python/packages/core/agent_framework/_mcp.py +++ b/python/packages/core/agent_framework/_mcp.py @@ -142,69 +142,86 @@ def _parse_message_from_mcp( def _parse_tool_result_from_mcp( mcp_type: types.CallToolResult, -) -> str: - """Parse an MCP CallToolResult directly into a string representation. +) -> str | list[Content]: + """Parse an MCP CallToolResult into a string or rich content list. - Converts each content item in the MCP result to its string form and combines them. - This skips the intermediate Content object step for tool results. + Converts each content item in the MCP result to its appropriate form. + Text-only results are returned as strings. When the result contains + image or audio content, returns a list of Content objects so the + framework can forward the rich media to the model. Args: mcp_type: The MCP CallToolResult object to convert. Returns: - A string representation of the tool result — either plain text or serialized JSON. + A string for text-only results, or a list of Content for rich media results. """ import json - parts: list[str] = [] + result: list[Content] = [] + has_rich = False for item in mcp_type.content: match item: case types.TextContent(): - parts.append(item.text) - case types.ImageContent() | types.AudioContent(): - parts.append( - json.dumps( - { - "type": "image" if isinstance(item, types.ImageContent) else "audio", - "data": item.data, - "mimeType": item.mimeType, - }, - default=str, + result.append(Content.from_text(item.text)) + case types.ImageContent(): + has_rich = True + result.append( + Content.from_uri( + uri=f"data:{item.mimeType};base64,{item.data}", + media_type=item.mimeType, + ) + ) + case types.AudioContent(): + has_rich = True + result.append( + Content.from_uri( + uri=f"data:{item.mimeType};base64,{item.data}", + media_type=item.mimeType, ) ) case types.ResourceLink(): - parts.append( - json.dumps( - { - "type": "resource_link", - "uri": str(item.uri), - "mimeType": item.mimeType, - }, - default=str, + result.append( + Content.from_text( + json.dumps( + { + "type": "resource_link", + "uri": str(item.uri), + "mimeType": item.mimeType, + }, + default=str, + ) ) ) case types.EmbeddedResource(): match item.resource: case types.TextResourceContents(): - parts.append(item.resource.text) + result.append(Content.from_text(item.resource.text)) case types.BlobResourceContents(): - parts.append( - json.dumps( - { - "type": "blob", - "data": item.resource.blob, - "mimeType": item.resource.mimeType, - }, - default=str, + result.append( + Content.from_text( + json.dumps( + { + "type": "blob", + "data": item.resource.blob, + "mimeType": item.resource.mimeType, + }, + default=str, + ) ) ) case _: - parts.append(str(item)) - if not parts: + result.append(Content.from_text(str(item))) + + if has_rich: + return result + + text_parts = [c.text for c in result if c.text] + if not text_parts: return "" - if len(parts) == 1: - return parts[0] - return json.dumps(parts, default=str) + if len(text_parts) == 1: + return text_parts[0] + return json.dumps(text_parts, default=str) def _parse_content_from_mcp( @@ -425,7 +442,7 @@ def __init__( approval_mode: (Literal["always_require", "never_require"] | MCPSpecificApproval | None) = None, allowed_tools: Collection[str] | None = None, load_tools: bool = True, - parse_tool_results: Callable[[types.CallToolResult], str] | None = None, + parse_tool_results: Callable[[types.CallToolResult], str | list[Content]] | None = None, load_prompts: bool = True, parse_prompt_results: Callable[[types.GetPromptResult], str] | None = None, session: ClientSession | None = None, @@ -850,7 +867,7 @@ async def _ensure_connected(self) -> None: inner_exception=ex, ) from ex - async def call_tool(self, tool_name: str, **kwargs: Any) -> str: + async def call_tool(self, tool_name: str, **kwargs: Any) -> str | list[Content]: """Call a tool with the given arguments. Args: @@ -860,7 +877,7 @@ async def call_tool(self, tool_name: str, **kwargs: Any) -> str: kwargs: Arguments to pass to the tool. Returns: - A string representation of the tool result — either plain text or serialized JSON. + A string for text-only results, or a list of Content for rich media results. Raises: ToolExecutionException: If the MCP server is not connected, tools are not loaded, @@ -902,7 +919,8 @@ async def call_tool(self, tool_name: str, **kwargs: Any) -> str: try: result = await self.session.call_tool(tool_name, arguments=filtered_kwargs, meta=otel_meta) # type: ignore if result.isError: - raise ToolExecutionException(parser(result)) + parsed = parser(result) + raise ToolExecutionException(str(parsed) if not isinstance(parsed, str) else parsed) return parser(result) except ToolExecutionException: raise @@ -1057,7 +1075,7 @@ def __init__( command: str, *, load_tools: bool = True, - parse_tool_results: Callable[[types.CallToolResult], str] | None = None, + parse_tool_results: Callable[[types.CallToolResult], str | list[Content]] | None = None, load_prompts: bool = True, parse_prompt_results: Callable[[types.GetPromptResult], str] | None = None, request_timeout: int | None = None, @@ -1182,7 +1200,7 @@ def __init__( url: str, *, load_tools: bool = True, - parse_tool_results: Callable[[types.CallToolResult], str] | None = None, + parse_tool_results: Callable[[types.CallToolResult], str | list[Content]] | None = None, load_prompts: bool = True, parse_prompt_results: Callable[[types.GetPromptResult], str] | None = None, request_timeout: int | None = None, @@ -1301,7 +1319,7 @@ def __init__( url: str, *, load_tools: bool = True, - parse_tool_results: Callable[[types.CallToolResult], str] | None = None, + parse_tool_results: Callable[[types.CallToolResult], str | list[Content]] | None = None, load_prompts: bool = True, parse_prompt_results: Callable[[types.GetPromptResult], str] | None = None, request_timeout: int | None = None, diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py index 3f11189fdc..e7f007228f 100644 --- a/python/packages/core/agent_framework/_tools.py +++ b/python/packages/core/agent_framework/_tools.py @@ -245,7 +245,7 @@ def __init__( additional_properties: dict[str, Any] | None = None, func: Callable[..., Any] | None = None, input_model: type[BaseModel] | Mapping[str, Any] | None = None, - result_parser: Callable[[Any], str] | None = None, + result_parser: Callable[[Any], str | list[Content]] | None = None, **kwargs: Any, ) -> None: """Initialize the FunctionTool. @@ -448,19 +448,19 @@ async def invoke( *, arguments: BaseModel | Mapping[str, Any] | None = None, **kwargs: Any, - ) -> str: + ) -> str | list[Content]: """Run the AI function with the provided arguments as a Pydantic model. The raw return value of the wrapped function is automatically parsed into a ``str`` - (either plain text or serialized JSON) using :meth:`parse_result` or the custom - ``result_parser`` if one was provided. + (either plain text or serialized JSON) or a ``list[Content]`` (for rich content like + images) using :meth:`parse_result` or the custom ``result_parser`` if one was provided. Keyword Args: arguments: A mapping or model instance containing the arguments for the function. kwargs: Keyword arguments to pass to the function, will not be used if ``arguments`` is provided. Returns: - The parsed result as a string — either plain text or serialized JSON. + The parsed result as a string, or a list of Content items for rich results. Raises: TypeError: If arguments is not mapping-like or fails schema checks. @@ -566,8 +566,9 @@ async def invoke( parsed = str(result) logger.info(f"Function {self.name} succeeded.") if OBSERVABILITY_SETTINGS.SENSITIVE_DATA_ENABLED: # type: ignore[name-defined] - span.set_attribute(OtelAttr.TOOL_RESULT, parsed) - logger.debug(f"Function result: {parsed}") + result_str = parsed if isinstance(parsed, str) else str(parsed) + span.set_attribute(OtelAttr.TOOL_RESULT, result_str) + logger.debug(f"Function result: {result_str}") return parsed finally: duration = (end_time_stamp or perf_counter()) - start_time_stamp @@ -621,10 +622,13 @@ def _make_dumpable(value: Any) -> Any: return value @staticmethod - def parse_result(result: Any) -> str: - """Convert a raw function return value to a string representation. + def parse_result(result: Any) -> str | list[Content]: + """Convert a raw function return value to a string or rich content list. + + Returns a ``str`` for text-only results, or a ``list[Content]`` when the + function produced rich content (images, audio, files) that should be + forwarded to the model as visual/multi-modal input. - The return value is always a ``str`` — either plain text or serialized JSON. This is called automatically by :meth:`invoke` before returning the result, ensuring that the result stored in ``Content.from_function_result`` is already in a form that can be passed directly to LLM APIs. @@ -633,12 +637,22 @@ def parse_result(result: Any) -> str: result: The raw return value from the wrapped function. Returns: - A string representation of the result, either plain text or serialized JSON. + A string representation, or a list of Content items for rich results. """ + from ._types import Content + if result is None: return "" if isinstance(result, str): return result + # Preserve rich Content (images, audio, files) instead of serializing to JSON + if isinstance(result, Content): + if result.type in ("data", "uri"): + return [result] + if result.type == "text": + return result.text or "" + if isinstance(result, list) and any(isinstance(item, Content) for item in result): # type: ignore[reportUnknownVariableType] + return [item if isinstance(item, Content) else Content.from_text(str(item)) for item in result] # type: ignore[reportUnknownVariableType, reportUnknownArgumentType] dumpable = FunctionTool._make_dumpable(result) if isinstance(dumpable, str): return dumpable @@ -859,7 +873,7 @@ def tool( max_invocations: int | None = None, max_invocation_exceptions: int | None = None, additional_properties: dict[str, Any] | None = None, - result_parser: Callable[[Any], str] | None = None, + result_parser: Callable[[Any], str | list[Content]] | None = None, ) -> FunctionTool: ... @@ -875,7 +889,7 @@ def tool( max_invocations: int | None = None, max_invocation_exceptions: int | None = None, additional_properties: dict[str, Any] | None = None, - result_parser: Callable[[Any], str] | None = None, + result_parser: Callable[[Any], str | list[Content]] | None = None, ) -> Callable[[Callable[..., Any]], FunctionTool]: ... @@ -890,7 +904,7 @@ def tool( max_invocations: int | None = None, max_invocation_exceptions: int | None = None, additional_properties: dict[str, Any] | None = None, - result_parser: Callable[[Any], str] | None = None, + result_parser: Callable[[Any], str | list[Content]] | None = None, ) -> FunctionTool | Callable[[Callable[..., Any]], FunctionTool]: """Decorate a function to turn it into a FunctionTool that can be passed to models and executed automatically. diff --git a/python/packages/core/agent_framework/_types.py b/python/packages/core/agent_framework/_types.py index 7ae9dbaa3d..60ec77b9d0 100644 --- a/python/packages/core/agent_framework/_types.py +++ b/python/packages/core/agent_framework/_types.py @@ -467,6 +467,7 @@ def __init__( arguments: str | Mapping[str, Any] | None = None, exception: str | None = None, result: Any = None, + items: Sequence[Content] | None = None, # Hosted file/vector store fields file_id: str | None = None, vector_store_id: str | None = None, @@ -524,6 +525,7 @@ def __init__( self.arguments = arguments self.exception = exception self.result = result + self.items = items self.file_id = file_id self.vector_store_id = vector_store_id self.inputs = inputs @@ -776,16 +778,57 @@ def from_function_result( call_id: str, *, result: Any = None, + items: Sequence[Content] | None = None, exception: str | None = None, annotations: Sequence[Annotation] | None = None, additional_properties: MutableMapping[str, Any] | None = None, raw_representation: Any = None, ) -> ContentT: - """Create function result content.""" + """Create function result content. + + Args: + call_id: The ID of the function call this result corresponds to. + + Keyword Args: + result: The text result, or a list of Content items. When a list is + provided, text items are concatenated as the text result and + media items (images, audio, files) are stored in ``items``. + items: Optional rich content items (e.g. images, audio) produced by the tool. + Ignored when ``result`` is a list (items are extracted from it instead). + exception: The exception message if the function call failed. + annotations: Optional annotations for the content. + additional_properties: Optional additional properties. + raw_representation: Optional raw representation from the provider. + """ + if isinstance(result, list): + if not all(isinstance(c, Content) for c in result): # type: ignore[reportUnknownVariableType] + return cls( + "function_result", + call_id=call_id, + result=str(result), # type: ignore[reportUnknownArgumentType] + items=list(items) if items else None, + exception=exception, + annotations=annotations, + additional_properties=additional_properties, + raw_representation=raw_representation, + ) + text_parts = [c.text for c in result if c.type == "text" and c.text] # type: ignore[reportUnknownVariableType, reportUnknownMemberType] + rich_items = [c for c in result if c.type in ("data", "uri")] # type: ignore[reportUnknownVariableType, reportUnknownMemberType] + return cls( + "function_result", + call_id=call_id, + result="\n".join(text_parts) if text_parts else "", # type: ignore[reportUnknownArgumentType] + items=rich_items or None, # type: ignore[reportUnknownArgumentType] + exception=exception, + annotations=annotations, + additional_properties=additional_properties, + raw_representation=raw_representation, + ) return cls( "function_result", call_id=call_id, result=result, + items=list(items) if items else None, exception=exception, annotations=annotations, additional_properties=additional_properties, @@ -1186,6 +1229,7 @@ def to_dict(self, *, exclude_none: bool = True, exclude: set[str] | None = None) "arguments", "exception", "result", + "items", "file_id", "vector_store_id", "inputs", diff --git a/python/packages/core/agent_framework/openai/_chat_client.py b/python/packages/core/agent_framework/openai/_chat_client.py index 0214c8df20..df04b2426b 100644 --- a/python/packages/core/agent_framework/openai/_chat_client.py +++ b/python/packages/core/agent_framework/openai/_chat_client.py @@ -580,6 +580,15 @@ def _prepare_message_for_openai(self, message: Message) -> list[dict[str, Any]]: # Always include content for tool results - API requires it even if empty # Functions returning None should still have a tool result message args["content"] = content.result if content.result is not None else "" + if content.items: + logger.warning( + "OpenAI Chat Completions API does not support rich content (images, audio) " + "in tool results. Rich content items will be omitted. " + "Use the Responses API client for rich tool results." + ) + if args: + all_messages.append(args) + continue case "text_reasoning" if (protected_data := content.protected_data) is not None: # Buffer reasoning to attach to the next message with content/tool_calls pending_reasoning = json.loads(protected_data) diff --git a/python/packages/core/agent_framework/openai/_responses_client.py b/python/packages/core/agent_framework/openai/_responses_client.py index 726616adbb..70485a1629 100644 --- a/python/packages/core/agent_framework/openai/_responses_client.py +++ b/python/packages/core/agent_framework/openai/_responses_client.py @@ -16,7 +16,16 @@ ) from datetime import datetime, timezone from itertools import chain -from typing import TYPE_CHECKING, Any, ClassVar, Generic, Literal, NoReturn, TypedDict, cast +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Generic, + Literal, + NoReturn, + TypedDict, + cast, +) from openai import AsyncOpenAI, BadRequestError from openai.types.responses import FunctionShellTool @@ -309,23 +318,33 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]: ) async for chunk in stream_response: yield self._parse_chunk_from_openai( - chunk, options=validated_options, function_call_ids=function_call_ids + chunk, + options=validated_options, + function_call_ids=function_call_ids, ) except Exception as ex: self._handle_request_error(ex) else: - client, run_options, validated_options = await self._prepare_request(messages, options, **kwargs) + ( + client, + run_options, + validated_options, + ) = await self._prepare_request(messages, options, **kwargs) try: if "text_format" in run_options: async with client.responses.stream(**run_options) as response: async for chunk in response: yield self._parse_chunk_from_openai( - chunk, options=validated_options, function_call_ids=function_call_ids + chunk, + options=validated_options, + function_call_ids=function_call_ids, ) else: async for chunk in await client.responses.create(stream=True, **run_options): yield self._parse_chunk_from_openai( - chunk, options=validated_options, function_call_ids=function_call_ids + chunk, + options=validated_options, + function_call_ids=function_call_ids, ) except Exception as ex: self._handle_request_error(ex) @@ -439,7 +458,8 @@ def _get_conversation_id( # region Prep methods def _prepare_tools_for_openai( - self, tools: ToolTypes | Callable[..., Any] | Sequence[ToolTypes | Callable[..., Any]] | None + self, + tools: ToolTypes | Callable[..., Any] | Sequence[ToolTypes | Callable[..., Any]] | None, ) -> list[Any]: """Prepare tools for the OpenAI Responses API. @@ -1190,10 +1210,21 @@ def _prepare_content_for_openai( "output": self._to_local_shell_output_payload(content), } # call_id for the result needs to be the same as the call_id for the function call + output: str | list[dict[str, Any]] = content.result if content.result is not None else "" + if content.items: + output_parts: list[dict[str, Any]] = [] + if content.result: + output_parts.append({"type": "input_text", "text": content.result}) + for item in content.items: + part = self._prepare_content_for_openai("user", item, call_id_to_id) # type: ignore[arg-type] + if part: + output_parts.append(part) + if output_parts: + output = output_parts return { "call_id": content.call_id, "type": "function_call_output", - "output": content.result if content.result is not None else "", + "output": output, } case "function_approval_request": return { @@ -1821,7 +1852,10 @@ def _parse_chunk_from_openai( case "response.created": response_id = event.response.id conversation_id = self._get_conversation_id(event.response, options.get("store")) - if event.response.status and event.response.status in ("in_progress", "queued"): + if event.response.status and event.response.status in ( + "in_progress", + "queued", + ): continuation_token = OpenAIContinuationToken(response_id=event.response.id) case "response.in_progress": response_id = event.response.id @@ -1999,7 +2033,11 @@ def _parse_chunk_from_openai( Content.from_shell_tool_call( call_id=local_call_id, commands=[local_command] if local_command else [], - timeout_ms=getattr(getattr(event_item, "action", None), "timeout_ms", None), + timeout_ms=getattr( + getattr(event_item, "action", None), + "timeout_ms", + None, + ), status=getattr(event_item, "status", None), raw_representation=event_item, ) diff --git a/python/packages/core/tests/assets/sample_image.jpg b/python/packages/core/tests/assets/sample_image.jpg new file mode 100644 index 0000000000..ea6486656f Binary files /dev/null and b/python/packages/core/tests/assets/sample_image.jpg differ diff --git a/python/packages/core/tests/azure/test_azure_chat_client.py b/python/packages/core/tests/azure/test_azure_chat_client.py index 3e88504493..cae5b13911 100644 --- a/python/packages/core/tests/azure/test_azure_chat_client.py +++ b/python/packages/core/tests/azure/test_azure_chat_client.py @@ -89,18 +89,26 @@ def test_init_endpoint(azure_openai_unit_test_env: dict[str, str]) -> None: @pytest.mark.parametrize("exclude_list", [["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"]], indirect=True) -def test_init_with_empty_deployment_name(azure_openai_unit_test_env: dict[str, str]) -> None: +def test_init_with_empty_deployment_name( + azure_openai_unit_test_env: dict[str, str], +) -> None: with pytest.raises(ValueError): AzureOpenAIChatClient() @pytest.mark.parametrize("exclude_list", [["AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_BASE_URL"]], indirect=True) -def test_init_with_empty_endpoint_and_base_url(azure_openai_unit_test_env: dict[str, str]) -> None: +def test_init_with_empty_endpoint_and_base_url( + azure_openai_unit_test_env: dict[str, str], +) -> None: with pytest.raises(ValueError): AzureOpenAIChatClient() -@pytest.mark.parametrize("override_env_param_dict", [{"AZURE_OPENAI_ENDPOINT": "http://test.com"}], indirect=True) +@pytest.mark.parametrize( + "override_env_param_dict", + [{"AZURE_OPENAI_ENDPOINT": "http://test.com"}], + indirect=True, +) def test_init_with_invalid_endpoint(azure_openai_unit_test_env: dict[str, str]) -> None: # Note: URL scheme validation was previously handled by pydantic's HTTPsUrl type. # After migrating to load_settings with TypedDict, endpoint is a plain string and no longer @@ -147,7 +155,11 @@ def mock_chat_completion_response() -> ChatCompletion: return ChatCompletion( id="test_id", choices=[ - Choice(index=0, message=ChatCompletionMessage(content="test", role="assistant"), finish_reason="stop") + Choice( + index=0, + message=ChatCompletionMessage(content="test", role="assistant"), + finish_reason="stop", + ) ], created=0, model="test", @@ -159,7 +171,13 @@ def mock_chat_completion_response() -> ChatCompletion: def mock_streaming_chat_completion_response() -> AsyncStream[ChatCompletionChunk]: content = ChatCompletionChunk( id="test_id", - choices=[ChunkChoice(index=0, delta=ChunkChoiceDelta(content="test", role="assistant"), finish_reason="stop")], + choices=[ + ChunkChoice( + index=0, + delta=ChunkChoiceDelta(content="test", role="assistant"), + finish_reason="stop", + ) + ], created=0, model="test", object="chat.completion.chunk", @@ -546,7 +564,9 @@ async def test_bad_request_non_content_filter( test_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") assert test_endpoint is not None mock_create.side_effect = openai.BadRequestError( - "The request was bad.", response=Response(400, request=Request("POST", test_endpoint)), body={} + "The request was bad.", + response=Response(400, request=Request("POST", test_endpoint)), + body={}, ) azure_chat_client = AzureOpenAIChatClient() @@ -605,7 +625,13 @@ async def test_streaming_with_none_delta( # Second chunk has actual content chunk_with_content = ChatCompletionChunk( id="test_id", - choices=[ChunkChoice(index=0, delta=ChunkChoiceDelta(content="test", role="assistant"), finish_reason="stop")], + choices=[ + ChunkChoice( + index=0, + delta=ChunkChoiceDelta(content="test", role="assistant"), + finish_reason="stop", + ) + ], created=0, model="test", object="chat.completion.chunk", @@ -787,7 +813,10 @@ async def test_azure_openai_chat_client_agent_basic_run_streaming(): ) as agent: # Test streaming run full_text = "" - async for chunk in agent.run("Please respond with exactly: 'This is a streaming response test.'", stream=True): + async for chunk in agent.run( + "Please respond with exactly: 'This is a streaming response test.'", + stream=True, + ): assert isinstance(chunk, AgentResponseUpdate) if chunk.text: full_text += chunk.text diff --git a/python/packages/core/tests/azure/test_azure_responses_client.py b/python/packages/core/tests/azure/test_azure_responses_client.py index 37efff16ca..7bc02f4e59 100644 --- a/python/packages/core/tests/azure/test_azure_responses_client.py +++ b/python/packages/core/tests/azure/test_azure_responses_client.py @@ -3,6 +3,7 @@ import json import logging import os +from pathlib import Path from typing import Annotated, Any from unittest.mock import MagicMock @@ -44,10 +45,13 @@ async def get_weather(location: Annotated[str, "The location as a city name"]) - return f"The weather in {location} is sunny and 72°F." -async def create_vector_store(client: AzureOpenAIResponsesClient) -> tuple[str, Content]: +async def create_vector_store( + client: AzureOpenAIResponsesClient, +) -> tuple[str, Content]: """Create a vector store with sample documents for testing.""" file = await client.client.files.create( - file=("todays_weather.txt", b"The weather today is sunny with a high of 75F."), purpose="assistants" + file=("todays_weather.txt", b"The weather today is sunny with a high of 75F."), + purpose="assistants", ) vector_store = await client.client.vector_stores.create( name="knowledge_base", @@ -98,7 +102,9 @@ def test_init_model_id_kwarg(azure_openai_unit_test_env: dict[str, str]) -> None assert isinstance(azure_responses_client, SupportsChatGetResponse) -def test_init_model_id_kwarg_does_not_override_deployment_name(azure_openai_unit_test_env: dict[str, str]) -> None: +def test_init_model_id_kwarg_does_not_override_deployment_name( + azure_openai_unit_test_env: dict[str, str], +) -> None: """Test that deployment_name takes precedence over model_id kwarg (issue #4299).""" azure_responses_client = AzureOpenAIResponsesClient(deployment_name="my-deployment", model_id="gpt-4o") @@ -323,7 +329,12 @@ def test_serialize(azure_openai_unit_test_env: dict[str, str]) -> None: "temperature_c": {"type": "number"}, "advisory": {"type": "string"}, }, - "required": ["location", "conditions", "temperature_c", "advisory"], + "required": [ + "location", + "conditions", + "temperature_c", + "advisory", + ], "additionalProperties": False, }, }, @@ -445,7 +456,12 @@ async def test_integration_web_search() -> None: # Test that the client will use the web search tool with location content = { - "messages": [Message(role="user", text="What is the current weather? Do not ask for my current location.")], + "messages": [ + Message( + role="user", + text="What is the current weather? Do not ask for my current location.", + ) + ], "options": { "tool_choice": "auto", "tools": [ @@ -556,7 +572,12 @@ async def test_integration_client_agent_hosted_code_interpreter_tool(): client = AzureOpenAIResponsesClient(credential=AzureCliCredential()) response = await client.get_response( - messages=[Message(role="user", text="Calculate the sum of numbers from 1 to 10 using Python code.")], + messages=[ + Message( + role="user", + text="Calculate the sum of numbers from 1 to 10 using Python code.", + ) + ], options={ "tools": [AzureOpenAIResponsesClient.get_code_interpreter_tool()], }, @@ -602,3 +623,40 @@ async def test_integration_client_agent_existing_session(): assert isinstance(second_response, AgentResponse) assert second_response.text is not None assert "photography" in second_response.text.lower() + + +@pytest.mark.flaky +@pytest.mark.integration +@skip_if_azure_integration_tests_disabled +async def test_azure_openai_responses_client_tool_rich_content_image() -> None: + """Test that Azure OpenAI Responses client can handle tool results containing images.""" + image_path = Path(__file__).parent.parent / "assets" / "sample_image.jpg" + image_bytes = image_path.read_bytes() + + @tool(approval_mode="never_require") + def get_test_image() -> Content: + """Return a test image for analysis.""" + return Content.from_data(data=image_bytes, media_type="image/jpeg") + + client = AzureOpenAIResponsesClient(credential=AzureCliCredential()) + client.function_invocation_configuration["max_iterations"] = 2 + + for streaming in [False, True]: + messages = [ + Message( + role="user", + text="Call the get_test_image tool and describe what you see.", + ) + ] + options: dict[str, Any] = {"tools": [get_test_image], "tool_choice": "auto"} + + if streaming: + response = await client.get_response(messages=messages, stream=True, options=options).get_final_response() + else: + response = await client.get_response(messages=messages, options=options) + + assert response is not None + assert isinstance(response, ChatResponse) + assert response.text is not None + assert len(response.text) > 0 + assert "house" in response.text.lower(), f"Model did not describe the house image. Response: {response.text}" diff --git a/python/packages/core/tests/core/test_mcp.py b/python/packages/core/tests/core/test_mcp.py index 867e7183cf..4ea911f8a2 100644 --- a/python/packages/core/tests/core/test_mcp.py +++ b/python/packages/core/tests/core/test_mcp.py @@ -67,30 +67,31 @@ def test_mcp_prompt_message_to_ai_content(): def test_parse_tool_result_from_mcp(): - """Test conversion from MCP tool result to string representation.""" + """Test conversion from MCP tool result with images preserves original order.""" mcp_result = types.CallToolResult( content=[ types.TextContent(type="text", text="Result text"), types.ImageContent(type="image", data="eHl6", mimeType="image/png"), + types.TextContent(type="text", text="After image"), types.ImageContent(type="image", data="YWJj", mimeType="image/webp"), ] ) result = _parse_tool_result_from_mcp(mcp_result) - # Multiple items produce a JSON array of strings - assert isinstance(result, str) - import json - - parsed = json.loads(result) - assert len(parsed) == 3 - assert parsed[0] == "Result text" - # Image items are JSON-encoded strings within the array - img1 = json.loads(parsed[1]) - assert img1["type"] == "image" - assert img1["data"] == "eHl6" - img2 = json.loads(parsed[2]) - assert img2["type"] == "image" - assert img2["data"] == "YWJj" + # Results with images return a list of Content objects in original order + assert isinstance(result, list) + assert len(result) == 4 + # Order is preserved: text, image, text, image + assert result[0].type == "text" + assert result[0].text == "Result text" + assert result[1].type == "data" + assert result[1].media_type == "image/png" + assert "eHl6" in result[1].uri + assert result[2].type == "text" + assert result[2].text == "After image" + assert result[3].type == "data" + assert result[3].media_type == "image/webp" + assert "YWJj" in result[3].uri def test_parse_tool_result_from_mcp_single_text(): @@ -120,6 +121,22 @@ def test_parse_tool_result_from_mcp_empty_content(): assert result == "" +def test_parse_tool_result_from_mcp_audio_content(): + """Test conversion from MCP tool result with audio returns rich content list.""" + mcp_result = types.CallToolResult( + content=[ + types.AudioContent(type="audio", data="YXVkaW8=", mimeType="audio/wav"), + ] + ) + result = _parse_tool_result_from_mcp(mcp_result) + + assert isinstance(result, list) + assert len(result) == 1 + assert result[0].type == "data" + assert result[0].media_type == "audio/wav" + assert "YXVkaW8=" in result[0].uri + + def test_mcp_content_types_to_ai_content_text(): """Test conversion of MCP text content to AI content.""" mcp_content = types.TextContent(type="text", text="Sample text") diff --git a/python/packages/core/tests/core/test_types.py b/python/packages/core/tests/core/test_types.py index 0d314c1aa5..e868aa9124 100644 --- a/python/packages/core/tests/core/test_types.py +++ b/python/packages/core/tests/core/test_types.py @@ -2332,12 +2332,120 @@ def test_parse_result_content_object(): def test_parse_result_list_of_content(): - """Test that list[Content] is serialized to JSON.""" + """Test that list[Content] with text-only items is returned as list[Content].""" contents = [Content.from_text("hello"), Content.from_text("world")] result = FunctionTool.parse_result(contents) + assert isinstance(result, list) + assert len(result) == 2 + assert result[0].text == "hello" + assert result[1].text == "world" + + +def test_parse_result_single_image_content(): + """Test that a single image Content is preserved as list[Content].""" + image_content = Content.from_data(data=b"fake_png_bytes", media_type="image/png") + result = FunctionTool.parse_result(image_content) + assert isinstance(result, list) + assert len(result) == 1 + assert result[0].type == "data" + assert result[0].media_type == "image/png" + + +def test_parse_result_single_text_content(): + """Test that a single text Content returns its text string.""" + text_content = Content.from_text("just text") + result = FunctionTool.parse_result(text_content) assert isinstance(result, str) - assert "hello" in result - assert "world" in result + assert result == "just text" + + +def test_parse_result_mixed_content_list(): + """Test that list with text and image Content is preserved.""" + contents = [ + Content.from_text("Chart rendered."), + Content.from_data(data=b"image_bytes", media_type="image/png"), + ] + result = FunctionTool.parse_result(contents) + assert isinstance(result, list) + assert len(result) == 2 + assert result[0].type == "text" + assert result[1].type == "data" + + +def test_from_function_result_with_content_list(): + """Test Content.from_function_result separates text and rich items from a list.""" + content_list = [ + Content.from_text("Chart rendered."), + Content.from_data(data=b"image_bytes", media_type="image/png"), + ] + result = Content.from_function_result(call_id="test-123", result=content_list) + assert result.type == "function_result" + assert result.call_id == "test-123" + assert result.result == "Chart rendered." + assert result.items is not None + assert len(result.items) == 1 + assert result.items[0].type == "data" + assert result.items[0].media_type == "image/png" + + +def test_from_function_result_with_string(): + """Test Content.from_function_result with plain string result.""" + result = Content.from_function_result(call_id="test-123", result="just text") + assert result.type == "function_result" + assert result.call_id == "test-123" + assert result.result == "just text" + assert result.items is None + + +def test_content_from_function_result_with_items(): + """Test Content.from_function_result with items parameter.""" + image = Content.from_data(data=b"png_data", media_type="image/png") + result = Content.from_function_result( + call_id="call-1", + result="Screenshot captured.", + items=[image], + ) + assert result.type == "function_result" + assert result.call_id == "call-1" + assert result.result == "Screenshot captured." + assert result.items is not None + assert len(result.items) == 1 + assert result.items[0].media_type == "image/png" + + +def test_content_from_function_result_items_in_to_dict(): + """Test that items are included in to_dict serialization.""" + image = Content.from_data(data=b"png_data", media_type="image/png") + result = Content.from_function_result( + call_id="call-1", + result="done", + items=[image], + ) + d = result.to_dict() + assert "items" in d + assert len(d["items"]) == 1 + assert d["items"][0]["type"] == "data" + + +def test_from_function_result_with_only_rich_content_list(): + """Test Content.from_function_result with only image items and no text.""" + content_list = [ + Content.from_data(data=b"image_bytes", media_type="image/png"), + ] + result = Content.from_function_result(call_id="test-456", result=content_list) + assert result.type == "function_result" + assert result.result == "" + assert result.items is not None + assert len(result.items) == 1 + assert result.items[0].type == "data" + + +def test_from_function_result_with_non_content_list(): + """Test Content.from_function_result with a list of non-Content objects falls back to str.""" + result = Content.from_function_result(call_id="test-789", result=["hello", "world"]) + assert result.type == "function_result" + assert result.result == "['hello', 'world']" + assert result.items is None # endregion diff --git a/python/packages/core/tests/openai/test_openai_chat_client.py b/python/packages/core/tests/openai/test_openai_chat_client.py index 58faac42a3..6243b61896 100644 --- a/python/packages/core/tests/openai/test_openai_chat_client.py +++ b/python/packages/core/tests/openai/test_openai_chat_client.py @@ -142,7 +142,9 @@ def test_serialize_with_org_id(openai_unit_test_env: dict[str, str]) -> None: assert "User-Agent" not in dumped_settings.get("default_headers", {}) -async def test_content_filter_exception_handling(openai_unit_test_env: dict[str, str]) -> None: +async def test_content_filter_exception_handling( + openai_unit_test_env: dict[str, str], +) -> None: """Test that content filter errors are properly handled.""" client = OpenAIChatClient() messages = [Message(role="user", text="test message")] @@ -150,7 +152,9 @@ async def test_content_filter_exception_handling(openai_unit_test_env: dict[str, # Create a mock BadRequestError with content_filter code mock_response = MagicMock() mock_error = BadRequestError( - message="Content filter error", response=mock_response, body={"error": {"code": "content_filter"}} + message="Content filter error", + response=mock_response, + body={"error": {"code": "content_filter"}}, ) mock_error.code = "content_filter" @@ -184,7 +188,9 @@ class UnsupportedTool: assert result["tools"] == [dict_tool] -def test_prepare_tools_with_single_function_tool(openai_unit_test_env: dict[str, str]) -> None: +def test_prepare_tools_with_single_function_tool( + openai_unit_test_env: dict[str, str], +) -> None: """Test that a single FunctionTool is accepted for tool preparation.""" client = OpenAIChatClient() @@ -241,12 +247,17 @@ async def test_exception_message_includes_original_error_details() -> None: assert original_error_message in exception_message -def test_chat_response_content_order_text_before_tool_calls(openai_unit_test_env: dict[str, str]): +def test_chat_response_content_order_text_before_tool_calls( + openai_unit_test_env: dict[str, str], +): """Test that text content appears before tool calls in ChatResponse contents.""" # Import locally to avoid break other tests when the import changes from openai.types.chat.chat_completion import ChatCompletion, Choice from openai.types.chat.chat_completion_message import ChatCompletionMessage - from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall, Function + from openai.types.chat.chat_completion_message_tool_call import ( + ChatCompletionMessageToolCall, + Function, + ) # Create a mock OpenAI response with both text and tool calls mock_response = ChatCompletion( @@ -298,7 +309,8 @@ def test_function_result_falsy_values_handling(openai_unit_test_env: dict[str, s # Test with empty list serialized as JSON string (as FunctionTool.invoke would produce) message_with_empty_list = Message( - role="tool", contents=[Content.from_function_result(call_id="call-123", result="[]")] + role="tool", + contents=[Content.from_function_result(call_id="call-123", result="[]")], ) openai_messages = client._prepare_message_for_openai(message_with_empty_list) @@ -307,7 +319,8 @@ def test_function_result_falsy_values_handling(openai_unit_test_env: dict[str, s # Test with empty string (falsy but not None) message_with_empty_string = Message( - role="tool", contents=[Content.from_function_result(call_id="call-456", result="")] + role="tool", + contents=[Content.from_function_result(call_id="call-456", result="")], ) openai_messages = client._prepare_message_for_openai(message_with_empty_string) @@ -316,7 +329,8 @@ def test_function_result_falsy_values_handling(openai_unit_test_env: dict[str, s # Test with False serialized as JSON string (as FunctionTool.invoke would produce) message_with_false = Message( - role="tool", contents=[Content.from_function_result(call_id="call-789", result="false")] + role="tool", + contents=[Content.from_function_result(call_id="call-789", result="false")], ) openai_messages = client._prepare_message_for_openai(message_with_false) @@ -336,7 +350,11 @@ def test_function_result_exception_handling(openai_unit_test_env: dict[str, str] message_with_exception = Message( role="tool", contents=[ - Content.from_function_result(call_id="call-123", result="Error: Function failed.", exception=test_exception) + Content.from_function_result( + call_id="call-123", + result="Error: Function failed.", + exception=test_exception, + ) ], ) @@ -346,6 +364,37 @@ def test_function_result_exception_handling(openai_unit_test_env: dict[str, str] assert openai_messages[0]["tool_call_id"] == "call-123" +def test_function_result_with_rich_items_warns_and_omits( + openai_unit_test_env: dict[str, str], +) -> None: + """Test that function_result with items logs a warning and omits rich items.""" + + client = OpenAIChatClient() + image_content = Content.from_data(data=b"image_bytes", media_type="image/png") + message = Message( + role="tool", + contents=[ + Content.from_function_result( + call_id="call_rich", + result=[Content.from_text("Result text"), image_content], + ) + ], + ) + + with patch("agent_framework.openai._chat_client.logger") as mock_logger: + openai_messages = client._prepare_message_for_openai(message) + + # Warning should be logged + mock_logger.warning.assert_called_once() + assert "does not support rich content" in mock_logger.warning.call_args[0][0] + + # Tool message should still be emitted with text result + assert len(openai_messages) == 1 + assert openai_messages[0]["role"] == "tool" + assert openai_messages[0]["tool_call_id"] == "call_rich" + assert openai_messages[0]["content"] == "Result text" + + def test_parse_result_string_passthrough(): """Test that string values are passed through directly without JSON encoding.""" from agent_framework import FunctionTool @@ -355,7 +404,9 @@ def test_parse_result_string_passthrough(): assert isinstance(result, str) -def test_prepare_content_for_openai_data_content_image(openai_unit_test_env: dict[str, str]) -> None: +def test_prepare_content_for_openai_data_content_image( + openai_unit_test_env: dict[str, str], +) -> None: """Test _prepare_content_for_openai converts DataContent with image media type to OpenAI format.""" client = OpenAIChatClient() @@ -397,7 +448,8 @@ def test_prepare_content_for_openai_data_content_image(openai_unit_test_env: dic # Test DataContent with MP3 audio mp3_data_content = Content.from_uri( - uri="data:audio/mp3;base64,//uQAAAAWGluZwAAAA8AAAACAAACcQ==", media_type="audio/mp3" + uri="data:audio/mp3;base64,//uQAAAAWGluZwAAAA8AAAACAAACcQ==", + media_type="audio/mp3", ) result = client._prepare_content_for_openai(mp3_data_content) # type: ignore @@ -409,7 +461,9 @@ def test_prepare_content_for_openai_data_content_image(openai_unit_test_env: dic assert result["input_audio"]["format"] == "mp3" -def test_prepare_content_for_openai_document_file_mapping(openai_unit_test_env: dict[str, str]) -> None: +def test_prepare_content_for_openai_document_file_mapping( + openai_unit_test_env: dict[str, str], +) -> None: """Test _prepare_content_for_openai converts document files (PDF, DOCX, etc.) to OpenAI file format.""" client = OpenAIChatClient() @@ -515,7 +569,9 @@ def test_prepare_content_for_openai_document_file_mapping(openai_unit_test_env: assert "filename" not in result["file"] # None filename should be omitted -def test_parse_text_reasoning_content_from_response(openai_unit_test_env: dict[str, str]) -> None: +def test_parse_text_reasoning_content_from_response( + openai_unit_test_env: dict[str, str], +) -> None: """Test that TextReasoningContent is correctly parsed from OpenAI response with reasoning_details.""" client = OpenAIChatClient() @@ -563,7 +619,9 @@ def test_parse_text_reasoning_content_from_response(openai_unit_test_env: dict[s assert parsed_details == mock_reasoning_details -def test_parse_text_reasoning_content_from_streaming_chunk(openai_unit_test_env: dict[str, str]) -> None: +def test_parse_text_reasoning_content_from_streaming_chunk( + openai_unit_test_env: dict[str, str], +) -> None: """Test that TextReasoningContent is correctly parsed from streaming OpenAI chunk with reasoning_details.""" from openai.types.chat.chat_completion_chunk import ChatCompletionChunk from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice @@ -611,7 +669,9 @@ def test_parse_text_reasoning_content_from_streaming_chunk(openai_unit_test_env: assert parsed_details == mock_reasoning_details -def test_prepare_message_with_text_reasoning_content(openai_unit_test_env: dict[str, str]) -> None: +def test_prepare_message_with_text_reasoning_content( + openai_unit_test_env: dict[str, str], +) -> None: """Test that TextReasoningContent with protected_data is correctly prepared for OpenAI.""" client = OpenAIChatClient() @@ -643,7 +703,9 @@ def test_prepare_message_with_text_reasoning_content(openai_unit_test_env: dict[ assert prepared[0]["content"] == "The answer is 42." -def test_prepare_message_with_only_text_reasoning_content(openai_unit_test_env: dict[str, str]) -> None: +def test_prepare_message_with_only_text_reasoning_content( + openai_unit_test_env: dict[str, str], +) -> None: """Test that a message with only text_reasoning content does not raise IndexError. Regression test for https://github.com/microsoft/agent-framework/issues/4384 @@ -677,7 +739,9 @@ def test_prepare_message_with_only_text_reasoning_content(openai_unit_test_env: assert prepared[0]["content"] == "" -def test_prepare_message_with_text_reasoning_before_text(openai_unit_test_env: dict[str, str]) -> None: +def test_prepare_message_with_text_reasoning_before_text( + openai_unit_test_env: dict[str, str], +) -> None: """Test that text_reasoning content appearing before text content is handled correctly. Regression test for https://github.com/microsoft/agent-framework/issues/4384 @@ -711,7 +775,9 @@ def test_prepare_message_with_text_reasoning_before_text(openai_unit_test_env: d assert prepared[0]["content"] == "The answer is 42." -def test_prepare_message_with_text_reasoning_before_function_call(openai_unit_test_env: dict[str, str]) -> None: +def test_prepare_message_with_text_reasoning_before_function_call( + openai_unit_test_env: dict[str, str], +) -> None: """Test that text_reasoning content appearing before a function call is handled correctly. Regression test for https://github.com/microsoft/agent-framework/issues/4384 @@ -747,7 +813,9 @@ def test_prepare_message_with_text_reasoning_before_function_call(openai_unit_te assert prepared[0]["role"] == "assistant" -def test_function_approval_content_is_skipped_in_preparation(openai_unit_test_env: dict[str, str]) -> None: +def test_function_approval_content_is_skipped_in_preparation( + openai_unit_test_env: dict[str, str], +) -> None: """Test that function approval request and response content are skipped.""" client = OpenAIChatClient() @@ -793,7 +861,9 @@ def test_function_approval_content_is_skipped_in_preparation(openai_unit_test_en assert prepared_mixed[0]["content"] == "I need approval for this action." -def test_usage_content_in_streaming_response(openai_unit_test_env: dict[str, str]) -> None: +def test_usage_content_in_streaming_response( + openai_unit_test_env: dict[str, str], +) -> None: """Test that UsageContent is correctly parsed from streaming response with usage data.""" from openai.types.chat.chat_completion_chunk import ChatCompletionChunk from openai.types.completion_usage import CompletionUsage @@ -829,13 +899,19 @@ def test_usage_content_in_streaming_response(openai_unit_test_env: dict[str, str assert usage_content.usage_details["total_token_count"] == 150 -def test_streaming_chunk_with_usage_and_text(openai_unit_test_env: dict[str, str]) -> None: +def test_streaming_chunk_with_usage_and_text( + openai_unit_test_env: dict[str, str], +) -> None: """Test that text content is not lost when usage data is in the same chunk. Some providers (e.g. Gemini) include both usage and text content in the same streaming chunk. See https://github.com/microsoft/agent-framework/issues/3434 """ - from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, Choice, ChoiceDelta + from openai.types.chat.chat_completion_chunk import ( + ChatCompletionChunk, + Choice, + ChoiceDelta, + ) from openai.types.completion_usage import CompletionUsage client = OpenAIChatClient() @@ -923,7 +999,9 @@ def test_prepare_options_without_messages(openai_unit_test_env: dict[str, str]) client._prepare_options([], {}) -def test_prepare_tools_with_web_search_no_location(openai_unit_test_env: dict[str, str]) -> None: +def test_prepare_tools_with_web_search_no_location( + openai_unit_test_env: dict[str, str], +) -> None: """Test preparing web search tool without user location.""" client = OpenAIChatClient() @@ -937,7 +1015,9 @@ def test_prepare_tools_with_web_search_no_location(openai_unit_test_env: dict[st assert result["web_search_options"] == {} -def test_prepare_options_with_instructions(openai_unit_test_env: dict[str, str]) -> None: +def test_prepare_options_with_instructions( + openai_unit_test_env: dict[str, str], +) -> None: """Test that instructions are prepended as system message.""" client = OpenAIChatClient() @@ -969,7 +1049,9 @@ def test_prepare_message_with_author_name(openai_unit_test_env: dict[str, str]) assert prepared[0]["name"] == "TestUser" -def test_prepare_message_with_tool_result_author_name(openai_unit_test_env: dict[str, str]) -> None: +def test_prepare_message_with_tool_result_author_name( + openai_unit_test_env: dict[str, str], +) -> None: """Test that author_name is not included for TOOL role messages.""" client = OpenAIChatClient() @@ -987,7 +1069,9 @@ def test_prepare_message_with_tool_result_author_name(openai_unit_test_env: dict assert "name" not in prepared[0] -def test_prepare_system_message_content_is_string(openai_unit_test_env: dict[str, str]) -> None: +def test_prepare_system_message_content_is_string( + openai_unit_test_env: dict[str, str], +) -> None: """Test that system message content is a plain string, not a list. Some OpenAI-compatible endpoints (e.g. NVIDIA NIM) reject system messages @@ -1005,7 +1089,9 @@ def test_prepare_system_message_content_is_string(openai_unit_test_env: dict[str assert prepared[0]["content"] == "You are a helpful assistant." -def test_prepare_developer_message_content_is_string(openai_unit_test_env: dict[str, str]) -> None: +def test_prepare_developer_message_content_is_string( + openai_unit_test_env: dict[str, str], +) -> None: """Test that developer message content is a plain string, not a list.""" client = OpenAIChatClient() @@ -1019,7 +1105,9 @@ def test_prepare_developer_message_content_is_string(openai_unit_test_env: dict[ assert prepared[0]["content"] == "Follow these rules." -def test_prepare_system_message_multiple_text_contents_joined(openai_unit_test_env: dict[str, str]) -> None: +def test_prepare_system_message_multiple_text_contents_joined( + openai_unit_test_env: dict[str, str], +) -> None: """Test that system messages with multiple text contents are joined into a single string.""" client = OpenAIChatClient() @@ -1039,7 +1127,9 @@ def test_prepare_system_message_multiple_text_contents_joined(openai_unit_test_e assert prepared[0]["content"] == "You are a helpful assistant.\nBe concise." -def test_prepare_user_message_text_content_is_string(openai_unit_test_env: dict[str, str]) -> None: +def test_prepare_user_message_text_content_is_string( + openai_unit_test_env: dict[str, str], +) -> None: """Test that text-only user message content is flattened to a plain string. Some OpenAI-compatible endpoints (e.g. Foundry Local) cannot deserialize @@ -1057,7 +1147,9 @@ def test_prepare_user_message_text_content_is_string(openai_unit_test_env: dict[ assert prepared[0]["content"] == "Hello" -def test_prepare_user_message_multimodal_content_remains_list(openai_unit_test_env: dict[str, str]) -> None: +def test_prepare_user_message_multimodal_content_remains_list( + openai_unit_test_env: dict[str, str], +) -> None: """Test that multimodal user message content remains a list.""" client = OpenAIChatClient() @@ -1076,7 +1168,9 @@ def test_prepare_user_message_multimodal_content_remains_list(openai_unit_test_e assert has_list_content -def test_prepare_assistant_message_text_content_is_string(openai_unit_test_env: dict[str, str]) -> None: +def test_prepare_assistant_message_text_content_is_string( + openai_unit_test_env: dict[str, str], +) -> None: """Test that text-only assistant message content is flattened to a plain string.""" client = OpenAIChatClient() @@ -1090,7 +1184,9 @@ def test_prepare_assistant_message_text_content_is_string(openai_unit_test_env: assert prepared[0]["content"] == "Sure, I can help." -def test_tool_choice_required_with_function_name(openai_unit_test_env: dict[str, str]) -> None: +def test_tool_choice_required_with_function_name( + openai_unit_test_env: dict[str, str], +) -> None: """Test that tool_choice with required mode and function name is correctly prepared.""" client = OpenAIChatClient() @@ -1125,7 +1221,9 @@ def test_response_format_dict_passthrough(openai_unit_test_env: dict[str, str]) assert prepared_options["response_format"] == custom_format -def test_multiple_function_calls_in_single_message(openai_unit_test_env: dict[str, str]) -> None: +def test_multiple_function_calls_in_single_message( + openai_unit_test_env: dict[str, str], +) -> None: """Test that multiple function calls in a message are correctly prepared.""" client = OpenAIChatClient() @@ -1148,7 +1246,9 @@ def test_multiple_function_calls_in_single_message(openai_unit_test_env: dict[st assert prepared[0]["tool_calls"][1]["id"] == "call_2" -def test_prepare_options_removes_parallel_tool_calls_when_no_tools(openai_unit_test_env: dict[str, str]) -> None: +def test_prepare_options_removes_parallel_tool_calls_when_no_tools( + openai_unit_test_env: dict[str, str], +) -> None: """Test that parallel_tool_calls is removed when no tools are present.""" client = OpenAIChatClient() @@ -1161,7 +1261,9 @@ def test_prepare_options_removes_parallel_tool_calls_when_no_tools(openai_unit_t assert "parallel_tool_calls" not in prepared_options -async def test_streaming_exception_handling(openai_unit_test_env: dict[str, str]) -> None: +async def test_streaming_exception_handling( + openai_unit_test_env: dict[str, str], +) -> None: """Test that streaming errors are properly handled.""" client = OpenAIChatClient() messages = [Message(role="user", text="test")] @@ -1205,7 +1307,12 @@ class OutputStruct(BaseModel): param("allow_multiple_tool_calls", True, False, id="allow_multiple_tool_calls"), # OpenAIChatOptions - just verify they don't fail param("logit_bias", {"50256": -1}, False, id="logit_bias"), - param("prediction", {"type": "content", "content": "hello world"}, False, id="prediction"), + param( + "prediction", + {"type": "content", "content": "hello world"}, + False, + id="prediction", + ), # Complex options requiring output validation param("tools", [get_weather], True, id="tools_function"), param("tool_choice", "auto", True, id="tool_choice_auto"), @@ -1234,7 +1341,12 @@ class OutputStruct(BaseModel): "temperature_c": {"type": "number"}, "advisory": {"type": "string"}, }, - "required": ["location", "conditions", "temperature_c", "advisory"], + "required": [ + "location", + "conditions", + "temperature_c", + "advisory", + ], "additionalProperties": False, }, }, @@ -1368,7 +1480,12 @@ async def test_integration_web_search() -> None: } ) content = { - "messages": [Message(role="user", text="What is the current weather? Do not ask for my current location.")], + "messages": [ + Message( + role="user", + text="What is the current weather? Do not ask for my current location.", + ) + ], "options": { "tool_choice": "auto", "tools": [web_search_tool_with_location], diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py index e049dbd16e..d9dd0830de 100644 --- a/python/packages/core/tests/openai/test_openai_responses_client.py +++ b/python/packages/core/tests/openai/test_openai_responses_client.py @@ -4,6 +4,7 @@ import json import os from datetime import datetime, timezone +from pathlib import Path from typing import Annotated, Any from unittest.mock import MagicMock, patch @@ -36,7 +37,10 @@ SupportsChatGetResponse, tool, ) -from agent_framework.exceptions import ChatClientException, ChatClientInvalidRequestException +from agent_framework.exceptions import ( + ChatClientException, + ChatClientInvalidRequestException, +) from agent_framework.openai import OpenAIResponsesClient from agent_framework.openai._exceptions import OpenAIContentFilterException from agent_framework.openai._responses_client import OPENAI_LOCAL_SHELL_CALL_ITEM_ID_KEY @@ -1261,7 +1265,10 @@ def test_prepare_messages_for_openai_full_conversation_with_reasoning() -> None: ), ], ), - Message(role="assistant", contents=[Content.from_text(text="I found hotels for you")]), + Message( + role="assistant", + contents=[Content.from_text(text="I found hotels for you")], + ), ] result = client._prepare_messages_for_openai(messages) @@ -1370,10 +1377,16 @@ def test_response_format_with_conflicting_definitions() -> None: client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") # Mock response_format and text_config that conflict - response_format = {"type": "json_schema", "format": {"type": "json_schema", "name": "Test", "schema": {}}} + response_format = { + "type": "json_schema", + "format": {"type": "json_schema", "name": "Test", "schema": {}}, + } text_config = {"format": {"type": "json_object"}} - with pytest.raises(ChatClientInvalidRequestException, match="Conflicting response_format definitions"): + with pytest.raises( + ChatClientInvalidRequestException, + match="Conflicting response_format definitions", + ): client._prepare_response_and_text_format(response_format=response_format, text_config=text_config) @@ -1405,7 +1418,13 @@ def test_response_format_with_format_key() -> None: """Test response_format that already has a format key.""" client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") - response_format = {"format": {"type": "json_schema", "name": "MySchema", "schema": {"type": "object"}}} + response_format = { + "format": { + "type": "json_schema", + "name": "MySchema", + "schema": {"type": "object"}, + } + } _, text_config = client._prepare_response_and_text_format(response_format=response_format, text_config=None) @@ -1435,7 +1454,11 @@ def test_response_format_json_schema_with_strict() -> None: response_format = { "type": "json_schema", - "json_schema": {"name": "StrictSchema", "schema": {"type": "object"}, "strict": True}, + "json_schema": { + "name": "StrictSchema", + "schema": {"type": "object"}, + "strict": True, + }, } _, text_config = client._prepare_response_and_text_format(response_format=response_format, text_config=None) @@ -1469,7 +1492,10 @@ def test_response_format_json_schema_missing_schema() -> None: response_format = {"type": "json_schema", "json_schema": {"name": "NoSchema"}} - with pytest.raises(ChatClientInvalidRequestException, match="json_schema response_format requires a schema"): + with pytest.raises( + ChatClientInvalidRequestException, + match="json_schema response_format requires a schema", + ): client._prepare_response_and_text_format(response_format=response_format, text_config=None) @@ -1489,7 +1515,10 @@ def test_response_format_invalid_type() -> None: response_format = "invalid" # Not a Pydantic model or mapping - with pytest.raises(ChatClientInvalidRequestException, match="response_format must be a Pydantic model or mapping"): + with pytest.raises( + ChatClientInvalidRequestException, + match="response_format must be a Pydantic model or mapping", + ): client._prepare_response_and_text_format(response_format=response_format, text_config=None) # type: ignore @@ -2146,7 +2175,9 @@ async def test_get_response_streaming_with_response_format() -> None: async def run_streaming(): async for _ in client.get_response( - stream=True, messages=messages, options={"response_format": OutputStruct} + stream=True, + messages=messages, + options={"response_format": OutputStruct}, ): pass @@ -2210,6 +2241,45 @@ def test_prepare_content_for_openai_unsupported_content() -> None: assert result == {} +def test_prepare_content_for_openai_function_result_with_rich_items() -> None: + """Test _prepare_content_for_openai with function_result containing rich items.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + image_content = Content.from_data(data=b"image_bytes", media_type="image/png") + content = Content.from_function_result( + call_id="call_rich", + result=[Content.from_text("Result text"), image_content], + ) + + result = client._prepare_content_for_openai("user", content, {}) # type: ignore + + assert result["type"] == "function_call_output" + assert result["call_id"] == "call_rich" + # Output should be a list with text and image parts + output = result["output"] + assert isinstance(output, list) + assert len(output) == 2 + assert output[0]["type"] == "input_text" + assert output[0]["text"] == "Result text" + assert output[1]["type"] == "input_image" + + +def test_prepare_content_for_openai_function_result_without_items() -> None: + """Test _prepare_content_for_openai with plain string function_result.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + content = Content.from_function_result( + call_id="call_plain", + result="Simple result", + ) + + result = client._prepare_content_for_openai("user", content, {}) # type: ignore + + assert result["type"] == "function_call_output" + assert result["call_id"] == "call_plain" + assert result["output"] == "Simple result" + + def test_parse_chunk_from_openai_code_interpreter() -> None: """Test _parse_chunk_from_openai with code_interpreter_call.""" client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") @@ -2726,7 +2796,10 @@ async def test_instructions_sent_first_turn_then_skipped_for_continuation() -> N await client.get_response( messages=[Message(role="user", text="Tell me a joke")], - options={"instructions": "Reply in uppercase.", "conversation_id": "resp_123"}, + options={ + "instructions": "Reply in uppercase.", + "conversation_id": "resp_123", + }, ) second_input_messages = mock_create.call_args.kwargs["input"] @@ -2736,7 +2809,9 @@ async def test_instructions_sent_first_turn_then_skipped_for_continuation() -> N @pytest.mark.parametrize("conversation_id", ["resp_456", "conv_abc123"]) -async def test_instructions_not_repeated_for_continuation_ids(conversation_id: str) -> None: +async def test_instructions_not_repeated_for_continuation_ids( + conversation_id: str, +) -> None: client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") mock_response = _create_mock_responses_text_response(response_id="resp_456") @@ -2837,7 +2912,12 @@ async def get_api_key() -> str: "temperature_c": {"type": "number"}, "advisory": {"type": "string"}, }, - "required": ["location", "conditions", "temperature_c", "advisory"], + "required": [ + "location", + "conditions", + "temperature_c", + "advisory", + ], "additionalProperties": False, }, }, @@ -2962,7 +3042,12 @@ async def test_integration_web_search() -> None: user_location={"country": "US", "city": "Seattle"}, ) content = { - "messages": [Message(role="user", text="What is the current weather? Do not ask for my current location.")], + "messages": [ + Message( + role="user", + text="What is the current weather? Do not ask for my current location.", + ) + ], "options": { "tool_choice": "auto", "tools": [web_search_tool_with_location], @@ -3053,7 +3138,41 @@ async def test_integration_streaming_file_search() -> None: assert "75" in full_message -# region Background Response / ContinuationToken Tests +@pytest.mark.flaky +@pytest.mark.integration +@skip_if_openai_integration_tests_disabled +async def test_integration_tool_rich_content_image() -> None: + """Integration test: a tool returns an image and the model describes it.""" + image_path = Path(__file__).parent.parent / "assets" / "sample_image.jpg" + image_bytes = image_path.read_bytes() + + @tool(approval_mode="never_require") + def get_test_image() -> Content: + """Return a test image for analysis.""" + return Content.from_data(data=image_bytes, media_type="image/jpeg") + + client = OpenAIResponsesClient() + client.function_invocation_configuration["max_iterations"] = 2 + + for streaming in [False, True]: + messages = [ + Message( + role="user", + text="Call the get_test_image tool and describe what you see.", + ) + ] + options: dict[str, Any] = {"tools": [get_test_image], "tool_choice": "auto"} + + if streaming: + response = await client.get_response(messages=messages, stream=True, options=options).get_final_response() + else: + response = await client.get_response(messages=messages, options=options) + + assert response is not None + assert isinstance(response, ChatResponse) + assert response.text is not None + assert len(response.text) > 0 + assert "house" in response.text.lower(), f"Model did not describe the house image. Response: {response.text}" def test_continuation_token_json_serializable() -> None: diff --git a/python/packages/ollama/agent_framework_ollama/_chat_client.py b/python/packages/ollama/agent_framework_ollama/_chat_client.py index e31c1971da..9f5ee77496 100644 --- a/python/packages/ollama/agent_framework_ollama/_chat_client.py +++ b/python/packages/ollama/agent_framework_ollama/_chat_client.py @@ -500,11 +500,16 @@ def _format_assistant_message(self, message: Message) -> list[OllamaMessage]: def _format_tool_message(self, message: Message) -> list[OllamaMessage]: # Ollama does not support multiple tool results in a single message, so we create a separate - return [ - OllamaMessage(role="tool", content=str(item.result), tool_name=item.call_id) - for item in message.contents - if item.type == "function_result" - ] + messages: list[OllamaMessage] = [] + for item in message.contents: + if item.type == "function_result": + if item.items: + logger.warning( + "Ollama does not support rich content (images, audio) in tool results. " + "Rich content items will be omitted." + ) + messages.append(OllamaMessage(role="tool", content=str(item.result), tool_name=item.call_id)) + return messages def _parse_contents_from_ollama(self, response: OllamaChatResponse) -> list[Content]: contents: list[Content] = []