microsoft · giles17 · Mar 12, 2026 · Feb 26, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py
@@ -713,12 +713,44 @@ def _prepare_message_for_anthropic(self, message: Message) -> dict[str, Any]:
                         "input": content.parse_arguments(),
                     })
                 case "function_result":
-                    a_content.append({
-                        "type": "tool_result",
-                        "tool_use_id": content.call_id,
-                        "content": content.result if content.result is not None else "",
-                        "is_error": content.exception is not None,
-                    })
+                    if content.items:
+                        # Rich content: build array with text + image blocks
+                        tool_content: list[dict[str, Any]] = []
+                        if content.result:
+                            tool_content.append({"type": "text", "text": content.result})
+                        for item in content.items:
+                            if item.type == "data" and item.has_top_level_media_type("image"):
+                                tool_content.append({
+                                    "type": "image",
+                                    "source": {
+                                        "data": _get_data_bytes_as_str(item),  # type: ignore[attr-defined]
+                                        "media_type": item.media_type,
+                                        "type": "base64",
+                                    },
+                                })
+                            elif item.type == "uri" and item.has_top_level_media_type("image"):
+                                tool_content.append({
+                                    "type": "image",
+                                    "source": {"type": "url", "url": item.uri},
+                                })
+                            else:
+                                logger.debug(
+                                    "Ignoring unsupported rich content media type in tool result: %s",
+                                    item.media_type,
+                                )
+                        a_content.append({
+                            "type": "tool_result",
+                            "tool_use_id": content.call_id,
+                            "content": tool_content,
+                            "is_error": content.exception is not None,
+                        })
+                    else:
+                        a_content.append({
+                            "type": "tool_result",
+                            "tool_use_id": content.call_id,
+                            "content": content.result if content.result is not None else "",
+                            "is_error": content.exception is not None,
+                        })
                 case "mcp_server_tool_call":
                     mcp_call: dict[str, Any] = {
                         "type": "mcp_tool_use",

diff --git a/python/packages/anthropic/tests/test_anthropic_client.py b/python/packages/anthropic/tests/test_anthropic_client.py
diff --git a/python/packages/azure-ai/agent_framework_azure_ai/_chat_client.py b/python/packages/azure-ai/agent_framework_azure_ai/_chat_client.py
@@ -1391,6 +1391,11 @@ def _prepare_tool_outputs_for_azure_ai(
                 call_id = run_and_call_ids[1]
 
                 if content.type == "function_result":
+                    if content.items:
+                        logger.warning(
+                            "Azure AI Agents does not support rich content (images, audio) in tool results. "
+                            "Rich content items will be omitted."
+                        )
                     if tool_outputs is None:
                         tool_outputs = []
                     tool_outputs.append(

diff --git a/python/packages/bedrock/agent_framework_bedrock/_chat_client.py b/python/packages/bedrock/agent_framework_bedrock/_chat_client.py
@@ -503,10 +503,16 @@ def _convert_content_to_bedrock_block(self, content: Content) -> dict[str, Any]
                     }
                 }
             case "function_result":
+                tool_result_blocks = self._convert_tool_result_to_blocks(content.result)
+                if content.items:
+                    logger.warning(
+                        "Bedrock does not support rich content (images, audio) in tool results. "
+                        "Rich content items will be omitted."
+                    )
                 tool_result_block = {
                     "toolResult": {
                         "toolUseId": content.call_id,
-                        "content": self._convert_tool_result_to_blocks(content.result),
+                        "content": tool_result_blocks,
                         "status": "error" if content.exception else "success",
                     }
                 }
@@ -528,6 +534,8 @@ def _convert_content_to_bedrock_block(self, content: Content) -> dict[str, Any]
 
     def _convert_tool_result_to_blocks(self, result: Any) -> list[dict[str, Any]]:
         prepared_result = result if isinstance(result, str) else FunctionTool.parse_result(result)
+        if not isinstance(prepared_result, str):
+            return [{"text": str(prepared_result)}]
         try:
             parsed_result = json.loads(prepared_result)
         except json.JSONDecodeError:

diff --git a/python/packages/core/agent_framework/_mcp.py b/python/packages/core/agent_framework/_mcp.py
@@ -142,38 +142,44 @@ def _parse_message_from_mcp(
 
 def _parse_tool_result_from_mcp(
     mcp_type: types.CallToolResult,
-) -> str:
-    """Parse an MCP CallToolResult directly into a string representation.
+) -> str | list[Content]:
+    """Parse an MCP CallToolResult into a string or rich content list.
 
-    Converts each content item in the MCP result to its string form and combines them.
-    This skips the intermediate Content object step for tool results.
+    Converts each content item in the MCP result to its appropriate form.
+    Text-only results are returned as strings. When the result contains
+    image or audio content, returns a list of Content objects so the
+    framework can forward the rich media to the model.
 
     Args:
         mcp_type: The MCP CallToolResult object to convert.
 
     Returns:
-        A string representation of the tool result — either plain text or serialized JSON.
+        A string for text-only results, or a list of Content for rich media results.
     """
     import json
 
-    parts: list[str] = []
+    text_parts: list[str] = []
+    rich_items: list[Content] = []
     for item in mcp_type.content:
         match item:
             case types.TextContent():
-                parts.append(item.text)
-            case types.ImageContent() | types.AudioContent():
-                parts.append(
-                    json.dumps(
-                        {
-                            "type": "image" if isinstance(item, types.ImageContent) else "audio",
-                            "data": item.data,
-                            "mimeType": item.mimeType,
-                        },
-                        default=str,
+                text_parts.append(item.text)
+            case types.ImageContent():
+                rich_items.append(
+                    Content.from_uri(
+                        uri=f"data:{item.mimeType};base64,{item.data}",
+                        media_type=item.mimeType,
+                    )
+                )
+            case types.AudioContent():
+                rich_items.append(
+                    Content.from_uri(
+                        uri=f"data:{item.mimeType};base64,{item.data}",
+                        media_type=item.mimeType,
                     )
                 )
             case types.ResourceLink():
-                parts.append(
+                text_parts.append(
                     json.dumps(
                         {
                             "type": "resource_link",
@@ -186,9 +192,9 @@ def _parse_tool_result_from_mcp(
             case types.EmbeddedResource():
                 match item.resource:
                     case types.TextResourceContents():
-                        parts.append(item.resource.text)
+                        text_parts.append(item.resource.text)
                     case types.BlobResourceContents():
-                        parts.append(
+                        text_parts.append(
                             json.dumps(
                                 {
                                     "type": "blob",
@@ -199,12 +205,29 @@ def _parse_tool_result_from_mcp(
                             )
                         )
             case _:
-                parts.append(str(item))
-    if not parts:
+                text_parts.append(str(item))
+
+    if rich_items:
+        # Return rich content list preserving original order
+        result: list[Content] = []
+        text_idx = 0
+        rich_idx = 0
+        for item in mcp_type.content:
+            match item:
+                case types.ImageContent() | types.AudioContent():
+                    result.append(rich_items[rich_idx])
+                    rich_idx += 1
+                case _:
+                    if text_idx < len(text_parts):
+                        result.append(Content.from_text(text_parts[text_idx]))
+                        text_idx += 1
+        return result
+
+    if not text_parts:
         return ""
-    if len(parts) == 1:
-        return parts[0]
-    return json.dumps(parts, default=str)
+    if len(text_parts) == 1:
+        return text_parts[0]
+    return json.dumps(text_parts, default=str)
 
 
 def _parse_content_from_mcp(
@@ -425,7 +448,7 @@ def __init__(
         approval_mode: (Literal["always_require", "never_require"] | MCPSpecificApproval | None) = None,
         allowed_tools: Collection[str] | None = None,
         load_tools: bool = True,
-        parse_tool_results: Callable[[types.CallToolResult], str] | None = None,
+        parse_tool_results: Callable[[types.CallToolResult], str | list[Content]] | None = None,
         load_prompts: bool = True,
         parse_prompt_results: Callable[[types.GetPromptResult], str] | None = None,
         session: ClientSession | None = None,
@@ -850,7 +873,7 @@ async def _ensure_connected(self) -> None:
                     inner_exception=ex,
                 ) from ex
 
-    async def call_tool(self, tool_name: str, **kwargs: Any) -> str:
+    async def call_tool(self, tool_name: str, **kwargs: Any) -> str | list[Content]:
         """Call a tool with the given arguments.
 
         Args:
@@ -860,7 +883,7 @@ async def call_tool(self, tool_name: str, **kwargs: Any) -> str:
             kwargs: Arguments to pass to the tool.
 
         Returns:
-            A string representation of the tool result — either plain text or serialized JSON.
+            A string for text-only results, or a list of Content for rich media results.
 
         Raises:
             ToolExecutionException: If the MCP server is not connected, tools are not loaded,
@@ -1053,7 +1076,7 @@ def __init__(
         command: str,
         *,
         load_tools: bool = True,
-        parse_tool_results: Callable[[types.CallToolResult], str] | None = None,
+        parse_tool_results: Callable[[types.CallToolResult], str | list[Content]] | None = None,
         load_prompts: bool = True,
         parse_prompt_results: Callable[[types.GetPromptResult], str] | None = None,
         request_timeout: int | None = None,
@@ -1178,7 +1201,7 @@ def __init__(
         url: str,
         *,
         load_tools: bool = True,
-        parse_tool_results: Callable[[types.CallToolResult], str] | None = None,
+        parse_tool_results: Callable[[types.CallToolResult], str | list[Content]] | None = None,
         load_prompts: bool = True,
         parse_prompt_results: Callable[[types.GetPromptResult], str] | None = None,
         request_timeout: int | None = None,
@@ -1297,7 +1320,7 @@ def __init__(
         url: str,
         *,
         load_tools: bool = True,
-        parse_tool_results: Callable[[types.CallToolResult], str] | None = None,
+        parse_tool_results: Callable[[types.CallToolResult], str | list[Content]] | None = None,
         load_prompts: bool = True,
         parse_prompt_results: Callable[[types.GetPromptResult], str] | None = None,
         request_timeout: int | None = None,

diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py
@@ -244,7 +244,7 @@ def __init__(
         additional_properties: dict[str, Any] | None = None,
         func: Callable[..., Any] | None = None,
         input_model: type[BaseModel] | Mapping[str, Any] | None = None,
-        result_parser: Callable[[Any], str] | None = None,
+        result_parser: Callable[[Any], str | list[Content]] | None = None,
         **kwargs: Any,
     ) -> None:
         """Initialize the FunctionTool.
@@ -443,19 +443,19 @@ async def invoke(
         *,
         arguments: BaseModel | Mapping[str, Any] | None = None,
         **kwargs: Any,
-    ) -> str:
+    ) -> str | list[Content]:
         """Run the AI function with the provided arguments as a Pydantic model.
 
         The raw return value of the wrapped function is automatically parsed into a ``str``
-        (either plain text or serialized JSON) using :meth:`parse_result` or the custom
-        ``result_parser`` if one was provided.
+        (either plain text or serialized JSON) or a ``list[Content]`` (for rich content like
+        images) using :meth:`parse_result` or the custom ``result_parser`` if one was provided.
 
         Keyword Args:
             arguments: A mapping or model instance containing the arguments for the function.
             kwargs: Keyword arguments to pass to the function, will not be used if ``arguments`` is provided.
 
         Returns:
-            The parsed result as a string — either plain text or serialized JSON.
+            The parsed result as a string, or a list of Content items for rich results.
 
         Raises:
             TypeError: If arguments is not mapping-like or fails schema checks.
@@ -561,8 +561,9 @@ async def invoke(
                     parsed = str(result)
                 logger.info(f"Function {self.name} succeeded.")
                 if OBSERVABILITY_SETTINGS.SENSITIVE_DATA_ENABLED:  # type: ignore[name-defined]
-                    span.set_attribute(OtelAttr.TOOL_RESULT, parsed)
-                    logger.debug(f"Function result: {parsed}")
+                    result_str = parsed if isinstance(parsed, str) else str(parsed)
+                    span.set_attribute(OtelAttr.TOOL_RESULT, result_str)
+                    logger.debug(f"Function result: {result_str}")
                 return parsed
             finally:
                 duration = (end_time_stamp or perf_counter()) - start_time_stamp
@@ -614,10 +615,13 @@ def _make_dumpable(value: Any) -> Any:
         return value
 
     @staticmethod
-    def parse_result(result: Any) -> str:
-        """Convert a raw function return value to a string representation.
+    def parse_result(result: Any) -> str | list[Content]:
+        """Convert a raw function return value to a string or rich content list.
+
+        Returns a ``str`` for text-only results, or a ``list[Content]`` when the
+        function produced rich content (images, audio, files) that should be
+        forwarded to the model as visual/multi-modal input.
 
-        The return value is always a ``str`` — either plain text or serialized JSON.
         This is called automatically by :meth:`invoke` before returning the result,
         ensuring that the result stored in ``Content.from_function_result`` is
         already in a form that can be passed directly to LLM APIs.
@@ -626,12 +630,22 @@ def parse_result(result: Any) -> str:
             result: The raw return value from the wrapped function.
 
         Returns:
-            A string representation of the result, either plain text or serialized JSON.
+            A string representation, or a list of Content items for rich results.
         """
+        from ._types import Content
+
         if result is None:
             return ""
         if isinstance(result, str):
             return result
+        # Preserve rich Content (images, audio, files) instead of serializing to JSON
+        if isinstance(result, Content):
+            if result.type in ("data", "uri"):
+                return [result]
+            if result.type == "text" and result.text:
+                return result.text
+        if isinstance(result, list) and any(isinstance(item, Content) for item in result):
+            return [item if isinstance(item, Content) else Content.from_text(str(item)) for item in result]
         dumpable = FunctionTool._make_dumpable(result)
         if isinstance(dumpable, str):
             return dumpable
@@ -1086,7 +1100,7 @@ def tool(
     max_invocations: int | None = None,
     max_invocation_exceptions: int | None = None,
     additional_properties: dict[str, Any] | None = None,
-    result_parser: Callable[[Any], str] | None = None,
+    result_parser: Callable[[Any], str | list[Content]] | None = None,
 ) -> FunctionTool: ...
 
 
@@ -1102,7 +1116,7 @@ def tool(
     max_invocations: int | None = None,
     max_invocation_exceptions: int | None = None,
     additional_properties: dict[str, Any] | None = None,
-    result_parser: Callable[[Any], str] | None = None,
+    result_parser: Callable[[Any], str | list[Content]] | None = None,
 ) -> Callable[[Callable[..., Any]], FunctionTool]: ...
 
 
@@ -1117,7 +1131,7 @@ def tool(
     max_invocations: int | None = None,
     max_invocation_exceptions: int | None = None,
     additional_properties: dict[str, Any] | None = None,
-    result_parser: Callable[[Any], str] | None = None,
+    result_parser: Callable[[Any], str | list[Content]] | None = None,
 ) -> FunctionTool | Callable[[Callable[..., Any]], FunctionTool]:
     """Decorate a function to turn it into a FunctionTool that can be passed to models and executed automatically.