From 10043b8a63217495cbe0f0f72dcff6f381058bb5 Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Mon, 4 May 2026 17:21:03 +0200 Subject: [PATCH 1/4] refactor(foundry_hosting): build FoundryHostedAgentHistoryProvider on azure.ai.agentserver SDK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rebuilds the Foundry hosted-agent history provider on top of ``azure.ai.agentserver``'s ``FoundryStorageProvider`` instead of the in-house ``_HttpStorageBackend``. Splits the monolithic ``_responses.py`` into focused modules: - ``_history_provider.py`` — new ``FoundryHostedAgentHistoryProvider`` that talks to the SDK's ``FoundryStorageProvider``, threads ``response_id`` / ``previous_response_id`` through ``ContextVar``s via ``bind_request_context``, and lifts host-bound isolation keys (``x-agent-{user,chat}-isolation-key``) from the optional ``agent_framework_hosting`` package into a provider-local ``IsolationContext`` so the storage layer carries the correct partition keys without channels having to know about them. - ``_shared.py`` — extracts all SDK ``Item`` / ``OutputItem`` ↔ framework ``Message`` conversion helpers into one place so both ``_responses.py`` and the new history provider can share them. Restores ``_convert_file_data`` for inline ``input_file`` payloads, and the hosted-MCP routing for ``custom_tool_call_output`` items whose ``call_id`` carries the ``mcp_*`` prefix. - ``_ids.py`` — shared id helpers. - ``_responses.py`` — shrinks ~700 lines, re-exports converters for back-compat with existing tests. - ``tests/test_history_provider.py`` — exercises the new provider against a fake SDK backend; the host-isolation test is gated on the optional ``agent_framework_hosting`` import. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../__init__.py | 21 +- .../_history_provider.py | 758 ++++++++++ .../agent_framework_foundry_hosting/_ids.py | 72 + .../_responses.py | 868 +---------- .../_shared.py | 1287 +++++++++++++++++ .../tests/test_history_provider.py | 969 +++++++++++++ 6 files changed, 3172 insertions(+), 803 deletions(-) create mode 100644 python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py create mode 100644 python/packages/foundry_hosting/agent_framework_foundry_hosting/_ids.py create mode 100644 python/packages/foundry_hosting/agent_framework_foundry_hosting/_shared.py create mode 100644 python/packages/foundry_hosting/tests/test_history_provider.py diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/__init__.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/__init__.py index 81e8430783..691353a0e1 100644 --- a/python/packages/foundry_hosting/agent_framework_foundry_hosting/__init__.py +++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/__init__.py @@ -2,6 +2,16 @@ import importlib.metadata +from ._history_provider import ( + FoundryHostedAgentHistoryProvider, + bind_request_context, + get_current_request_context, +) +from ._ids import ( + foundry_item_id, + foundry_response_id, + foundry_response_id_factory, +) from ._invocations import InvocationsHostServer from ._responses import ResponsesHostServer @@ -10,4 +20,13 @@ except importlib.metadata.PackageNotFoundError: __version__ = "0.0.0" -__all__ = ["InvocationsHostServer", "ResponsesHostServer"] +__all__ = [ + "FoundryHostedAgentHistoryProvider", + "InvocationsHostServer", + "ResponsesHostServer", + "bind_request_context", + "foundry_item_id", + "foundry_response_id", + "foundry_response_id_factory", + "get_current_request_context", +] diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py new file mode 100644 index 0000000000..a558979631 --- /dev/null +++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py @@ -0,0 +1,758 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""Foundry Hosted Agent history provider. + +A standalone :class:`agent_framework.HistoryProvider` implementation that +sources conversation history from the Foundry Hosted Agent storage backend. + +Transport is delegated to the SDK's +:class:`azure.ai.agentserver.responses.FoundryStorageProvider` (when running +inside a Foundry Hosted Agent container) or +:class:`azure.ai.agentserver.responses.InMemoryResponseProvider` (for local +development). Both implement the same read/write surface +(``get_history_item_ids`` / ``get_items`` / ``create_response``), so this +provider's persistence logic stays backend-agnostic. + +Allowed dependencies (deliberately narrow): + +* :mod:`agent_framework` (core, for ``HistoryProvider`` / ``Message``) +* :mod:`azure.ai.agentserver.responses` (for the storage backends, + ``IsolationContext`` typing, and ``OutputItem`` deserialization) +* :mod:`azure.core.credentials_async` (typing of token credentials) + +It MUST NOT depend on any ``agent_framework_hosting*`` package at module +import time. (The host's isolation contextvar is consulted lazily via an +``import`` inside :func:`_host_isolation` so the dependency stays soft.) + +Environment variables read: + +* ``FOUNDRY_HOSTING_ENVIRONMENT`` — non-empty marks "running inside Foundry" + and selects the SDK-backed storage transport. +* ``FOUNDRY_PROJECT_ENDPOINT`` — base URL of the Foundry project; required + when running hosted unless an explicit ``endpoint=`` is supplied. +* ``FOUNDRY_AGENT_NAME`` / ``FOUNDRY_AGENT_VERSION`` — stamped onto the + ``agent_reference`` field of every persisted response envelope. +* ``FOUNDRY_AGENT_SESSION_ID`` — used as a chain anchor when the channel + did not bind a per-request ``previous_response_id``. +* ``MODEL_DEPLOYMENT_NAME`` / ``AZURE_AI_MODEL_DEPLOYMENT_NAME`` — model + field stamped on the persisted envelope (must match a real deployment). + +Local fallback: when ``FOUNDRY_HOSTING_ENVIRONMENT`` is unset, the provider +transparently falls back to :class:`InMemoryResponseProvider` so the same +agent code runs in dev. +""" + +from __future__ import annotations + +import contextlib +import logging +import os +import time +from contextlib import contextmanager +from contextvars import ContextVar +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, ClassVar + +from agent_framework import HistoryProvider, Message +from azure.ai.agentserver.responses import ( + FoundryStorageProvider, + FoundryStorageSettings, + InMemoryResponseProvider, + IsolationContext, +) +from azure.ai.agentserver.responses._id_generator import IdGenerator +from azure.ai.agentserver.responses.models import OutputItem, ResponseObject +from azure.ai.agentserver.responses.store._foundry_errors import ( # pyright: ignore[reportPrivateUsage] + FoundryBadRequestError, + FoundryResourceNotFoundError, +) + +from ._shared import ( + _messages_to_output_items, # pyright: ignore[reportPrivateUsage] + _output_items_to_messages, # pyright: ignore[reportPrivateUsage] +) + +if TYPE_CHECKING: + from collections.abc import Iterator, Sequence + + from azure.core.credentials_async import AsyncTokenCredential + +logger = logging.getLogger(__name__) + +# Environment variable names — re-declared (not imported) so this module +# stays decoupled from the private ``azure.ai.agentserver.core._config`` +# constants while still matching them exactly. +_ENV_FOUNDRY_HOSTING_ENVIRONMENT = "FOUNDRY_HOSTING_ENVIRONMENT" +_ENV_FOUNDRY_PROJECT_ENDPOINT = "FOUNDRY_PROJECT_ENDPOINT" + +# Per-request isolation context. The owning Channel is expected to set this +# from the inbound request (e.g. user / tenant headers) for the duration of +# an ``agent.run(...)`` call. When unset, requests are made without +# isolation headers (matches how ``ResponseContext`` behaves with no +# ``IsolationContext``). +_isolation_var: ContextVar[IsolationContext | None] = ContextVar( + "agent_framework_foundry_hosting_isolation", + default=None, +) + + +def set_current_isolation(isolation: IsolationContext | None) -> Any: + """Set the per-request isolation context for downstream history calls. + + Channels that drive an agent backed by :class:`FoundryHostedAgentHistoryProvider` + should call this before invoking ``agent.run(...)`` and reset the token + afterwards. + + Args: + isolation: The isolation context to associate with the current + ``contextvars`` context, or ``None`` to clear it. + + Returns: + A token suitable for :func:`reset_current_isolation` that restores + the previous value. + """ + return _isolation_var.set(isolation) + + +def reset_current_isolation(token: Any) -> None: + """Restore a previously-saved isolation context. + + Args: + token: A token returned by :func:`set_current_isolation`. + """ + _isolation_var.reset(token) + + +def get_current_isolation() -> IsolationContext | None: + """Return the isolation context bound to the current async context, if any. + + Returns: + The :class:`IsolationContext` for the current request, or ``None`` + when no channel has set one. + """ + return _isolation_var.get() + + +@dataclass(frozen=True) +class _RequestContext: + """Per-request anchors the host binds before invoking the agent. + + ``response_id`` is the id this provider's :meth:`save_messages` call + will write under, so the channel and the storage backend agree on + one stable handle per turn (the channel surfaces the same id on the + response envelope, the next turn arrives with this value as + ``previous_response_id`` and the chain walks). + + ``previous_response_id`` is the prior turn's anchor (``None`` on + first turn). Used to seed ``history_item_ids`` on the new write so + the storage chain stays connected, and to load history without + needing to know the channel's session minting convention. + + Per-request Foundry isolation keys (the + ``x-agent-{user,chat}-isolation-key`` headers) are *not* carried + here; the host's own ASGI middleware lifts them off every inbound + HTTP request into a contextvar + (:func:`agent_framework_hosting.get_current_isolation_keys`) which + this provider consults at storage-call time. Keeping the headers + out of the per-request bind means channels never have to import + Foundry-specific types and the host owns the (intentional) coupling + to those two well-known headers. + """ + + response_id: str + previous_response_id: str | None + + +_request_var: ContextVar[_RequestContext | None] = ContextVar( + "agent_framework_foundry_hosting_request", + default=None, +) + + +@contextmanager +def bind_request_context( + *, + response_id: str, + previous_response_id: str | None = None, + **_unused: Any, +) -> "Iterator[None]": + """Bind the per-request response-chain anchors for this provider. + + Intended for the host (or any caller orchestrating an + ``agent.run(...)``) to call immediately before invocation, so the + provider's :meth:`save_messages` writes under a known, stable + ``response_id`` (the same one the channel surfaces to the client) + and walks ``previous_response_id`` for history continuity. Unknown + keyword arguments are accepted and ignored so the host can extend + the ``ChannelRequest.attributes`` contract without breaking existing + providers. Foundry isolation keys flow through a separate + host-installed contextvar; see the class docstring on + :class:`_RequestContext`. + + The binding is scoped to the current ``contextvars.Context``, so + concurrent requests in the same process do not interfere. + """ + token = _request_var.set( + _RequestContext( + response_id=response_id, + previous_response_id=previous_response_id, + ) + ) + try: + yield + finally: + _request_var.reset(token) + + +def get_current_request_context() -> _RequestContext | None: + """Return the per-request response chain anchors, if bound.""" + return _request_var.get() + + +def _host_isolation() -> "IsolationContext | None": + """Lift the host-bound isolation contextvar into our local type. + + The host installs an ASGI middleware that reads + ``x-agent-{user,chat}-isolation-key`` off every inbound HTTP request + and stores them in a generic ``IsolationKeys`` slot on a contextvar + we import from :mod:`agent_framework_hosting`. We translate it into + our :class:`IsolationContext` shape on demand so the provider stays + in charge of the storage-side type while the host stays free of any + Foundry-specific dependencies. + """ + # Soft dep: ``agent_framework_hosting`` may not be installed (this + # provider is also usable standalone). The whole block is wrapped in + # ``# pyright: ignore`` so the optional import does not block type + # checking when the package isn't on sys.path; when it is, pyright + # picks up the real types automatically. + try: + from agent_framework_hosting import ( # pyright: ignore[reportMissingImports] + get_current_isolation_keys, # pyright: ignore[reportUnknownVariableType] + ) + except ImportError: # pragma: no cover - hosting is a soft dep + return None + keys = get_current_isolation_keys() # pyright: ignore[reportUnknownVariableType] + if keys is None or keys.is_empty: # pyright: ignore[reportUnknownMemberType] + return None + return IsolationContext( + user_key=keys.user_key, # pyright: ignore[reportUnknownMemberType, reportUnknownArgumentType] + chat_key=keys.chat_key, # pyright: ignore[reportUnknownMemberType, reportUnknownArgumentType] + ) + + +# Type alias for the storage backend surface this provider depends on. +# Both ``FoundryStorageProvider`` and ``InMemoryResponseProvider`` from +# ``azure.ai.agentserver.responses`` expose the same +# ``get_history_item_ids`` / ``get_items`` / ``create_response`` methods. +_StorageBackend = "FoundryStorageProvider | InMemoryResponseProvider" + + +class FoundryHostedAgentHistoryProvider(HistoryProvider): + """``HistoryProvider`` backed by Foundry Hosted Agent storage. + + Wraps :class:`azure.ai.agentserver.responses.FoundryStorageProvider` + when running inside a Foundry Hosted Agent container, or + :class:`InMemoryResponseProvider` for local development. The + selection is driven by the ``FOUNDRY_HOSTING_ENVIRONMENT`` + environment variable. + + ``session_id`` semantics: the value passed to :meth:`get_messages` + and :meth:`save_messages` is treated as the Responses + ``previous_response_id`` (or ``conversation_id``) whose chain to + load. When omitted (and no host-bound chain anchor is set), + :meth:`get_messages` returns an empty list (a fresh conversation). + """ + + DEFAULT_SOURCE_ID: ClassVar[str] = "foundry_hosted_agent" + + def __init__( + self, + *, + credential: "AsyncTokenCredential | None" = None, + endpoint: str | None = None, + history_limit: int = 100, + source_id: str = DEFAULT_SOURCE_ID, + load_messages: bool = True, + store_inputs: bool = True, + store_context_messages: bool = False, + store_context_from: set[str] | None = None, + store_outputs: bool = True, + ) -> None: + """Initialize the provider. + + Args: + credential: Async token credential used to authenticate against + the Foundry storage API. Required when running hosted + (``FOUNDRY_HOSTING_ENVIRONMENT`` is set). Ignored in + local-mode (the in-memory backend needs no auth). + endpoint: Foundry project endpoint URL. Defaults to the value + of the ``FOUNDRY_PROJECT_ENDPOINT`` environment variable. + Required when running hosted. + history_limit: Maximum number of history items to fetch per + ``get_messages`` call. Mirrors the agent-server runtime's + ``ResponseContext._history_limit``. Default ``100``. + source_id: Unique identifier for this provider instance, as + required by ``HistoryProvider``. + load_messages: Whether to load messages before invocation. + Default ``True``. + store_inputs: Whether to mirror input messages into Foundry + storage. Default ``True`` — the Foundry Hosted Agents + runtime does not persist Responses turns automatically, so + without this the chain would never be visible to subsequent + requests. Set ``False`` only if you know an external writer + is populating storage on your behalf. + store_context_messages: Whether to mirror context-provider + messages. Default ``False``. + store_context_from: If set, only mirror context messages from + these source IDs. + store_outputs: Whether to mirror response messages into Foundry + storage. Default ``True`` for the same reason as + ``store_inputs``. + """ + super().__init__( + source_id=source_id, + load_messages=load_messages, + store_inputs=store_inputs, + store_context_messages=store_context_messages, + store_context_from=store_context_from, + store_outputs=store_outputs, + ) + + self._history_limit = history_limit + self._credential = credential + self._endpoint = endpoint or os.environ.get(_ENV_FOUNDRY_PROJECT_ENDPOINT) or None + self._backend: FoundryStorageProvider | InMemoryResponseProvider | None = None + + @staticmethod + def is_hosted_environment() -> bool: + """Return ``True`` when running inside a Foundry Hosted Agent container. + + Detection uses the ``FOUNDRY_HOSTING_ENVIRONMENT`` environment + variable, the same signal :class:`ResponsesAgentServerHost` uses to + switch between hosted and local storage backends. + """ + return bool(os.environ.get(_ENV_FOUNDRY_HOSTING_ENVIRONMENT)) + + def _resolve_backend(self) -> "FoundryStorageProvider | InMemoryResponseProvider": + """Return the storage backend, constructing it lazily on first use. + + * If ``FOUNDRY_HOSTING_ENVIRONMENT`` is set, build a + :class:`FoundryStorageProvider` (requires ``credential`` and a + resolved ``endpoint``). + * Otherwise, fall back to a process-local + :class:`InMemoryResponseProvider` so dev/local runs work without + additional configuration. + """ + if self._backend is not None: + return self._backend + + if self.is_hosted_environment(): + if self._credential is None: + raise RuntimeError( + "FoundryHostedAgentHistoryProvider requires an async credential when running " + "inside a Foundry Hosted Agent container. Pass credential=... ." + ) + if not self._endpoint: + raise RuntimeError( + "FoundryHostedAgentHistoryProvider needs a Foundry project endpoint. Pass " + "endpoint=... or set the FOUNDRY_PROJECT_ENDPOINT environment variable." + ) + self._backend = FoundryStorageProvider( + credential=self._credential, + settings=FoundryStorageSettings.from_endpoint(self._endpoint), + ) + logger.debug( + "FoundryHostedAgentHistoryProvider using FoundryStorageProvider against %s", + self._endpoint, + ) + return self._backend + + logger.info( + "FOUNDRY_HOSTING_ENVIRONMENT is unset — FoundryHostedAgentHistoryProvider falling " + "back to InMemoryResponseProvider for local development.", + ) + self._backend = InMemoryResponseProvider() + return self._backend + + async def aclose(self) -> None: + """Release storage resources held by this provider. + + Safe to call multiple times. Closes the lazily-constructed + backend if one was created. ``InMemoryResponseProvider`` has no + ``aclose`` and is closed implicitly on garbage collection. + """ + if self._backend is None: + return + aclose = getattr(self._backend, "aclose", None) + if aclose is not None: + await aclose() + self._backend = None + + async def get_messages( + self, + session_id: str | None, + *, + state: dict[str, Any] | None = None, + **kwargs: Any, + ) -> list[Message]: + """Load conversation history for the given Foundry response chain. + + Args: + session_id: The Responses ``previous_response_id`` / + ``conversation_id`` to anchor history on. When ``None`` / + empty, an empty history is returned (fresh conversation). + state: Unused — kept for ``HistoryProvider`` compatibility. + **kwargs: Extensibility hook; ``isolation`` may be supplied + explicitly to override the contextvar. + + Returns: + The conversation history materialised as a list of + :class:`agent_framework.Message`, oldest-first. + + Notes: + History anchoring follows the Foundry response-id chain. The + preferred anchor is the per-request ``previous_response_id`` + bound by the host via :func:`bind_request_context` — that's + the prior turn's resp id, written by *this* provider's + previous :meth:`save_messages` call, so the chain is + guaranteed walkable. When unbound (e.g. local dev calling + the provider directly), we fall back to the ``session_id`` + argument as long as it's ``resp_*``-shaped; opaque tokens + (such as chat-isolation-key values) are skipped because the + storage backend rejects them with HTTP 400 "Malformed + identifier". + """ + bound = get_current_request_context() + # Prefer the host-bound previous_response_id over the session_id + # the framework feeds in: the bound value is the id we ourselves + # wrote on the previous turn, so we know it's storage-valid. + anchor = bound.previous_response_id if bound is not None else None + if anchor is None and session_id and session_id.startswith(("caresp_", "resp_")): + anchor = session_id + if anchor is None: + # The Foundry Hosted Agent runtime stamps the previous turn's + # response id into ``FOUNDRY_AGENT_SESSION_ID`` for the + # following turn's container, so we can walk back from it + # directly without keeping any cross-request state ourselves. + env_session = os.environ.get("FOUNDRY_AGENT_SESSION_ID") or None + if env_session and env_session.startswith(("caresp_", "resp_")): + anchor = env_session + if anchor is None: + # No walkable anchor → fresh conversation, nothing to load. + return [] + + isolation = kwargs.get("isolation") or _host_isolation() or get_current_isolation() + backend = self._resolve_backend() + + try: + item_ids = await backend.get_history_item_ids( + anchor, + None, + self._history_limit, + isolation=isolation, + ) + except (FoundryBadRequestError, FoundryResourceNotFoundError) as err: + # 400 / 404 here means the anchor isn't storage-valid — treat + # it as an empty history rather than failing the whole request. + logger.debug( + "get_messages: anchor %r rejected by storage (%s); returning empty history", + anchor, + type(err).__name__, + ) + return [] + if not item_ids: + return [] + + items = await backend.get_items(item_ids, isolation=isolation) + # ``get_items`` may return ``None`` placeholders for missing IDs. + resolved = [item for item in items if item is not None] + return _output_items_to_messages(resolved) + + async def save_messages( + self, + session_id: str | None, + messages: "Sequence[Message]", + *, + state: dict[str, Any] | None = None, + **kwargs: Any, + ) -> None: + """Persist messages for ``session_id`` into Foundry storage. + + Unlike the standalone ``azure.ai.agentserver`` runtime — which + owns response orchestration end-to-end and writes turns + authoritatively — the Agent Framework hosting stack treats + ``HistoryProvider`` as the *only* persistence path. Without this + method actively writing, a deployed hosted agent would silently + drop every turn. + + Strategy: + + * Use the host-bound ``response_id`` as the envelope id (mints + a fresh ``caresp_*`` id when unbound, e.g. local dev). + * Anchor the new write to the previous turn via + ``previous_response_id``, walking the prior turn's history + item ids forward so the full transcript stays visible. + * Split items by role: ``"message"`` (user/system inputs) into + ``input_items``, everything else (assistant outputs, tool + calls, reasoning, ...) into ``response.output``. + + Args: + session_id: The Responses ``previous_response_id`` / + ``conversation_id`` the messages belong to. + messages: The messages selected for persistence by the base + ``HistoryProvider`` after-run hook. + state: Unused — kept for ``HistoryProvider`` compatibility. + **kwargs: Extensibility hook; ``isolation`` may be supplied + explicitly to override the contextvar. + """ + if not messages: + return + + bound = get_current_request_context() + # Prefer the host-bound response_id so the channel envelope and + # the storage write agree on a single id per turn — which is + # what makes the next turn's ``previous_response_id`` walkable. + # Without a binding (e.g. local dev calling ``save_messages`` + # directly), fall back to a fresh Foundry-format response id. + # Free-form ``resp_`` ids carry no embedded partition key + # and the storage backend rejects writes with a server error; + # ``IdGenerator.new_response_id()`` mints a ``caresp_*`` id with + # the partition-key segment the backend expects. The chain + # walks only when ``session_id`` is itself a ``caresp_*``-shaped + # value (i.e. a previous response id), matching the prefix the + # ``ResponsesChannel`` factory uses. + if bound is not None: + response_id = bound.response_id + previous_response_id = bound.previous_response_id + else: + if not session_id: + return + response_id = IdGenerator.new_response_id() + previous_response_id = session_id if session_id.startswith(("caresp_", "resp_")) else None + + # Foundry session-bound containers: when ``FOUNDRY_AGENT_SESSION_ID`` + # is set the runtime stamps it to the previous turn's response id + # so each new container can chain back to it directly. We don't + # need to maintain any cross-request map ourselves. + env_session = os.environ.get("FOUNDRY_AGENT_SESSION_ID") or None + if previous_response_id is None and env_session and env_session.startswith(("caresp_", "resp_")): + previous_response_id = env_session + + isolation = kwargs.get("isolation") or _host_isolation() or get_current_isolation() + logger.debug( + "save_messages: response_id=%r previous_response_id=%r isolation=%s", + response_id, + previous_response_id, + "" if isolation else "", + ) + backend = self._resolve_backend() + + # The agentserver runtime puts INBOUND items (user/system messages + # the request sent in) in the envelope's ``input_items`` axis and + # OUTBOUND items (assistant outputs, tool calls, reasoning) in + # ``response.output``. See + # ``_resolve_input_items_for_persistence`` (orchestrator.py:61) + + # ``_extract_response_snapshot_from_events`` in + # ``azure.ai.agentserver.responses``: ``input_items`` comes from + # ``ctx.input_items`` (request inputs only); ``response.output`` + # is populated from the lifecycle event stream. + # + # Putting everything in ``input_items`` with ``response.output: []`` + # is a schema violation that the storage backend rejects with an + # opaque HTTP 500. Split by role to mirror the runtime. + all_items = _messages_to_output_items(list(messages), id_prefix=response_id) + + # Re-stamp every item id via ``IdGenerator`` so each carries a + # Foundry-format ``{type-prefix}_`` + # identifier, with the response_id as the partition-key hint + # (co-locates each item with the response record). Free-form + # ``{response_id}_itm_N`` ids are rejected by the storage + # backend with an opaque HTTP 500 because the partition-key + # extractor cannot parse them. ``IdGenerator.new_item_id`` + # dispatches by *Item* (input) type and returns ``None`` for + # our *OutputItem* (storage) instances, so we dispatch by the + # ``type`` discriminator string instead. + ITEM_ID_FACTORY: dict[str, Any] = { + "message": IdGenerator.new_message_item_id, + "output_message": IdGenerator.new_output_message_item_id, + "function_call": IdGenerator.new_function_call_item_id, + "function_call_output": IdGenerator.new_function_call_output_item_id, + "reasoning": IdGenerator.new_reasoning_item_id, + "file_search_call": IdGenerator.new_file_search_call_item_id, + "web_search_call": IdGenerator.new_web_search_call_item_id, + "image_generation_call": IdGenerator.new_image_gen_call_item_id, + "code_interpreter_call": IdGenerator.new_code_interpreter_call_item_id, + "computer_call": IdGenerator.new_computer_call_item_id, + "computer_call_output": IdGenerator.new_computer_call_output_item_id, + "local_shell_call": IdGenerator.new_local_shell_call_item_id, + "local_shell_call_output": IdGenerator.new_local_shell_call_output_item_id, + "mcp_call": IdGenerator.new_mcp_call_item_id, + "mcp_list_tools": IdGenerator.new_mcp_list_tools_item_id, + "mcp_approval_request": IdGenerator.new_mcp_approval_request_item_id, + "mcp_approval_response": IdGenerator.new_mcp_approval_response_item_id, + "custom_tool_call": IdGenerator.new_custom_tool_call_item_id, + "custom_tool_call_output": IdGenerator.new_custom_tool_call_output_item_id, + } + for item in all_items: + factory = ITEM_ID_FACTORY.get(getattr(item, "type", "") or "") + if factory is None: + continue + new_id = factory(response_id) + with contextlib.suppress(AttributeError, TypeError): + item.id = new_id # type: ignore[attr-defined] + + input_items: list[Any] = [] + output_items: list[Any] = [] + for item in all_items: + item_type = getattr(item, "type", None) + if item_type == "message": + input_items.append(item) + else: + # ``output_message``, tool calls, reasoning, etc. all + # belong to the response output stream. + output_items.append(item) + + # Walk the previous response's history chain so the new write + # carries the full transcript forward. Without this, each turn + # would only see the messages saved on that very turn. + history_item_ids: list[str] | None = None + if previous_response_id is not None: + try: + history_item_ids = await backend.get_history_item_ids( + previous_response_id, + None, + self._history_limit, + isolation=isolation, + ) + except (FoundryBadRequestError, FoundryResourceNotFoundError) as err: + # Don't let history fetch failures torpedo the write — + # we still want to persist the new turn even if the + # chain seed is unreachable for some reason. + logger.warning( + "save_messages: failed to walk previous_response_id=%r (%s); writing new turn without history seed", + previous_response_id, + type(err).__name__, + ) + + # Mirror what the agentserver runtime serialises onto the wire + # (see ``_extract_response_snapshot_from_events`` + + # ``strip_nulls`` in + # ``azure.ai.agentserver.responses.streaming._helpers``): + # + # * ``agent_reference`` (Required on the response envelope) — + # built from ``FOUNDRY_AGENT_NAME`` / ``FOUNDRY_AGENT_VERSION``, + # which the hosted platform sets per-deploy (sentinel fallback + # for local dev so the envelope stays well-formed). + # * ``agent_session_id`` (S-038) — forcibly stamped by the + # runtime; sourced from ``FOUNDRY_AGENT_SESSION_ID``. + # * ``conversation`` is intentionally omitted: the (user, chat) + # isolation headers are the Foundry storage partition key, + # and the chat-isolation-key value is opaque (the API + # returns "Malformed identifier"/HTTP 400 if used as a + # body-level ``conversation_id``). + # * Per-item ``response_id`` / ``agent_reference`` are NOT + # stamped here — those B20/B21 defaults only apply to items + # inside ``response.output_item.added/done`` *events* (see + # ``_coerce_handler_event``); items inside ``input_items`` + # and ``response.output`` go through ``to_output_item`` which + # never sets these fields, and the storage validator returns + # HTTP 400 ``invalid_payload`` when extras leak in. + agent_name = os.environ.get("FOUNDRY_AGENT_NAME") or "agent-framework-host" + agent_version = os.environ.get("FOUNDRY_AGENT_VERSION") or None + agent_reference: dict[str, Any] = {"type": "agent_reference", "name": agent_name} + if agent_version: + agent_reference["version"] = agent_version + + agent_session_id = os.environ.get("FOUNDRY_AGENT_SESSION_ID") or None + # ``model`` must be a real deployed model name — the storage + # validator rejects arbitrary strings. Pull it from the + # platform-provided ``MODEL_DEPLOYMENT_NAME`` (set in agent.yaml) + # and fall back to ``AZURE_AI_MODEL_DEPLOYMENT_NAME`` for local + # dev. When neither is set we omit the field entirely (it is + # ``Optional[str]`` per the ResponseObject schema). + model_deployment = ( + os.environ.get("MODEL_DEPLOYMENT_NAME") or os.environ.get("AZURE_AI_MODEL_DEPLOYMENT_NAME") or None + ) + + # Build the wire payload to match exactly what the agentserver + # runtime emits via ``_extract_response_snapshot_from_events`` + # for a synthetic ``status=completed`` snapshot: + # + # {id, object, output, created_at, [model], agent_reference, + # status, completed_at, [agent_session_id]} + # + # ``previous_response_id`` is appended when chaining; the runtime + # threads it through the same code path. + now = int(time.time()) + response_body: dict[str, Any] = { + "id": response_id, + # SDK mirror: ``streaming/_helpers.py:244`` always stamps + # ``response_id`` alongside ``id`` on the snapshot before it + # reaches ``serialize_create_request``. + "response_id": response_id, + "object": "response", + # S-040 auto-stamp: the orchestrator (``_orchestrator.py:1706``) + # echoes ``background`` from the request to every response + # envelope; storage rejects payloads that omit it. + "background": False, + # ``ResponseObject`` schema (``_models.py:13995``) declares + # ``parallel_tool_calls: bool`` as REQUIRED. The SDK's synthetic + # fallback path (``_build_events``) never sets it because it's + # only invoked for failure recovery; real handler events carry + # it through. Storage rejects payloads that omit it. + "parallel_tool_calls": False, + # Same story for ``instructions`` (``_models.py:13989``) — + # required ``str | list[Item]`` field. + "instructions": "", + "output": [item.as_dict() for item in output_items], + "created_at": now, + "agent_reference": agent_reference, + "status": "completed", + "completed_at": now, + } + if model_deployment is not None: + response_body["model"] = model_deployment + if agent_session_id is not None: + response_body["agent_session_id"] = agent_session_id + if previous_response_id is not None: + response_body["previous_response_id"] = previous_response_id + response = ResponseObject(response_body) + + try: + await backend.create_response( + response, + input_items=input_items, + history_item_ids=history_item_ids, + isolation=isolation, + ) + except Exception as exc: + err_body = getattr(exc, "response_body", None) + logger.exception( + "FoundryHostedAgentHistoryProvider.save_messages: backend rejected " + "%d message(s) (response_id=%s, previous_response_id=%s, error_body=%s).", + len(messages), + response_id, + previous_response_id, + err_body, + ) + return + logger.debug( + "FoundryHostedAgentHistoryProvider.save_messages: persisted %d message(s) " + "(response_id=%s, previous_response_id=%s).", + len(messages), + response_id, + previous_response_id, + ) + + +# Re-export ``OutputItem`` for callers that want to construct test items +# without reaching into the SDK's ``models`` namespace directly. +__all__ = [ + "FoundryHostedAgentHistoryProvider", + "OutputItem", + "bind_request_context", + "get_current_isolation", + "get_current_request_context", + "reset_current_isolation", + "set_current_isolation", +] diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_ids.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_ids.py new file mode 100644 index 0000000000..588231d073 --- /dev/null +++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_ids.py @@ -0,0 +1,72 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""Foundry-storage-compatible identifier helpers. + +The Foundry hosted-agent storage backend partitions records by extracting +an embedded partition-key segment from every record/item id. The id +format is ``{prefix}_{18charPartitionKey}{32charEntropy}`` (or a 48-char +legacy body). Free-form ids such as ``resp_`` carry no valid +partition key and the storage API rejects writes with an opaque +``HTTP 500 server_error``. + +These helpers wrap :class:`azure.ai.agentserver.responses._id_generator.IdGenerator` +so callers (e.g. the ``ResponsesChannel.response_id_factory`` argument +or :class:`FoundryHostedAgentHistoryProvider.save_messages`) can mint +ids that the storage backend accepts without leaking the SDK import +path into user code. +""" + +from __future__ import annotations + +from typing import Any + +from azure.ai.agentserver.responses._id_generator import IdGenerator + +__all__ = [ + "foundry_item_id", + "foundry_response_id", + "foundry_response_id_factory", +] + + +def foundry_response_id(previous_response_id: str | None = None) -> str: + """Mint a Foundry-storage-compatible response id (``caresp_*``). + + Args: + previous_response_id: When supplied (and shaped like a Foundry + id with an embedded partition key), the new id co-locates + with the chain by reusing that partition key. The storage + backend rejects chained writes whose new record sits in a + different partition than the prior one. + + Returns: + A new id of the form ``caresp_<18charPartitionKey><32charEntropy>``. + """ + return IdGenerator.new_response_id(previous_response_id or "") + + +def foundry_response_id_factory() -> "Any": + """Return a callable suitable for ``ResponsesChannel(response_id_factory=...)``. + + The returned callable accepts an optional ``previous_response_id`` + hint which the channel passes for chained turns so the new id + inherits the prior turn's partition key (Foundry storage requirement). + """ + return foundry_response_id + + +def foundry_item_id(item: "Any", response_id: str | None = None) -> str | None: + """Mint a Foundry-storage-compatible item id for *item*. + + Dispatches via :meth:`IdGenerator.new_item_id` so the id picks up + the right type prefix (``msg`` / ``om`` / ``fc`` / ``rs`` / ...). + When ``response_id`` is supplied it acts as a partition-key hint so + every item written under one response co-locates with the response + record (Foundry storage requirement). + + Returns: + A new id of the form ``{type-prefix}_``, + or ``None`` when *item* is an unrecognised / reference-only type + (mirrors the SDK helper's contract). + """ + return IdGenerator.new_item_id(item, response_id) diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_responses.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_responses.py index 64b50f236a..186bd30df2 100644 --- a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_responses.py +++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_responses.py @@ -3,11 +3,9 @@ from __future__ import annotations import asyncio -import base64 -import json import logging import os -from collections.abc import AsyncIterable, AsyncIterator, Generator, Mapping, Sequence +from collections.abc import AsyncIterable, AsyncIterator, Generator from typing import cast from agent_framework import ( @@ -16,7 +14,6 @@ ContextProvider, FileCheckpointStorage, HistoryProvider, - Message, RawAgent, SupportsAgentRun, WorkflowAgent, @@ -26,76 +23,9 @@ ResponseEventStream, ResponseProviderProtocol, ResponsesServerOptions, + models, ) from azure.ai.agentserver.responses.hosting import ResponsesAgentServerHost -from azure.ai.agentserver.responses.models import ( - ApplyPatchToolCallItemParam, - ApplyPatchToolCallOutputItemParam, - ComputerCallOutputItemParam, - ComputerScreenshotContent, - CreateResponse, - FunctionCallOutputItemParam, - FunctionShellAction, - FunctionShellCallItemParam, - FunctionShellCallOutputContent, - FunctionShellCallOutputExitOutcome, - FunctionShellCallOutputItemParam, - Item, - ItemCodeInterpreterToolCall, - ItemComputerToolCall, - ItemCustomToolCall, - ItemCustomToolCallOutput, - ItemFileSearchToolCall, - ItemFunctionToolCall, - ItemImageGenToolCall, - ItemLocalShellToolCall, - ItemLocalShellToolCallOutput, - ItemMcpApprovalRequest, - ItemMcpToolCall, - ItemMessage, - ItemOutputMessage, - ItemReasoningItem, - ItemWebSearchToolCall, - LocalEnvironmentResource, - MCPApprovalResponse, - MessageContent, - MessageContentInputFileContent, - MessageContentInputImageContent, - MessageContentInputTextContent, - MessageContentOutputTextContent, - MessageContentReasoningTextContent, - MessageContentRefusalContent, - OAuthConsentRequestOutputItem, - OutputItem, - OutputItemApplyPatchToolCall, - OutputItemApplyPatchToolCallOutput, - OutputItemCodeInterpreterToolCall, - OutputItemComputerToolCall, - OutputItemComputerToolCallOutputResource, - OutputItemCustomToolCall, - OutputItemCustomToolCallOutput, - OutputItemFileSearchToolCall, - OutputItemFunctionShellCall, - OutputItemFunctionShellCallOutput, - OutputItemFunctionToolCall, - OutputItemImageGenToolCall, - OutputItemLocalShellToolCall, - OutputItemLocalShellToolCallOutput, - OutputItemMcpApprovalRequest, - OutputItemMcpApprovalResponseResource, - OutputItemMcpToolCall, - OutputItemMessage, - OutputItemOutputMessage, - OutputItemReasoningItem, - OutputItemWebSearchToolCall, - OutputMessageContent, - OutputMessageContentOutputTextContent, - OutputMessageContentRefusalContent, - ResponseStreamEvent, - StructuredOutputsOutputItem, - SummaryTextContent, - TextContent, -) from azure.ai.agentserver.responses.streaming._builders import ( OutputItemFunctionCallBuilder, OutputItemMcpCallBuilder, @@ -106,6 +36,39 @@ ) from typing_extensions import Any +from ._shared import ( + _arguments_to_str, # pyright: ignore[reportPrivateUsage] + _convert_message_content, # pyright: ignore[reportPrivateUsage] + _convert_output_message_content, # pyright: ignore[reportPrivateUsage] + _item_to_message, # pyright: ignore[reportPrivateUsage] + _items_to_messages, # pyright: ignore[reportPrivateUsage] + _output_item_to_message, # pyright: ignore[reportPrivateUsage] + _output_items_to_messages, # pyright: ignore[reportPrivateUsage] +) + +# Re-export the conversion helpers under their historical names so existing +# tests (which import them from this module) keep working — the canonical +# definitions now live in :mod:`._shared`. +__all__ = ( + "_arguments_to_str", + "_convert_message_content", + "_convert_output_message_content", + "_item_to_message", + "_items_to_messages", + "_output_item_to_message", + "_output_items_to_messages", +) + +# Local aliases for the agent-server SDK types this module touches at the +# Python type-annotation layer. Using ``models.X`` everywhere would work but +# would noisily clutter type-only positions where the alias adds no value. +CreateResponse = models.CreateResponse +ResponseStreamEvent = models.ResponseStreamEvent +FunctionShellAction = models.FunctionShellAction +FunctionShellCallOutputContent = models.FunctionShellCallOutputContent +FunctionShellCallOutputExitOutcome = models.FunctionShellCallOutputExitOutcome +LocalEnvironmentResource = models.LocalEnvironmentResource + logger = logging.getLogger(__name__) @@ -272,86 +235,50 @@ async def _handle_inner_workflow( if not isinstance(self._agent, WorkflowAgent): raise RuntimeError("Agent is not a workflow agent.") - # Determine the latest checkpoint (if any) so we can resume the - # workflow's prior state for this turn. The directory is keyed by - # the inbound context id (conversation_id when set, otherwise - # previous_response_id). Multi-turn declarative workflows need the - # workflow's internal state (e.g. Conversation.messages, - # intermediate Local.* variables) to survive across user turns; - # the only place that state lives is the workflow checkpoint, so - # on every turn we restore the latest checkpoint and feed the new - # input back into the start executor as a continuation rather than - # a fresh run. - latest_checkpoint_id: str | None = None - restore_storage: FileCheckpointStorage | None = None + # Restore from the latest checkpoint if available, otherwise start with an empty history if context_id is not None: - restore_storage = FileCheckpointStorage(os.path.join(self._checkpoint_storage_path, context_id)) - latest_checkpoint = await restore_storage.get_latest(workflow_name=self._agent.workflow.name) + checkpoint_storage = FileCheckpointStorage(os.path.join(self._checkpoint_storage_path, context_id)) + latest_checkpoint = await checkpoint_storage.get_latest(workflow_name=self._agent.workflow.name) if latest_checkpoint is not None: - latest_checkpoint_id = latest_checkpoint.checkpoint_id - - # Storage that will receive checkpoints written during this turn. - # When the caller chains with previous_response_id, the next turn - # will reference the current response_id as its previous_response_id, - # so new checkpoints must land under the current response_id (or the - # conversation_id when set). When conversation_id is set, this - # matches restore_storage; when only previous_response_id was - # supplied, restore_storage points at the *prior* response's - # directory and write_storage points at the *current* response's. - write_context_id = context.conversation_id or context.response_id - write_storage = FileCheckpointStorage(os.path.join(self._checkpoint_storage_path, write_context_id)) - - # Multi-turn pattern: when we have a prior checkpoint, restore it - # first (drive the workflow back to idle with prior state intact), - # then make a separate call that delivers the new user input. This - # depends on Workflow.run preserving shared state across calls. The - # restore-only call may yield events from any pending in-flight - # work in the checkpoint; we consume those internally here so they - # don't surface to the response stream as duplicates. - # - # If the restored checkpoint had pending request_info events, the - # restore-only call replays them through - # ``WorkflowAgent._convert_workflow_event_to_agent_response_updates`` - # and populates ``self._agent.pending_requests``. That is the correct - # state: those requests are genuinely outstanding, and the next - # ``run(input_messages, ...)`` call may contain ``function_call_output`` - # items (carried as FunctionResult/FunctionApprovalResponse content) - # that fulfill them via :meth:`WorkflowAgent._process_pending_requests`. - if latest_checkpoint_id is not None: - if is_streaming_request: - async for _ in self._agent.run( - stream=True, - checkpoint_id=latest_checkpoint_id, - checkpoint_storage=restore_storage, - ): - pass - else: - await self._agent.run( - stream=False, - checkpoint_id=latest_checkpoint_id, - checkpoint_storage=restore_storage, - ) + if not is_streaming_request: + _ = await self._agent.run( + stream=False, + checkpoint_id=latest_checkpoint.checkpoint_id, + checkpoint_storage=checkpoint_storage, + ) + else: + # Consume the streaming or the invocation will result in a no-op + async for _ in self._agent.run( + stream=True, + checkpoint_id=latest_checkpoint.checkpoint_id, + checkpoint_storage=checkpoint_storage, + ): + pass # Now run the agent with the latest input response_event_stream = ResponseEventStream(response_id=context.response_id, model=request.model) + # Create a new checkpoint storage for this response based on the following rules: + # - If no previous response ID or conversation ID is provided, + # create a new checkpoint storage for this response + # - If a previous response ID is provided, create a new checkpoint storage for this response + # - If a conversation ID is provided, reuse the existing checkpoint storage for the conversation + context_id = context.conversation_id or context.response_id + checkpoint_storage = FileCheckpointStorage(os.path.join(self._checkpoint_storage_path, context_id)) + yield response_event_stream.emit_created() yield response_event_stream.emit_in_progress() if not is_streaming_request: - # Run the agent in non-streaming mode with the new user input. - response = await self._agent.run( - input_messages, - stream=False, - checkpoint_storage=write_storage, - ) + # Run the agent in non-streaming mode + response = await self._agent.run(input_messages, stream=False, checkpoint_storage=checkpoint_storage) for message in response.messages: for content in message.contents: async for item in _to_outputs(response_event_stream, content): yield item - await self._delete_not_latest_checkpoints(write_storage, self._agent.workflow.name) + await self._delete_not_latest_checkpoints(checkpoint_storage, self._agent.workflow.name) yield response_event_stream.emit_completed() return @@ -359,12 +286,8 @@ async def _handle_inner_workflow( # lazily created on matching content, closed when a different type arrives. tracker = _OutputItemTracker(response_event_stream) - # Run the workflow agent in streaming mode with the new user input. - async for update in self._agent.run( - input_messages, - stream=True, - checkpoint_storage=write_storage, - ): + # Run the workflow agent in streaming mode + async for update in self._agent.run(input_messages, stream=True, checkpoint_storage=checkpoint_storage): for content in update.contents: for event in tracker.handle(content): yield event @@ -377,7 +300,7 @@ async def _handle_inner_workflow( for event in tracker.close(): yield event - await self._delete_not_latest_checkpoints(write_storage, self._agent.workflow.name) + await self._delete_not_latest_checkpoints(checkpoint_storage, self._agent.workflow.name) yield response_event_stream.emit_completed() @staticmethod @@ -578,665 +501,6 @@ def _to_chat_options(request: CreateResponse) -> tuple[ChatOptions, bool]: # endregion -# region Input Message Conversion - - -def _items_to_messages(input_items: Sequence[Item]) -> list[Message]: - """Converts a sequence of input items to a list of Messages, one per item. - - Args: - input_items: The input items to convert. - - Returns: - A list of Messages, one per supported input item. - """ - messages: list[Message] = [] - for item in input_items: - messages.append(_item_to_message(item)) - return messages - - -def _item_to_message(item: Item) -> Message: - """Converts an Item to a Message. - - Args: - item: The Item to convert. - - Returns: - The converted Message. - - Raises: - ValueError: If the Item type is not supported. - """ - if item.type == "message": - msg = cast(ItemMessage, item) - if isinstance(msg.content, str): - return Message(role=msg.role, contents=[Content.from_text(msg.content)]) - return Message(role=msg.role, contents=[_convert_message_content(part) for part in msg.content]) - - if item.type == "output_message": - output_msg = cast(ItemOutputMessage, item) - return Message( - role=output_msg.role, contents=[_convert_output_message_content(part) for part in output_msg.content] - ) - - if item.type == "function_call": - fc = cast(ItemFunctionToolCall, item) - return Message( - role="assistant", - contents=[Content.from_function_call(fc.call_id, fc.name, arguments=fc.arguments)], - ) - - if item.type == "function_call_output": - fco = cast(FunctionCallOutputItemParam, item) - output = fco.output if isinstance(fco.output, str) else str(fco.output) - return Message( - role="tool", - contents=[Content.from_function_result(fco.call_id, result=output)], - ) - - if item.type == "reasoning": - reasoning = cast(ItemReasoningItem, item) - reason_contents: list[Content] = [] - if reasoning.summary: - for summary in reasoning.summary: - reason_contents.append(Content.from_text(summary.text)) - return Message(role="assistant", contents=reason_contents) - - if item.type == "mcp_call": - mcp = cast(ItemMcpToolCall, item) - return Message( - role="assistant", - contents=[ - Content.from_mcp_server_tool_call( - mcp.id, - mcp.name, - server_name=mcp.server_label, - arguments=mcp.arguments, - ) - ], - ) - - if item.type == "mcp_approval_request": - mcp_req = cast(ItemMcpApprovalRequest, item) - mcp_call_content = Content.from_mcp_server_tool_call( - mcp_req.id, - mcp_req.name, - server_name=mcp_req.server_label, - arguments=mcp_req.arguments, - ) - return Message( - role="assistant", - contents=[Content.from_function_approval_request(mcp_req.id, mcp_call_content)], - ) - - if item.type == "mcp_approval_response": - mcp_resp = cast(MCPApprovalResponse, item) - placeholder_content = Content.from_function_call(mcp_resp.approval_request_id, "mcp_approval") - return Message( - role="user", - contents=[ - Content.from_function_approval_response( - mcp_resp.approve, mcp_resp.approval_request_id, placeholder_content - ) - ], - ) - - if item.type == "code_interpreter_call": - ci = cast(ItemCodeInterpreterToolCall, item) - return Message( - role="assistant", - contents=[Content.from_code_interpreter_tool_call(call_id=ci.id)], - ) - - if item.type == "image_generation_call": - ig = cast(ItemImageGenToolCall, item) - return Message( - role="assistant", - contents=[Content.from_image_generation_tool_call(image_id=ig.id)], - ) - - if item.type == "shell_call": - sc = cast(FunctionShellCallItemParam, item) - return Message( - role="assistant", - contents=[ - Content.from_shell_tool_call( - call_id=sc.call_id, - commands=sc.action.commands, - status=str(sc.status), - ) - ], - ) - - if item.type == "shell_call_output": - sco = cast(FunctionShellCallOutputItemParam, item) - outputs = [ - Content.from_shell_command_output( - stdout=out.stdout or "", - stderr=out.stderr or "", - exit_code=getattr(out.outcome, "exit_code", None) if hasattr(out, "outcome") else None, - ) - for out in (sco.output or []) - ] - return Message( - role="tool", - contents=[ - Content.from_shell_tool_result( - call_id=sco.call_id, - outputs=outputs, - max_output_length=sco.max_output_length, - ) - ], - ) - - if item.type == "local_shell_call": - lsc = cast(ItemLocalShellToolCall, item) - commands = lsc.action.command if hasattr(lsc.action, "command") and lsc.action.command else [] - return Message( - role="assistant", - contents=[ - Content.from_shell_tool_call( - call_id=lsc.call_id, - commands=commands, - status=str(lsc.status), - ) - ], - ) - - if item.type == "local_shell_call_output": - lsco = cast(ItemLocalShellToolCallOutput, item) - return Message( - role="tool", - contents=[ - Content.from_shell_tool_result( - call_id=lsco.id, - outputs=[Content.from_shell_command_output(stdout=lsco.output)], - ) - ], - ) - - if item.type == "file_search_call": - fs = cast(ItemFileSearchToolCall, item) - return Message( - role="assistant", - contents=[ - Content.from_function_call( - fs.id, - "file_search", - arguments=json.dumps({"queries": fs.queries}), - ) - ], - ) - - if item.type == "web_search_call": - ws = cast(ItemWebSearchToolCall, item) - return Message( - role="assistant", - contents=[Content.from_function_call(ws.id, "web_search")], - ) - - if item.type == "computer_call": - cc = cast(ItemComputerToolCall, item) - return Message( - role="assistant", - contents=[ - Content.from_function_call( - cc.call_id, - "computer_use", - arguments=str(cc.action), - ) - ], - ) - - if item.type == "computer_call_output": - cco = cast(ComputerCallOutputItemParam, item) - return Message( - role="tool", - contents=[Content.from_function_result(cco.call_id, result=str(cco.output))], - ) - - if item.type == "custom_tool_call": - ct = cast(ItemCustomToolCall, item) - return Message( - role="assistant", - contents=[Content.from_function_call(ct.call_id, ct.name, arguments=ct.input)], - ) - - if item.type == "custom_tool_call_output": - cto = cast(ItemCustomToolCallOutput, item) - output = cto.output if isinstance(cto.output, str) else str(cto.output) - # Hosted-MCP results land here because the host writes them via - # `aoutput_item_custom_tool_call_output` (see `_to_outputs` for - # `mcp_server_tool_result`). The persisted `call_id` keeps its - # `mcp_*` prefix; on read, route those back to a hosted-MCP result - # Content so the chat-client serialize layer can coalesce them - # onto a single `mcp_call` input item with `output` populated. - # Issue #5546. - if cto.call_id and cto.call_id.startswith("mcp_"): - return Message( - role="tool", - contents=[Content.from_mcp_server_tool_result(call_id=cto.call_id, output=output)], - ) - return Message( - role="tool", - contents=[Content.from_function_result(cto.call_id, result=output)], - ) - - if item.type == "apply_patch_call": - ap = cast(ApplyPatchToolCallItemParam, item) - return Message( - role="assistant", - contents=[ - Content.from_function_call( - ap.call_id, - "apply_patch", - arguments=str(ap.operation), - ) - ], - ) - - if item.type == "apply_patch_call_output": - apo = cast(ApplyPatchToolCallOutputItemParam, item) - return Message( - role="tool", - contents=[Content.from_function_result(apo.call_id, result=apo.output or "")], - ) - - raise ValueError(f"Unsupported Item type: {item.type}") - - -def _output_items_to_messages(history: Sequence[OutputItem]) -> list[Message]: - """Converts a sequence of OutputItem objects to a list of Message objects. - - Args: - history (Sequence[OutputItem]): The sequence of OutputItem objects to convert. - - Returns: - list[Message]: The list of Message objects. - """ - messages: list[Message] = [] - for item in history: - messages.append(_output_item_to_message(item)) - return messages - - -def _output_item_to_message(item: OutputItem) -> Message: - """Converts an OutputItem to a Message. - - Args: - item (OutputItem): The OutputItem to convert. - - Returns: - Message: The converted Message. - - Raises: - ValueError: If the OutputItem type is not supported. - """ - if item.type == "output_message": - output_msg = cast(OutputItemOutputMessage, item) - return Message( - role=output_msg.role, contents=[_convert_output_message_content(part) for part in output_msg.content] - ) - - if item.type == "message": - msg = cast(OutputItemMessage, item) - return Message(role=msg.role, contents=[_convert_message_content(part) for part in msg.content]) - - if item.type == "function_call": - fc = cast(OutputItemFunctionToolCall, item) - return Message( - role="assistant", - contents=[Content.from_function_call(fc.call_id, fc.name, arguments=fc.arguments)], - ) - - if item.type == "function_call_output": - fco = cast(FunctionCallOutputItemParam, item) - output = fco.output if isinstance(fco.output, str) else str(fco.output) - return Message( - role="tool", - contents=[Content.from_function_result(fco.call_id, result=output)], - ) - - if item.type == "reasoning": - reasoning = cast(OutputItemReasoningItem, item) - contents: list[Content] = [] - if reasoning.summary: - for summary in reasoning.summary: - contents.append(Content.from_text(summary.text)) - return Message(role="assistant", contents=contents) - - if item.type == "mcp_call": - mcp = cast(OutputItemMcpToolCall, item) - return Message( - role="assistant", - contents=[ - Content.from_mcp_server_tool_call( - mcp.id, - mcp.name, - server_name=mcp.server_label, - arguments=mcp.arguments, - ) - ], - ) - - if item.type == "mcp_approval_request": - mcp_req = cast(OutputItemMcpApprovalRequest, item) - mcp_call_content = Content.from_mcp_server_tool_call( - mcp_req.id, - mcp_req.name, - server_name=mcp_req.server_label, - arguments=mcp_req.arguments, - ) - return Message( - role="assistant", - contents=[Content.from_function_approval_request(mcp_req.id, mcp_call_content)], - ) - - if item.type == "mcp_approval_response": - mcp_resp = cast(OutputItemMcpApprovalResponseResource, item) - # Build a placeholder function_call Content since the original call details are not available - placeholder_content = Content.from_function_call(mcp_resp.approval_request_id, "mcp_approval") - return Message( - role="user", - contents=[Content.from_function_approval_response(mcp_resp.approve, mcp_resp.id, placeholder_content)], - ) - - if item.type == "code_interpreter_call": - ci = cast(OutputItemCodeInterpreterToolCall, item) - return Message( - role="assistant", - contents=[Content.from_code_interpreter_tool_call(call_id=ci.id)], - ) - - if item.type == "image_generation_call": - ig = cast(OutputItemImageGenToolCall, item) - return Message( - role="assistant", - contents=[Content.from_image_generation_tool_call(image_id=ig.id)], - ) - - if item.type == "shell_call": - sc = cast(OutputItemFunctionShellCall, item) - return Message( - role="assistant", - contents=[ - Content.from_shell_tool_call( - call_id=sc.call_id, - commands=sc.action.commands, - status=str(sc.status), - ) - ], - ) - - if item.type == "shell_call_output": - sco = cast(OutputItemFunctionShellCallOutput, item) - outputs = [ - Content.from_shell_command_output( - stdout=out.stdout or "", - stderr=out.stderr or "", - exit_code=getattr(out.outcome, "exit_code", None) if hasattr(out, "outcome") else None, - ) - for out in (sco.output or []) - ] - return Message( - role="tool", - contents=[ - Content.from_shell_tool_result( - call_id=sco.call_id, - outputs=outputs, - max_output_length=sco.max_output_length, - ) - ], - ) - - if item.type == "local_shell_call": - lsc = cast(OutputItemLocalShellToolCall, item) - commands = lsc.action.command if hasattr(lsc.action, "command") and lsc.action.command else [] - return Message( - role="assistant", - contents=[ - Content.from_shell_tool_call( - call_id=lsc.call_id, - commands=commands, - status=str(lsc.status), - ) - ], - ) - - if item.type == "local_shell_call_output": - lsco = cast(OutputItemLocalShellToolCallOutput, item) - return Message( - role="tool", - contents=[ - Content.from_shell_tool_result( - call_id=lsco.id, - outputs=[Content.from_shell_command_output(stdout=lsco.output)], - ) - ], - ) - - if item.type == "file_search_call": - fs = cast(OutputItemFileSearchToolCall, item) - return Message( - role="assistant", - contents=[ - Content.from_function_call( - fs.id, - "file_search", - arguments=json.dumps({"queries": fs.queries}), - ) - ], - ) - - if item.type == "web_search_call": - ws = cast(OutputItemWebSearchToolCall, item) - return Message( - role="assistant", - contents=[Content.from_function_call(ws.id, "web_search")], - ) - - if item.type == "computer_call": - cc = cast(OutputItemComputerToolCall, item) - return Message( - role="assistant", - contents=[ - Content.from_function_call( - cc.call_id, - "computer_use", - arguments=str(cc.action), - ) - ], - ) - - if item.type == "computer_call_output": - cco = cast(OutputItemComputerToolCallOutputResource, item) - return Message( - role="tool", - contents=[Content.from_function_result(cco.call_id, result=str(cco.output))], - ) - - if item.type == "custom_tool_call": - ct = cast(OutputItemCustomToolCall, item) - return Message( - role="assistant", - contents=[Content.from_function_call(ct.call_id, ct.name, arguments=ct.input)], - ) - - if item.type == "custom_tool_call_output": - cto = cast(OutputItemCustomToolCallOutput, item) - output = cto.output if isinstance(cto.output, str) else str(cto.output) - # Hosted-MCP results land here because the host writes them via - # `aoutput_item_custom_tool_call_output`. Route `mcp_*` call_ids - # back to a hosted-MCP result Content so the chat-client serialize - # layer can coalesce onto the matching `mcp_call` input item. - # Issue #5546. - if cto.call_id and cto.call_id.startswith("mcp_"): - return Message( - role="tool", - contents=[Content.from_mcp_server_tool_result(call_id=cto.call_id, output=output)], - ) - return Message( - role="tool", - contents=[Content.from_function_result(cto.call_id, result=output)], - ) - - if item.type == "apply_patch_call": - ap = cast(OutputItemApplyPatchToolCall, item) - return Message( - role="assistant", - contents=[ - Content.from_function_call( - ap.call_id, - "apply_patch", - arguments=str(ap.operation), - ) - ], - ) - - if item.type == "apply_patch_call_output": - apo = cast(OutputItemApplyPatchToolCallOutput, item) - return Message( - role="tool", - contents=[Content.from_function_result(apo.call_id, result=apo.output or "")], - ) - - if item.type == "oauth_consent_request": - oauth = cast(OAuthConsentRequestOutputItem, item) - return Message( - role="assistant", - contents=[Content.from_oauth_consent_request(oauth.consent_link)], - ) - - if item.type == "structured_outputs": - so = cast(StructuredOutputsOutputItem, item) - text = json.dumps(so.output) if not isinstance(so.output, str) else so.output - return Message(role="assistant", contents=[Content.from_text(text)]) - - raise ValueError(f"Unsupported OutputItem type: {item.type}") - - -def _convert_output_message_content(content: OutputMessageContent) -> Content: - """Converts an OutputMessageContent to a Content object. - - Args: - content (OutputMessageContent): The OutputMessageContent to convert. - - Returns: - Content: The converted Content object. - - Raises: - ValueError: If the OutputMessageContent type is not supported. - """ - if content.type == "output_text": - text_content = cast(OutputMessageContentOutputTextContent, content) - return Content.from_text(text_content.text) - if content.type == "refusal": - refusal_content = cast(OutputMessageContentRefusalContent, content) - return Content.from_text(refusal_content.refusal) - - raise ValueError(f"Unsupported OutputMessageContent type: {content.type}") - - -def _convert_file_data(data_uri: str, filename: str | None = None) -> Content: - """Convert a file_data data URI to a Content object. - - For text/* MIME types, decodes the base64 content and returns it as text. - For other types, returns a URI-based Content with the filename preserved. - """ - # Parse data URI: data:;base64, - if data_uri.startswith("data:") and ";base64," in data_uri: - header, encoded = data_uri.split(";base64,", 1) - media_type = header[len("data:") :] - if media_type.startswith("text/"): - try: - decoded_text = base64.b64decode(encoded).decode("utf-8") - except (ValueError, UnicodeDecodeError): - logger.warning( - "Failed to decode text/* file_data as UTF-8, falling through to URI passthrough.", - exc_info=True, - ) - else: - prefix = f"[File: {filename}]\n" if filename else "" - return Content.from_text(f"{prefix}{decoded_text}") - additional_properties = {"filename": filename} if filename else None - return Content.from_uri(data_uri, additional_properties=additional_properties) - - -def _convert_message_content(content: MessageContent) -> Content: - """Converts a MessageContent to a Content object. - - Args: - content (MessageContent): The MessageContent to convert. - - Returns: - Content: The converted Content object. - - Raises: - ValueError: If the MessageContent type is not supported. - """ - if content.type == "input_text": - input_text = cast(MessageContentInputTextContent, content) - return Content.from_text(input_text.text) - if content.type == "output_text": - output_text = cast(MessageContentOutputTextContent, content) - return Content.from_text(output_text.text) - if content.type == "text": - text = cast(TextContent, content) - return Content.from_text(text.text) - if content.type == "summary_text": - summary = cast(SummaryTextContent, content) - return Content.from_text(summary.text) - if content.type == "refusal": - refusal = cast(MessageContentRefusalContent, content) - return Content.from_text(refusal.refusal) - if content.type == "reasoning_text": - reasoning = cast(MessageContentReasoningTextContent, content) - return Content.from_text_reasoning(text=reasoning.text) - if content.type == "input_image": - image = cast(MessageContentInputImageContent, content) - if image.image_url: - if image.image_url.startswith("data:"): - return Content.from_uri(image.image_url) - return Content.from_uri(image.image_url, media_type="image/*") - if image.file_id: - return Content.from_hosted_file(image.file_id) - if content.type == "input_file": - file = cast(MessageContentInputFileContent, content) - if file.file_url: - return Content.from_uri(file.file_url) - if file.file_id: - return Content.from_hosted_file(file.file_id, name=file.filename) - if file.file_data: - return _convert_file_data(file.file_data, file.filename) - if content.type == "computer_screenshot": - screenshot = cast(ComputerScreenshotContent, content) - return Content.from_uri(screenshot.image_url) - - raise ValueError(f"Unsupported MessageContent type: {content.type}") - - -# endregion - -# region Output Item Conversion - - -def _arguments_to_str(arguments: str | Mapping[str, Any] | None) -> str: - """Convert arguments to a JSON string. - - Args: - arguments: The arguments to convert, can be a string, mapping, or None. - - Returns: - The arguments as a JSON string. - """ - if arguments is None: - return "" - if isinstance(arguments, str): - return arguments - return json.dumps(arguments) - - async def _to_outputs(stream: ResponseEventStream, content: Content) -> AsyncIterator[ResponseStreamEvent]: """Converts a Content object to an async sequence of ResponseStreamEvent objects. diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_shared.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_shared.py new file mode 100644 index 0000000000..890dd7bcfe --- /dev/null +++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_shared.py @@ -0,0 +1,1287 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""Shared transformation helpers between the agent-server data model and Agent Framework. + +This module is the single home for *pure-data* conversions between the +:mod:`azure.ai.agentserver.responses.models` SDK shapes (``Item``, +``OutputItem``, ``MessageContent``, …) and the Agent Framework public types +(:class:`agent_framework.Message`, :class:`agent_framework.Content`, …). + +Why this lives in one module +---------------------------- +* The :mod:`._responses` channel adapter and the + :class:`._history_provider.FoundryHostedAgentHistoryProvider` both need the + exact same OutputItem→Message conversion. Keeping it in one place means we + only have **one** ``isinstance(item.type, ...)`` dispatch table to keep up + to date when the agent-server SDK grows new item kinds. If you spot a + ``type`` value that this module raises ``ValueError`` for, that is the place + to add support — and **both** consumers benefit immediately. +* The whole module references the agent-server SDK through a single + ``from azure.ai.agentserver.responses import models`` import. Looking at the + ``models.X`` references makes it obvious which generated types we already + consume and which ones (e.g. ``models.A2AToolCall``, + ``models.AzureFunctionToolCall``, …) are not yet wired into + :func:`_output_item_to_message`. + +``additional_properties`` round-trip +------------------------------------ +Both the SDK models and :class:`agent_framework.Message` carry an extensible +extras bag — the agent-server models are +:class:`collections.abc.MutableMapping` instances that round-trip *any* key +through their JSON serialisation, and ``Message`` (and ``Content``) expose a +public ``additional_properties: dict[str, Any]`` slot. + +To preserve channel-specific extras across a load/save cycle: + +* On **load** (SDK model → Message) :func:`_collect_unknown_keys` extracts + every key on the source model that is **not** part of its declared schema + (per ``_attr_to_rest_field``) and stashes it on + ``Message.additional_properties["foundry"]`` (and per-content the same + bag is attached onto ``Content.additional_properties["foundry"]``). The + bag is only attached when at least one extra key is present, so messages + that didn't have extras stay byte-equal to the previous behaviour. +* On **save** (Message → SDK model) :func:`_inject_extras` writes any + previously stashed bag back as direct keys on the SDK model — Foundry + storage will round-trip them as opaque JSON. + +This means an app can stash channel-specific bookkeeping (delivery +fingerprints, `hosting` envelope from the host, AG-UI ``client_state`` +snapshots, …) under a known top-level key and rely on it surviving a +write/read cycle through the Foundry response store. +""" + +from __future__ import annotations + +import base64 +import json +import logging +from collections.abc import Mapping, Sequence +from typing import Any, cast + +from agent_framework import Content, Message +from azure.ai.agentserver.responses import models + +logger = logging.getLogger(__name__) + +# Top-level key under which round-tripped SDK extras live on +# ``Message.additional_properties`` and ``Content.additional_properties``. +# Stable on purpose: write-paths look it up by name to re-inject extras into +# outbound SDK models. +EXTRAS_KEY = "foundry" + +# Sub-key (under ``additional_properties[EXTRAS_KEY]``) that stores a +# verbatim snapshot of the original SDK ``OutputItem`` mapping captured at +# read time. The write path re-emits the SDK item from this snapshot when +# present, giving lossless audit/replay semantics: every declared field +# (item id, type discriminator, content array, status, …) AND every undeclared +# extra Foundry handed us survive the AF round-trip. Without this, a +# message synthesised back from ``Message.text`` alone would discard the +# original item shape. +RAW_KEY = "__raw__" + +# Top-level key on the SDK ``OutputItem`` mapping under which we round-trip +# *every* :class:`agent_framework.Message` ``additional_properties`` namespace +# **other than** :data:`EXTRAS_KEY` (the foundry-internal namespace, handled +# separately by :func:`_inject_extras`). +# +# Why a single container key instead of writing each namespace as a top-level +# extra on the SDK item: Foundry's storage backend round-trips arbitrary +# unknown keys, but on **load** :func:`_collect_unknown_keys` cannot tell +# which unknowns were AF-written namespaces (``hosting``, ``agui_state``, +# ...) vs Foundry-runtime additions. Funnelling AF namespaces under a single +# sentinel key removes that ambiguity: anything inside ``agent_framework`` +# is restored under its original namespace; anything else stays under +# :data:`EXTRAS_KEY` (preserving today's behaviour for Foundry-side extras). +# +# Concretely, this is the mechanism that gives the Hosting spec's +# ``Message.additional_properties["hosting"]`` envelope (channel / +# identity / response_target / initial-write ``deliveries[]``) durable +# round-trip semantics through the Foundry response store — see +# ``docs/specs/002-python-hosting-channels.md`` §"Channel metadata +# persisted onto stored messages". +AF_EXTRAS_KEY = "agent_framework" + +# Re-exports — these helpers are consumed by sibling modules +# (``_responses.py`` and ``_history_provider.py``); declaring them in +# ``__all__`` quiets pyright's ``reportUnusedFunction`` for module-private +# names that are intentionally part of the package-internal API. +__all__ = ( + "AF_EXTRAS_KEY", + "EXTRAS_KEY", + "RAW_KEY", + "_arguments_to_str", + "_attach_content_extras", + "_attach_extras", + "_capture_raw", + "_collect_af_extras", + "_collect_unknown_keys", + "_convert_message_content", + "_convert_output_message_content", + "_inject_af_extras", + "_inject_extras", + "_item_to_message", + "_items_to_messages", + "_message_text", + "_message_to_output_item", + "_messages_to_output_items", + "_output_item_to_message", + "_output_items_to_messages", +) + + +# region Extras helpers + + +def _collect_unknown_keys(model: Mapping[str, Any]) -> dict[str, Any]: + """Return any keys present on the SDK model that are not part of its declared schema. + + The agent-server SDK models are + :class:`collections.abc.MutableMapping` instances generated from the + Foundry REST contract; declared fields are exposed via the class-level + ``_attr_to_rest_field`` map. Any extra key on the instance therefore + represents data the Foundry runtime stored that the SDK doesn't model + explicitly — typically channel-specific extras a previous write-path + deliberately stashed there via :func:`_inject_extras`. + + Args: + model: A model instance (or any mapping) to inspect. + + Returns: + A new ``dict`` containing only the keys on ``model`` that are not + declared in the model's REST schema. Empty when the model only + carries declared fields. + """ + if not isinstance(model, Mapping): + return {} + known = set(getattr(type(model), "_attr_to_rest_field", {}).keys()) + return {key: value for key, value in model.items() if key not in known} + + +def _attach_extras(message: Message, model: Mapping[str, Any]) -> Message: + """Attach SDK extras (if any) to ``message.additional_properties``. + + Two-tier restoration so the Hosting spec's namespaced envelopes + (``hosting``, ``agui_state``, …) come back under their **original** + keys while Foundry-side extras (anything the runtime layered on the + SDK item) stay under the foundry-internal :data:`EXTRAS_KEY` + namespace: + + 1. Pop :data:`AF_EXTRAS_KEY` from the unknown-keys bag and merge each + sub-key directly onto ``message.additional_properties`` — this is + how the inbound ``hosting`` envelope (channel/identity/ + response_target) and the initial-write ``deliveries[]`` snapshot + round-trip through Foundry storage. + 2. Anything remaining (Foundry-runtime extras the SDK doesn't model + explicitly) is stashed under + ``additional_properties[EXTRAS_KEY]`` for backward compatibility + and audit/replay. + + No-op when the model carries no extras — ``additional_properties`` is left + alone so callers and tests that compare ``Message`` instances for equality + by ``role``/``contents`` only continue to pass. + + Args: + message: The message to enrich. + model: The SDK model whose extras should be preserved. + + Returns: + The same ``message`` instance (returned for fluent chaining). + """ + extras = _collect_unknown_keys(model) + if not extras: + return message + af_extras = extras.pop(AF_EXTRAS_KEY, None) + if isinstance(af_extras, Mapping): + af_extras_typed = cast("Mapping[str, Any]", af_extras) + for ns_key, ns_val in af_extras_typed.items(): + # Per-namespace overwrite: a fresh load is the source of + # truth for the message we're rebuilding. + message.additional_properties[ns_key] = ns_val + if extras: + message.additional_properties.setdefault(EXTRAS_KEY, {}).update(extras) + return message + + +def _capture_raw(message: Message, item: Mapping[str, Any]) -> Message: + """Snapshot the SDK item's full mapping onto the message for replay. + + Stored under ``message.additional_properties[EXTRAS_KEY][RAW_KEY]`` so + :func:`_message_to_output_item` can re-emit the byte-for-byte original + SDK shape on the write side. This is what lets the AF → + Foundry-storage round-trip preserve item ids, content variants + (citations, reasoning, tool results, …) and any extras Foundry + layered on top of the declared schema. + + A best-effort ``dict(...)`` is used so failure to snapshot (e.g. a + non-mapping subclass surfacing in the future) degrades gracefully to + the lossy-but-functional synthesise-from-text path rather than + crashing the read. + """ + try: + raw = dict(item) + except Exception: + return message + message.additional_properties.setdefault(EXTRAS_KEY, {})[RAW_KEY] = raw + return message + + +def _inject_extras(model: Any, source: Mapping[str, Any] | None) -> Any: + """Inject previously-stashed extras back onto an outbound SDK model. + + The SDK models are :class:`collections.abc.MutableMapping`; setting + arbitrary keys on them is supported and round-trips through serialisation. + Use this when **emitting** SDK shapes (e.g. when ``save_messages`` decides + to write back through the Foundry storage API). + + Args: + model: The SDK model instance to enrich. Must be mapping-like. + source: The extras bag previously read from + ``Message.additional_properties[EXTRAS_KEY]`` (or any equivalent). + ``None`` is treated as an empty bag. + + Returns: + The same ``model`` instance (returned for fluent chaining). + """ + if not source: + return model + for key, value in source.items(): + # Internal sentinel — never write the raw-snapshot back as a + # storage field; it lives only inside ``additional_properties``. + if key == RAW_KEY: + continue + # Avoid clobbering declared fields — extras are never allowed to + # overwrite the schema-defined contract on the model. + model_type: Any = type(model) # pyright: ignore[reportUnknownVariableType] + known: set[str] = set(getattr(model_type, "_attr_to_rest_field", {})) + if key in known: + continue + model[key] = value + return model + + +def _collect_af_extras(message: Message) -> dict[str, Any]: + """Gather every AF-side ``additional_properties`` namespace except :data:`EXTRAS_KEY`. + + Returns the namespaces (``hosting``, ``agui_state``, …) that should + round-trip through Foundry storage as a single opaque container under + :data:`AF_EXTRAS_KEY` on the SDK item. The foundry-internal namespace + is excluded because :func:`_inject_extras` handles it separately and + its contents are AF-specific bookkeeping (raw snapshots, Foundry + runtime extras) that don't belong inside the AF container. + """ + props = message.additional_properties or {} + return {key: value for key, value in props.items() if key != EXTRAS_KEY} + + +def _inject_af_extras(model: Any, source: Mapping[str, Any] | None) -> Any: + """Write AF-side namespaces onto the SDK model under :data:`AF_EXTRAS_KEY`. + + This is the save-side counterpart to :func:`_attach_extras`'s + AF-namespace restoration. The container key collides with declared + schema fields only if Foundry decides to add an + ``agent_framework`` field to its REST contract — at which point we + rename the constant. + + A non-empty ``source`` overwrites any value already at + :data:`AF_EXTRAS_KEY` on the model (e.g. a stale value baked into a + raw-snapshot replay) so the in-process :class:`Message` remains the + source of truth at write time. + """ + if not source: + return model + model[AF_EXTRAS_KEY] = dict(source) + return model + + +# endregion + + +# region Small utilities + + +def _arguments_to_str(arguments: str | Mapping[str, Any] | None) -> str: + """Convert a tool-call ``arguments`` payload to its on-the-wire JSON string form. + + Args: + arguments: The arguments to serialise. ``None`` becomes an empty + string, an existing string is returned verbatim, and any mapping + is JSON-encoded. + + Returns: + The arguments as a JSON string. + """ + if arguments is None: + return "" + if isinstance(arguments, str): + return arguments + return json.dumps(arguments) + + +# endregion + + +# region Content conversion + + +def _convert_file_data(data_uri: str, filename: str | None = None) -> Content: + """Convert a ``file_data`` data URI to a :class:`Content`. + + For ``text/*`` MIME types the base64 payload is decoded and returned as + plain text (with a ``[File: ]`` prefix when a filename is known); + other media types fall through to a URI-based content with the + filename preserved as an additional property. + """ + if data_uri.startswith("data:") and ";base64," in data_uri: + header, encoded = data_uri.split(";base64,", 1) + media_type = header[len("data:") :] + if media_type.startswith("text/"): + try: + decoded_text = base64.b64decode(encoded).decode("utf-8") + except (ValueError, UnicodeDecodeError): + logger.warning( + "Failed to decode text/* file_data as UTF-8, falling through to URI passthrough.", + exc_info=True, + ) + else: + prefix = f"[File: {filename}]\n" if filename else "" + return Content.from_text(f"{prefix}{decoded_text}") + additional_properties = {"filename": filename} if filename else None + return Content.from_uri(data_uri, additional_properties=additional_properties) + + +def _convert_message_content(content: models.MessageContent) -> Content: + """Convert an SDK ``MessageContent`` (input-side) into a framework ``Content``. + + Handles all input/output content variants currently understood by the + Responses channel — text, output text, summary, refusal, reasoning text, + input images, input files, computer screenshot. + + Args: + content: The SDK content node to convert. + + Returns: + The corresponding :class:`agent_framework.Content`. + + Raises: + ValueError: If the SDK content ``type`` is not yet supported by this + adapter. + """ + if content.type == "input_text": + return _attach_content_extras( + Content.from_text(cast(models.MessageContentInputTextContent, content).text), content + ) + if content.type == "output_text": + return _attach_content_extras( + Content.from_text(cast(models.MessageContentOutputTextContent, content).text), content + ) + if content.type == "text": + return _attach_content_extras(Content.from_text(cast(models.TextContent, content).text), content) + if content.type == "summary_text": + return _attach_content_extras(Content.from_text(cast(models.SummaryTextContent, content).text), content) + if content.type == "refusal": + return _attach_content_extras( + Content.from_text(cast(models.MessageContentRefusalContent, content).refusal), content + ) + if content.type == "reasoning_text": + return _attach_content_extras( + Content.from_text_reasoning(text=cast(models.MessageContentReasoningTextContent, content).text), + content, + ) + if content.type == "input_image": + image = cast(models.MessageContentInputImageContent, content) + if image.image_url: + return _attach_content_extras(Content.from_uri(image.image_url), content) + if image.file_id: + return _attach_content_extras(Content.from_hosted_file(image.file_id), content) + if content.type == "input_file": + file = cast(models.MessageContentInputFileContent, content) + if file.file_url: + return _attach_content_extras(Content.from_uri(file.file_url), content) + if file.file_id: + return _attach_content_extras(Content.from_hosted_file(file.file_id, name=file.filename), content) + if file.file_data: + return _attach_content_extras(_convert_file_data(file.file_data, file.filename), content) + if content.type == "computer_screenshot": + return _attach_content_extras( + Content.from_uri(cast(models.ComputerScreenshotContent, content).image_url), content + ) + + raise ValueError(f"Unsupported MessageContent type: {content.type}") + + +def _convert_output_message_content(content: models.OutputMessageContent) -> Content: + """Convert an SDK ``OutputMessageContent`` (assistant output side) into a framework ``Content``. + + Handles assistant-output variants: ``output_text`` and ``refusal``. + + Args: + content: The SDK content node to convert. + + Returns: + The corresponding :class:`agent_framework.Content`. + + Raises: + ValueError: If the SDK content ``type`` is not yet supported. + """ + if content.type == "output_text": + return _attach_content_extras( + Content.from_text(cast(models.OutputMessageContentOutputTextContent, content).text), content + ) + if content.type == "refusal": + return _attach_content_extras( + Content.from_text(cast(models.OutputMessageContentRefusalContent, content).refusal), content + ) + + raise ValueError(f"Unsupported OutputMessageContent type: {content.type}") + + +def _attach_content_extras(content: Content, model: Mapping[str, Any]) -> Content: + """Round-trip SDK content extras onto :attr:`Content.additional_properties`. + + Mirror of :func:`_attach_extras` but for individual content nodes. Only + attaches the bag when at least one extra key is present, so the produced + ``Content`` stays byte-equivalent to a non-extras conversion when there is + nothing to preserve. + + Args: + content: The framework content to enrich. + model: The SDK content node whose extras should be preserved. + + Returns: + The same ``content`` instance. + """ + extras = _collect_unknown_keys(model) + if extras: + content.additional_properties.setdefault(EXTRAS_KEY, {}).update(extras) + return content + + +# endregion + + +# region Item → Message (input side) + + +def _items_to_messages(input_items: Sequence[models.Item]) -> list[Message]: + """Convert a sequence of input ``Item`` SDK objects to framework ``Message`` objects. + + One :class:`agent_framework.Message` per input item — fan-out is the + caller's responsibility. + + Args: + input_items: The input items to convert. + + Returns: + A list of messages in the same order as the input. + """ + return [_item_to_message(item) for item in input_items] + + +def _item_to_message(item: models.Item) -> Message: + """Convert a single input ``Item`` SDK object to a framework ``Message``. + + Wraps :func:`_item_to_message_inner` and stamps a :data:`RAW_KEY` + snapshot of the SDK item so the write path can rebuild the original + shape losslessly. See :func:`_capture_raw`. + """ + return _capture_raw(_item_to_message_inner(item), item) + + +def _item_to_message_inner(item: models.Item) -> Message: + """Convert a single input ``Item`` SDK object to a framework ``Message``. + + The conversion table is intentionally explicit (no auto-discovery) so it + is easy to scan for missing variants. To add support for a new item kind: + + 1. Add an ``elif item.type == "...":`` branch here. + 2. Reference the corresponding ``models.ItemX`` (or + ``models.XItemParam``) type via ``cast(...)``. + 3. Map its fields onto :class:`agent_framework.Content` factory methods. + 4. Add an ``isinstance(...)`` branch in :func:`_output_item_to_message` + if the same kind also appears on the output side. + + Args: + item: The SDK item to convert. + + Returns: + The converted message, with any unknown extras round-tripped under + ``message.additional_properties[EXTRAS_KEY]``. + + Raises: + ValueError: If the SDK item ``type`` is not yet supported by this + adapter. + """ + if item.type == "message": + msg = cast(models.ItemMessage, item) + if isinstance(msg.content, str): + message = Message(role=msg.role, contents=[Content.from_text(msg.content)]) + else: + message = Message(role=msg.role, contents=[_convert_message_content(part) for part in msg.content]) + return _attach_extras(message, item) + + if item.type == "output_message": + output_msg = cast(models.ItemOutputMessage, item) + return _attach_extras( + Message( + role=output_msg.role, + contents=[_convert_output_message_content(part) for part in output_msg.content], + ), + item, + ) + + if item.type == "function_call": + fc = cast(models.ItemFunctionToolCall, item) + return _attach_extras( + Message( + role="assistant", + contents=[Content.from_function_call(fc.call_id, fc.name, arguments=fc.arguments)], + ), + item, + ) + + if item.type == "function_call_output": + fco = cast(models.FunctionCallOutputItemParam, item) + output = fco.output if isinstance(fco.output, str) else str(fco.output) + return _attach_extras( + Message(role="tool", contents=[Content.from_function_result(fco.call_id, result=output)]), + item, + ) + + if item.type == "reasoning": + reasoning = cast(models.ItemReasoningItem, item) + reason_contents: list[Content] = [] + if reasoning.summary: + for summary in reasoning.summary: + reason_contents.append(Content.from_text(summary.text)) + return _attach_extras(Message(role="assistant", contents=reason_contents), item) + + if item.type == "mcp_call": + mcp = cast(models.ItemMcpToolCall, item) + return _attach_extras( + Message( + role="assistant", + contents=[ + Content.from_mcp_server_tool_call( + mcp.id, + mcp.name, + server_name=mcp.server_label, + arguments=mcp.arguments, + ) + ], + ), + item, + ) + + if item.type == "mcp_approval_request": + mcp_req = cast(models.ItemMcpApprovalRequest, item) + mcp_call_content = Content.from_mcp_server_tool_call( + mcp_req.id, + mcp_req.name, + server_name=mcp_req.server_label, + arguments=mcp_req.arguments, + ) + return _attach_extras( + Message( + role="assistant", + contents=[Content.from_function_approval_request(mcp_req.id, mcp_call_content)], + ), + item, + ) + + if item.type == "mcp_approval_response": + mcp_resp = cast(models.MCPApprovalResponse, item) + placeholder_content = Content.from_function_call(mcp_resp.approval_request_id, "mcp_approval") + return _attach_extras( + Message( + role="user", + contents=[ + Content.from_function_approval_response( + mcp_resp.approve, mcp_resp.approval_request_id, placeholder_content + ) + ], + ), + item, + ) + + if item.type == "code_interpreter_call": + ci = cast(models.ItemCodeInterpreterToolCall, item) + return _attach_extras( + Message(role="assistant", contents=[Content.from_code_interpreter_tool_call(call_id=ci.id)]), + item, + ) + + if item.type == "image_generation_call": + ig = cast(models.ItemImageGenToolCall, item) + return _attach_extras( + Message(role="assistant", contents=[Content.from_image_generation_tool_call(image_id=ig.id)]), + item, + ) + + if item.type == "shell_call": + sc = cast(models.FunctionShellCallItemParam, item) + return _attach_extras( + Message( + role="assistant", + contents=[ + Content.from_shell_tool_call( + call_id=sc.call_id, + commands=sc.action.commands, + status=str(sc.status), + ) + ], + ), + item, + ) + + if item.type == "shell_call_output": + sco = cast(models.FunctionShellCallOutputItemParam, item) + outputs = [ + Content.from_shell_command_output( + stdout=out.stdout or "", + stderr=out.stderr or "", + exit_code=getattr(out.outcome, "exit_code", None) if hasattr(out, "outcome") else None, + ) + for out in (sco.output or []) + ] + return _attach_extras( + Message( + role="tool", + contents=[ + Content.from_shell_tool_result( + call_id=sco.call_id, + outputs=outputs, + max_output_length=sco.max_output_length, + ) + ], + ), + item, + ) + + if item.type == "local_shell_call": + lsc = cast(models.ItemLocalShellToolCall, item) + commands = lsc.action.command if hasattr(lsc.action, "command") and lsc.action.command else [] + return _attach_extras( + Message( + role="assistant", + contents=[ + Content.from_shell_tool_call( + call_id=lsc.call_id, + commands=commands, + status=str(lsc.status), + ) + ], + ), + item, + ) + + if item.type == "local_shell_call_output": + lsco = cast(models.ItemLocalShellToolCallOutput, item) + return _attach_extras( + Message( + role="tool", + contents=[ + Content.from_shell_tool_result( + call_id=lsco.id, + outputs=[Content.from_shell_command_output(stdout=lsco.output)], + ) + ], + ), + item, + ) + + if item.type == "file_search_call": + fs = cast(models.ItemFileSearchToolCall, item) + return _attach_extras( + Message( + role="assistant", + contents=[ + Content.from_function_call( + fs.id, + "file_search", + arguments=json.dumps({"queries": fs.queries}), + ) + ], + ), + item, + ) + + if item.type == "web_search_call": + ws = cast(models.ItemWebSearchToolCall, item) + return _attach_extras( + Message(role="assistant", contents=[Content.from_function_call(ws.id, "web_search")]), + item, + ) + + if item.type == "computer_call": + cc = cast(models.ItemComputerToolCall, item) + return _attach_extras( + Message( + role="assistant", + contents=[ + Content.from_function_call( + cc.call_id, + "computer_use", + arguments=str(cc.action), + ) + ], + ), + item, + ) + + if item.type == "computer_call_output": + cco = cast(models.ComputerCallOutputItemParam, item) + return _attach_extras( + Message(role="tool", contents=[Content.from_function_result(cco.call_id, result=str(cco.output))]), + item, + ) + + if item.type == "custom_tool_call": + ct = cast(models.ItemCustomToolCall, item) + return _attach_extras( + Message( + role="assistant", + contents=[Content.from_function_call(ct.call_id, ct.name, arguments=ct.input)], + ), + item, + ) + + if item.type == "custom_tool_call_output": + cto = cast(models.ItemCustomToolCallOutput, item) + output = cto.output if isinstance(cto.output, str) else str(cto.output) + # Hosted-MCP results land here because the host writes them via + # ``aoutput_item_custom_tool_call_output`` (see ``_to_outputs`` for + # ``mcp_server_tool_result``). The persisted ``call_id`` keeps its + # ``mcp_*`` prefix; on read, route those back to a hosted-MCP + # result Content so the chat-client serialize layer can coalesce + # them onto a single ``mcp_call`` input item with ``output`` + # populated. Issue #5546. + if cto.call_id and cto.call_id.startswith("mcp_"): + return _attach_extras( + Message( + role="tool", + contents=[Content.from_mcp_server_tool_result(call_id=cto.call_id, output=output)], + ), + item, + ) + return _attach_extras( + Message(role="tool", contents=[Content.from_function_result(cto.call_id, result=output)]), + item, + ) + + if item.type == "apply_patch_call": + ap = cast(models.ApplyPatchToolCallItemParam, item) + return _attach_extras( + Message( + role="assistant", + contents=[ + Content.from_function_call( + ap.call_id, + "apply_patch", + arguments=str(ap.operation), + ) + ], + ), + item, + ) + + if item.type == "apply_patch_call_output": + apo = cast(models.ApplyPatchToolCallOutputItemParam, item) + return _attach_extras( + Message(role="tool", contents=[Content.from_function_result(apo.call_id, result=apo.output or "")]), + item, + ) + + raise ValueError(f"Unsupported Item type: {item.type}") + + +# endregion + + +# region OutputItem → Message (output / history side) + + +def _output_items_to_messages(history: Sequence[models.OutputItem]) -> list[Message]: + """Convert a sequence of ``OutputItem`` SDK objects to framework ``Message`` objects. + + This is the function the :class:`._history_provider.FoundryHostedAgentHistoryProvider` + calls to materialise stored Foundry response items into the message + history the agent will see on its next turn. + + Args: + history: The output items to convert, oldest-first. + + Returns: + A list of messages, one per supported item, in the same order. + """ + return [_output_item_to_message(item) for item in history] + + +def _output_item_to_message(item: models.OutputItem) -> Message: + """Convert a single ``OutputItem`` SDK object to a framework ``Message``. + + Wraps :func:`_output_item_to_message_inner` and stamps a + :data:`RAW_KEY` snapshot of the SDK item onto + ``Message.additional_properties[EXTRAS_KEY]`` so the write path can + re-emit byte-for-byte. See :func:`_capture_raw` for the rationale. + """ + return _capture_raw(_output_item_to_message_inner(item), item) + + +def _output_item_to_message_inner(item: models.OutputItem) -> Message: + """Convert a single ``OutputItem`` SDK object to a framework ``Message``. + + Variant table — keep in sync with :func:`_item_to_message` when both + sides exist for the same item kind. To add a new variant: + + 1. Add a ``elif item.type == "...":`` branch here. + 2. Reference the corresponding ``models.OutputItemX`` type. + 3. Map its fields to :class:`agent_framework.Content` factory methods. + + Variants currently **missing** from this dispatch (visible by scanning + ``models.OutputItem*`` and comparing against the branches below): + + * ``models.OutputItemCompactionBody`` — context compaction summaries + * ``models.OutputItemMcpListTools`` — MCP server ``list_tools`` results + * ``models.WorkflowActionOutputItem`` — workflow-channel actions + * Any tool-call variant produced by Azure-specific tools + (Azure Search, Bing Grounding, SharePoint, Fabric, OpenAPI, A2A, + browser automation, memory search, …) — the ``models.*ToolCall`` + / ``models.*ToolCallOutput`` family. + + Args: + item: The SDK item to convert. + + Returns: + The converted message, with any unknown extras round-tripped under + ``message.additional_properties[EXTRAS_KEY]``. + + Raises: + ValueError: If the SDK item ``type`` is not yet supported. + """ + if item.type == "output_message": + output_msg = cast(models.OutputItemOutputMessage, item) + return _attach_extras( + Message( + role=output_msg.role, + contents=[_convert_output_message_content(part) for part in output_msg.content], + ), + item, + ) + + if item.type == "message": + msg = cast(models.OutputItemMessage, item) + return _attach_extras( + Message(role=msg.role, contents=[_convert_message_content(part) for part in msg.content]), + item, + ) + + if item.type == "function_call": + fc = cast(models.OutputItemFunctionToolCall, item) + return _attach_extras( + Message( + role="assistant", + contents=[Content.from_function_call(fc.call_id, fc.name, arguments=fc.arguments)], + ), + item, + ) + + if item.type == "function_call_output": + fco = cast(models.FunctionCallOutputItemParam, item) + output = fco.output if isinstance(fco.output, str) else str(fco.output) + return _attach_extras( + Message(role="tool", contents=[Content.from_function_result(fco.call_id, result=output)]), + item, + ) + + if item.type == "reasoning": + reasoning = cast(models.OutputItemReasoningItem, item) + contents: list[Content] = [] + if reasoning.summary: + for summary in reasoning.summary: + contents.append(Content.from_text(summary.text)) + return _attach_extras(Message(role="assistant", contents=contents), item) + + if item.type == "mcp_call": + mcp = cast(models.OutputItemMcpToolCall, item) + return _attach_extras( + Message( + role="assistant", + contents=[ + Content.from_mcp_server_tool_call( + mcp.id, + mcp.name, + server_name=mcp.server_label, + arguments=mcp.arguments, + ) + ], + ), + item, + ) + + if item.type == "mcp_approval_request": + mcp_req = cast(models.OutputItemMcpApprovalRequest, item) + mcp_call_content = Content.from_mcp_server_tool_call( + mcp_req.id, + mcp_req.name, + server_name=mcp_req.server_label, + arguments=mcp_req.arguments, + ) + return _attach_extras( + Message( + role="assistant", + contents=[Content.from_function_approval_request(mcp_req.id, mcp_call_content)], + ), + item, + ) + + if item.type == "mcp_approval_response": + mcp_resp = cast(models.OutputItemMcpApprovalResponseResource, item) + # Build a placeholder function_call Content since the original call details are not available here. + placeholder_content = Content.from_function_call(mcp_resp.approval_request_id, "mcp_approval") + return _attach_extras( + Message( + role="user", + contents=[Content.from_function_approval_response(mcp_resp.approve, mcp_resp.id, placeholder_content)], + ), + item, + ) + + if item.type == "code_interpreter_call": + ci = cast(models.OutputItemCodeInterpreterToolCall, item) + return _attach_extras( + Message(role="assistant", contents=[Content.from_code_interpreter_tool_call(call_id=ci.id)]), + item, + ) + + if item.type == "image_generation_call": + ig = cast(models.OutputItemImageGenToolCall, item) + return _attach_extras( + Message(role="assistant", contents=[Content.from_image_generation_tool_call(image_id=ig.id)]), + item, + ) + + if item.type == "shell_call": + sc = cast(models.OutputItemFunctionShellCall, item) + return _attach_extras( + Message( + role="assistant", + contents=[ + Content.from_shell_tool_call( + call_id=sc.call_id, + commands=sc.action.commands, + status=str(sc.status), + ) + ], + ), + item, + ) + + if item.type == "shell_call_output": + sco = cast(models.OutputItemFunctionShellCallOutput, item) + outputs = [ + Content.from_shell_command_output( + stdout=out.stdout or "", + stderr=out.stderr or "", + exit_code=getattr(out.outcome, "exit_code", None) if hasattr(out, "outcome") else None, + ) + for out in (sco.output or []) + ] + return _attach_extras( + Message( + role="tool", + contents=[ + Content.from_shell_tool_result( + call_id=sco.call_id, + outputs=outputs, + max_output_length=sco.max_output_length, + ) + ], + ), + item, + ) + + if item.type == "local_shell_call": + lsc = cast(models.OutputItemLocalShellToolCall, item) + commands = lsc.action.command if hasattr(lsc.action, "command") and lsc.action.command else [] + return _attach_extras( + Message( + role="assistant", + contents=[ + Content.from_shell_tool_call( + call_id=lsc.call_id, + commands=commands, + status=str(lsc.status), + ) + ], + ), + item, + ) + + if item.type == "local_shell_call_output": + lsco = cast(models.OutputItemLocalShellToolCallOutput, item) + return _attach_extras( + Message( + role="tool", + contents=[ + Content.from_shell_tool_result( + call_id=lsco.id, + outputs=[Content.from_shell_command_output(stdout=lsco.output)], + ) + ], + ), + item, + ) + + if item.type == "file_search_call": + fs = cast(models.OutputItemFileSearchToolCall, item) + return _attach_extras( + Message( + role="assistant", + contents=[ + Content.from_function_call( + fs.id, + "file_search", + arguments=json.dumps({"queries": fs.queries}), + ) + ], + ), + item, + ) + + if item.type == "web_search_call": + ws = cast(models.OutputItemWebSearchToolCall, item) + return _attach_extras( + Message(role="assistant", contents=[Content.from_function_call(ws.id, "web_search")]), + item, + ) + + if item.type == "computer_call": + cc = cast(models.OutputItemComputerToolCall, item) + return _attach_extras( + Message( + role="assistant", + contents=[ + Content.from_function_call( + cc.call_id, + "computer_use", + arguments=str(cc.action), + ) + ], + ), + item, + ) + + if item.type == "computer_call_output": + cco = cast(models.OutputItemComputerToolCallOutputResource, item) + return _attach_extras( + Message(role="tool", contents=[Content.from_function_result(cco.call_id, result=str(cco.output))]), + item, + ) + + if item.type == "custom_tool_call": + ct = cast(models.OutputItemCustomToolCall, item) + return _attach_extras( + Message( + role="assistant", + contents=[Content.from_function_call(ct.call_id, ct.name, arguments=ct.input)], + ), + item, + ) + + if item.type == "custom_tool_call_output": + cto = cast(models.OutputItemCustomToolCallOutput, item) + output = cto.output if isinstance(cto.output, str) else str(cto.output) + # Hosted-MCP results land here because the host writes them via + # ``aoutput_item_custom_tool_call_output``. Route ``mcp_*`` + # call_ids back to a hosted-MCP result Content so the chat-client + # serialize layer can coalesce onto the matching ``mcp_call`` + # input item. Issue #5546. + if cto.call_id and cto.call_id.startswith("mcp_"): + return _attach_extras( + Message( + role="tool", + contents=[Content.from_mcp_server_tool_result(call_id=cto.call_id, output=output)], + ), + item, + ) + return _attach_extras( + Message(role="tool", contents=[Content.from_function_result(cto.call_id, result=output)]), + item, + ) + + if item.type == "apply_patch_call": + ap = cast(models.OutputItemApplyPatchToolCall, item) + return _attach_extras( + Message( + role="assistant", + contents=[ + Content.from_function_call( + ap.call_id, + "apply_patch", + arguments=str(ap.operation), + ) + ], + ), + item, + ) + + if item.type == "apply_patch_call_output": + apo = cast(models.OutputItemApplyPatchToolCallOutput, item) + return _attach_extras( + Message(role="tool", contents=[Content.from_function_result(apo.call_id, result=apo.output or "")]), + item, + ) + + if item.type == "oauth_consent_request": + oauth = cast(models.OAuthConsentRequestOutputItem, item) + return _attach_extras( + Message(role="assistant", contents=[Content.from_oauth_consent_request(oauth.consent_link)]), + item, + ) + + if item.type == "structured_outputs": + so = cast(models.StructuredOutputsOutputItem, item) + text = json.dumps(so.output) if not isinstance(so.output, str) else so.output + return _attach_extras(Message(role="assistant", contents=[Content.from_text(text)]), item) + + raise ValueError(f"Unsupported OutputItem type: {item.type}") + + +# endregion + + +# region AF Message → SDK OutputItem (write path) + + +def _message_text(message: Message) -> str: + """Collapse a :class:`Message` into a single text blob. + + The Foundry storage write path only persists the user-visible text — the + same compression the Responses runtime applies on its own write side. We + walk ``contents`` rather than relying on ``Message.text`` so we get a + consistent ordering and can drop non-text parts cleanly. + """ + chunks: list[str] = [] + for content in message.contents: + text = getattr(content, "text", None) + if isinstance(text, str) and text: + chunks.append(text) + if chunks: + return "".join(chunks) + # Fallback: surface ``Message.text`` if the framework knows how to + # render the contents (covers structured contents that synthesise text). + return message.text or "" + + +def _message_to_output_item(message: Message, item_id: str) -> models.OutputItem: + """Convert a single :class:`Message` to a Foundry SDK :class:`OutputItem`. + + Two-tier strategy: + + 1. **Lossless replay** — if the message carries a previously-captured + raw SDK snapshot under ``additional_properties[EXTRAS_KEY][RAW_KEY]`` + (set by :func:`_capture_raw` on the read path), rebuild the SDK + item from that snapshot via the model registry's discriminator + dispatch (:meth:`models.OutputItem._deserialize`). The snapshot's + ``id`` is rewritten to ``item_id`` so each write turn gets a + unique storage row, but every other declared field — content + variants (citations, reasoning, tool calls, function results, + …) AND any undeclared extras Foundry layered on top — survives + intact. This is the auditable round-trip the Foundry storage + backend relies on. + + 2. **Synthesise from text** — for messages constructed in user code + (no raw snapshot), fall back to the text-only path. ``assistant`` + maps to :class:`OutputItemOutputMessage` (output_text content, + ``status="completed"``); anything else maps to + :class:`OutputItemMessage` with the role normalised onto the + enum's three accepted values (``user`` / ``system`` / + ``developer`` — ``tool`` collapses to ``user`` because the + discriminator forbids it). + + In both branches: + + * ``additional_properties[EXTRAS_KEY]`` extras other than the raw + snapshot are layered onto the emitted model via + :func:`_inject_extras` so message-level Foundry annotations + round-trip. + * **Every other ``additional_properties`` namespace** (notably the + Hosting spec's ``hosting`` envelope — channel, identity, + response_target, initial-write ``deliveries[]`` — plus any future + AF namespaces) is funneled into a single + :data:`AF_EXTRAS_KEY` container key on the SDK item via + :func:`_inject_af_extras`. Foundry storage round-trips that key + as opaque JSON, and :func:`_attach_extras` peels each sub-key + back onto its original namespace on load. This is what makes the + audit/replay envelope from the Hosting spec durable across + Foundry-storage save/load cycles. + """ + extras_raw: Any = (message.additional_properties or {}).get(EXTRAS_KEY) or {} + extras: dict[str, Any] = dict(cast("Mapping[str, Any]", extras_raw)) if isinstance(extras_raw, Mapping) else {} + raw_snapshot: Any = extras.get(RAW_KEY) + af_extras = _collect_af_extras(message) + + if isinstance(raw_snapshot, Mapping): + # ``_deserialize`` does discriminator dispatch and tolerates + # extras-bearing mappings; bypassing it (constructing the + # concrete class directly) would lose the discriminator wiring + # and break round-trip for tool-call / reasoning / ... variants. + snapshot: dict[str, Any] = dict(cast("Mapping[str, Any]", raw_snapshot)) + snapshot["id"] = item_id + deserialize = cast(Any, models.OutputItem)._deserialize + item = cast("models.OutputItem", deserialize(snapshot, [])) + return cast( + "models.OutputItem", + _inject_af_extras(_inject_extras(item, extras), af_extras), + ) + + text = _message_text(message) + # ``Message.role`` is an unconstrained ``str | enum`` slot — the + # framework keeps whatever the constructor was handed (str literals + # round-trip as ``str``; converters that pass the SDK's + # ``MessageRole`` enum store the enum). Normalise to the enum's + # ``value`` (or the bare string) so we don't end up writing + # ``"MessageRole.USER"`` to storage. + role_str = getattr(message.role, "value", message.role) + + # Construct via the mapping overload — the SDK's keyword overload tags + # ``content`` with the abstract base type and rejects our concrete list. + if role_str == "assistant": + item = models.OutputItemOutputMessage({ + "id": item_id, + "type": "output_message", + "role": "assistant", + "status": "completed", + "content": [ + {"type": "output_text", "text": text, "annotations": [], "logprobs": []}, + ], + }) + else: + # OutputItemMessage's role enum admits "user" / "system" / + # "developer". Anything outside that set (e.g. "tool") collapses to + # "user" so we don't crash on the SDK's discriminator validation. + role_value = role_str if role_str in ("user", "system", "developer") else "user" + item = models.OutputItemMessage({ + "id": item_id, + "type": "message", + "role": role_value, + "status": "completed", + "content": [ + {"type": "input_text", "text": text}, + ], + }) + return cast("models.OutputItem", _inject_af_extras(_inject_extras(item, extras), af_extras)) + + +def _messages_to_output_items(messages: Sequence[Message], *, id_prefix: str) -> list[models.OutputItem]: + """Convert a batch of messages to Foundry SDK items with stable IDs. + + Each message gets a deterministic id of the form ``{id_prefix}_itm_{i}``. + Callers (typically :meth:`FoundryHostedAgentHistoryProvider.save_messages`) + derive ``id_prefix`` from the response id they're persisting under so + the per-item ids are unique across a conversation. + """ + return [_message_to_output_item(msg, f"{id_prefix}_itm_{i}") for i, msg in enumerate(messages)] + + +# endregion diff --git a/python/packages/foundry_hosting/tests/test_history_provider.py b/python/packages/foundry_hosting/tests/test_history_provider.py new file mode 100644 index 0000000000..6b6c4c42db --- /dev/null +++ b/python/packages/foundry_hosting/tests/test_history_provider.py @@ -0,0 +1,969 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""Unit tests for FoundryHostedAgentHistoryProvider.""" + +from __future__ import annotations + +import os +import time +from collections.abc import Iterable +from typing import Any +from unittest.mock import AsyncMock, MagicMock + +import pytest +from agent_framework import Content, HistoryProvider, Message +from azure.ai.agentserver.responses import ( + FoundryStorageProvider, + InMemoryResponseProvider, + IsolationContext, +) +from azure.ai.agentserver.responses.models import ( + OutputItem, + OutputItemOutputMessage, + OutputMessageContentOutputTextContent, +) +from azure.ai.agentserver.responses.store._foundry_errors import ( # pyright: ignore[reportPrivateUsage] + FoundryBadRequestError, +) + +from agent_framework_foundry_hosting import FoundryHostedAgentHistoryProvider +from agent_framework_foundry_hosting._history_provider import ( # pyright: ignore[reportPrivateUsage] + get_current_isolation, + reset_current_isolation, + set_current_isolation, +) + + +def _with_backend(prov: FoundryHostedAgentHistoryProvider, backend: Any) -> FoundryHostedAgentHistoryProvider: + """Inject a fake backend into ``prov`` so ``_resolve_backend`` returns it. + + Replaces the old ``backend=`` constructor parameter that was removed + when the dual-backend model was collapsed onto ``FoundryStorageProvider``. + """ + prov._backend = backend # pyright: ignore[reportPrivateUsage] + return prov + + +# region Helpers + + +def _make_text_item(item_id: str, text: str) -> OutputItemOutputMessage: + return OutputItemOutputMessage( + id=item_id, + type="output_message", + role="assistant", + status="completed", + content=[OutputMessageContentOutputTextContent(type="output_text", text=text, annotations=[])], + ) + + +def _make_fake_backend( + *, + history_ids: list[str] | None = None, + items: list[OutputItem | None] | None = None, +) -> MagicMock: + """Build a MagicMock matching the _StorageBackend protocol.""" + backend = MagicMock() + + async def _ids(*args: Any, **kwargs: Any) -> list[str]: + return list(history_ids or []) + + async def _items(item_ids: Iterable[str], *, isolation: IsolationContext | None = None) -> list[OutputItem | None]: + return list(items or []) + + backend.get_history_item_ids = AsyncMock(side_effect=_ids) + backend.get_items = AsyncMock(side_effect=_items) + backend.create_response = AsyncMock() + return backend + + +class _FakeAccessToken: + def __init__(self, token: str, *, expires_in: float = 3600.0) -> None: + self.token = token + self.expires_on = int(time.time() + expires_in) + + +class _FakeCredential: + """Minimal AsyncTokenCredential stand-in.""" + + def __init__(self, *, token: str = "fake-token", expires_in: float = 3600.0) -> None: + self._token = token + self._expires_in = expires_in + self.calls: list[tuple[str, ...]] = [] + + async def get_token(self, *scopes: str) -> _FakeAccessToken: + self.calls.append(scopes) + return _FakeAccessToken(self._token, expires_in=self._expires_in) + + +# region Construction + + +class TestConstruction: + """Constructor + class-level invariants.""" + + def test_defaults(self) -> None: + prov = _with_backend(FoundryHostedAgentHistoryProvider(), _make_fake_backend()) + assert isinstance(prov, HistoryProvider) + assert prov.source_id == FoundryHostedAgentHistoryProvider.DEFAULT_SOURCE_ID + assert prov.store_inputs is True + assert prov.store_outputs is True + assert prov.load_messages is True + + def test_is_hosted_environment_reads_env(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False) + assert FoundryHostedAgentHistoryProvider.is_hosted_environment() is False + monkeypatch.setenv("FOUNDRY_HOSTING_ENVIRONMENT", "1") + assert FoundryHostedAgentHistoryProvider.is_hosted_environment() is True + + def test_endpoint_falls_back_to_env(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("FOUNDRY_PROJECT_ENDPOINT", "https://example.foundry.azure.com") + prov = _with_backend(FoundryHostedAgentHistoryProvider(), _make_fake_backend()) + assert prov._endpoint == "https://example.foundry.azure.com" # pyright: ignore[reportPrivateUsage] + + +# region Backend resolution + + +class TestBackendResolution: + """Lazy backend construction + local fallback.""" + + def test_uses_explicit_backend(self) -> None: + backend = _make_fake_backend() + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + assert prov._resolve_backend() is backend # pyright: ignore[reportPrivateUsage] + + def test_local_fallback_when_not_hosted(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False) + prov = FoundryHostedAgentHistoryProvider() + resolved = prov._resolve_backend() # pyright: ignore[reportPrivateUsage] + assert isinstance(resolved, InMemoryResponseProvider) + # Cached on subsequent calls. + assert prov._resolve_backend() is resolved # pyright: ignore[reportPrivateUsage] + + def test_hosted_without_credential_raises(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("FOUNDRY_HOSTING_ENVIRONMENT", "1") + monkeypatch.setenv("FOUNDRY_PROJECT_ENDPOINT", "https://x.foundry.azure.com") + prov = FoundryHostedAgentHistoryProvider() + with pytest.raises(RuntimeError, match="requires an async credential"): + prov._resolve_backend() # pyright: ignore[reportPrivateUsage] + + def test_hosted_without_endpoint_raises(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("FOUNDRY_HOSTING_ENVIRONMENT", "1") + monkeypatch.delenv("FOUNDRY_PROJECT_ENDPOINT", raising=False) + prov = FoundryHostedAgentHistoryProvider(credential=_FakeCredential()) # type: ignore[arg-type] + with pytest.raises(RuntimeError, match="needs a Foundry project endpoint"): + prov._resolve_backend() # pyright: ignore[reportPrivateUsage] + + def test_hosted_builds_http_backend(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("FOUNDRY_HOSTING_ENVIRONMENT", "1") + monkeypatch.setenv("FOUNDRY_PROJECT_ENDPOINT", "https://x.foundry.azure.com") + prov = FoundryHostedAgentHistoryProvider(credential=_FakeCredential()) # type: ignore[arg-type] + resolved = prov._resolve_backend() # pyright: ignore[reportPrivateUsage] + assert isinstance(resolved, FoundryStorageProvider) + + +# region get_messages + + +class TestGetMessages: + async def test_no_session_id_returns_empty(self) -> None: + backend = _make_fake_backend(history_ids=["x"], items=[_make_text_item("x", "hi")]) + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + assert await prov.get_messages(None) == [] + assert await prov.get_messages("") == [] + backend.get_history_item_ids.assert_not_called() + + async def test_no_history_returns_empty(self) -> None: + backend = _make_fake_backend(history_ids=[]) + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + assert await prov.get_messages("resp_123") == [] + backend.get_items.assert_not_called() + + async def test_loads_and_converts(self) -> None: + items: list[OutputItem | None] = [_make_text_item("itm_1", "hello"), _make_text_item("itm_2", "world")] + backend = _make_fake_backend(history_ids=["itm_1", "itm_2"], items=items) + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + + messages = await prov.get_messages("resp_123") + assert len(messages) == 2 + assert all(isinstance(m, Message) for m in messages) + assert messages[0].text == "hello" + assert messages[1].text == "world" + + backend.get_history_item_ids.assert_awaited_once() + call = backend.get_history_item_ids.await_args + assert call.args[0] == "resp_123" + assert call.args[1] is None # conversation_id + assert call.args[2] == 100 # default history_limit + + async def test_drops_missing_items(self) -> None: + backend = _make_fake_backend( + history_ids=["a", "b", "c"], + items=[_make_text_item("a", "first"), None, _make_text_item("c", "third")], + ) + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + messages = await prov.get_messages("resp_x") + assert [m.text for m in messages] == ["first", "third"] + + async def test_history_limit_propagates(self) -> None: + backend = _make_fake_backend(history_ids=[]) + prov = _with_backend(FoundryHostedAgentHistoryProvider(history_limit=7), backend) + # ``resp_*``-shaped session anchors directly; we expect a single + # backend call carrying the configured limit. + await prov.get_messages("resp_s") + assert backend.get_history_item_ids.await_count == 1 + assert backend.get_history_item_ids.await_args.args[2] == 7 + + async def test_non_resp_session_skips_storage_probe(self) -> None: + """Non-``resp_*`` session ids (e.g. opaque chat-isolation keys) + are not valid storage anchors — the provider must skip the + backend probe entirely so we don't hit "Malformed identifier" + HTTP 400s, returning an empty history instead. + """ + backend = _make_fake_backend(history_ids=[]) + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + messages = await prov.get_messages("5leZSsJ3m1UtB-JW3m3iowFd5_zqP30SE0MmGUEkcGQ") + assert messages == [] + backend.get_history_item_ids.assert_not_awaited() + + async def test_resp_probe_tolerates_400(self) -> None: + """A 400 on the storage probe must not abort ``get_messages`` — + the provider falls through to an empty history.""" + backend = _make_fake_backend() + backend.get_history_item_ids.side_effect = FoundryBadRequestError("malformed", response_body=None) + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + messages = await prov.get_messages("resp_x") + assert messages == [] + + +# region IsolationContext + + +class TestIsolationContext: + async def test_explicit_isolation_kwarg_wins(self) -> None: + backend = _make_fake_backend(history_ids=[]) + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + explicit = IsolationContext(user_key="u-explicit", chat_key="c-explicit") + await prov.get_messages("resp_s", isolation=explicit) + assert backend.get_history_item_ids.await_args.kwargs["isolation"] is explicit + + async def test_contextvar_picked_up(self) -> None: + backend = _make_fake_backend(history_ids=["a"], items=[_make_text_item("a", "x")]) + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + ctx = IsolationContext(user_key="u-1", chat_key="c-1") + token = set_current_isolation(ctx) + try: + assert get_current_isolation() is ctx + await prov.get_messages("resp_s") + finally: + reset_current_isolation(token) + assert backend.get_history_item_ids.await_args.kwargs["isolation"] is ctx + assert backend.get_items.await_args.kwargs["isolation"] is ctx + + async def test_no_isolation_when_unset(self) -> None: + backend = _make_fake_backend(history_ids=[]) + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + await prov.get_messages("resp_s") + assert backend.get_history_item_ids.await_args.kwargs["isolation"] is None + + async def test_host_isolation_keys_picked_up(self) -> None: + """The host's ASGI middleware lifts the + ``x-agent-{user,chat}-isolation-key`` headers into a contextvar + exposed by ``agent_framework_hosting``. The provider lifts that + into its own ``IsolationContext`` so the storage call carries + the platform partition keys without channels having to forward + anything (or even know the headers exist).""" + pytest.importorskip("agent_framework_hosting") + from agent_framework_hosting import ( + IsolationKeys, + reset_current_isolation_keys, + set_current_isolation_keys, + ) + + backend = _make_fake_backend(history_ids=["a"], items=[_make_text_item("a", "x")]) + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + token = set_current_isolation_keys(IsolationKeys(user_key="u-3", chat_key="c-3")) + try: + await prov.get_messages("resp_s") + finally: + reset_current_isolation_keys(token) + applied = backend.get_history_item_ids.await_args.kwargs["isolation"] + assert applied is not None + assert applied.user_key == "u-3" + assert applied.chat_key == "c-3" + + +# region save_messages + + +class TestSaveMessages: + async def test_save_messages_writes_to_backend_when_bound(self) -> None: + """``save_messages`` writes a ``create_response`` envelope using + the host-bound response_id when present. + + The host's ``_bind_request_context`` plumbs the channel-minted + ``response_id`` (and prior turn's ``previous_response_id``) into + the provider via :func:`bind_request_context`, so the channel + envelope and the storage write share a single id per turn — + which is what makes the next turn's ``previous_response_id`` + walkable. + """ + from agent_framework_foundry_hosting import bind_request_context + + backend = _make_fake_backend() + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + msg = Message(role="assistant", contents=[Content.from_text("hello")]) + with bind_request_context(response_id="resp_bound_1", previous_response_id=None): + await prov.save_messages("session-x", [msg]) + + backend.create_response.assert_awaited_once() + call = backend.create_response.await_args + response = call.args[0] + assert response.id == "resp_bound_1" + # Conversation is intentionally omitted — Foundry isolation + # headers handle partitioning; cross-turn chaining is via the + # response-id chain only. + assert response.conversation is None + # Assistant outputs go on ``response.output``, not ``input_items`` + # — mirrors the agentserver runtime split (see + # ``_resolve_input_items_for_persistence``). + assert call.kwargs["input_items"] == [] + output = response.output or [] + assert len(output) == 1 + assert output[0]["type"] == "output_message" + + async def test_save_messages_falls_back_to_session_id_when_unbound(self) -> None: + """Without a host binding (e.g. local dev), ``save_messages`` + mints a fresh ``resp_*`` envelope and only chains when the + ``session_id`` is itself ``resp_*``-shaped.""" + backend = _make_fake_backend() + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + msg = Message(role="user", contents=[Content.from_text("hi")]) + await prov.save_messages("resp_prev", [msg]) + + backend.create_response.assert_awaited_once() + call = backend.create_response.await_args + response = call.args[0] + assert response.id.startswith("caresp_") + # Provider walked the prior chain to seed history_item_ids; the + # fake backend returns ``[]`` so this stays empty but the call + # was made. + assert backend.get_history_item_ids.await_count == 1 + assert backend.get_history_item_ids.await_args.args[0] == "resp_prev" + + async def test_save_messages_empty_short_circuits(self) -> None: + backend = _make_fake_backend() + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + await prov.save_messages("s", []) + backend.create_response.assert_not_called() + + async def test_save_messages_no_session_short_circuits(self) -> None: + """No session id and no host binding → nothing to anchor against, + skip the write.""" + backend = _make_fake_backend() + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + await prov.save_messages(None, [Message(role="user", contents=[Content.from_text("hi")])]) + backend.create_response.assert_not_called() + + async def test_save_messages_swallows_backend_errors(self) -> None: + """Persistence is best-effort — backend failures must NOT propagate. + + A successful agent turn that hits a transient storage error + (RBAC propagation lag, throttling, …) should still return a 2xx + to the caller; we only log so operators can spot systematic + failures. + """ + backend = _make_fake_backend() + backend.create_response.side_effect = RuntimeError("simulated 500 from storage") + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + # Must not raise. + await prov.save_messages("resp_session_x", [Message(role="user", contents=[Content.from_text("hi")])]) + backend.create_response.assert_awaited_once() + + async def test_save_then_get_round_trip_via_in_memory_backend(self) -> None: + """End-to-end save→get round-trip through ``InMemoryResponseProvider``. + + Mirrors the host-bound multi-turn flow: turn 1 binds a fresh + response id; turn 2 binds a new response id with the prior id + as ``previous_response_id``. ``get_messages`` on turn 2 is + called with the prior anchor and must return both turns. + """ + from agent_framework_foundry_hosting import bind_request_context + + backend = InMemoryResponseProvider() + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + + with bind_request_context(response_id="resp_turn1", previous_response_id=None): + await prov.save_messages( + "resp_turn1", + [Message(role="user", contents=[Content.from_text("ping")])], + ) + + with bind_request_context(response_id="resp_turn2", previous_response_id="resp_turn1"): + history = await prov.get_messages("resp_turn1") + assert [m.text for m in history] == ["ping"] + await prov.save_messages( + "resp_turn2", + [Message(role="assistant", contents=[Content.from_text("pong")])], + ) + + # Final read for turn 3: walking turn 2 must reveal both turns. + with bind_request_context(response_id="resp_turn3", previous_response_id="resp_turn2"): + messages = await prov.get_messages("resp_turn2") + assert [m.text for m in messages] == ["ping", "pong"] + roles = [getattr(m.role, "value", m.role) for m in messages] + assert roles == ["user", "assistant"] + + +# region aclose + + +class TestAclose: + async def test_closes_backend_with_aclose(self) -> None: + # Provider always closes whatever backend is currently bound; + # the dual-mode (external vs owned) distinction was dropped + # along with the ``backend=`` constructor param. + backend = _make_fake_backend() + backend.aclose = AsyncMock() + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + prov._resolve_backend() # pyright: ignore[reportPrivateUsage] + await prov.aclose() + backend.aclose.assert_awaited_once() + + async def test_aclose_idempotent(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False) + prov = FoundryHostedAgentHistoryProvider() + prov._resolve_backend() # pyright: ignore[reportPrivateUsage] + await prov.aclose() + await prov.aclose() # idempotent — second call is a no-op + + +# region Shared module re-exports + + +class TestSharedReExports: + """`_responses.py` must re-export the conversion helpers so tests and + downstream code that historically imported them keep working.""" + + def test_responses_re_exports_helpers(self) -> None: + # All of these used to live in ``_responses``; after the + # refactor they live in ``_shared`` but are re-exported. + from agent_framework_foundry_hosting import ( + _responses, # pyright: ignore[reportPrivateUsage] + _shared, # pyright: ignore[reportPrivateUsage] + ) + + for name in ( + "_arguments_to_str", + "_convert_message_content", + "_convert_output_message_content", + "_item_to_message", + "_items_to_messages", + "_output_item_to_message", + "_output_items_to_messages", + ): + assert getattr(_responses, name) is getattr(_shared, name), ( + f"{name} should be re-exported from _responses for backwards compat" + ) + + +# region Full AF ↔ Foundry round-trip via InMemoryResponseProvider + + +class TestAfFoundryRoundTrip: + """Round-trip two AF :class:`Message` instances through the Foundry SDK + types and back via the real :class:`InMemoryResponseProvider` backend. + + This is the same backend the provider uses in its local-fallback path + (i.e. the one that runs whenever ``FOUNDRY_HOSTING_ENVIRONMENT`` is + unset), so this test gives us coverage of the + "AF → Foundry SDK shape → storage → Foundry SDK shape → AF" pipeline + using exactly the production conversion code in :mod:`._shared`. + """ + + @staticmethod + def _af_message(text: str, item_id: str) -> tuple[Message, OutputItem]: + """Build an AF ``Message`` and the matching Foundry ``OutputItem``. + + Both messages are assistant ``output_message`` items because that's + the only OutputItem variant we round-trip through here — this test + exercises the conversion path, not every input/output shape. + """ + from agent_framework import Content + + af_message = Message(role="assistant", contents=[Content.from_text(text)]) + foundry_item = OutputItemOutputMessage( + id=item_id, + type="output_message", + role="assistant", + status="completed", + content=[OutputMessageContentOutputTextContent(type="output_text", text=text, annotations=[])], + ) + return af_message, foundry_item + + async def test_two_messages_round_trip_through_in_memory_backend(self) -> None: + from azure.ai.agentserver.responses.models import ResponseObject + + # 1. Start from two AF Messages (the "outside world" shape). + original_first, foundry_first = self._af_message("First message: 2 + 2 equals 4.", "itm_1") + original_second, foundry_second = self._af_message("Second message: 3 + 5 equals 8.", "itm_2") + + # 2. Hand the Foundry items to the real in-memory storage backend + # via the same ``create_response`` API the agent-server runtime + # uses on every successful turn. Passing them as ``input_items`` + # is enough — the in-memory backend records each item under its + # own id and exposes it via ``get_history_item_ids``. + backend = InMemoryResponseProvider() + response = ResponseObject( + id="resp_round_trip", + object="response", + status="completed", + model="test-model", + created_at=0, + ) + await backend.create_response( + response, + input_items=[foundry_first, foundry_second], + history_item_ids=None, + ) + + # 3. Wire the provider to the seeded backend (no HTTP, no + # credential needed — this exercises the local-mode contract). + provider = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + + # 4. Retrieve via the public API. Internally this fans out: + # backend.get_history_item_ids → backend.get_items + # → ``_output_items_to_messages`` from ``_shared`` → AF Messages. + retrieved = await provider.get_messages("resp_round_trip") + + # 5. Round-trip preserves role + text content for both messages. + assert len(retrieved) == 2 + assert all(isinstance(m, Message) for m in retrieved) + + assert retrieved[0].role == original_first.role + assert retrieved[0].text == original_first.text == "First message: 2 + 2 equals 4." + + assert retrieved[1].role == original_second.role + assert retrieved[1].text == original_second.text == "Second message: 3 + 5 equals 8." + + async def test_additional_properties_round_trip_through_in_memory_backend(self) -> None: + """End-to-end audit/replay verification via the public provider API. + + Seeds the in-memory backend with an :class:`OutputItemOutputMessage` + carrying: + + * a non-default item id; + * declared content fields (``output_text`` with annotations); + * a non-default ``status``; + * an arbitrary, undeclared top-level key + (``"audit_trace_id": "..."``) — i.e. the kind of opaque field + Foundry might layer on for audit/replay; + * an undeclared key on a content child + (``"vendor_metadata": {...}``). + + Reads the items back through ``get_messages`` (which captures the + :data:`RAW_KEY` snapshot), then writes them via ``save_messages`` + (which re-emits via the snapshot), then reads again and asserts + every field above survives the storage → AF → storage hop. Without + the raw-snapshot path, the second read would see synthesised + text-only items with newly-minted ids and lose every audit field. + """ + from azure.ai.agentserver.responses.models import ResponseObject + + from agent_framework_foundry_hosting._shared import EXTRAS_KEY, RAW_KEY # pyright: ignore[reportPrivateUsage] + + backend = InMemoryResponseProvider() + original_id = "itm_audit_001" + seed_item = OutputItemOutputMessage( + id=original_id, + type="output_message", + role="assistant", + status="completed", + content=[ + OutputMessageContentOutputTextContent( + type="output_text", + text="The final answer is 42.", + annotations=[], + ) + ], + ) + # Layer audit fields onto the SDK model directly — these are the + # "extras" that pyright would warn about but the runtime + # round-trips faithfully via as_dict(). + seed_item["audit_trace_id"] = "trace-abc-123" + seed_item.content[0]["vendor_metadata"] = {"score": 0.97, "model": "gpt-x"} + + seed_response = ResponseObject( + id="resp_audit", + object="response", + status="completed", + model="test-model", + created_at=0, + ) + await backend.create_response(seed_response, input_items=[seed_item], history_item_ids=None) + + provider = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + + # 1. Read back — provider stamps the RAW_KEY snapshot onto the + # AF Message's additional_properties. + first_read = await provider.get_messages("resp_audit") + assert len(first_read) == 1 + msg = first_read[0] + raw = msg.additional_properties[EXTRAS_KEY][RAW_KEY] + assert raw["id"] == original_id + assert raw["type"] == "output_message" + assert raw["audit_trace_id"] == "trace-abc-123" + assert raw["content"][0]["text"] == "The final answer is 42." + assert raw["content"][0]["vendor_metadata"] == {"score": 0.97, "model": "gpt-x"} + + # 2. Write back — this is where the snapshot-driven write path + # matters: save_messages mints a new response_id but must + # re-emit the SDK item from the captured raw shape. + from agent_framework_foundry_hosting import bind_request_context + + with bind_request_context(response_id="resp_audit_replay", previous_response_id="resp_audit"): + await provider.save_messages("resp_audit_replay", [msg]) + + # 3. Inspect what was stored. We walk the new response id and + # expect to see the prior history seeded plus the replayed + # message — proof the snapshot survived storage→AF→storage. + item_ids = await backend.get_history_item_ids( + previous_response_id="resp_audit_replay", conversation_id=None, limit=20 + ) + assert len(item_ids) >= 1 + stored_items = await backend.get_items(item_ids) + # Find the replayed item (its content text matches). + replay = next( + dict(it) + for it in stored_items + if it is not None + and dict(it).get("type") == "output_message" + and dict(it).get("audit_trace_id") == "trace-abc-123" + and dict(it).get("id") != original_id + ) + stored_dict = replay + assert stored_dict["type"] == "output_message" + assert stored_dict["status"] == "completed" + assert stored_dict["audit_trace_id"] == "trace-abc-123" + assert stored_dict["content"][0]["text"] == "The final answer is 42." + assert stored_dict["content"][0]["vendor_metadata"] == {"score": 0.97, "model": "gpt-x"} + # The replay item id is regenerated per write turn (caller + # supplies it), so it must NOT equal the original — that's how + # we know the snapshot path didn't naively echo back the seed. + assert stored_dict["id"] != original_id + + # 4. Final read confirms the entire chain is observable through + # the public AF surface. Walking the new response id returns + # both the seeded prior item and the replayed one. + second_read = await provider.get_messages("resp_audit_replay") + assert len(second_read) >= 1 + # Find the replayed message (matches the seed text + audit field). + replayed_msg = next( + m + for m in second_read + if EXTRAS_KEY in m.additional_properties + and m.additional_properties[EXTRAS_KEY].get(RAW_KEY, {}).get("audit_trace_id") == "trace-abc-123" + ) + replayed_raw = replayed_msg.additional_properties[EXTRAS_KEY][RAW_KEY] + assert replayed_raw["content"][0]["vendor_metadata"] == {"score": 0.97, "model": "gpt-x"} + + +# region Integration tests against a real Foundry project +# +# Required environment variables: +# +# * ``FOUNDRY_PROJECT_ENDPOINT`` — base URL of a real Foundry project, +# e.g. ``https://my-proj.services.ai.azure.com``. +# * Azure auth (any one of): +# - ``az login`` (recommended for local dev) +# - ``AZURE_CLIENT_ID`` + ``AZURE_CLIENT_SECRET`` + ``AZURE_TENANT_ID`` +# - Managed identity when on Azure +# The identity needs at least the ``Azure AI User`` role on the project. +# +# Optional (enables the seeded-history test): +# +# * ``FOUNDRY_HOSTING_PREVIOUS_RESPONSE_ID`` — a real response id with attached items. +# * ``FOUNDRY_HOSTING_CONVERSATION_ID`` — alternative. +# * ``FOUNDRY_HOSTING_USER_ISOLATION_KEY`` / +# ``FOUNDRY_HOSTING_CHAT_ISOLATION_KEY`` — set if your project enforces isolation. +# +# Run with: ``uv run pytest -m integration packages/foundry_hosting/tests/test_history_provider.py`` + + +_FOUNDRY_PROJECT_ENDPOINT = os.getenv("FOUNDRY_PROJECT_ENDPOINT", "") + +_skip_if_no_foundry_endpoint = pytest.mark.skipif( + not _FOUNDRY_PROJECT_ENDPOINT or _FOUNDRY_PROJECT_ENDPOINT == "https://test-project.services.ai.azure.com/", + reason=( + "FOUNDRY_PROJECT_ENDPOINT not set to a real Foundry project; " + "skipping FoundryHostedAgentHistoryProvider integration tests." + ), +) + + +def _isolation_from_env() -> IsolationContext | None: + user_key = os.getenv("FOUNDRY_HOSTING_USER_ISOLATION_KEY") + chat_key = os.getenv("FOUNDRY_HOSTING_CHAT_ISOLATION_KEY") + if not user_key and not chat_key: + return None + return IsolationContext(user_key=user_key, chat_key=chat_key) + + +@pytest.fixture +async def _live_credential() -> object: + """Yield a :class:`AzureCliCredential` and close it afterwards.""" + # Imported lazily so collection still works in environments without + # ``azure-identity`` available (e.g. minimal CI matrices). + from azure.identity.aio import AzureCliCredential + + cred = AzureCliCredential() + try: + yield cred + finally: + await cred.close() + + +class TestLiveFoundryStorage: + """End-to-end tests against a real Foundry project's storage HTTP API. + + These tests are gated behind ``@pytest.mark.integration`` so the + default ``pytest -m 'not integration'`` run skips them; they are + additionally skipped unless ``FOUNDRY_PROJECT_ENDPOINT`` points at a + real project. + """ + + @pytest.mark.flaky + @pytest.mark.integration + @_skip_if_no_foundry_endpoint + async def test_get_messages_unknown_response_id_returns_empty(self, _live_credential: object) -> None: + """A brand-new previous_response_id should yield an empty history. + + The native HTTP backend treats a 404 from the storage ``item_ids`` + endpoint as "no prior history" rather than raising, so a freshly + bootstrapped client never crashes on its first request. This test + proves that contract end-to-end against the live service. + """ + isolation = _isolation_from_env() + provider = FoundryHostedAgentHistoryProvider( + endpoint=_FOUNDRY_PROJECT_ENDPOINT, + credential=_live_credential, # type: ignore[arg-type] + ) + try: + messages = await provider.get_messages( + "resp_does_not_exist_integration_smoke", + isolation=isolation, + ) + finally: + await provider.aclose() + + assert messages == [] + + @pytest.mark.flaky + @pytest.mark.integration + @_skip_if_no_foundry_endpoint + @pytest.mark.skipif( + not os.getenv("FOUNDRY_HOSTING_PREVIOUS_RESPONSE_ID") and not os.getenv("FOUNDRY_HOSTING_CONVERSATION_ID"), + reason=( + "Set FOUNDRY_HOSTING_PREVIOUS_RESPONSE_ID or " + "FOUNDRY_HOSTING_CONVERSATION_ID to a real seeded conversation to " + "enable this test." + ), + ) + async def test_get_messages_returns_real_history(self, _live_credential: object) -> None: + """When pointed at a real seeded conversation we should get Messages back.""" + previous_response_id = os.getenv("FOUNDRY_HOSTING_PREVIOUS_RESPONSE_ID") or "" + conversation_id = os.getenv("FOUNDRY_HOSTING_CONVERSATION_ID") + isolation = _isolation_from_env() + + provider = FoundryHostedAgentHistoryProvider( + endpoint=_FOUNDRY_PROJECT_ENDPOINT, + credential=_live_credential, # type: ignore[arg-type] + history_limit=20, + ) + try: + # ``get_messages`` is keyed on ``session_id`` (== previous_response_id) + # so we pass that as the primary lookup; conversation_id is the + # fallback when only a conversation id is configured. + messages = await provider.get_messages( + previous_response_id or (conversation_id or ""), + isolation=isolation, + ) + finally: + await provider.aclose() + + assert isinstance(messages, list) + assert messages, "Expected at least one message in the seeded history" + assert all(isinstance(m, Message) for m in messages) + + @pytest.mark.flaky + @pytest.mark.integration + @_skip_if_no_foundry_endpoint + async def test_invoke_then_read_and_write_with_isolation(self, _live_credential: object) -> None: + """Invoke a deployed Foundry hosted agent, then round-trip via storage. + + This test exercises the realistic, fully-permissioned path: + + 1. Use :class:`FoundryAgent` to invoke the deployed + ``agent-framework-hosting-sample`` (version 10) hosted agent + with an explicit ``isolation_key``. The Foundry runtime + creates the response + history items inside the storage + backend on the user's behalf. + 2. Read the resulting history back through our own native HTTP + :class:`FoundryHostedAgentHistoryProvider` using the matching + :class:`IsolationContext`. This is the production read path + that DevUI / external clients use to render conversation + transcripts. + 3. Best-effort: try to APPEND two more items to the same + response via :class:`FoundryStorageProvider` write API. The + storage write path is normally callable only from inside the + agent-server container's runtime identity (Foundry strips + the user's bearer token at the runtime boundary), so a 403 + here is expected for ordinary user principals; we skip the + write-side assertions in that case rather than failing. + """ + from agent_framework_foundry import FoundryAgent + from azure.ai.agentserver.responses import ( + FoundryStorageProvider, + FoundryStorageSettings, + ) + from azure.ai.agentserver.responses.store._foundry_errors import ( # pyright: ignore[reportPrivateImportUsage] + FoundryApiError, + ) + + # Per-run-unique isolation key keeps each test run in its own + # tenant partition so concurrent runs (CI matrix, retries) don't + # collide. + isolation_key = f"af-hosting-roundtrip-{int(time.time())}" + isolation = IsolationContext(user_key=isolation_key, chat_key=isolation_key) + + # 1. Invoke the deployed hosted agent. + agent = FoundryAgent( + project_endpoint=_FOUNDRY_PROJECT_ENDPOINT, + agent_name="agent-framework-hosting-sample", + agent_version="10", + credential=_live_credential, # type: ignore[arg-type] + allow_preview=True, + default_options={"isolation_key": isolation_key}, + ) + # ``create_session()`` makes a fresh local session with no + # ``service_session_id`` set; the FoundryAgent's + # ``_prepare_run_context`` will lazily call + # ``project_client.beta.agents.create_session`` under our + # isolation key on first run. + session = agent.create_session() + prompt = "Please reply with exactly: 'Round-trip ack.'" + result = await agent.run(prompt, session=session) + + assert result.text, "FoundryAgent.run returned an empty response" + response_id = result.response_id + assert isinstance(response_id, str) and response_id, "Expected a non-empty response_id from FoundryAgent.run" + + # 2. Read history back via the native HTTP provider with the + # same isolation context. Try both the response_id and the + # service_session_id Foundry created on our behalf — depending + # on the runtime's storage layout, history may be anchored to + # either. + service_session_id = session.service_session_id + candidates = [c for c in (response_id, service_session_id) if c] + + reader = FoundryHostedAgentHistoryProvider( + endpoint=_FOUNDRY_PROJECT_ENDPOINT, + credential=_live_credential, # type: ignore[arg-type] + history_limit=20, + ) + try: + messages_after_invoke: list[Message] = [] + for cand in candidates: + msgs = await reader.get_messages(cand, isolation=isolation) + if msgs: + messages_after_invoke = msgs + break + finally: + await reader.aclose() + + # The read path returning a well-typed list (possibly empty if + # Foundry compacts items out of the response chain we queried) + # is enough to confirm the isolation header path works end-to-end. + assert all(isinstance(m, Message) for m in messages_after_invoke) + + # If we got messages back, every one should carry the lossless + # raw-snapshot under additional_properties[EXTRAS_KEY][RAW_KEY] — + # this is what guarantees audit/replay round-trip through the + # storage backend. Without it, a write-back would synthesise a + # text-only item and lose every audit field. + if messages_after_invoke: + from agent_framework_foundry_hosting._shared import ( # pyright: ignore[reportPrivateUsage] + EXTRAS_KEY, + RAW_KEY, + ) + + for m in messages_after_invoke: + extras = m.additional_properties.get(EXTRAS_KEY) or {} + assert RAW_KEY in extras, f"Live read message missing raw snapshot: {m!r}" + raw = extras[RAW_KEY] + # Snapshot must carry the discriminator + id — the two + # fields save_messages relies on to rebuild the SDK item. + assert isinstance(raw, dict) + assert "type" in raw and "id" in raw + + # 3. Best-effort write: create a fresh response under the same + # isolation key carrying two known items, then read it back + # via the native HTTP provider. Skip the write-side + # assertions if Foundry rejects the call with 403 (expected + # when the runtime is the only authorised writer). + from azure.ai.agentserver.responses.models import ResponseObject + + write_response_id = f"resp_af_write_{int(time.time())}" + _, foundry_first = TestAfFoundryRoundTrip._af_message( + "Appended message 1: 2 + 2 equals 4.", f"{write_response_id}_itm_1" + ) + _, foundry_second = TestAfFoundryRoundTrip._af_message( + "Appended message 2: 3 + 5 equals 8.", f"{write_response_id}_itm_2" + ) + + write_succeeded = False + writer = FoundryStorageProvider( + credential=_live_credential, # type: ignore[arg-type] + settings=FoundryStorageSettings.from_endpoint(_FOUNDRY_PROJECT_ENDPOINT), + ) + try: + await writer.create_response( + ResponseObject( + id=write_response_id, + object="response", + status="completed", + model="agent", + created_at=int(time.time()), + ), + input_items=[foundry_first, foundry_second], + history_item_ids=None, + isolation=isolation, + ) + write_succeeded = True + except FoundryApiError as exc: + if "403" not in str(exc): + raise + # Foundry strips the user bearer token at the runtime + # boundary, so external principals can't write directly to + # storage. The container's MSI is the authorised writer. + pytest.skip("Foundry rejected external storage write with 403 (expected outside container).") + finally: + await writer.aclose() + + # Re-read and verify our two appended items now show up. + if not write_succeeded: # pragma: no cover — defensive; pytest.skip already raised + return + reader2 = FoundryHostedAgentHistoryProvider( + endpoint=_FOUNDRY_PROJECT_ENDPOINT, + credential=_live_credential, # type: ignore[arg-type] + history_limit=20, + ) + try: + messages_after_write = await reader2.get_messages(write_response_id, isolation=isolation) + finally: + await reader2.aclose() + + appended_texts = {m.text for m in messages_after_write} + assert "Appended message 1: 2 + 2 equals 4." in appended_texts + assert "Appended message 2: 3 + 5 equals 8." in appended_texts From 73bf9a5bc4ee12e6a119a76f5a6f0e6e18033fce Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Tue, 5 May 2026 13:43:40 +0200 Subject: [PATCH 2/4] feat(foundry_hosting): add local_storage_root for file-based dev history Adds an optional `local_storage_root: str | Path | None` parameter to `FoundryHostedAgentHistoryProvider`. When set and the provider is running outside a Foundry Hosted Agent container, conversations are persisted to JSONL files via `agent_framework.FileHistoryProvider` laid out as: {root}/{user_key or '~none'}/{chat_key or '~none'}/{session_id}.jsonl Hosted mode (FOUNDRY_HOSTING_ENVIRONMENT set) ignores the option with a one-time INFO log so Foundry storage always wins on the platform. The in-memory fallback is unchanged when the option is omitted. Path safety: isolation segments are validated against the same character allowlist FileHistoryProvider uses for session-id stems and base64-url-encoded with a reserved "~iso-" prefix when unsafe. "~none" sentinel for missing keys can never collide with a real isolation key (real keys starting with "~" are encoded). The resolved target dir is also re-checked to be inside the configured root. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../_history_provider.py | 225 ++++++++++++++++-- .../tests/test_history_provider.py | 115 +++++++++ 2 files changed, 323 insertions(+), 17 deletions(-) diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py index a558979631..8427e9c557 100644 --- a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py +++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py @@ -39,7 +39,10 @@ Local fallback: when ``FOUNDRY_HOSTING_ENVIRONMENT`` is unset, the provider transparently falls back to :class:`InMemoryResponseProvider` so the same -agent code runs in dev. +agent code runs in dev. Pass ``local_storage_root`` to use a persistent +file-based store instead of in-memory; histories are then laid out as +``{root}/{user_key or "~none"}/{chat_key or "~none"}/{session_id}.jsonl`` +via :class:`agent_framework.FileHistoryProvider`. """ from __future__ import annotations @@ -48,12 +51,14 @@ import logging import os import time +from base64 import urlsafe_b64encode from contextlib import contextmanager from contextvars import ContextVar from dataclasses import dataclass +from pathlib import Path from typing import TYPE_CHECKING, Any, ClassVar -from agent_framework import HistoryProvider, Message +from agent_framework import FileHistoryProvider, HistoryProvider, Message from azure.ai.agentserver.responses import ( FoundryStorageProvider, FoundryStorageSettings, @@ -175,7 +180,7 @@ def bind_request_context( response_id: str, previous_response_id: str | None = None, **_unused: Any, -) -> "Iterator[None]": +) -> Iterator[None]: """Bind the per-request response-chain anchors for this provider. Intended for the host (or any caller orchestrating an @@ -209,7 +214,7 @@ def get_current_request_context() -> _RequestContext | None: return _request_var.get() -def _host_isolation() -> "IsolationContext | None": +def _host_isolation() -> IsolationContext | None: """Lift the host-bound isolation contextvar into our local type. The host installs an ASGI middleware that reads @@ -247,6 +252,62 @@ def _host_isolation() -> "IsolationContext | None": _StorageBackend = "FoundryStorageProvider | InMemoryResponseProvider" +# Sentinel directory name used in place of a missing ``user_key`` / +# ``chat_key`` when laying out file-based local history. The tilde +# prefix is reserved (``_is_safe_isolation_segment`` rejects keys that +# start with one) so a real isolation key can never collide with the +# sentinel after sanitisation. +_ISOLATION_NONE_MARKER = "~none" +_ISOLATION_ENCODED_PREFIX = "~iso-" + +# Windows reserved file/directory stems. Mirrors +# ``FileHistoryProvider._WINDOWS_RESERVED_FILE_STEMS`` so the directory +# layer enforces the same portability constraints the file layer does. +_WINDOWS_RESERVED_STEMS = frozenset({ + "CON", + "PRN", + "AUX", + "NUL", + *(f"COM{i}" for i in range(1, 10)), + *(f"LPT{i}" for i in range(1, 10)), +}) + + +def _is_safe_isolation_segment(value: str) -> bool: + """Return whether ``value`` is safe to use directly as a directory name. + + Rules mirror :meth:`FileHistoryProvider._is_literal_session_file_stem_safe`, + with the additional rule that a leading tilde is reserved for our + sentinel/encoded prefixes so real keys can never collide with them. + """ + if ( + not value + or value.startswith((".", "~")) + or value.endswith((" ", ".")) + or value.upper() in _WINDOWS_RESERVED_STEMS + ): + return False + if any(ord(character) < 32 for character in value): + return False + return all(character.isalnum() or character in "._-" for character in value) + + +def _encode_isolation_segment(value: str | None) -> str: + """Encode an isolation key into a filesystem-safe directory name. + + * ``None`` / empty → ``"~none"`` sentinel. + * Already-safe values pass through unchanged. + * Anything else is base64-url-encoded and prefixed with ``"~iso-"`` + so it is unambiguous and never collides with a real (safe) key. + """ + if value is None or value == "": + return _ISOLATION_NONE_MARKER + if _is_safe_isolation_segment(value): + return value + encoded = urlsafe_b64encode(value.encode("utf-8")).decode("ascii").rstrip("=") + return f"{_ISOLATION_ENCODED_PREFIX}{encoded}" + + class FoundryHostedAgentHistoryProvider(HistoryProvider): """``HistoryProvider`` backed by Foundry Hosted Agent storage. @@ -256,11 +317,25 @@ class FoundryHostedAgentHistoryProvider(HistoryProvider): selection is driven by the ``FOUNDRY_HOSTING_ENVIRONMENT`` environment variable. - ``session_id`` semantics: the value passed to :meth:`get_messages` - and :meth:`save_messages` is treated as the Responses - ``previous_response_id`` (or ``conversation_id``) whose chain to - load. When omitted (and no host-bound chain anchor is set), - :meth:`get_messages` returns an empty list (a fresh conversation). + For local runs that need to *persist* history across process + restarts, pass ``local_storage_root``: the provider then writes + each conversation to + ``{root}/{user_key or "~none"}/{chat_key or "~none"}/{session_id}.jsonl`` + via :class:`agent_framework.FileHistoryProvider`. The Foundry + response-chain semantics (``previous_response_id`` walking, + ``caresp_*`` id stamping, ``ResponseObject`` envelopes) are + bypassed in file mode — the on-disk format is plain JSONL of + :class:`Message` payloads, identical to ``FileHistoryProvider`` + standalone usage. ``local_storage_root`` is ignored when running + hosted (Foundry storage always wins). + + ``session_id`` semantics: in hosted / in-memory mode the value + passed to :meth:`get_messages` and :meth:`save_messages` is treated + as the Responses ``previous_response_id`` (or ``conversation_id``) + whose chain to load. When omitted (and no host-bound chain anchor + is set), :meth:`get_messages` returns an empty list (a fresh + conversation). In file mode ``session_id`` is used as the literal + filename stem (``FileHistoryProvider`` sanitises unsafe values). """ DEFAULT_SOURCE_ID: ClassVar[str] = "foundry_hosted_agent" @@ -268,7 +343,7 @@ class FoundryHostedAgentHistoryProvider(HistoryProvider): def __init__( self, *, - credential: "AsyncTokenCredential | None" = None, + credential: AsyncTokenCredential | None = None, endpoint: str | None = None, history_limit: int = 100, source_id: str = DEFAULT_SOURCE_ID, @@ -277,6 +352,7 @@ def __init__( store_context_messages: bool = False, store_context_from: set[str] | None = None, store_outputs: bool = True, + local_storage_root: str | Path | None = None, ) -> None: """Initialize the provider. @@ -284,13 +360,15 @@ def __init__( credential: Async token credential used to authenticate against the Foundry storage API. Required when running hosted (``FOUNDRY_HOSTING_ENVIRONMENT`` is set). Ignored in - local-mode (the in-memory backend needs no auth). + local-mode (the in-memory / file backends need no auth). endpoint: Foundry project endpoint URL. Defaults to the value of the ``FOUNDRY_PROJECT_ENDPOINT`` environment variable. Required when running hosted. history_limit: Maximum number of history items to fetch per ``get_messages`` call. Mirrors the agent-server runtime's ``ResponseContext._history_limit``. Default ``100``. + Ignored in file mode (``FileHistoryProvider`` returns the + full session file each call). source_id: Unique identifier for this provider instance, as required by ``HistoryProvider``. load_messages: Whether to load messages before invocation. @@ -308,6 +386,13 @@ def __init__( store_outputs: Whether to mirror response messages into Foundry storage. Default ``True`` for the same reason as ``store_inputs``. + local_storage_root: When set, *and* the provider is running + outside a Foundry Hosted Agent container, persist history + to JSONL files under + ``{root}/{user_key or "~none"}/{chat_key or "~none"}/{session_id}.jsonl`` + instead of using the in-memory backend. Ignored when + hosted (with a one-time INFO log). Defaults to ``None`` + (in-memory local fallback). """ super().__init__( source_id=source_id, @@ -323,6 +408,17 @@ def __init__( self._endpoint = endpoint or os.environ.get(_ENV_FOUNDRY_PROJECT_ENDPOINT) or None self._backend: FoundryStorageProvider | InMemoryResponseProvider | None = None + self._local_storage_root: Path | None = ( + Path(local_storage_root).resolve() if local_storage_root is not None else None + ) + # Cache one ``FileHistoryProvider`` per (user_key, chat_key) + # tuple. Bounded by the number of distinct isolation scopes the + # process sees; cleared on ``aclose``. + self._file_providers: dict[tuple[str, str], FileHistoryProvider] = {} + self._hosted_local_root_warned = False + if self._local_storage_root is not None and self.is_hosted_environment(): + self._warn_hosted_local_root_ignored() + @staticmethod def is_hosted_environment() -> bool: """Return ``True`` when running inside a Foundry Hosted Agent container. @@ -333,7 +429,7 @@ def is_hosted_environment() -> bool: """ return bool(os.environ.get(_ENV_FOUNDRY_HOSTING_ENVIRONMENT)) - def _resolve_backend(self) -> "FoundryStorageProvider | InMemoryResponseProvider": + def _resolve_backend(self) -> FoundryStorageProvider | InMemoryResponseProvider: """Return the storage backend, constructing it lazily on first use. * If ``FOUNDRY_HOSTING_ENVIRONMENT`` is set, build a @@ -378,9 +474,12 @@ async def aclose(self) -> None: """Release storage resources held by this provider. Safe to call multiple times. Closes the lazily-constructed - backend if one was created. ``InMemoryResponseProvider`` has no - ``aclose`` and is closed implicitly on garbage collection. + backend if one was created and drops any cached file-history + providers. ``InMemoryResponseProvider`` and + ``FileHistoryProvider`` have no ``aclose`` and are closed + implicitly on garbage collection. """ + self._file_providers.clear() if self._backend is None: return aclose = getattr(self._backend, "aclose", None) @@ -388,6 +487,75 @@ async def aclose(self) -> None: await aclose() self._backend = None + def _warn_hosted_local_root_ignored(self) -> None: + """Log (once) that ``local_storage_root`` is being ignored under hosted mode.""" + if self._hosted_local_root_warned: + return + self._hosted_local_root_warned = True + logger.info( + "FoundryHostedAgentHistoryProvider ignored local_storage_root=%s because " + "FOUNDRY_HOSTING_ENVIRONMENT is set; Foundry storage takes precedence " + "when hosted.", + self._local_storage_root, + ) + + def _resolve_local_file_provider( + self, + isolation: IsolationContext | None, + ) -> FileHistoryProvider | None: + """Return a ``FileHistoryProvider`` for the current isolation, or ``None``. + + Returns ``None`` when ``local_storage_root`` is unset *or* the + provider is running in hosted mode (in which case Foundry + storage handles persistence). Otherwise builds — and caches — + one provider per (user_key, chat_key) tuple, rooted at the + sanitised ``{root}/{user_segment}/{chat_segment}`` directory. + + Raises: + ValueError: If the resolved isolation directory escapes + ``local_storage_root`` (defence in depth — the + sanitisation should already prevent this). + """ + if self._local_storage_root is None: + return None + if self.is_hosted_environment(): + self._warn_hosted_local_root_ignored() + return None + + user_key = isolation.user_key if isolation is not None else None + chat_key = isolation.chat_key if isolation is not None else None + cache_key = (user_key or "", chat_key or "") + cached = self._file_providers.get(cache_key) + if cached is not None: + return cached + + user_segment = _encode_isolation_segment(user_key) + chat_segment = _encode_isolation_segment(chat_key) + target_dir = (self._local_storage_root / user_segment / chat_segment).resolve() + if not target_dir.is_relative_to(self._local_storage_root): + raise ValueError( + "Isolation segments resolved outside of local_storage_root: " + f"user_key={user_key!r} chat_key={chat_key!r}" + ) + + provider = FileHistoryProvider( + target_dir, + source_id=f"{self.source_id}__file__{user_segment}__{chat_segment}", + load_messages=self.load_messages, + store_inputs=self.store_inputs, + store_context_messages=self.store_context_messages, + store_context_from=self.store_context_from, + store_outputs=self.store_outputs, + ) + self._file_providers[cache_key] = provider + logger.debug( + "FoundryHostedAgentHistoryProvider created file backend for isolation (user=%s, chat=%s) at %s", + user_key, + chat_key, + target_dir, + ) + return provider + async def get_messages( self, session_id: str | None, @@ -421,7 +589,18 @@ async def get_messages( (such as chat-isolation-key values) are skipped because the storage backend rejects them with HTTP 400 "Malformed identifier". + + When ``local_storage_root`` is configured (and the provider + is running outside a Foundry Hosted Agent container), this + method instead delegates to a per-isolation + :class:`FileHistoryProvider` and ``session_id`` is used as + the literal file stem. """ + isolation = kwargs.get("isolation") or _host_isolation() or get_current_isolation() + file_provider = self._resolve_local_file_provider(isolation) + if file_provider is not None: + return await file_provider.get_messages(session_id, state=state, **kwargs) + bound = get_current_request_context() # Prefer the host-bound previous_response_id over the session_id # the framework feeds in: the bound value is the id we ourselves @@ -441,7 +620,6 @@ async def get_messages( # No walkable anchor → fresh conversation, nothing to load. return [] - isolation = kwargs.get("isolation") or _host_isolation() or get_current_isolation() backend = self._resolve_backend() try: @@ -471,7 +649,7 @@ async def get_messages( async def save_messages( self, session_id: str | None, - messages: "Sequence[Message]", + messages: Sequence[Message], *, state: dict[str, Any] | None = None, **kwargs: Any, @@ -504,10 +682,24 @@ async def save_messages( state: Unused — kept for ``HistoryProvider`` compatibility. **kwargs: Extensibility hook; ``isolation`` may be supplied explicitly to override the contextvar. + + Notes: + When ``local_storage_root`` is configured (and the provider + is running outside a Foundry Hosted Agent container), this + method instead delegates to a per-isolation + :class:`FileHistoryProvider` and ``session_id`` is used as + the literal file stem. The Foundry response-chain stamping + described above is bypassed entirely in that mode. """ if not messages: return + isolation = kwargs.get("isolation") or _host_isolation() or get_current_isolation() + file_provider = self._resolve_local_file_provider(isolation) + if file_provider is not None: + await file_provider.save_messages(session_id, messages, state=state, **kwargs) + return + bound = get_current_request_context() # Prefer the host-bound response_id so the channel envelope and # the storage write agree on a single id per turn — which is @@ -538,7 +730,6 @@ async def save_messages( if previous_response_id is None and env_session and env_session.startswith(("caresp_", "resp_")): previous_response_id = env_session - isolation = kwargs.get("isolation") or _host_isolation() or get_current_isolation() logger.debug( "save_messages: response_id=%r previous_response_id=%r isolation=%s", response_id, diff --git a/python/packages/foundry_hosting/tests/test_history_provider.py b/python/packages/foundry_hosting/tests/test_history_provider.py index 6b6c4c42db..763c912a41 100644 --- a/python/packages/foundry_hosting/tests/test_history_provider.py +++ b/python/packages/foundry_hosting/tests/test_history_provider.py @@ -439,6 +439,121 @@ async def test_aclose_idempotent(self, monkeypatch: pytest.MonkeyPatch) -> None: await prov.aclose() # idempotent — second call is a no-op +# region Local file storage option + + +class TestLocalFileStorage: + """`local_storage_root` swaps the in-memory local fallback for a + per-isolation :class:`FileHistoryProvider` so dev runs persist + across process restarts.""" + + async def test_unset_keeps_in_memory_fallback(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Any) -> None: + monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False) + prov = FoundryHostedAgentHistoryProvider() + assert prov._resolve_local_file_provider(None) is None # pyright: ignore[reportPrivateUsage] + assert isinstance( + prov._resolve_backend(), # pyright: ignore[reportPrivateUsage] + InMemoryResponseProvider, + ) + + async def test_creates_per_isolation_provider(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Any) -> None: + monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False) + prov = FoundryHostedAgentHistoryProvider(local_storage_root=tmp_path) + iso = IsolationContext(user_key="alice", chat_key="chat-1") + + fp = prov._resolve_local_file_provider(iso) # pyright: ignore[reportPrivateUsage] + assert fp is not None + # Cached on subsequent calls for the same (user, chat). + assert prov._resolve_local_file_provider(iso) is fp # pyright: ignore[reportPrivateUsage] + # Different isolation → different provider rooted at a different dir. + other = prov._resolve_local_file_provider( # pyright: ignore[reportPrivateUsage] + IsolationContext(user_key="bob", chat_key="chat-1"), + ) + assert other is not None and other is not fp + assert fp.storage_path != other.storage_path + assert fp.storage_path == (tmp_path / "alice" / "chat-1").resolve() + + async def test_missing_isolation_uses_sentinel_dir(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Any) -> None: + monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False) + prov = FoundryHostedAgentHistoryProvider(local_storage_root=tmp_path) + fp = prov._resolve_local_file_provider(None) # pyright: ignore[reportPrivateUsage] + assert fp is not None + assert fp.storage_path == (tmp_path / "~none" / "~none").resolve() + + async def test_unsafe_isolation_segments_are_encoded(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Any) -> None: + monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False) + prov = FoundryHostedAgentHistoryProvider(local_storage_root=tmp_path) + iso = IsolationContext(user_key="../escape", chat_key="ok-chat") + fp = prov._resolve_local_file_provider(iso) # pyright: ignore[reportPrivateUsage] + assert fp is not None + # Encoded segment never contains a ``/`` and never escapes the root. + assert fp.storage_path.is_relative_to(tmp_path.resolve()) + assert "../" not in str(fp.storage_path) + # Encoded segments use the reserved ``~iso-`` prefix. + parts = fp.storage_path.relative_to(tmp_path.resolve()).parts + assert parts[0].startswith("~iso-") + assert parts[1] == "ok-chat" + + async def test_hosted_mode_ignores_local_storage_root( + self, monkeypatch: pytest.MonkeyPatch, tmp_path: Any, caplog: pytest.LogCaptureFixture + ) -> None: + monkeypatch.setenv("FOUNDRY_HOSTING_ENVIRONMENT", "1") + with caplog.at_level("INFO", logger="agent_framework_foundry_hosting._history_provider"): + prov = FoundryHostedAgentHistoryProvider(local_storage_root=tmp_path) + # File provider is never resolved when hosted. + assert prov._resolve_local_file_provider(None) is None # pyright: ignore[reportPrivateUsage] + assert any("ignored local_storage_root" in record.message for record in caplog.records) + + async def test_get_and_save_round_trip_via_file(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Any) -> None: + monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False) + prov = FoundryHostedAgentHistoryProvider(local_storage_root=tmp_path) + iso = IsolationContext(user_key="alice", chat_key="chat-1") + + msgs = [ + Message(role="user", contents=["hello"]), + Message(role="assistant", contents=["hi back"]), + ] + await prov.save_messages("conv-1", msgs, isolation=iso) + + # File exists at the expected nested path with session_id as stem. + expected_path = tmp_path / "alice" / "chat-1" / "conv-1.jsonl" + assert expected_path.exists() + # Two JSONL records (one per message). + assert len([line for line in expected_path.read_text().splitlines() if line.strip()]) == 2 + + loaded = await prov.get_messages("conv-1", isolation=iso) + assert [m.text for m in loaded] == ["hello", "hi back"] + + # Different isolation → different file → independent history. + bob_loaded = await prov.get_messages( + "conv-1", + isolation=IsolationContext(user_key="bob", chat_key="chat-1"), + ) + assert bob_loaded == [] + + async def test_session_id_with_special_chars_is_sanitised_by_file_provider( + self, monkeypatch: pytest.MonkeyPatch, tmp_path: Any + ) -> None: + # The wrapper passes ``session_id`` through unchanged; the + # delegate ``FileHistoryProvider`` is responsible for sanitising + # it. This test just confirms the delegation works for a + # non-trivial id without raising. + monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False) + prov = FoundryHostedAgentHistoryProvider(local_storage_root=tmp_path) + msgs = [Message(role="user", contents=["hi"])] + await prov.save_messages("conv:with:colons", msgs) + loaded = await prov.get_messages("conv:with:colons") + assert [m.text for m in loaded] == ["hi"] + + async def test_aclose_clears_file_provider_cache(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Any) -> None: + monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False) + prov = FoundryHostedAgentHistoryProvider(local_storage_root=tmp_path) + prov._resolve_local_file_provider(IsolationContext(user_key="alice")) # pyright: ignore[reportPrivateUsage] + assert prov._file_providers # pyright: ignore[reportPrivateUsage] + await prov.aclose() + assert not prov._file_providers # pyright: ignore[reportPrivateUsage] + + # region Shared module re-exports From 20cbc3e29701b07dd88a1d8f8eaecb3fc5e5a42b Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Wed, 6 May 2026 15:43:50 +0200 Subject: [PATCH 3/4] fix(foundry_hosting): address PR-1 review comments - _shared.py:_capture_raw narrows `except Exception` to `except TypeError` and emits a WARNING with traceback so the lossy fallback to a synthesized round-trip is observable. Mirrors the reviewer suggestion. - _history_provider.py:save_messages narrows `except Exception` to `except FoundryStorageError` so only storage-validation failures (4xx/5xx, opaque server errors) are swallowed. Network / TLS / auth / payload-builder bugs propagate so the caller can retry / alert. Adds an instance-level `failed_writes` counter operators can poll for silent-drop visibility. - _history_provider.py id-stamping loop: drops the `contextlib.suppress(AttributeError, TypeError)` around `item.id = new_id` so SDK contract changes surface in the test suite instead of silently corrupting the chain (the storage backend rejects the entire `create_response` with HTTP 500 when synthetic prefix-based ids leak through). `import contextlib` removed. - tests: * Unit-cover `foundry_response_id` / `foundry_response_id_factory` / `foundry_item_id` so SDK `IdGenerator` contract changes are caught locally. * Cover the `save_messages` wire payload: required-by-storage fields (`background`, `parallel_tool_calls`, `instructions`, `agent_reference`), env-var-driven stamping (`FOUNDRY_AGENT_NAME` / `FOUNDRY_AGENT_VERSION` / `FOUNDRY_AGENT_SESSION_ID` / `MODEL_DEPLOYMENT_NAME` with `AZURE_AI_MODEL_DEPLOYMENT_NAME` fallback), and the rule that `model` / `agent_session_id` / `agent_reference.version` are omitted (not stamped to `None`) when their env vars are unset. * Cover the `FOUNDRY_AGENT_SESSION_ID` last-resort chain anchor on both the get and save paths, including the prefix gate that blocks non-`caresp_*`/`resp_*` values from reaching storage, and the precedence rule that a host binding wins over the env. * Replace the old `test_save_messages_swallows_backend_errors` with two tests asserting the new contract: storage errors are swallowed and bump `failed_writes`; everything else propagates and leaves the counter at zero. 141 unit tests pass; mypy + pyright + ruff clean. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../_history_provider.py | 43 +- .../_shared.py | 18 +- .../tests/test_history_provider.py | 377 +++++++++++++++++- 3 files changed, 420 insertions(+), 18 deletions(-) diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py index 8427e9c557..e6c6b09876 100644 --- a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py +++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py @@ -47,7 +47,6 @@ from __future__ import annotations -import contextlib import logging import os import time @@ -70,6 +69,7 @@ from azure.ai.agentserver.responses.store._foundry_errors import ( # pyright: ignore[reportPrivateUsage] FoundryBadRequestError, FoundryResourceNotFoundError, + FoundryStorageError, ) from ._shared import ( @@ -419,6 +419,12 @@ def __init__( if self._local_storage_root is not None and self.is_hosted_environment(): self._warn_hosted_local_root_ignored() + # Observability: number of ``save_messages`` calls dropped by + # :class:`FoundryStorageError` from ``backend.create_response``. + # Operators / health probes can read this attribute directly to + # detect silent persistence loss; never decremented. + self.failed_writes: int = 0 + @staticmethod def is_hosted_environment() -> bool: """Return ``True`` when running inside a Foundry Hosted Agent container. @@ -789,8 +795,16 @@ async def save_messages( if factory is None: continue new_id = factory(response_id) - with contextlib.suppress(AttributeError, TypeError): - item.id = new_id # type: ignore[attr-defined] + # Plain attribute assignment — the SDK ``OutputItem`` models + # are ``MutableMapping``s with ``__setattr__`` wired to dict + # set, so this is expected to succeed for every type listed + # above. The previous ``contextlib.suppress`` masked SDK + # contract changes (next save would silently retain the + # synthetic prefix-based id and the storage backend would + # reject the entire ``create_response`` with HTTP 500). + # Letting it raise surfaces those breakages to the test + # suite instead. + item.id = new_id # type: ignore[attr-defined] input_items: list[Any] = [] output_items: list[Any] = [] @@ -916,15 +930,32 @@ async def save_messages( history_item_ids=history_item_ids, isolation=isolation, ) - except Exception as exc: + except FoundryStorageError as exc: + # Storage-validation failures (4xx ``invalid_payload`` / + # ``not_found``, opaque 5xx) are best-effort losses: the + # caller's run already produced output and we don't want to + # crash the whole turn over a chain-write the user can't + # recover from. They are still observable: every drop bumps + # ``failed_writes`` (operators can poll it / surface in + # health probes) and the full traceback + ``response_body`` + # is logged. + # + # Network / TLS / DNS errors, expired-credential 401/403s, + # and bugs in the wire-payload builder above (e.g. a + # required-field regression) deliberately propagate so they + # surface to the caller and trigger retry / alerting paths + # instead of being silently dropped here. + self.failed_writes += 1 err_body = getattr(exc, "response_body", None) logger.exception( - "FoundryHostedAgentHistoryProvider.save_messages: backend rejected " - "%d message(s) (response_id=%s, previous_response_id=%s, error_body=%s).", + "FoundryHostedAgentHistoryProvider.save_messages: storage rejected " + "%d message(s) (response_id=%s, previous_response_id=%s, error_body=%s, " + "failed_writes=%d).", len(messages), response_id, previous_response_id, err_body, + self.failed_writes, ) return logger.debug( diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_shared.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_shared.py index 890dd7bcfe..4b3d3c4dd3 100644 --- a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_shared.py +++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_shared.py @@ -212,14 +212,22 @@ def _capture_raw(message: Message, item: Mapping[str, Any]) -> Message: (citations, reasoning, tool results, …) and any extras Foundry layered on top of the declared schema. - A best-effort ``dict(...)`` is used so failure to snapshot (e.g. a - non-mapping subclass surfacing in the future) degrades gracefully to - the lossy-but-functional synthesise-from-text path rather than - crashing the read. + Narrow ``TypeError`` is the only swallowed failure (matches the + ``Mapping`` contract precondition); ``MemoryError`` and other + ``Exception`` subclasses propagate so genuine bugs aren't masked. + A WARNING with ``exc_info`` is logged so the lossy fallback is + observable downstream — without it a regression in the SDK schema + silently drops citations / reasoning / tool-result extras on every + round-tripped message and there is no breadcrumb pointing here. """ try: raw = dict(item) - except Exception: + except TypeError: + logger.warning( + "_capture_raw: SDK item %r is not mapping-like; round-tripping without raw snapshot", + type(item).__name__, + exc_info=True, + ) return message message.additional_properties.setdefault(EXTRAS_KEY, {})[RAW_KEY] = raw return message diff --git a/python/packages/foundry_hosting/tests/test_history_provider.py b/python/packages/foundry_hosting/tests/test_history_provider.py index 763c912a41..a1d372c486 100644 --- a/python/packages/foundry_hosting/tests/test_history_provider.py +++ b/python/packages/foundry_hosting/tests/test_history_provider.py @@ -366,20 +366,42 @@ async def test_save_messages_no_session_short_circuits(self) -> None: await prov.save_messages(None, [Message(role="user", contents=[Content.from_text("hi")])]) backend.create_response.assert_not_called() - async def test_save_messages_swallows_backend_errors(self) -> None: - """Persistence is best-effort — backend failures must NOT propagate. + async def test_save_messages_swallows_storage_errors(self) -> None: + """Persistence is best-effort for *Foundry storage* failures. - A successful agent turn that hits a transient storage error - (RBAC propagation lag, throttling, …) should still return a 2xx - to the caller; we only log so operators can spot systematic - failures. + Storage-validation rejections, opaque 5xx, etc. should be + swallowed (the agent run already produced output and the + caller can't recover from a chain-write failure mid-stream). + Counter is bumped for observability. """ backend = _make_fake_backend() - backend.create_response.side_effect = RuntimeError("simulated 500 from storage") + backend.create_response.side_effect = FoundryBadRequestError( + "simulated invalid_payload", + response_body={"error": {"code": "invalid_payload"}}, + ) prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) # Must not raise. await prov.save_messages("resp_session_x", [Message(role="user", contents=[Content.from_text("hi")])]) backend.create_response.assert_awaited_once() + assert prov.failed_writes == 1 + + async def test_save_messages_propagates_non_storage_errors(self) -> None: + """Network / auth / payload-builder bugs MUST surface to the caller. + + Anything that's not a ``FoundryStorageError`` — connection + resets, expired credential 401/403s, ``AttributeError`` from a + regression in the wire-payload builder — propagates so the + caller can retry / alert. Counter is NOT bumped for these. + """ + backend = _make_fake_backend() + backend.create_response.side_effect = ConnectionError("simulated network failure") + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + with pytest.raises(ConnectionError, match="simulated network failure"): + await prov.save_messages( + "resp_session_x", + [Message(role="user", contents=[Content.from_text("hi")])], + ) + assert prov.failed_writes == 0 async def test_save_then_get_round_trip_via_in_memory_backend(self) -> None: """End-to-end save→get round-trip through ``InMemoryResponseProvider``. @@ -554,6 +576,347 @@ async def test_aclose_clears_file_provider_cache(self, monkeypatch: pytest.Monke assert not prov._file_providers # pyright: ignore[reportPrivateUsage] +# region Foundry id helpers (`_ids.py`) + + +class TestFoundryIdHelpers: + """Cover the public ``_ids`` re-exports so SDK ``IdGenerator`` + contract changes surface in unit tests rather than as opaque + HTTP 500 ``server_error`` from Foundry storage at runtime.""" + + def test_foundry_response_id_carries_partition_key(self) -> None: + """A minted ``caresp_*`` id must embed an 18-char partition key. + + Free-form ``resp_`` ids carry no parseable partition key + and Foundry storage rejects writes with HTTP 500. + """ + from agent_framework_foundry_hosting import foundry_response_id + + new_id = foundry_response_id() + assert new_id.startswith("caresp_") + # ``caresp_`` (7) + 18-char partition key + 32-char entropy = 57. + # The legacy 48-char body variant is also accepted by storage, + # so just check the lower bound. + assert len(new_id) >= 7 + 18 + 32 - 8 + + def test_foundry_response_id_reuses_previous_partition_key(self) -> None: + """Chained writes co-locate by reusing the prior partition key. + + Foundry storage rejects chained writes whose new record sits in + a different partition than the prior one. Passing a ``caresp_*`` + ``previous_response_id`` should produce a new id whose partition + segment matches. + """ + from agent_framework_foundry_hosting import foundry_response_id + + prior = foundry_response_id() + # Partition key = 18 chars after the ``caresp_`` prefix. + prior_partition = prior[len("caresp_") : len("caresp_") + 18] + chained = foundry_response_id(prior) + assert chained.startswith("caresp_") + assert chained != prior + assert chained[len("caresp_") : len("caresp_") + 18] == prior_partition + + def test_foundry_response_id_factory_returns_callable(self) -> None: + """The factory wrapper used by ``ResponsesChannel`` must + delegate to :func:`foundry_response_id` so chained turns can + seed the partition key from ``previous_response_id``.""" + from agent_framework_foundry_hosting import ( + foundry_response_id, + foundry_response_id_factory, + ) + + factory = foundry_response_id_factory() + assert factory is foundry_response_id + + def test_foundry_item_id_for_known_input_type(self) -> None: + """Recognised ``Item`` types get a typed prefix and a + partition-key hint matching the response id when supplied.""" + from azure.ai.agentserver.responses.models import ( + ItemMessage, + MessageContentInputTextContent, + ) + + from agent_framework_foundry_hosting import foundry_item_id, foundry_response_id + + response_id = foundry_response_id() + partition = response_id[len("caresp_") : len("caresp_") + 18] + item = ItemMessage( + type="message", + role="user", + content=[MessageContentInputTextContent(type="input_text", text="hi")], + ) + new_id = foundry_item_id(item, response_id) + assert new_id is not None + # ``msg_*`` is what ``IdGenerator.new_message_item_id`` mints. + assert new_id.startswith("msg_") + assert partition in new_id + + def test_foundry_item_id_returns_none_for_unknown_type(self) -> None: + """Reference-only / unrecognised types must return ``None`` + per the SDK helper's contract — callers (e.g. + ``save_messages``'s id-stamping loop) skip these so storage + only receives ids it can parse.""" + from agent_framework_foundry_hosting import foundry_item_id + + class _UnknownItem: + pass + + assert foundry_item_id(_UnknownItem()) is None + + +# region Wire payload stamping (`save_messages`) + + +class TestSaveMessagesWirePayload: + """Storage rejects ``create_response`` payloads that omit fields + flagged as REQUIRED in ``ResponseObject`` (``parallel_tool_calls``, + ``instructions``, ``background``) or that leak extras the validator + refuses (``conversation``, ``model=None``, …). Any regression that + drops one of these silently breaks every hosted deploy with an + opaque 4xx; cover them here so the test suite catches it first.""" + + async def test_envelope_includes_required_storage_fields( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """``background``, ``parallel_tool_calls``, ``instructions``, + and ``agent_reference`` MUST be present on every stamped + envelope; storage returns HTTP 400 ``invalid_payload`` if any + of them is missing.""" + from agent_framework_foundry_hosting import bind_request_context + + # Strip env so the defaults are exercised cleanly. + for var in ( + "FOUNDRY_AGENT_NAME", + "FOUNDRY_AGENT_VERSION", + "FOUNDRY_AGENT_SESSION_ID", + "MODEL_DEPLOYMENT_NAME", + "AZURE_AI_MODEL_DEPLOYMENT_NAME", + ): + monkeypatch.delenv(var, raising=False) + + backend = _make_fake_backend() + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + with bind_request_context(response_id="resp_envelope_1", previous_response_id=None): + await prov.save_messages( + "session-x", + [Message(role="assistant", contents=[Content.from_text("hi")])], + ) + + backend.create_response.assert_awaited_once() + response = backend.create_response.await_args.args[0] + body = response.as_dict() + + # Required-by-storage fields. + assert body["background"] is False + assert body["parallel_tool_calls"] is False + assert body["instructions"] == "" + assert body["agent_reference"] == { + "type": "agent_reference", + "name": "agent-framework-host", + } + + async def test_envelope_omits_optional_fields_when_env_unset( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """``model``, ``agent_session_id``, and the ``version`` slot of + ``agent_reference`` are omitted (NOT stamped as ``None``) when + their env vars are unset — storage rejects ``model: null``.""" + from agent_framework_foundry_hosting import bind_request_context + + for var in ( + "FOUNDRY_AGENT_NAME", + "FOUNDRY_AGENT_VERSION", + "FOUNDRY_AGENT_SESSION_ID", + "MODEL_DEPLOYMENT_NAME", + "AZURE_AI_MODEL_DEPLOYMENT_NAME", + ): + monkeypatch.delenv(var, raising=False) + + backend = _make_fake_backend() + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + with bind_request_context(response_id="resp_omit_1", previous_response_id=None): + await prov.save_messages( + "session-x", + [Message(role="assistant", contents=[Content.from_text("hi")])], + ) + + body = backend.create_response.await_args.args[0].as_dict() + # Either entirely absent or explicitly None — assert the field + # was NOT stamped to a non-None value. + assert body.get("model") is None + assert body.get("agent_session_id") is None + # ``version`` slot inside agent_reference is omitted entirely + # (the key is absent, not set to None) when the env var is unset. + assert "version" not in body["agent_reference"] + + async def test_envelope_picks_up_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> None: + """When the platform-set env vars are present they MUST land on + the envelope: ``FOUNDRY_AGENT_NAME`` / ``FOUNDRY_AGENT_VERSION`` + feed ``agent_reference``, ``FOUNDRY_AGENT_SESSION_ID`` feeds + ``agent_session_id``, and ``MODEL_DEPLOYMENT_NAME`` feeds + ``model``.""" + from agent_framework_foundry_hosting import bind_request_context + + monkeypatch.setenv("FOUNDRY_AGENT_NAME", "concierge") + monkeypatch.setenv("FOUNDRY_AGENT_VERSION", "v3") + monkeypatch.setenv("FOUNDRY_AGENT_SESSION_ID", "caresp_envsessionABCDEF") + monkeypatch.setenv("MODEL_DEPLOYMENT_NAME", "gpt-4o-mini-prod") + monkeypatch.delenv("AZURE_AI_MODEL_DEPLOYMENT_NAME", raising=False) + + backend = _make_fake_backend() + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + with bind_request_context(response_id="resp_env_1", previous_response_id=None): + await prov.save_messages( + "session-x", + [Message(role="assistant", contents=[Content.from_text("hi")])], + ) + + body = backend.create_response.await_args.args[0].as_dict() + assert body["agent_reference"] == { + "type": "agent_reference", + "name": "concierge", + "version": "v3", + } + assert body["agent_session_id"] == "caresp_envsessionABCDEF" + assert body["model"] == "gpt-4o-mini-prod" + + async def test_envelope_falls_back_to_local_dev_model_var( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Local dev sets ``AZURE_AI_MODEL_DEPLOYMENT_NAME`` rather than + the platform-only ``MODEL_DEPLOYMENT_NAME``; the latter wins + when both are present, the former fills in when only it is.""" + from agent_framework_foundry_hosting import bind_request_context + + monkeypatch.delenv("MODEL_DEPLOYMENT_NAME", raising=False) + monkeypatch.setenv("AZURE_AI_MODEL_DEPLOYMENT_NAME", "gpt-4o-mini-dev") + for var in ("FOUNDRY_AGENT_NAME", "FOUNDRY_AGENT_VERSION", "FOUNDRY_AGENT_SESSION_ID"): + monkeypatch.delenv(var, raising=False) + + backend = _make_fake_backend() + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + with bind_request_context(response_id="resp_devmodel_1", previous_response_id=None): + await prov.save_messages( + "session-x", + [Message(role="assistant", contents=[Content.from_text("hi")])], + ) + + body = backend.create_response.await_args.args[0].as_dict() + assert body["model"] == "gpt-4o-mini-dev" + + +# region FOUNDRY_AGENT_SESSION_ID chain anchor + + +class TestFoundryAgentSessionIdAnchor: + """The Foundry runtime stamps the previous turn's response id into + ``FOUNDRY_AGENT_SESSION_ID`` for the next turn's container so each + new container can chain back without us keeping any cross-request + state. A regression that moves the lookup, mistypes the prefix + check, or stops gating on ``caresp_*``/``resp_*`` would silently + make hosted multi-turn conversations forget every prior turn.""" + + async def test_get_messages_uses_env_anchor_when_unbound( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """No host binding, ``session_id`` is opaque (not ``caresp_*``): + ``get_messages`` must fall back to ``FOUNDRY_AGENT_SESSION_ID`` + and walk from there.""" + for var in ("MODEL_DEPLOYMENT_NAME", "AZURE_AI_MODEL_DEPLOYMENT_NAME"): + monkeypatch.delenv(var, raising=False) + monkeypatch.setenv("FOUNDRY_AGENT_SESSION_ID", "caresp_envanchor1") + + backend = _make_fake_backend( + history_ids=["msg_envanchor_1"], + items=[_make_text_item("msg_envanchor_1", "from-env-anchor")], + ) + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + + # Opaque session_id — no host binding either. Without the env + # fallback this would return [] without making any backend call. + messages = await prov.get_messages("opaque-session") + + assert [m.text for m in messages] == ["from-env-anchor"] + assert backend.get_history_item_ids.await_args.args[0] == "caresp_envanchor1" + + async def test_get_messages_ignores_non_caresp_env_anchor( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Defence in depth: if the runtime ever stamps a non-``caresp_*`` + value into the env var (or it leaks from another source), we + must NOT pass it to storage — the partition-key extractor + would reject it with HTTP 500.""" + monkeypatch.setenv("FOUNDRY_AGENT_SESSION_ID", "garbage-not-an-id") + + backend = _make_fake_backend() + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + messages = await prov.get_messages("opaque-session") + + assert messages == [] + backend.get_history_item_ids.assert_not_called() + + async def test_save_messages_uses_env_anchor_when_unbound( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """When no host binding supplies a previous_response_id, the + env anchor must be used so the new write chains correctly.""" + for var in ( + "FOUNDRY_AGENT_NAME", + "FOUNDRY_AGENT_VERSION", + "MODEL_DEPLOYMENT_NAME", + "AZURE_AI_MODEL_DEPLOYMENT_NAME", + ): + monkeypatch.delenv(var, raising=False) + monkeypatch.setenv("FOUNDRY_AGENT_SESSION_ID", "caresp_envchain1") + + backend = _make_fake_backend() + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + # Opaque session_id, no host binding → without the env anchor + # the prior chain wouldn't be walked. + await prov.save_messages( + "opaque-session", + [Message(role="assistant", contents=[Content.from_text("hi")])], + ) + + # Provider walked the prior chain via the env anchor. + assert backend.get_history_item_ids.await_args.args[0] == "caresp_envchain1" + + async def test_save_messages_env_anchor_skipped_when_host_bound( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """A host-bound previous_response_id wins over the env anchor; + the binding is the authoritative chain seed for the request.""" + from agent_framework_foundry_hosting import bind_request_context + + for var in ( + "FOUNDRY_AGENT_NAME", + "FOUNDRY_AGENT_VERSION", + "MODEL_DEPLOYMENT_NAME", + "AZURE_AI_MODEL_DEPLOYMENT_NAME", + ): + monkeypatch.delenv(var, raising=False) + monkeypatch.setenv("FOUNDRY_AGENT_SESSION_ID", "caresp_envignored") + + backend = _make_fake_backend() + prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) + with bind_request_context(response_id="resp_bound_2", previous_response_id="caresp_boundprev"): + await prov.save_messages( + "session-x", + [Message(role="assistant", contents=[Content.from_text("hi")])], + ) + + # Host binding wins; the env anchor is ignored. + assert backend.get_history_item_ids.await_args.args[0] == "caresp_boundprev" + + # region Shared module re-exports From 93dc1454913d51534da0593d3807c2a4d93d3a7f Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Thu, 7 May 2026 16:06:40 +0200 Subject: [PATCH 4/4] fix(foundry_hosting): address PR-1 round-2 review comments - Hosted detection now delegates to AgentConfig.from_env().is_hosted so a future Foundry SDK rename of FOUNDRY_HOSTING_ENVIRONMENT propagates automatically; drop the local _ENV_FOUNDRY_HOSTING_ENVIRONMENT constant. - Drop the FOUNDRY_AGENT_SESSION_ID fallback in both get_messages and save_messages: per the SDK it identifies the *container instance*, not the conversation, so chaining off it would silently merge unrelated conversations across container restarts. The host-bound previous_response_id (set by ResponsesChannel) is the only authoritative anchor; the env value is still stamped into the persisted envelope's agent_session_id for operator correlation. - Update module docstring + replace TestFoundryAgentSessionIdAnchor with assertions for the new contract (env var ignored as anchor, still stamped onto persisted envelope, host binding wins). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../_history_provider.py | 61 ++++++++++------- .../tests/test_history_provider.py | 66 ++++++++----------- 2 files changed, 63 insertions(+), 64 deletions(-) diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py index e6c6b09876..06a9edcd7d 100644 --- a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py +++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py @@ -27,16 +27,24 @@ Environment variables read: * ``FOUNDRY_HOSTING_ENVIRONMENT`` — non-empty marks "running inside Foundry" - and selects the SDK-backed storage transport. + and selects the SDK-backed storage transport. Detection is delegated to + :class:`azure.ai.agentserver.core.AgentConfig` so a future SDK rename + propagates without touching this module. * ``FOUNDRY_PROJECT_ENDPOINT`` — base URL of the Foundry project; required when running hosted unless an explicit ``endpoint=`` is supplied. * ``FOUNDRY_AGENT_NAME`` / ``FOUNDRY_AGENT_VERSION`` — stamped onto the ``agent_reference`` field of every persisted response envelope. -* ``FOUNDRY_AGENT_SESSION_ID`` — used as a chain anchor when the channel - did not bind a per-request ``previous_response_id``. * ``MODEL_DEPLOYMENT_NAME`` / ``AZURE_AI_MODEL_DEPLOYMENT_NAME`` — model field stamped on the persisted envelope (must match a real deployment). +Note on ``FOUNDRY_AGENT_SESSION_ID``: this env var identifies the +*container instance*, not the conversation, so it is **not** consulted as +a fallback ``previous_response_id``. The host-bound +``previous_response_id`` (set by :class:`ResponsesChannel` from the +request envelope) is the authoritative anchor. The value is still +persisted into the ``agent_session_id`` envelope field for operator +correlation only. + Local fallback: when ``FOUNDRY_HOSTING_ENVIRONMENT`` is unset, the provider transparently falls back to :class:`InMemoryResponseProvider` so the same agent code runs in dev. Pass ``local_storage_root`` to use a persistent @@ -58,6 +66,7 @@ from typing import TYPE_CHECKING, Any, ClassVar from agent_framework import FileHistoryProvider, HistoryProvider, Message +from azure.ai.agentserver.core import AgentConfig from azure.ai.agentserver.responses import ( FoundryStorageProvider, FoundryStorageSettings, @@ -84,10 +93,10 @@ logger = logging.getLogger(__name__) -# Environment variable names — re-declared (not imported) so this module +# Environment variable name — re-declared (not imported) so this module # stays decoupled from the private ``azure.ai.agentserver.core._config`` -# constants while still matching them exactly. -_ENV_FOUNDRY_HOSTING_ENVIRONMENT = "FOUNDRY_HOSTING_ENVIRONMENT" +# constants while still matching exactly. Hosted-vs-local detection is +# delegated to :class:`AgentConfig` so a future SDK rename propagates. _ENV_FOUNDRY_PROJECT_ENDPOINT = "FOUNDRY_PROJECT_ENDPOINT" # Per-request isolation context. The owning Channel is expected to set this @@ -429,11 +438,13 @@ def __init__( def is_hosted_environment() -> bool: """Return ``True`` when running inside a Foundry Hosted Agent container. - Detection uses the ``FOUNDRY_HOSTING_ENVIRONMENT`` environment - variable, the same signal :class:`ResponsesAgentServerHost` uses to - switch between hosted and local storage backends. + Delegates to :meth:`azure.ai.agentserver.core.AgentConfig.from_env` + so the detection rule stays in lockstep with the Foundry SDK; if + the platform ever renames the underlying signal (today + ``FOUNDRY_HOSTING_ENVIRONMENT``) the SDK update is picked up + automatically without a code change here. """ - return bool(os.environ.get(_ENV_FOUNDRY_HOSTING_ENVIRONMENT)) + return AgentConfig.from_env().is_hosted def _resolve_backend(self) -> FoundryStorageProvider | InMemoryResponseProvider: """Return the storage backend, constructing it lazily on first use. @@ -614,16 +625,15 @@ async def get_messages( anchor = bound.previous_response_id if bound is not None else None if anchor is None and session_id and session_id.startswith(("caresp_", "resp_")): anchor = session_id - if anchor is None: - # The Foundry Hosted Agent runtime stamps the previous turn's - # response id into ``FOUNDRY_AGENT_SESSION_ID`` for the - # following turn's container, so we can walk back from it - # directly without keeping any cross-request state ourselves. - env_session = os.environ.get("FOUNDRY_AGENT_SESSION_ID") or None - if env_session and env_session.startswith(("caresp_", "resp_")): - anchor = env_session if anchor is None: # No walkable anchor → fresh conversation, nothing to load. + # Note: we intentionally do NOT fall back to + # ``FOUNDRY_AGENT_SESSION_ID`` — per the Foundry SDK that env + # var identifies the *container instance*, not the + # conversation, so it doesn't yield a walkable response-id + # chain. The host-bound ``previous_response_id`` (set by + # ``ResponsesChannel`` from the request envelope) is the + # authoritative anchor. return [] backend = self._resolve_backend() @@ -728,13 +738,14 @@ async def save_messages( response_id = IdGenerator.new_response_id() previous_response_id = session_id if session_id.startswith(("caresp_", "resp_")) else None - # Foundry session-bound containers: when ``FOUNDRY_AGENT_SESSION_ID`` - # is set the runtime stamps it to the previous turn's response id - # so each new container can chain back to it directly. We don't - # need to maintain any cross-request map ourselves. - env_session = os.environ.get("FOUNDRY_AGENT_SESSION_ID") or None - if previous_response_id is None and env_session and env_session.startswith(("caresp_", "resp_")): - previous_response_id = env_session + # Note: we intentionally do NOT consult ``FOUNDRY_AGENT_SESSION_ID`` + # as a fallback ``previous_response_id`` here. Per the Foundry SDK + # that env var identifies the *container instance*, not the + # conversation, so chaining off it produces an unwalkable history. + # The host-bound ``previous_response_id`` (set by + # ``ResponsesChannel`` from the request envelope) is the only + # authoritative anchor; if it's missing the new turn is the start + # of a fresh chain. logger.debug( "save_messages: response_id=%r previous_response_id=%r isolation=%s", diff --git a/python/packages/foundry_hosting/tests/test_history_provider.py b/python/packages/foundry_hosting/tests/test_history_provider.py index a1d372c486..cfdbeccacb 100644 --- a/python/packages/foundry_hosting/tests/test_history_provider.py +++ b/python/packages/foundry_hosting/tests/test_history_provider.py @@ -813,20 +813,21 @@ async def test_envelope_falls_back_to_local_dev_model_var( class TestFoundryAgentSessionIdAnchor: - """The Foundry runtime stamps the previous turn's response id into - ``FOUNDRY_AGENT_SESSION_ID`` for the next turn's container so each - new container can chain back without us keeping any cross-request - state. A regression that moves the lookup, mistypes the prefix - check, or stops gating on ``caresp_*``/``resp_*`` would silently - make hosted multi-turn conversations forget every prior turn.""" - - async def test_get_messages_uses_env_anchor_when_unbound( + """``FOUNDRY_AGENT_SESSION_ID`` identifies the *container instance*, + not the conversation (per the Foundry SDK), so it MUST NOT be used + as a fallback ``previous_response_id`` for chain walking. The host- + bound ``previous_response_id`` (set by ``ResponsesChannel`` from the + request envelope) is the only authoritative anchor; any code that + re-introduces an env-based fallback would silently merge unrelated + conversations across container restarts.""" + + async def test_get_messages_ignores_env_session_anchor_when_unbound( self, monkeypatch: pytest.MonkeyPatch, ) -> None: - """No host binding, ``session_id`` is opaque (not ``caresp_*``): - ``get_messages`` must fall back to ``FOUNDRY_AGENT_SESSION_ID`` - and walk from there.""" + """No host binding, opaque ``session_id`` and a populated + ``FOUNDRY_AGENT_SESSION_ID``: ``get_messages`` must return ``[]`` + and never call the backend (no walkable conversation anchor).""" for var in ("MODEL_DEPLOYMENT_NAME", "AZURE_AI_MODEL_DEPLOYMENT_NAME"): monkeypatch.delenv(var, raising=False) monkeypatch.setenv("FOUNDRY_AGENT_SESSION_ID", "caresp_envanchor1") @@ -837,36 +838,18 @@ async def test_get_messages_uses_env_anchor_when_unbound( ) prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) - # Opaque session_id — no host binding either. Without the env - # fallback this would return [] without making any backend call. - messages = await prov.get_messages("opaque-session") - - assert [m.text for m in messages] == ["from-env-anchor"] - assert backend.get_history_item_ids.await_args.args[0] == "caresp_envanchor1" - - async def test_get_messages_ignores_non_caresp_env_anchor( - self, - monkeypatch: pytest.MonkeyPatch, - ) -> None: - """Defence in depth: if the runtime ever stamps a non-``caresp_*`` - value into the env var (or it leaks from another source), we - must NOT pass it to storage — the partition-key extractor - would reject it with HTTP 500.""" - monkeypatch.setenv("FOUNDRY_AGENT_SESSION_ID", "garbage-not-an-id") - - backend = _make_fake_backend() - prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) messages = await prov.get_messages("opaque-session") assert messages == [] backend.get_history_item_ids.assert_not_called() - async def test_save_messages_uses_env_anchor_when_unbound( + async def test_save_messages_ignores_env_session_anchor_when_unbound( self, monkeypatch: pytest.MonkeyPatch, ) -> None: - """When no host binding supplies a previous_response_id, the - env anchor must be used so the new write chains correctly.""" + """When no host binding supplies a ``previous_response_id`` and + ``session_id`` is opaque, the env var must NOT be consulted as a + fallback; the new turn writes without a prior chain seed.""" for var in ( "FOUNDRY_AGENT_NAME", "FOUNDRY_AGENT_VERSION", @@ -878,21 +861,26 @@ async def test_save_messages_uses_env_anchor_when_unbound( backend = _make_fake_backend() prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend) - # Opaque session_id, no host binding → without the env anchor - # the prior chain wouldn't be walked. + # Opaque session_id, no host binding → save proceeds without + # walking any chain (no get_history_item_ids call). await prov.save_messages( "opaque-session", [Message(role="assistant", contents=[Content.from_text("hi")])], ) - # Provider walked the prior chain via the env anchor. - assert backend.get_history_item_ids.await_args.args[0] == "caresp_envchain1" + backend.get_history_item_ids.assert_not_called() + # The persisted envelope still stamps the env value into + # ``agent_session_id`` for operator correlation (see the + # docstring on the module): only the chain anchor is gated. + backend.create_response.assert_awaited_once() + wire_payload = backend.create_response.await_args.args[0].as_dict() + assert wire_payload["agent_session_id"] == "caresp_envchain1" async def test_save_messages_env_anchor_skipped_when_host_bound( self, monkeypatch: pytest.MonkeyPatch, ) -> None: - """A host-bound previous_response_id wins over the env anchor; + """A host-bound ``previous_response_id`` wins over any env value; the binding is the authoritative chain seed for the request.""" from agent_framework_foundry_hosting import bind_request_context @@ -913,7 +901,7 @@ async def test_save_messages_env_anchor_skipped_when_host_bound( [Message(role="assistant", contents=[Content.from_text("hi")])], ) - # Host binding wins; the env anchor is ignored. + # Host binding wins; the env anchor is ignored for chaining. assert backend.get_history_item_ids.await_args.args[0] == "caresp_boundprev"