From 10043b8a63217495cbe0f0f72dcff6f381058bb5 Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Mon, 4 May 2026 17:21:03 +0200
Subject: [PATCH 1/4] refactor(foundry_hosting): build
 FoundryHostedAgentHistoryProvider on azure.ai.agentserver SDK
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rebuilds the Foundry hosted-agent history provider on top of
``azure.ai.agentserver``'s ``FoundryStorageProvider`` instead of the
in-house ``_HttpStorageBackend``. Splits the monolithic ``_responses.py``
into focused modules:

- ``_history_provider.py`` — new ``FoundryHostedAgentHistoryProvider``
  that talks to the SDK's ``FoundryStorageProvider``, threads
  ``response_id`` / ``previous_response_id`` through ``ContextVar``s via
  ``bind_request_context``, and lifts host-bound isolation keys
  (``x-agent-{user,chat}-isolation-key``) from the optional
  ``agent_framework_hosting`` package into a provider-local
  ``IsolationContext`` so the storage layer carries the correct
  partition keys without channels having to know about them.
- ``_shared.py`` — extracts all SDK ``Item`` / ``OutputItem`` ↔
  framework ``Message`` conversion helpers into one place so both
  ``_responses.py`` and the new history provider can share them.
  Restores ``_convert_file_data`` for inline ``input_file`` payloads,
  and the hosted-MCP routing for ``custom_tool_call_output`` items
  whose ``call_id`` carries the ``mcp_*`` prefix.
- ``_ids.py`` — shared id helpers.
- ``_responses.py`` — shrinks ~700 lines, re-exports converters for
  back-compat with existing tests.
- ``tests/test_history_provider.py`` — exercises the new provider
  against a fake SDK backend; the host-isolation test is gated on the
  optional ``agent_framework_hosting`` import.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../__init__.py                               |   21 +-
 .../_history_provider.py                      |  758 ++++++++++
 .../agent_framework_foundry_hosting/_ids.py   |   72 +
 .../_responses.py                             |  868 +----------
 .../_shared.py                                | 1287 +++++++++++++++++
 .../tests/test_history_provider.py            |  969 +++++++++++++
 6 files changed, 3172 insertions(+), 803 deletions(-)
 create mode 100644 python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py
 create mode 100644 python/packages/foundry_hosting/agent_framework_foundry_hosting/_ids.py
 create mode 100644 python/packages/foundry_hosting/agent_framework_foundry_hosting/_shared.py
 create mode 100644 python/packages/foundry_hosting/tests/test_history_provider.py

diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/__init__.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/__init__.py
index 81e8430783..691353a0e1 100644
--- a/python/packages/foundry_hosting/agent_framework_foundry_hosting/__init__.py
+++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/__init__.py
@@ -2,6 +2,16 @@
 
 import importlib.metadata
 
+from ._history_provider import (
+    FoundryHostedAgentHistoryProvider,
+    bind_request_context,
+    get_current_request_context,
+)
+from ._ids import (
+    foundry_item_id,
+    foundry_response_id,
+    foundry_response_id_factory,
+)
 from ._invocations import InvocationsHostServer
 from ._responses import ResponsesHostServer
 
@@ -10,4 +20,13 @@
 except importlib.metadata.PackageNotFoundError:
     __version__ = "0.0.0"
 
-__all__ = ["InvocationsHostServer", "ResponsesHostServer"]
+__all__ = [
+    "FoundryHostedAgentHistoryProvider",
+    "InvocationsHostServer",
+    "ResponsesHostServer",
+    "bind_request_context",
+    "foundry_item_id",
+    "foundry_response_id",
+    "foundry_response_id_factory",
+    "get_current_request_context",
+]
diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py
new file mode 100644
index 0000000000..a558979631
--- /dev/null
+++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py
@@ -0,0 +1,758 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+"""Foundry Hosted Agent history provider.
+
+A standalone :class:`agent_framework.HistoryProvider` implementation that
+sources conversation history from the Foundry Hosted Agent storage backend.
+
+Transport is delegated to the SDK's
+:class:`azure.ai.agentserver.responses.FoundryStorageProvider` (when running
+inside a Foundry Hosted Agent container) or
+:class:`azure.ai.agentserver.responses.InMemoryResponseProvider` (for local
+development). Both implement the same read/write surface
+(``get_history_item_ids`` / ``get_items`` / ``create_response``), so this
+provider's persistence logic stays backend-agnostic.
+
+Allowed dependencies (deliberately narrow):
+
+* :mod:`agent_framework` (core, for ``HistoryProvider`` / ``Message``)
+* :mod:`azure.ai.agentserver.responses` (for the storage backends,
+  ``IsolationContext`` typing, and ``OutputItem`` deserialization)
+* :mod:`azure.core.credentials_async` (typing of token credentials)
+
+It MUST NOT depend on any ``agent_framework_hosting*`` package at module
+import time. (The host's isolation contextvar is consulted lazily via an
+``import`` inside :func:`_host_isolation` so the dependency stays soft.)
+
+Environment variables read:
+
+* ``FOUNDRY_HOSTING_ENVIRONMENT`` — non-empty marks "running inside Foundry"
+  and selects the SDK-backed storage transport.
+* ``FOUNDRY_PROJECT_ENDPOINT`` — base URL of the Foundry project; required
+  when running hosted unless an explicit ``endpoint=`` is supplied.
+* ``FOUNDRY_AGENT_NAME`` / ``FOUNDRY_AGENT_VERSION`` — stamped onto the
+  ``agent_reference`` field of every persisted response envelope.
+* ``FOUNDRY_AGENT_SESSION_ID`` — used as a chain anchor when the channel
+  did not bind a per-request ``previous_response_id``.
+* ``MODEL_DEPLOYMENT_NAME`` / ``AZURE_AI_MODEL_DEPLOYMENT_NAME`` — model
+  field stamped on the persisted envelope (must match a real deployment).
+
+Local fallback: when ``FOUNDRY_HOSTING_ENVIRONMENT`` is unset, the provider
+transparently falls back to :class:`InMemoryResponseProvider` so the same
+agent code runs in dev.
+"""
+
+from __future__ import annotations
+
+import contextlib
+import logging
+import os
+import time
+from contextlib import contextmanager
+from contextvars import ContextVar
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any, ClassVar
+
+from agent_framework import HistoryProvider, Message
+from azure.ai.agentserver.responses import (
+    FoundryStorageProvider,
+    FoundryStorageSettings,
+    InMemoryResponseProvider,
+    IsolationContext,
+)
+from azure.ai.agentserver.responses._id_generator import IdGenerator
+from azure.ai.agentserver.responses.models import OutputItem, ResponseObject
+from azure.ai.agentserver.responses.store._foundry_errors import (  # pyright: ignore[reportPrivateUsage]
+    FoundryBadRequestError,
+    FoundryResourceNotFoundError,
+)
+
+from ._shared import (
+    _messages_to_output_items,  # pyright: ignore[reportPrivateUsage]
+    _output_items_to_messages,  # pyright: ignore[reportPrivateUsage]
+)
+
+if TYPE_CHECKING:
+    from collections.abc import Iterator, Sequence
+
+    from azure.core.credentials_async import AsyncTokenCredential
+
+logger = logging.getLogger(__name__)
+
+# Environment variable names — re-declared (not imported) so this module
+# stays decoupled from the private ``azure.ai.agentserver.core._config``
+# constants while still matching them exactly.
+_ENV_FOUNDRY_HOSTING_ENVIRONMENT = "FOUNDRY_HOSTING_ENVIRONMENT"
+_ENV_FOUNDRY_PROJECT_ENDPOINT = "FOUNDRY_PROJECT_ENDPOINT"
+
+# Per-request isolation context.  The owning Channel is expected to set this
+# from the inbound request (e.g. user / tenant headers) for the duration of
+# an ``agent.run(...)`` call.  When unset, requests are made without
+# isolation headers (matches how ``ResponseContext`` behaves with no
+# ``IsolationContext``).
+_isolation_var: ContextVar[IsolationContext | None] = ContextVar(
+    "agent_framework_foundry_hosting_isolation",
+    default=None,
+)
+
+
+def set_current_isolation(isolation: IsolationContext | None) -> Any:
+    """Set the per-request isolation context for downstream history calls.
+
+    Channels that drive an agent backed by :class:`FoundryHostedAgentHistoryProvider`
+    should call this before invoking ``agent.run(...)`` and reset the token
+    afterwards.
+
+    Args:
+        isolation: The isolation context to associate with the current
+            ``contextvars`` context, or ``None`` to clear it.
+
+    Returns:
+        A token suitable for :func:`reset_current_isolation` that restores
+        the previous value.
+    """
+    return _isolation_var.set(isolation)
+
+
+def reset_current_isolation(token: Any) -> None:
+    """Restore a previously-saved isolation context.
+
+    Args:
+        token: A token returned by :func:`set_current_isolation`.
+    """
+    _isolation_var.reset(token)
+
+
+def get_current_isolation() -> IsolationContext | None:
+    """Return the isolation context bound to the current async context, if any.
+
+    Returns:
+        The :class:`IsolationContext` for the current request, or ``None``
+        when no channel has set one.
+    """
+    return _isolation_var.get()
+
+
+@dataclass(frozen=True)
+class _RequestContext:
+    """Per-request anchors the host binds before invoking the agent.
+
+    ``response_id`` is the id this provider's :meth:`save_messages` call
+    will write under, so the channel and the storage backend agree on
+    one stable handle per turn (the channel surfaces the same id on the
+    response envelope, the next turn arrives with this value as
+    ``previous_response_id`` and the chain walks).
+
+    ``previous_response_id`` is the prior turn's anchor (``None`` on
+    first turn). Used to seed ``history_item_ids`` on the new write so
+    the storage chain stays connected, and to load history without
+    needing to know the channel's session minting convention.
+
+    Per-request Foundry isolation keys (the
+    ``x-agent-{user,chat}-isolation-key`` headers) are *not* carried
+    here; the host's own ASGI middleware lifts them off every inbound
+    HTTP request into a contextvar
+    (:func:`agent_framework_hosting.get_current_isolation_keys`) which
+    this provider consults at storage-call time. Keeping the headers
+    out of the per-request bind means channels never have to import
+    Foundry-specific types and the host owns the (intentional) coupling
+    to those two well-known headers.
+    """
+
+    response_id: str
+    previous_response_id: str | None
+
+
+_request_var: ContextVar[_RequestContext | None] = ContextVar(
+    "agent_framework_foundry_hosting_request",
+    default=None,
+)
+
+
+@contextmanager
+def bind_request_context(
+    *,
+    response_id: str,
+    previous_response_id: str | None = None,
+    **_unused: Any,
+) -> "Iterator[None]":
+    """Bind the per-request response-chain anchors for this provider.
+
+    Intended for the host (or any caller orchestrating an
+    ``agent.run(...)``) to call immediately before invocation, so the
+    provider's :meth:`save_messages` writes under a known, stable
+    ``response_id`` (the same one the channel surfaces to the client)
+    and walks ``previous_response_id`` for history continuity. Unknown
+    keyword arguments are accepted and ignored so the host can extend
+    the ``ChannelRequest.attributes`` contract without breaking existing
+    providers. Foundry isolation keys flow through a separate
+    host-installed contextvar; see the class docstring on
+    :class:`_RequestContext`.
+
+    The binding is scoped to the current ``contextvars.Context``, so
+    concurrent requests in the same process do not interfere.
+    """
+    token = _request_var.set(
+        _RequestContext(
+            response_id=response_id,
+            previous_response_id=previous_response_id,
+        )
+    )
+    try:
+        yield
+    finally:
+        _request_var.reset(token)
+
+
+def get_current_request_context() -> _RequestContext | None:
+    """Return the per-request response chain anchors, if bound."""
+    return _request_var.get()
+
+
+def _host_isolation() -> "IsolationContext | None":
+    """Lift the host-bound isolation contextvar into our local type.
+
+    The host installs an ASGI middleware that reads
+    ``x-agent-{user,chat}-isolation-key`` off every inbound HTTP request
+    and stores them in a generic ``IsolationKeys`` slot on a contextvar
+    we import from :mod:`agent_framework_hosting`. We translate it into
+    our :class:`IsolationContext` shape on demand so the provider stays
+    in charge of the storage-side type while the host stays free of any
+    Foundry-specific dependencies.
+    """
+    # Soft dep: ``agent_framework_hosting`` may not be installed (this
+    # provider is also usable standalone). The whole block is wrapped in
+    # ``# pyright: ignore`` so the optional import does not block type
+    # checking when the package isn't on sys.path; when it is, pyright
+    # picks up the real types automatically.
+    try:
+        from agent_framework_hosting import (  # pyright: ignore[reportMissingImports]
+            get_current_isolation_keys,  # pyright: ignore[reportUnknownVariableType]
+        )
+    except ImportError:  # pragma: no cover - hosting is a soft dep
+        return None
+    keys = get_current_isolation_keys()  # pyright: ignore[reportUnknownVariableType]
+    if keys is None or keys.is_empty:  # pyright: ignore[reportUnknownMemberType]
+        return None
+    return IsolationContext(
+        user_key=keys.user_key,  # pyright: ignore[reportUnknownMemberType, reportUnknownArgumentType]
+        chat_key=keys.chat_key,  # pyright: ignore[reportUnknownMemberType, reportUnknownArgumentType]
+    )
+
+
+# Type alias for the storage backend surface this provider depends on.
+# Both ``FoundryStorageProvider`` and ``InMemoryResponseProvider`` from
+# ``azure.ai.agentserver.responses`` expose the same
+# ``get_history_item_ids`` / ``get_items`` / ``create_response`` methods.
+_StorageBackend = "FoundryStorageProvider | InMemoryResponseProvider"
+
+
+class FoundryHostedAgentHistoryProvider(HistoryProvider):
+    """``HistoryProvider`` backed by Foundry Hosted Agent storage.
+
+    Wraps :class:`azure.ai.agentserver.responses.FoundryStorageProvider`
+    when running inside a Foundry Hosted Agent container, or
+    :class:`InMemoryResponseProvider` for local development. The
+    selection is driven by the ``FOUNDRY_HOSTING_ENVIRONMENT``
+    environment variable.
+
+    ``session_id`` semantics: the value passed to :meth:`get_messages`
+    and :meth:`save_messages` is treated as the Responses
+    ``previous_response_id`` (or ``conversation_id``) whose chain to
+    load. When omitted (and no host-bound chain anchor is set),
+    :meth:`get_messages` returns an empty list (a fresh conversation).
+    """
+
+    DEFAULT_SOURCE_ID: ClassVar[str] = "foundry_hosted_agent"
+
+    def __init__(
+        self,
+        *,
+        credential: "AsyncTokenCredential | None" = None,
+        endpoint: str | None = None,
+        history_limit: int = 100,
+        source_id: str = DEFAULT_SOURCE_ID,
+        load_messages: bool = True,
+        store_inputs: bool = True,
+        store_context_messages: bool = False,
+        store_context_from: set[str] | None = None,
+        store_outputs: bool = True,
+    ) -> None:
+        """Initialize the provider.
+
+        Args:
+            credential: Async token credential used to authenticate against
+                the Foundry storage API. Required when running hosted
+                (``FOUNDRY_HOSTING_ENVIRONMENT`` is set). Ignored in
+                local-mode (the in-memory backend needs no auth).
+            endpoint: Foundry project endpoint URL. Defaults to the value
+                of the ``FOUNDRY_PROJECT_ENDPOINT`` environment variable.
+                Required when running hosted.
+            history_limit: Maximum number of history items to fetch per
+                ``get_messages`` call. Mirrors the agent-server runtime's
+                ``ResponseContext._history_limit``. Default ``100``.
+            source_id: Unique identifier for this provider instance, as
+                required by ``HistoryProvider``.
+            load_messages: Whether to load messages before invocation.
+                Default ``True``.
+            store_inputs: Whether to mirror input messages into Foundry
+                storage. Default ``True`` — the Foundry Hosted Agents
+                runtime does not persist Responses turns automatically, so
+                without this the chain would never be visible to subsequent
+                requests. Set ``False`` only if you know an external writer
+                is populating storage on your behalf.
+            store_context_messages: Whether to mirror context-provider
+                messages. Default ``False``.
+            store_context_from: If set, only mirror context messages from
+                these source IDs.
+            store_outputs: Whether to mirror response messages into Foundry
+                storage. Default ``True`` for the same reason as
+                ``store_inputs``.
+        """
+        super().__init__(
+            source_id=source_id,
+            load_messages=load_messages,
+            store_inputs=store_inputs,
+            store_context_messages=store_context_messages,
+            store_context_from=store_context_from,
+            store_outputs=store_outputs,
+        )
+
+        self._history_limit = history_limit
+        self._credential = credential
+        self._endpoint = endpoint or os.environ.get(_ENV_FOUNDRY_PROJECT_ENDPOINT) or None
+        self._backend: FoundryStorageProvider | InMemoryResponseProvider | None = None
+
+    @staticmethod
+    def is_hosted_environment() -> bool:
+        """Return ``True`` when running inside a Foundry Hosted Agent container.
+
+        Detection uses the ``FOUNDRY_HOSTING_ENVIRONMENT`` environment
+        variable, the same signal :class:`ResponsesAgentServerHost` uses to
+        switch between hosted and local storage backends.
+        """
+        return bool(os.environ.get(_ENV_FOUNDRY_HOSTING_ENVIRONMENT))
+
+    def _resolve_backend(self) -> "FoundryStorageProvider | InMemoryResponseProvider":
+        """Return the storage backend, constructing it lazily on first use.
+
+        * If ``FOUNDRY_HOSTING_ENVIRONMENT`` is set, build a
+          :class:`FoundryStorageProvider` (requires ``credential`` and a
+          resolved ``endpoint``).
+        * Otherwise, fall back to a process-local
+          :class:`InMemoryResponseProvider` so dev/local runs work without
+          additional configuration.
+        """
+        if self._backend is not None:
+            return self._backend
+
+        if self.is_hosted_environment():
+            if self._credential is None:
+                raise RuntimeError(
+                    "FoundryHostedAgentHistoryProvider requires an async credential when running "
+                    "inside a Foundry Hosted Agent container. Pass credential=... ."
+                )
+            if not self._endpoint:
+                raise RuntimeError(
+                    "FoundryHostedAgentHistoryProvider needs a Foundry project endpoint. Pass "
+                    "endpoint=... or set the FOUNDRY_PROJECT_ENDPOINT environment variable."
+                )
+            self._backend = FoundryStorageProvider(
+                credential=self._credential,
+                settings=FoundryStorageSettings.from_endpoint(self._endpoint),
+            )
+            logger.debug(
+                "FoundryHostedAgentHistoryProvider using FoundryStorageProvider against %s",
+                self._endpoint,
+            )
+            return self._backend
+
+        logger.info(
+            "FOUNDRY_HOSTING_ENVIRONMENT is unset — FoundryHostedAgentHistoryProvider falling "
+            "back to InMemoryResponseProvider for local development.",
+        )
+        self._backend = InMemoryResponseProvider()
+        return self._backend
+
+    async def aclose(self) -> None:
+        """Release storage resources held by this provider.
+
+        Safe to call multiple times. Closes the lazily-constructed
+        backend if one was created. ``InMemoryResponseProvider`` has no
+        ``aclose`` and is closed implicitly on garbage collection.
+        """
+        if self._backend is None:
+            return
+        aclose = getattr(self._backend, "aclose", None)
+        if aclose is not None:
+            await aclose()
+        self._backend = None
+
+    async def get_messages(
+        self,
+        session_id: str | None,
+        *,
+        state: dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> list[Message]:
+        """Load conversation history for the given Foundry response chain.
+
+        Args:
+            session_id: The Responses ``previous_response_id`` /
+                ``conversation_id`` to anchor history on. When ``None`` /
+                empty, an empty history is returned (fresh conversation).
+            state: Unused — kept for ``HistoryProvider`` compatibility.
+            **kwargs: Extensibility hook; ``isolation`` may be supplied
+                explicitly to override the contextvar.
+
+        Returns:
+            The conversation history materialised as a list of
+            :class:`agent_framework.Message`, oldest-first.
+
+        Notes:
+            History anchoring follows the Foundry response-id chain. The
+            preferred anchor is the per-request ``previous_response_id``
+            bound by the host via :func:`bind_request_context` — that's
+            the prior turn's resp id, written by *this* provider's
+            previous :meth:`save_messages` call, so the chain is
+            guaranteed walkable. When unbound (e.g. local dev calling
+            the provider directly), we fall back to the ``session_id``
+            argument as long as it's ``resp_*``-shaped; opaque tokens
+            (such as chat-isolation-key values) are skipped because the
+            storage backend rejects them with HTTP 400 "Malformed
+            identifier".
+        """
+        bound = get_current_request_context()
+        # Prefer the host-bound previous_response_id over the session_id
+        # the framework feeds in: the bound value is the id we ourselves
+        # wrote on the previous turn, so we know it's storage-valid.
+        anchor = bound.previous_response_id if bound is not None else None
+        if anchor is None and session_id and session_id.startswith(("caresp_", "resp_")):
+            anchor = session_id
+        if anchor is None:
+            # The Foundry Hosted Agent runtime stamps the previous turn's
+            # response id into ``FOUNDRY_AGENT_SESSION_ID`` for the
+            # following turn's container, so we can walk back from it
+            # directly without keeping any cross-request state ourselves.
+            env_session = os.environ.get("FOUNDRY_AGENT_SESSION_ID") or None
+            if env_session and env_session.startswith(("caresp_", "resp_")):
+                anchor = env_session
+        if anchor is None:
+            # No walkable anchor → fresh conversation, nothing to load.
+            return []
+
+        isolation = kwargs.get("isolation") or _host_isolation() or get_current_isolation()
+        backend = self._resolve_backend()
+
+        try:
+            item_ids = await backend.get_history_item_ids(
+                anchor,
+                None,
+                self._history_limit,
+                isolation=isolation,
+            )
+        except (FoundryBadRequestError, FoundryResourceNotFoundError) as err:
+            # 400 / 404 here means the anchor isn't storage-valid — treat
+            # it as an empty history rather than failing the whole request.
+            logger.debug(
+                "get_messages: anchor %r rejected by storage (%s); returning empty history",
+                anchor,
+                type(err).__name__,
+            )
+            return []
+        if not item_ids:
+            return []
+
+        items = await backend.get_items(item_ids, isolation=isolation)
+        # ``get_items`` may return ``None`` placeholders for missing IDs.
+        resolved = [item for item in items if item is not None]
+        return _output_items_to_messages(resolved)
+
+    async def save_messages(
+        self,
+        session_id: str | None,
+        messages: "Sequence[Message]",
+        *,
+        state: dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        """Persist messages for ``session_id`` into Foundry storage.
+
+        Unlike the standalone ``azure.ai.agentserver`` runtime — which
+        owns response orchestration end-to-end and writes turns
+        authoritatively — the Agent Framework hosting stack treats
+        ``HistoryProvider`` as the *only* persistence path. Without this
+        method actively writing, a deployed hosted agent would silently
+        drop every turn.
+
+        Strategy:
+
+        * Use the host-bound ``response_id`` as the envelope id (mints
+          a fresh ``caresp_*`` id when unbound, e.g. local dev).
+        * Anchor the new write to the previous turn via
+          ``previous_response_id``, walking the prior turn's history
+          item ids forward so the full transcript stays visible.
+        * Split items by role: ``"message"`` (user/system inputs) into
+          ``input_items``, everything else (assistant outputs, tool
+          calls, reasoning, ...) into ``response.output``.
+
+        Args:
+            session_id: The Responses ``previous_response_id`` /
+                ``conversation_id`` the messages belong to.
+            messages: The messages selected for persistence by the base
+                ``HistoryProvider`` after-run hook.
+            state: Unused — kept for ``HistoryProvider`` compatibility.
+            **kwargs: Extensibility hook; ``isolation`` may be supplied
+                explicitly to override the contextvar.
+        """
+        if not messages:
+            return
+
+        bound = get_current_request_context()
+        # Prefer the host-bound response_id so the channel envelope and
+        # the storage write agree on a single id per turn — which is
+        # what makes the next turn's ``previous_response_id`` walkable.
+        # Without a binding (e.g. local dev calling ``save_messages``
+        # directly), fall back to a fresh Foundry-format response id.
+        # Free-form ``resp_<uuid>`` ids carry no embedded partition key
+        # and the storage backend rejects writes with a server error;
+        # ``IdGenerator.new_response_id()`` mints a ``caresp_*`` id with
+        # the partition-key segment the backend expects. The chain
+        # walks only when ``session_id`` is itself a ``caresp_*``-shaped
+        # value (i.e. a previous response id), matching the prefix the
+        # ``ResponsesChannel`` factory uses.
+        if bound is not None:
+            response_id = bound.response_id
+            previous_response_id = bound.previous_response_id
+        else:
+            if not session_id:
+                return
+            response_id = IdGenerator.new_response_id()
+            previous_response_id = session_id if session_id.startswith(("caresp_", "resp_")) else None
+
+        # Foundry session-bound containers: when ``FOUNDRY_AGENT_SESSION_ID``
+        # is set the runtime stamps it to the previous turn's response id
+        # so each new container can chain back to it directly. We don't
+        # need to maintain any cross-request map ourselves.
+        env_session = os.environ.get("FOUNDRY_AGENT_SESSION_ID") or None
+        if previous_response_id is None and env_session and env_session.startswith(("caresp_", "resp_")):
+            previous_response_id = env_session
+
+        isolation = kwargs.get("isolation") or _host_isolation() or get_current_isolation()
+        logger.debug(
+            "save_messages: response_id=%r previous_response_id=%r isolation=%s",
+            response_id,
+            previous_response_id,
+            "<set>" if isolation else "<None>",
+        )
+        backend = self._resolve_backend()
+
+        # The agentserver runtime puts INBOUND items (user/system messages
+        # the request sent in) in the envelope's ``input_items`` axis and
+        # OUTBOUND items (assistant outputs, tool calls, reasoning) in
+        # ``response.output``. See
+        # ``_resolve_input_items_for_persistence`` (orchestrator.py:61) +
+        # ``_extract_response_snapshot_from_events`` in
+        # ``azure.ai.agentserver.responses``: ``input_items`` comes from
+        # ``ctx.input_items`` (request inputs only); ``response.output``
+        # is populated from the lifecycle event stream.
+        #
+        # Putting everything in ``input_items`` with ``response.output: []``
+        # is a schema violation that the storage backend rejects with an
+        # opaque HTTP 500. Split by role to mirror the runtime.
+        all_items = _messages_to_output_items(list(messages), id_prefix=response_id)
+
+        # Re-stamp every item id via ``IdGenerator`` so each carries a
+        # Foundry-format ``{type-prefix}_<partitionKey><entropy>``
+        # identifier, with the response_id as the partition-key hint
+        # (co-locates each item with the response record). Free-form
+        # ``{response_id}_itm_N`` ids are rejected by the storage
+        # backend with an opaque HTTP 500 because the partition-key
+        # extractor cannot parse them. ``IdGenerator.new_item_id``
+        # dispatches by *Item* (input) type and returns ``None`` for
+        # our *OutputItem* (storage) instances, so we dispatch by the
+        # ``type`` discriminator string instead.
+        ITEM_ID_FACTORY: dict[str, Any] = {
+            "message": IdGenerator.new_message_item_id,
+            "output_message": IdGenerator.new_output_message_item_id,
+            "function_call": IdGenerator.new_function_call_item_id,
+            "function_call_output": IdGenerator.new_function_call_output_item_id,
+            "reasoning": IdGenerator.new_reasoning_item_id,
+            "file_search_call": IdGenerator.new_file_search_call_item_id,
+            "web_search_call": IdGenerator.new_web_search_call_item_id,
+            "image_generation_call": IdGenerator.new_image_gen_call_item_id,
+            "code_interpreter_call": IdGenerator.new_code_interpreter_call_item_id,
+            "computer_call": IdGenerator.new_computer_call_item_id,
+            "computer_call_output": IdGenerator.new_computer_call_output_item_id,
+            "local_shell_call": IdGenerator.new_local_shell_call_item_id,
+            "local_shell_call_output": IdGenerator.new_local_shell_call_output_item_id,
+            "mcp_call": IdGenerator.new_mcp_call_item_id,
+            "mcp_list_tools": IdGenerator.new_mcp_list_tools_item_id,
+            "mcp_approval_request": IdGenerator.new_mcp_approval_request_item_id,
+            "mcp_approval_response": IdGenerator.new_mcp_approval_response_item_id,
+            "custom_tool_call": IdGenerator.new_custom_tool_call_item_id,
+            "custom_tool_call_output": IdGenerator.new_custom_tool_call_output_item_id,
+        }
+        for item in all_items:
+            factory = ITEM_ID_FACTORY.get(getattr(item, "type", "") or "")
+            if factory is None:
+                continue
+            new_id = factory(response_id)
+            with contextlib.suppress(AttributeError, TypeError):
+                item.id = new_id  # type: ignore[attr-defined]
+
+        input_items: list[Any] = []
+        output_items: list[Any] = []
+        for item in all_items:
+            item_type = getattr(item, "type", None)
+            if item_type == "message":
+                input_items.append(item)
+            else:
+                # ``output_message``, tool calls, reasoning, etc. all
+                # belong to the response output stream.
+                output_items.append(item)
+
+        # Walk the previous response's history chain so the new write
+        # carries the full transcript forward. Without this, each turn
+        # would only see the messages saved on that very turn.
+        history_item_ids: list[str] | None = None
+        if previous_response_id is not None:
+            try:
+                history_item_ids = await backend.get_history_item_ids(
+                    previous_response_id,
+                    None,
+                    self._history_limit,
+                    isolation=isolation,
+                )
+            except (FoundryBadRequestError, FoundryResourceNotFoundError) as err:
+                # Don't let history fetch failures torpedo the write —
+                # we still want to persist the new turn even if the
+                # chain seed is unreachable for some reason.
+                logger.warning(
+                    "save_messages: failed to walk previous_response_id=%r (%s); writing new turn without history seed",
+                    previous_response_id,
+                    type(err).__name__,
+                )
+
+        # Mirror what the agentserver runtime serialises onto the wire
+        # (see ``_extract_response_snapshot_from_events`` +
+        # ``strip_nulls`` in
+        # ``azure.ai.agentserver.responses.streaming._helpers``):
+        #
+        # * ``agent_reference`` (Required on the response envelope) —
+        #   built from ``FOUNDRY_AGENT_NAME`` / ``FOUNDRY_AGENT_VERSION``,
+        #   which the hosted platform sets per-deploy (sentinel fallback
+        #   for local dev so the envelope stays well-formed).
+        # * ``agent_session_id`` (S-038) — forcibly stamped by the
+        #   runtime; sourced from ``FOUNDRY_AGENT_SESSION_ID``.
+        # * ``conversation`` is intentionally omitted: the (user, chat)
+        #   isolation headers are the Foundry storage partition key,
+        #   and the chat-isolation-key value is opaque (the API
+        #   returns "Malformed identifier"/HTTP 400 if used as a
+        #   body-level ``conversation_id``).
+        # * Per-item ``response_id`` / ``agent_reference`` are NOT
+        #   stamped here — those B20/B21 defaults only apply to items
+        #   inside ``response.output_item.added/done`` *events* (see
+        #   ``_coerce_handler_event``); items inside ``input_items``
+        #   and ``response.output`` go through ``to_output_item`` which
+        #   never sets these fields, and the storage validator returns
+        #   HTTP 400 ``invalid_payload`` when extras leak in.
+        agent_name = os.environ.get("FOUNDRY_AGENT_NAME") or "agent-framework-host"
+        agent_version = os.environ.get("FOUNDRY_AGENT_VERSION") or None
+        agent_reference: dict[str, Any] = {"type": "agent_reference", "name": agent_name}
+        if agent_version:
+            agent_reference["version"] = agent_version
+
+        agent_session_id = os.environ.get("FOUNDRY_AGENT_SESSION_ID") or None
+        # ``model`` must be a real deployed model name — the storage
+        # validator rejects arbitrary strings. Pull it from the
+        # platform-provided ``MODEL_DEPLOYMENT_NAME`` (set in agent.yaml)
+        # and fall back to ``AZURE_AI_MODEL_DEPLOYMENT_NAME`` for local
+        # dev. When neither is set we omit the field entirely (it is
+        # ``Optional[str]`` per the ResponseObject schema).
+        model_deployment = (
+            os.environ.get("MODEL_DEPLOYMENT_NAME") or os.environ.get("AZURE_AI_MODEL_DEPLOYMENT_NAME") or None
+        )
+
+        # Build the wire payload to match exactly what the agentserver
+        # runtime emits via ``_extract_response_snapshot_from_events``
+        # for a synthetic ``status=completed`` snapshot:
+        #
+        #   {id, object, output, created_at, [model], agent_reference,
+        #    status, completed_at, [agent_session_id]}
+        #
+        # ``previous_response_id`` is appended when chaining; the runtime
+        # threads it through the same code path.
+        now = int(time.time())
+        response_body: dict[str, Any] = {
+            "id": response_id,
+            # SDK mirror: ``streaming/_helpers.py:244`` always stamps
+            # ``response_id`` alongside ``id`` on the snapshot before it
+            # reaches ``serialize_create_request``.
+            "response_id": response_id,
+            "object": "response",
+            # S-040 auto-stamp: the orchestrator (``_orchestrator.py:1706``)
+            # echoes ``background`` from the request to every response
+            # envelope; storage rejects payloads that omit it.
+            "background": False,
+            # ``ResponseObject`` schema (``_models.py:13995``) declares
+            # ``parallel_tool_calls: bool`` as REQUIRED. The SDK's synthetic
+            # fallback path (``_build_events``) never sets it because it's
+            # only invoked for failure recovery; real handler events carry
+            # it through. Storage rejects payloads that omit it.
+            "parallel_tool_calls": False,
+            # Same story for ``instructions`` (``_models.py:13989``) —
+            # required ``str | list[Item]`` field.
+            "instructions": "",
+            "output": [item.as_dict() for item in output_items],
+            "created_at": now,
+            "agent_reference": agent_reference,
+            "status": "completed",
+            "completed_at": now,
+        }
+        if model_deployment is not None:
+            response_body["model"] = model_deployment
+        if agent_session_id is not None:
+            response_body["agent_session_id"] = agent_session_id
+        if previous_response_id is not None:
+            response_body["previous_response_id"] = previous_response_id
+        response = ResponseObject(response_body)
+
+        try:
+            await backend.create_response(
+                response,
+                input_items=input_items,
+                history_item_ids=history_item_ids,
+                isolation=isolation,
+            )
+        except Exception as exc:
+            err_body = getattr(exc, "response_body", None)
+            logger.exception(
+                "FoundryHostedAgentHistoryProvider.save_messages: backend rejected "
+                "%d message(s) (response_id=%s, previous_response_id=%s, error_body=%s).",
+                len(messages),
+                response_id,
+                previous_response_id,
+                err_body,
+            )
+            return
+        logger.debug(
+            "FoundryHostedAgentHistoryProvider.save_messages: persisted %d message(s) "
+            "(response_id=%s, previous_response_id=%s).",
+            len(messages),
+            response_id,
+            previous_response_id,
+        )
+
+
+# Re-export ``OutputItem`` for callers that want to construct test items
+# without reaching into the SDK's ``models`` namespace directly.
+__all__ = [
+    "FoundryHostedAgentHistoryProvider",
+    "OutputItem",
+    "bind_request_context",
+    "get_current_isolation",
+    "get_current_request_context",
+    "reset_current_isolation",
+    "set_current_isolation",
+]
diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_ids.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_ids.py
new file mode 100644
index 0000000000..588231d073
--- /dev/null
+++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_ids.py
@@ -0,0 +1,72 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+"""Foundry-storage-compatible identifier helpers.
+
+The Foundry hosted-agent storage backend partitions records by extracting
+an embedded partition-key segment from every record/item id. The id
+format is ``{prefix}_{18charPartitionKey}{32charEntropy}`` (or a 48-char
+legacy body).  Free-form ids such as ``resp_<uuid hex>`` carry no valid
+partition key and the storage API rejects writes with an opaque
+``HTTP 500 server_error``.
+
+These helpers wrap :class:`azure.ai.agentserver.responses._id_generator.IdGenerator`
+so callers (e.g. the ``ResponsesChannel.response_id_factory`` argument
+or :class:`FoundryHostedAgentHistoryProvider.save_messages`) can mint
+ids that the storage backend accepts without leaking the SDK import
+path into user code.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from azure.ai.agentserver.responses._id_generator import IdGenerator
+
+__all__ = [
+    "foundry_item_id",
+    "foundry_response_id",
+    "foundry_response_id_factory",
+]
+
+
+def foundry_response_id(previous_response_id: str | None = None) -> str:
+    """Mint a Foundry-storage-compatible response id (``caresp_*``).
+
+    Args:
+        previous_response_id: When supplied (and shaped like a Foundry
+            id with an embedded partition key), the new id co-locates
+            with the chain by reusing that partition key. The storage
+            backend rejects chained writes whose new record sits in a
+            different partition than the prior one.
+
+    Returns:
+        A new id of the form ``caresp_<18charPartitionKey><32charEntropy>``.
+    """
+    return IdGenerator.new_response_id(previous_response_id or "")
+
+
+def foundry_response_id_factory() -> "Any":
+    """Return a callable suitable for ``ResponsesChannel(response_id_factory=...)``.
+
+    The returned callable accepts an optional ``previous_response_id``
+    hint which the channel passes for chained turns so the new id
+    inherits the prior turn's partition key (Foundry storage requirement).
+    """
+    return foundry_response_id
+
+
+def foundry_item_id(item: "Any", response_id: str | None = None) -> str | None:
+    """Mint a Foundry-storage-compatible item id for *item*.
+
+    Dispatches via :meth:`IdGenerator.new_item_id` so the id picks up
+    the right type prefix (``msg`` / ``om`` / ``fc`` / ``rs`` / ...).
+    When ``response_id`` is supplied it acts as a partition-key hint so
+    every item written under one response co-locates with the response
+    record (Foundry storage requirement).
+
+    Returns:
+        A new id of the form ``{type-prefix}_<partitionKey><entropy>``,
+        or ``None`` when *item* is an unrecognised / reference-only type
+        (mirrors the SDK helper's contract).
+    """
+    return IdGenerator.new_item_id(item, response_id)
diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_responses.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_responses.py
index 64b50f236a..186bd30df2 100644
--- a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_responses.py
+++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_responses.py
@@ -3,11 +3,9 @@
 from __future__ import annotations
 
 import asyncio
-import base64
-import json
 import logging
 import os
-from collections.abc import AsyncIterable, AsyncIterator, Generator, Mapping, Sequence
+from collections.abc import AsyncIterable, AsyncIterator, Generator
 from typing import cast
 
 from agent_framework import (
@@ -16,7 +14,6 @@
     ContextProvider,
     FileCheckpointStorage,
     HistoryProvider,
-    Message,
     RawAgent,
     SupportsAgentRun,
     WorkflowAgent,
@@ -26,76 +23,9 @@
     ResponseEventStream,
     ResponseProviderProtocol,
     ResponsesServerOptions,
+    models,
 )
 from azure.ai.agentserver.responses.hosting import ResponsesAgentServerHost
-from azure.ai.agentserver.responses.models import (
-    ApplyPatchToolCallItemParam,
-    ApplyPatchToolCallOutputItemParam,
-    ComputerCallOutputItemParam,
-    ComputerScreenshotContent,
-    CreateResponse,
-    FunctionCallOutputItemParam,
-    FunctionShellAction,
-    FunctionShellCallItemParam,
-    FunctionShellCallOutputContent,
-    FunctionShellCallOutputExitOutcome,
-    FunctionShellCallOutputItemParam,
-    Item,
-    ItemCodeInterpreterToolCall,
-    ItemComputerToolCall,
-    ItemCustomToolCall,
-    ItemCustomToolCallOutput,
-    ItemFileSearchToolCall,
-    ItemFunctionToolCall,
-    ItemImageGenToolCall,
-    ItemLocalShellToolCall,
-    ItemLocalShellToolCallOutput,
-    ItemMcpApprovalRequest,
-    ItemMcpToolCall,
-    ItemMessage,
-    ItemOutputMessage,
-    ItemReasoningItem,
-    ItemWebSearchToolCall,
-    LocalEnvironmentResource,
-    MCPApprovalResponse,
-    MessageContent,
-    MessageContentInputFileContent,
-    MessageContentInputImageContent,
-    MessageContentInputTextContent,
-    MessageContentOutputTextContent,
-    MessageContentReasoningTextContent,
-    MessageContentRefusalContent,
-    OAuthConsentRequestOutputItem,
-    OutputItem,
-    OutputItemApplyPatchToolCall,
-    OutputItemApplyPatchToolCallOutput,
-    OutputItemCodeInterpreterToolCall,
-    OutputItemComputerToolCall,
-    OutputItemComputerToolCallOutputResource,
-    OutputItemCustomToolCall,
-    OutputItemCustomToolCallOutput,
-    OutputItemFileSearchToolCall,
-    OutputItemFunctionShellCall,
-    OutputItemFunctionShellCallOutput,
-    OutputItemFunctionToolCall,
-    OutputItemImageGenToolCall,
-    OutputItemLocalShellToolCall,
-    OutputItemLocalShellToolCallOutput,
-    OutputItemMcpApprovalRequest,
-    OutputItemMcpApprovalResponseResource,
-    OutputItemMcpToolCall,
-    OutputItemMessage,
-    OutputItemOutputMessage,
-    OutputItemReasoningItem,
-    OutputItemWebSearchToolCall,
-    OutputMessageContent,
-    OutputMessageContentOutputTextContent,
-    OutputMessageContentRefusalContent,
-    ResponseStreamEvent,
-    StructuredOutputsOutputItem,
-    SummaryTextContent,
-    TextContent,
-)
 from azure.ai.agentserver.responses.streaming._builders import (
     OutputItemFunctionCallBuilder,
     OutputItemMcpCallBuilder,
@@ -106,6 +36,39 @@
 )
 from typing_extensions import Any
 
+from ._shared import (
+    _arguments_to_str,  # pyright: ignore[reportPrivateUsage]
+    _convert_message_content,  # pyright: ignore[reportPrivateUsage]
+    _convert_output_message_content,  # pyright: ignore[reportPrivateUsage]
+    _item_to_message,  # pyright: ignore[reportPrivateUsage]
+    _items_to_messages,  # pyright: ignore[reportPrivateUsage]
+    _output_item_to_message,  # pyright: ignore[reportPrivateUsage]
+    _output_items_to_messages,  # pyright: ignore[reportPrivateUsage]
+)
+
+# Re-export the conversion helpers under their historical names so existing
+# tests (which import them from this module) keep working — the canonical
+# definitions now live in :mod:`._shared`.
+__all__ = (
+    "_arguments_to_str",
+    "_convert_message_content",
+    "_convert_output_message_content",
+    "_item_to_message",
+    "_items_to_messages",
+    "_output_item_to_message",
+    "_output_items_to_messages",
+)
+
+# Local aliases for the agent-server SDK types this module touches at the
+# Python type-annotation layer. Using ``models.X`` everywhere would work but
+# would noisily clutter type-only positions where the alias adds no value.
+CreateResponse = models.CreateResponse
+ResponseStreamEvent = models.ResponseStreamEvent
+FunctionShellAction = models.FunctionShellAction
+FunctionShellCallOutputContent = models.FunctionShellCallOutputContent
+FunctionShellCallOutputExitOutcome = models.FunctionShellCallOutputExitOutcome
+LocalEnvironmentResource = models.LocalEnvironmentResource
+
 logger = logging.getLogger(__name__)
 
 
@@ -272,86 +235,50 @@ async def _handle_inner_workflow(
         if not isinstance(self._agent, WorkflowAgent):
             raise RuntimeError("Agent is not a workflow agent.")
 
-        # Determine the latest checkpoint (if any) so we can resume the
-        # workflow's prior state for this turn. The directory is keyed by
-        # the inbound context id (conversation_id when set, otherwise
-        # previous_response_id). Multi-turn declarative workflows need the
-        # workflow's internal state (e.g. Conversation.messages,
-        # intermediate Local.* variables) to survive across user turns;
-        # the only place that state lives is the workflow checkpoint, so
-        # on every turn we restore the latest checkpoint and feed the new
-        # input back into the start executor as a continuation rather than
-        # a fresh run.
-        latest_checkpoint_id: str | None = None
-        restore_storage: FileCheckpointStorage | None = None
+        # Restore from the latest checkpoint if available, otherwise start with an empty history
         if context_id is not None:
-            restore_storage = FileCheckpointStorage(os.path.join(self._checkpoint_storage_path, context_id))
-            latest_checkpoint = await restore_storage.get_latest(workflow_name=self._agent.workflow.name)
+            checkpoint_storage = FileCheckpointStorage(os.path.join(self._checkpoint_storage_path, context_id))
+            latest_checkpoint = await checkpoint_storage.get_latest(workflow_name=self._agent.workflow.name)
             if latest_checkpoint is not None:
-                latest_checkpoint_id = latest_checkpoint.checkpoint_id
-
-        # Storage that will receive checkpoints written during this turn.
-        # When the caller chains with previous_response_id, the next turn
-        # will reference the current response_id as its previous_response_id,
-        # so new checkpoints must land under the current response_id (or the
-        # conversation_id when set). When conversation_id is set, this
-        # matches restore_storage; when only previous_response_id was
-        # supplied, restore_storage points at the *prior* response's
-        # directory and write_storage points at the *current* response's.
-        write_context_id = context.conversation_id or context.response_id
-        write_storage = FileCheckpointStorage(os.path.join(self._checkpoint_storage_path, write_context_id))
-
-        # Multi-turn pattern: when we have a prior checkpoint, restore it
-        # first (drive the workflow back to idle with prior state intact),
-        # then make a separate call that delivers the new user input. This
-        # depends on Workflow.run preserving shared state across calls. The
-        # restore-only call may yield events from any pending in-flight
-        # work in the checkpoint; we consume those internally here so they
-        # don't surface to the response stream as duplicates.
-        #
-        # If the restored checkpoint had pending request_info events, the
-        # restore-only call replays them through
-        # ``WorkflowAgent._convert_workflow_event_to_agent_response_updates``
-        # and populates ``self._agent.pending_requests``. That is the correct
-        # state: those requests are genuinely outstanding, and the next
-        # ``run(input_messages, ...)`` call may contain ``function_call_output``
-        # items (carried as FunctionResult/FunctionApprovalResponse content)
-        # that fulfill them via :meth:`WorkflowAgent._process_pending_requests`.
-        if latest_checkpoint_id is not None:
-            if is_streaming_request:
-                async for _ in self._agent.run(
-                    stream=True,
-                    checkpoint_id=latest_checkpoint_id,
-                    checkpoint_storage=restore_storage,
-                ):
-                    pass
-            else:
-                await self._agent.run(
-                    stream=False,
-                    checkpoint_id=latest_checkpoint_id,
-                    checkpoint_storage=restore_storage,
-                )
+                if not is_streaming_request:
+                    _ = await self._agent.run(
+                        stream=False,
+                        checkpoint_id=latest_checkpoint.checkpoint_id,
+                        checkpoint_storage=checkpoint_storage,
+                    )
+                else:
+                    # Consume the streaming or the invocation will result in a no-op
+                    async for _ in self._agent.run(
+                        stream=True,
+                        checkpoint_id=latest_checkpoint.checkpoint_id,
+                        checkpoint_storage=checkpoint_storage,
+                    ):
+                        pass
 
         # Now run the agent with the latest input
         response_event_stream = ResponseEventStream(response_id=context.response_id, model=request.model)
 
+        # Create a new checkpoint storage for this response based on the following rules:
+        # - If no previous response ID or conversation ID is provided,
+        #   create a new checkpoint storage for this response
+        # - If a previous response ID is provided, create a new checkpoint storage for this response
+        # - If a conversation ID is provided, reuse the existing checkpoint storage for the conversation
+        context_id = context.conversation_id or context.response_id
+        checkpoint_storage = FileCheckpointStorage(os.path.join(self._checkpoint_storage_path, context_id))
+
         yield response_event_stream.emit_created()
         yield response_event_stream.emit_in_progress()
 
         if not is_streaming_request:
-            # Run the agent in non-streaming mode with the new user input.
-            response = await self._agent.run(
-                input_messages,
-                stream=False,
-                checkpoint_storage=write_storage,
-            )
+            # Run the agent in non-streaming mode
+            response = await self._agent.run(input_messages, stream=False, checkpoint_storage=checkpoint_storage)
 
             for message in response.messages:
                 for content in message.contents:
                     async for item in _to_outputs(response_event_stream, content):
                         yield item
 
-            await self._delete_not_latest_checkpoints(write_storage, self._agent.workflow.name)
+            await self._delete_not_latest_checkpoints(checkpoint_storage, self._agent.workflow.name)
             yield response_event_stream.emit_completed()
             return
 
@@ -359,12 +286,8 @@ async def _handle_inner_workflow(
         # lazily created on matching content, closed when a different type arrives.
         tracker = _OutputItemTracker(response_event_stream)
 
-        # Run the workflow agent in streaming mode with the new user input.
-        async for update in self._agent.run(
-            input_messages,
-            stream=True,
-            checkpoint_storage=write_storage,
-        ):
+        # Run the workflow agent in streaming mode
+        async for update in self._agent.run(input_messages, stream=True, checkpoint_storage=checkpoint_storage):
             for content in update.contents:
                 for event in tracker.handle(content):
                     yield event
@@ -377,7 +300,7 @@ async def _handle_inner_workflow(
         for event in tracker.close():
             yield event
 
-        await self._delete_not_latest_checkpoints(write_storage, self._agent.workflow.name)
+        await self._delete_not_latest_checkpoints(checkpoint_storage, self._agent.workflow.name)
         yield response_event_stream.emit_completed()
 
     @staticmethod
@@ -578,665 +501,6 @@ def _to_chat_options(request: CreateResponse) -> tuple[ChatOptions, bool]:
 # endregion
 
 
-# region Input Message Conversion
-
-
-def _items_to_messages(input_items: Sequence[Item]) -> list[Message]:
-    """Converts a sequence of input items to a list of Messages, one per item.
-
-    Args:
-        input_items: The input items to convert.
-
-    Returns:
-        A list of Messages, one per supported input item.
-    """
-    messages: list[Message] = []
-    for item in input_items:
-        messages.append(_item_to_message(item))
-    return messages
-
-
-def _item_to_message(item: Item) -> Message:
-    """Converts an Item to a Message.
-
-    Args:
-        item: The Item to convert.
-
-    Returns:
-        The converted Message.
-
-    Raises:
-        ValueError: If the Item type is not supported.
-    """
-    if item.type == "message":
-        msg = cast(ItemMessage, item)
-        if isinstance(msg.content, str):
-            return Message(role=msg.role, contents=[Content.from_text(msg.content)])
-        return Message(role=msg.role, contents=[_convert_message_content(part) for part in msg.content])
-
-    if item.type == "output_message":
-        output_msg = cast(ItemOutputMessage, item)
-        return Message(
-            role=output_msg.role, contents=[_convert_output_message_content(part) for part in output_msg.content]
-        )
-
-    if item.type == "function_call":
-        fc = cast(ItemFunctionToolCall, item)
-        return Message(
-            role="assistant",
-            contents=[Content.from_function_call(fc.call_id, fc.name, arguments=fc.arguments)],
-        )
-
-    if item.type == "function_call_output":
-        fco = cast(FunctionCallOutputItemParam, item)
-        output = fco.output if isinstance(fco.output, str) else str(fco.output)
-        return Message(
-            role="tool",
-            contents=[Content.from_function_result(fco.call_id, result=output)],
-        )
-
-    if item.type == "reasoning":
-        reasoning = cast(ItemReasoningItem, item)
-        reason_contents: list[Content] = []
-        if reasoning.summary:
-            for summary in reasoning.summary:
-                reason_contents.append(Content.from_text(summary.text))
-        return Message(role="assistant", contents=reason_contents)
-
-    if item.type == "mcp_call":
-        mcp = cast(ItemMcpToolCall, item)
-        return Message(
-            role="assistant",
-            contents=[
-                Content.from_mcp_server_tool_call(
-                    mcp.id,
-                    mcp.name,
-                    server_name=mcp.server_label,
-                    arguments=mcp.arguments,
-                )
-            ],
-        )
-
-    if item.type == "mcp_approval_request":
-        mcp_req = cast(ItemMcpApprovalRequest, item)
-        mcp_call_content = Content.from_mcp_server_tool_call(
-            mcp_req.id,
-            mcp_req.name,
-            server_name=mcp_req.server_label,
-            arguments=mcp_req.arguments,
-        )
-        return Message(
-            role="assistant",
-            contents=[Content.from_function_approval_request(mcp_req.id, mcp_call_content)],
-        )
-
-    if item.type == "mcp_approval_response":
-        mcp_resp = cast(MCPApprovalResponse, item)
-        placeholder_content = Content.from_function_call(mcp_resp.approval_request_id, "mcp_approval")
-        return Message(
-            role="user",
-            contents=[
-                Content.from_function_approval_response(
-                    mcp_resp.approve, mcp_resp.approval_request_id, placeholder_content
-                )
-            ],
-        )
-
-    if item.type == "code_interpreter_call":
-        ci = cast(ItemCodeInterpreterToolCall, item)
-        return Message(
-            role="assistant",
-            contents=[Content.from_code_interpreter_tool_call(call_id=ci.id)],
-        )
-
-    if item.type == "image_generation_call":
-        ig = cast(ItemImageGenToolCall, item)
-        return Message(
-            role="assistant",
-            contents=[Content.from_image_generation_tool_call(image_id=ig.id)],
-        )
-
-    if item.type == "shell_call":
-        sc = cast(FunctionShellCallItemParam, item)
-        return Message(
-            role="assistant",
-            contents=[
-                Content.from_shell_tool_call(
-                    call_id=sc.call_id,
-                    commands=sc.action.commands,
-                    status=str(sc.status),
-                )
-            ],
-        )
-
-    if item.type == "shell_call_output":
-        sco = cast(FunctionShellCallOutputItemParam, item)
-        outputs = [
-            Content.from_shell_command_output(
-                stdout=out.stdout or "",
-                stderr=out.stderr or "",
-                exit_code=getattr(out.outcome, "exit_code", None) if hasattr(out, "outcome") else None,
-            )
-            for out in (sco.output or [])
-        ]
-        return Message(
-            role="tool",
-            contents=[
-                Content.from_shell_tool_result(
-                    call_id=sco.call_id,
-                    outputs=outputs,
-                    max_output_length=sco.max_output_length,
-                )
-            ],
-        )
-
-    if item.type == "local_shell_call":
-        lsc = cast(ItemLocalShellToolCall, item)
-        commands = lsc.action.command if hasattr(lsc.action, "command") and lsc.action.command else []
-        return Message(
-            role="assistant",
-            contents=[
-                Content.from_shell_tool_call(
-                    call_id=lsc.call_id,
-                    commands=commands,
-                    status=str(lsc.status),
-                )
-            ],
-        )
-
-    if item.type == "local_shell_call_output":
-        lsco = cast(ItemLocalShellToolCallOutput, item)
-        return Message(
-            role="tool",
-            contents=[
-                Content.from_shell_tool_result(
-                    call_id=lsco.id,
-                    outputs=[Content.from_shell_command_output(stdout=lsco.output)],
-                )
-            ],
-        )
-
-    if item.type == "file_search_call":
-        fs = cast(ItemFileSearchToolCall, item)
-        return Message(
-            role="assistant",
-            contents=[
-                Content.from_function_call(
-                    fs.id,
-                    "file_search",
-                    arguments=json.dumps({"queries": fs.queries}),
-                )
-            ],
-        )
-
-    if item.type == "web_search_call":
-        ws = cast(ItemWebSearchToolCall, item)
-        return Message(
-            role="assistant",
-            contents=[Content.from_function_call(ws.id, "web_search")],
-        )
-
-    if item.type == "computer_call":
-        cc = cast(ItemComputerToolCall, item)
-        return Message(
-            role="assistant",
-            contents=[
-                Content.from_function_call(
-                    cc.call_id,
-                    "computer_use",
-                    arguments=str(cc.action),
-                )
-            ],
-        )
-
-    if item.type == "computer_call_output":
-        cco = cast(ComputerCallOutputItemParam, item)
-        return Message(
-            role="tool",
-            contents=[Content.from_function_result(cco.call_id, result=str(cco.output))],
-        )
-
-    if item.type == "custom_tool_call":
-        ct = cast(ItemCustomToolCall, item)
-        return Message(
-            role="assistant",
-            contents=[Content.from_function_call(ct.call_id, ct.name, arguments=ct.input)],
-        )
-
-    if item.type == "custom_tool_call_output":
-        cto = cast(ItemCustomToolCallOutput, item)
-        output = cto.output if isinstance(cto.output, str) else str(cto.output)
-        # Hosted-MCP results land here because the host writes them via
-        # `aoutput_item_custom_tool_call_output` (see `_to_outputs` for
-        # `mcp_server_tool_result`). The persisted `call_id` keeps its
-        # `mcp_*` prefix; on read, route those back to a hosted-MCP result
-        # Content so the chat-client serialize layer can coalesce them
-        # onto a single `mcp_call` input item with `output` populated.
-        # Issue #5546.
-        if cto.call_id and cto.call_id.startswith("mcp_"):
-            return Message(
-                role="tool",
-                contents=[Content.from_mcp_server_tool_result(call_id=cto.call_id, output=output)],
-            )
-        return Message(
-            role="tool",
-            contents=[Content.from_function_result(cto.call_id, result=output)],
-        )
-
-    if item.type == "apply_patch_call":
-        ap = cast(ApplyPatchToolCallItemParam, item)
-        return Message(
-            role="assistant",
-            contents=[
-                Content.from_function_call(
-                    ap.call_id,
-                    "apply_patch",
-                    arguments=str(ap.operation),
-                )
-            ],
-        )
-
-    if item.type == "apply_patch_call_output":
-        apo = cast(ApplyPatchToolCallOutputItemParam, item)
-        return Message(
-            role="tool",
-            contents=[Content.from_function_result(apo.call_id, result=apo.output or "")],
-        )
-
-    raise ValueError(f"Unsupported Item type: {item.type}")
-
-
-def _output_items_to_messages(history: Sequence[OutputItem]) -> list[Message]:
-    """Converts a sequence of OutputItem objects to a list of Message objects.
-
-    Args:
-        history (Sequence[OutputItem]): The sequence of OutputItem objects to convert.
-
-    Returns:
-        list[Message]: The list of Message objects.
-    """
-    messages: list[Message] = []
-    for item in history:
-        messages.append(_output_item_to_message(item))
-    return messages
-
-
-def _output_item_to_message(item: OutputItem) -> Message:
-    """Converts an OutputItem to a Message.
-
-    Args:
-        item (OutputItem): The OutputItem to convert.
-
-    Returns:
-        Message: The converted Message.
-
-    Raises:
-        ValueError: If the OutputItem type is not supported.
-    """
-    if item.type == "output_message":
-        output_msg = cast(OutputItemOutputMessage, item)
-        return Message(
-            role=output_msg.role, contents=[_convert_output_message_content(part) for part in output_msg.content]
-        )
-
-    if item.type == "message":
-        msg = cast(OutputItemMessage, item)
-        return Message(role=msg.role, contents=[_convert_message_content(part) for part in msg.content])
-
-    if item.type == "function_call":
-        fc = cast(OutputItemFunctionToolCall, item)
-        return Message(
-            role="assistant",
-            contents=[Content.from_function_call(fc.call_id, fc.name, arguments=fc.arguments)],
-        )
-
-    if item.type == "function_call_output":
-        fco = cast(FunctionCallOutputItemParam, item)
-        output = fco.output if isinstance(fco.output, str) else str(fco.output)
-        return Message(
-            role="tool",
-            contents=[Content.from_function_result(fco.call_id, result=output)],
-        )
-
-    if item.type == "reasoning":
-        reasoning = cast(OutputItemReasoningItem, item)
-        contents: list[Content] = []
-        if reasoning.summary:
-            for summary in reasoning.summary:
-                contents.append(Content.from_text(summary.text))
-        return Message(role="assistant", contents=contents)
-
-    if item.type == "mcp_call":
-        mcp = cast(OutputItemMcpToolCall, item)
-        return Message(
-            role="assistant",
-            contents=[
-                Content.from_mcp_server_tool_call(
-                    mcp.id,
-                    mcp.name,
-                    server_name=mcp.server_label,
-                    arguments=mcp.arguments,
-                )
-            ],
-        )
-
-    if item.type == "mcp_approval_request":
-        mcp_req = cast(OutputItemMcpApprovalRequest, item)
-        mcp_call_content = Content.from_mcp_server_tool_call(
-            mcp_req.id,
-            mcp_req.name,
-            server_name=mcp_req.server_label,
-            arguments=mcp_req.arguments,
-        )
-        return Message(
-            role="assistant",
-            contents=[Content.from_function_approval_request(mcp_req.id, mcp_call_content)],
-        )
-
-    if item.type == "mcp_approval_response":
-        mcp_resp = cast(OutputItemMcpApprovalResponseResource, item)
-        # Build a placeholder function_call Content since the original call details are not available
-        placeholder_content = Content.from_function_call(mcp_resp.approval_request_id, "mcp_approval")
-        return Message(
-            role="user",
-            contents=[Content.from_function_approval_response(mcp_resp.approve, mcp_resp.id, placeholder_content)],
-        )
-
-    if item.type == "code_interpreter_call":
-        ci = cast(OutputItemCodeInterpreterToolCall, item)
-        return Message(
-            role="assistant",
-            contents=[Content.from_code_interpreter_tool_call(call_id=ci.id)],
-        )
-
-    if item.type == "image_generation_call":
-        ig = cast(OutputItemImageGenToolCall, item)
-        return Message(
-            role="assistant",
-            contents=[Content.from_image_generation_tool_call(image_id=ig.id)],
-        )
-
-    if item.type == "shell_call":
-        sc = cast(OutputItemFunctionShellCall, item)
-        return Message(
-            role="assistant",
-            contents=[
-                Content.from_shell_tool_call(
-                    call_id=sc.call_id,
-                    commands=sc.action.commands,
-                    status=str(sc.status),
-                )
-            ],
-        )
-
-    if item.type == "shell_call_output":
-        sco = cast(OutputItemFunctionShellCallOutput, item)
-        outputs = [
-            Content.from_shell_command_output(
-                stdout=out.stdout or "",
-                stderr=out.stderr or "",
-                exit_code=getattr(out.outcome, "exit_code", None) if hasattr(out, "outcome") else None,
-            )
-            for out in (sco.output or [])
-        ]
-        return Message(
-            role="tool",
-            contents=[
-                Content.from_shell_tool_result(
-                    call_id=sco.call_id,
-                    outputs=outputs,
-                    max_output_length=sco.max_output_length,
-                )
-            ],
-        )
-
-    if item.type == "local_shell_call":
-        lsc = cast(OutputItemLocalShellToolCall, item)
-        commands = lsc.action.command if hasattr(lsc.action, "command") and lsc.action.command else []
-        return Message(
-            role="assistant",
-            contents=[
-                Content.from_shell_tool_call(
-                    call_id=lsc.call_id,
-                    commands=commands,
-                    status=str(lsc.status),
-                )
-            ],
-        )
-
-    if item.type == "local_shell_call_output":
-        lsco = cast(OutputItemLocalShellToolCallOutput, item)
-        return Message(
-            role="tool",
-            contents=[
-                Content.from_shell_tool_result(
-                    call_id=lsco.id,
-                    outputs=[Content.from_shell_command_output(stdout=lsco.output)],
-                )
-            ],
-        )
-
-    if item.type == "file_search_call":
-        fs = cast(OutputItemFileSearchToolCall, item)
-        return Message(
-            role="assistant",
-            contents=[
-                Content.from_function_call(
-                    fs.id,
-                    "file_search",
-                    arguments=json.dumps({"queries": fs.queries}),
-                )
-            ],
-        )
-
-    if item.type == "web_search_call":
-        ws = cast(OutputItemWebSearchToolCall, item)
-        return Message(
-            role="assistant",
-            contents=[Content.from_function_call(ws.id, "web_search")],
-        )
-
-    if item.type == "computer_call":
-        cc = cast(OutputItemComputerToolCall, item)
-        return Message(
-            role="assistant",
-            contents=[
-                Content.from_function_call(
-                    cc.call_id,
-                    "computer_use",
-                    arguments=str(cc.action),
-                )
-            ],
-        )
-
-    if item.type == "computer_call_output":
-        cco = cast(OutputItemComputerToolCallOutputResource, item)
-        return Message(
-            role="tool",
-            contents=[Content.from_function_result(cco.call_id, result=str(cco.output))],
-        )
-
-    if item.type == "custom_tool_call":
-        ct = cast(OutputItemCustomToolCall, item)
-        return Message(
-            role="assistant",
-            contents=[Content.from_function_call(ct.call_id, ct.name, arguments=ct.input)],
-        )
-
-    if item.type == "custom_tool_call_output":
-        cto = cast(OutputItemCustomToolCallOutput, item)
-        output = cto.output if isinstance(cto.output, str) else str(cto.output)
-        # Hosted-MCP results land here because the host writes them via
-        # `aoutput_item_custom_tool_call_output`. Route `mcp_*` call_ids
-        # back to a hosted-MCP result Content so the chat-client serialize
-        # layer can coalesce onto the matching `mcp_call` input item.
-        # Issue #5546.
-        if cto.call_id and cto.call_id.startswith("mcp_"):
-            return Message(
-                role="tool",
-                contents=[Content.from_mcp_server_tool_result(call_id=cto.call_id, output=output)],
-            )
-        return Message(
-            role="tool",
-            contents=[Content.from_function_result(cto.call_id, result=output)],
-        )
-
-    if item.type == "apply_patch_call":
-        ap = cast(OutputItemApplyPatchToolCall, item)
-        return Message(
-            role="assistant",
-            contents=[
-                Content.from_function_call(
-                    ap.call_id,
-                    "apply_patch",
-                    arguments=str(ap.operation),
-                )
-            ],
-        )
-
-    if item.type == "apply_patch_call_output":
-        apo = cast(OutputItemApplyPatchToolCallOutput, item)
-        return Message(
-            role="tool",
-            contents=[Content.from_function_result(apo.call_id, result=apo.output or "")],
-        )
-
-    if item.type == "oauth_consent_request":
-        oauth = cast(OAuthConsentRequestOutputItem, item)
-        return Message(
-            role="assistant",
-            contents=[Content.from_oauth_consent_request(oauth.consent_link)],
-        )
-
-    if item.type == "structured_outputs":
-        so = cast(StructuredOutputsOutputItem, item)
-        text = json.dumps(so.output) if not isinstance(so.output, str) else so.output
-        return Message(role="assistant", contents=[Content.from_text(text)])
-
-    raise ValueError(f"Unsupported OutputItem type: {item.type}")
-
-
-def _convert_output_message_content(content: OutputMessageContent) -> Content:
-    """Converts an OutputMessageContent to a Content object.
-
-    Args:
-        content (OutputMessageContent): The OutputMessageContent to convert.
-
-    Returns:
-        Content: The converted Content object.
-
-    Raises:
-        ValueError: If the OutputMessageContent type is not supported.
-    """
-    if content.type == "output_text":
-        text_content = cast(OutputMessageContentOutputTextContent, content)
-        return Content.from_text(text_content.text)
-    if content.type == "refusal":
-        refusal_content = cast(OutputMessageContentRefusalContent, content)
-        return Content.from_text(refusal_content.refusal)
-
-    raise ValueError(f"Unsupported OutputMessageContent type: {content.type}")
-
-
-def _convert_file_data(data_uri: str, filename: str | None = None) -> Content:
-    """Convert a file_data data URI to a Content object.
-
-    For text/* MIME types, decodes the base64 content and returns it as text.
-    For other types, returns a URI-based Content with the filename preserved.
-    """
-    # Parse data URI: data:<media_type>;base64,<data>
-    if data_uri.startswith("data:") and ";base64," in data_uri:
-        header, encoded = data_uri.split(";base64,", 1)
-        media_type = header[len("data:") :]
-        if media_type.startswith("text/"):
-            try:
-                decoded_text = base64.b64decode(encoded).decode("utf-8")
-            except (ValueError, UnicodeDecodeError):
-                logger.warning(
-                    "Failed to decode text/* file_data as UTF-8, falling through to URI passthrough.",
-                    exc_info=True,
-                )
-            else:
-                prefix = f"[File: {filename}]\n" if filename else ""
-                return Content.from_text(f"{prefix}{decoded_text}")
-    additional_properties = {"filename": filename} if filename else None
-    return Content.from_uri(data_uri, additional_properties=additional_properties)
-
-
-def _convert_message_content(content: MessageContent) -> Content:
-    """Converts a MessageContent to a Content object.
-
-    Args:
-        content (MessageContent): The MessageContent to convert.
-
-    Returns:
-        Content: The converted Content object.
-
-    Raises:
-        ValueError: If the MessageContent type is not supported.
-    """
-    if content.type == "input_text":
-        input_text = cast(MessageContentInputTextContent, content)
-        return Content.from_text(input_text.text)
-    if content.type == "output_text":
-        output_text = cast(MessageContentOutputTextContent, content)
-        return Content.from_text(output_text.text)
-    if content.type == "text":
-        text = cast(TextContent, content)
-        return Content.from_text(text.text)
-    if content.type == "summary_text":
-        summary = cast(SummaryTextContent, content)
-        return Content.from_text(summary.text)
-    if content.type == "refusal":
-        refusal = cast(MessageContentRefusalContent, content)
-        return Content.from_text(refusal.refusal)
-    if content.type == "reasoning_text":
-        reasoning = cast(MessageContentReasoningTextContent, content)
-        return Content.from_text_reasoning(text=reasoning.text)
-    if content.type == "input_image":
-        image = cast(MessageContentInputImageContent, content)
-        if image.image_url:
-            if image.image_url.startswith("data:"):
-                return Content.from_uri(image.image_url)
-            return Content.from_uri(image.image_url, media_type="image/*")
-        if image.file_id:
-            return Content.from_hosted_file(image.file_id)
-    if content.type == "input_file":
-        file = cast(MessageContentInputFileContent, content)
-        if file.file_url:
-            return Content.from_uri(file.file_url)
-        if file.file_id:
-            return Content.from_hosted_file(file.file_id, name=file.filename)
-        if file.file_data:
-            return _convert_file_data(file.file_data, file.filename)
-    if content.type == "computer_screenshot":
-        screenshot = cast(ComputerScreenshotContent, content)
-        return Content.from_uri(screenshot.image_url)
-
-    raise ValueError(f"Unsupported MessageContent type: {content.type}")
-
-
-# endregion
-
-# region Output Item Conversion
-
-
-def _arguments_to_str(arguments: str | Mapping[str, Any] | None) -> str:
-    """Convert arguments to a JSON string.
-
-    Args:
-        arguments: The arguments to convert, can be a string, mapping, or None.
-
-    Returns:
-        The arguments as a JSON string.
-    """
-    if arguments is None:
-        return ""
-    if isinstance(arguments, str):
-        return arguments
-    return json.dumps(arguments)
-
-
 async def _to_outputs(stream: ResponseEventStream, content: Content) -> AsyncIterator[ResponseStreamEvent]:
     """Converts a Content object to an async sequence of ResponseStreamEvent objects.
 
diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_shared.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_shared.py
new file mode 100644
index 0000000000..890dd7bcfe
--- /dev/null
+++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_shared.py
@@ -0,0 +1,1287 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+"""Shared transformation helpers between the agent-server data model and Agent Framework.
+
+This module is the single home for *pure-data* conversions between the
+:mod:`azure.ai.agentserver.responses.models` SDK shapes (``Item``,
+``OutputItem``, ``MessageContent``, …) and the Agent Framework public types
+(:class:`agent_framework.Message`, :class:`agent_framework.Content`, …).
+
+Why this lives in one module
+----------------------------
+* The :mod:`._responses` channel adapter and the
+  :class:`._history_provider.FoundryHostedAgentHistoryProvider` both need the
+  exact same OutputItem→Message conversion. Keeping it in one place means we
+  only have **one** ``isinstance(item.type, ...)`` dispatch table to keep up
+  to date when the agent-server SDK grows new item kinds. If you spot a
+  ``type`` value that this module raises ``ValueError`` for, that is the place
+  to add support — and **both** consumers benefit immediately.
+* The whole module references the agent-server SDK through a single
+  ``from azure.ai.agentserver.responses import models`` import. Looking at the
+  ``models.X`` references makes it obvious which generated types we already
+  consume and which ones (e.g. ``models.A2AToolCall``,
+  ``models.AzureFunctionToolCall``, …) are not yet wired into
+  :func:`_output_item_to_message`.
+
+``additional_properties`` round-trip
+------------------------------------
+Both the SDK models and :class:`agent_framework.Message` carry an extensible
+extras bag — the agent-server models are
+:class:`collections.abc.MutableMapping` instances that round-trip *any* key
+through their JSON serialisation, and ``Message`` (and ``Content``) expose a
+public ``additional_properties: dict[str, Any]`` slot.
+
+To preserve channel-specific extras across a load/save cycle:
+
+* On **load** (SDK model → Message) :func:`_collect_unknown_keys` extracts
+  every key on the source model that is **not** part of its declared schema
+  (per ``_attr_to_rest_field``) and stashes it on
+  ``Message.additional_properties["foundry"]`` (and per-content the same
+  bag is attached onto ``Content.additional_properties["foundry"]``). The
+  bag is only attached when at least one extra key is present, so messages
+  that didn't have extras stay byte-equal to the previous behaviour.
+* On **save** (Message → SDK model) :func:`_inject_extras` writes any
+  previously stashed bag back as direct keys on the SDK model — Foundry
+  storage will round-trip them as opaque JSON.
+
+This means an app can stash channel-specific bookkeeping (delivery
+fingerprints, `hosting` envelope from the host, AG-UI ``client_state``
+snapshots, …) under a known top-level key and rely on it surviving a
+write/read cycle through the Foundry response store.
+"""
+
+from __future__ import annotations
+
+import base64
+import json
+import logging
+from collections.abc import Mapping, Sequence
+from typing import Any, cast
+
+from agent_framework import Content, Message
+from azure.ai.agentserver.responses import models
+
+logger = logging.getLogger(__name__)
+
+# Top-level key under which round-tripped SDK extras live on
+# ``Message.additional_properties`` and ``Content.additional_properties``.
+# Stable on purpose: write-paths look it up by name to re-inject extras into
+# outbound SDK models.
+EXTRAS_KEY = "foundry"
+
+# Sub-key (under ``additional_properties[EXTRAS_KEY]``) that stores a
+# verbatim snapshot of the original SDK ``OutputItem`` mapping captured at
+# read time. The write path re-emits the SDK item from this snapshot when
+# present, giving lossless audit/replay semantics: every declared field
+# (item id, type discriminator, content array, status, …) AND every undeclared
+# extra Foundry handed us survive the AF round-trip. Without this, a
+# message synthesised back from ``Message.text`` alone would discard the
+# original item shape.
+RAW_KEY = "__raw__"
+
+# Top-level key on the SDK ``OutputItem`` mapping under which we round-trip
+# *every* :class:`agent_framework.Message` ``additional_properties`` namespace
+# **other than** :data:`EXTRAS_KEY` (the foundry-internal namespace, handled
+# separately by :func:`_inject_extras`).
+#
+# Why a single container key instead of writing each namespace as a top-level
+# extra on the SDK item: Foundry's storage backend round-trips arbitrary
+# unknown keys, but on **load** :func:`_collect_unknown_keys` cannot tell
+# which unknowns were AF-written namespaces (``hosting``, ``agui_state``,
+# ...) vs Foundry-runtime additions. Funnelling AF namespaces under a single
+# sentinel key removes that ambiguity: anything inside ``agent_framework``
+# is restored under its original namespace; anything else stays under
+# :data:`EXTRAS_KEY` (preserving today's behaviour for Foundry-side extras).
+#
+# Concretely, this is the mechanism that gives the Hosting spec's
+# ``Message.additional_properties["hosting"]`` envelope (channel /
+# identity / response_target / initial-write ``deliveries[]``) durable
+# round-trip semantics through the Foundry response store — see
+# ``docs/specs/002-python-hosting-channels.md`` §"Channel metadata
+# persisted onto stored messages".
+AF_EXTRAS_KEY = "agent_framework"
+
+# Re-exports — these helpers are consumed by sibling modules
+# (``_responses.py`` and ``_history_provider.py``); declaring them in
+# ``__all__`` quiets pyright's ``reportUnusedFunction`` for module-private
+# names that are intentionally part of the package-internal API.
+__all__ = (
+    "AF_EXTRAS_KEY",
+    "EXTRAS_KEY",
+    "RAW_KEY",
+    "_arguments_to_str",
+    "_attach_content_extras",
+    "_attach_extras",
+    "_capture_raw",
+    "_collect_af_extras",
+    "_collect_unknown_keys",
+    "_convert_message_content",
+    "_convert_output_message_content",
+    "_inject_af_extras",
+    "_inject_extras",
+    "_item_to_message",
+    "_items_to_messages",
+    "_message_text",
+    "_message_to_output_item",
+    "_messages_to_output_items",
+    "_output_item_to_message",
+    "_output_items_to_messages",
+)
+
+
+# region Extras helpers
+
+
+def _collect_unknown_keys(model: Mapping[str, Any]) -> dict[str, Any]:
+    """Return any keys present on the SDK model that are not part of its declared schema.
+
+    The agent-server SDK models are
+    :class:`collections.abc.MutableMapping` instances generated from the
+    Foundry REST contract; declared fields are exposed via the class-level
+    ``_attr_to_rest_field`` map. Any extra key on the instance therefore
+    represents data the Foundry runtime stored that the SDK doesn't model
+    explicitly — typically channel-specific extras a previous write-path
+    deliberately stashed there via :func:`_inject_extras`.
+
+    Args:
+        model: A model instance (or any mapping) to inspect.
+
+    Returns:
+        A new ``dict`` containing only the keys on ``model`` that are not
+        declared in the model's REST schema. Empty when the model only
+        carries declared fields.
+    """
+    if not isinstance(model, Mapping):
+        return {}
+    known = set(getattr(type(model), "_attr_to_rest_field", {}).keys())
+    return {key: value for key, value in model.items() if key not in known}
+
+
+def _attach_extras(message: Message, model: Mapping[str, Any]) -> Message:
+    """Attach SDK extras (if any) to ``message.additional_properties``.
+
+    Two-tier restoration so the Hosting spec's namespaced envelopes
+    (``hosting``, ``agui_state``, …) come back under their **original**
+    keys while Foundry-side extras (anything the runtime layered on the
+    SDK item) stay under the foundry-internal :data:`EXTRAS_KEY`
+    namespace:
+
+    1. Pop :data:`AF_EXTRAS_KEY` from the unknown-keys bag and merge each
+       sub-key directly onto ``message.additional_properties`` — this is
+       how the inbound ``hosting`` envelope (channel/identity/
+       response_target) and the initial-write ``deliveries[]`` snapshot
+       round-trip through Foundry storage.
+    2. Anything remaining (Foundry-runtime extras the SDK doesn't model
+       explicitly) is stashed under
+       ``additional_properties[EXTRAS_KEY]`` for backward compatibility
+       and audit/replay.
+
+    No-op when the model carries no extras — ``additional_properties`` is left
+    alone so callers and tests that compare ``Message`` instances for equality
+    by ``role``/``contents`` only continue to pass.
+
+    Args:
+        message: The message to enrich.
+        model: The SDK model whose extras should be preserved.
+
+    Returns:
+        The same ``message`` instance (returned for fluent chaining).
+    """
+    extras = _collect_unknown_keys(model)
+    if not extras:
+        return message
+    af_extras = extras.pop(AF_EXTRAS_KEY, None)
+    if isinstance(af_extras, Mapping):
+        af_extras_typed = cast("Mapping[str, Any]", af_extras)
+        for ns_key, ns_val in af_extras_typed.items():
+            # Per-namespace overwrite: a fresh load is the source of
+            # truth for the message we're rebuilding.
+            message.additional_properties[ns_key] = ns_val
+    if extras:
+        message.additional_properties.setdefault(EXTRAS_KEY, {}).update(extras)
+    return message
+
+
+def _capture_raw(message: Message, item: Mapping[str, Any]) -> Message:
+    """Snapshot the SDK item's full mapping onto the message for replay.
+
+    Stored under ``message.additional_properties[EXTRAS_KEY][RAW_KEY]`` so
+    :func:`_message_to_output_item` can re-emit the byte-for-byte original
+    SDK shape on the write side. This is what lets the AF →
+    Foundry-storage round-trip preserve item ids, content variants
+    (citations, reasoning, tool results, …) and any extras Foundry
+    layered on top of the declared schema.
+
+    A best-effort ``dict(...)`` is used so failure to snapshot (e.g. a
+    non-mapping subclass surfacing in the future) degrades gracefully to
+    the lossy-but-functional synthesise-from-text path rather than
+    crashing the read.
+    """
+    try:
+        raw = dict(item)
+    except Exception:
+        return message
+    message.additional_properties.setdefault(EXTRAS_KEY, {})[RAW_KEY] = raw
+    return message
+
+
+def _inject_extras(model: Any, source: Mapping[str, Any] | None) -> Any:
+    """Inject previously-stashed extras back onto an outbound SDK model.
+
+    The SDK models are :class:`collections.abc.MutableMapping`; setting
+    arbitrary keys on them is supported and round-trips through serialisation.
+    Use this when **emitting** SDK shapes (e.g. when ``save_messages`` decides
+    to write back through the Foundry storage API).
+
+    Args:
+        model: The SDK model instance to enrich. Must be mapping-like.
+        source: The extras bag previously read from
+            ``Message.additional_properties[EXTRAS_KEY]`` (or any equivalent).
+            ``None`` is treated as an empty bag.
+
+    Returns:
+        The same ``model`` instance (returned for fluent chaining).
+    """
+    if not source:
+        return model
+    for key, value in source.items():
+        # Internal sentinel — never write the raw-snapshot back as a
+        # storage field; it lives only inside ``additional_properties``.
+        if key == RAW_KEY:
+            continue
+        # Avoid clobbering declared fields — extras are never allowed to
+        # overwrite the schema-defined contract on the model.
+        model_type: Any = type(model)  # pyright: ignore[reportUnknownVariableType]
+        known: set[str] = set(getattr(model_type, "_attr_to_rest_field", {}))
+        if key in known:
+            continue
+        model[key] = value
+    return model
+
+
+def _collect_af_extras(message: Message) -> dict[str, Any]:
+    """Gather every AF-side ``additional_properties`` namespace except :data:`EXTRAS_KEY`.
+
+    Returns the namespaces (``hosting``, ``agui_state``, …) that should
+    round-trip through Foundry storage as a single opaque container under
+    :data:`AF_EXTRAS_KEY` on the SDK item. The foundry-internal namespace
+    is excluded because :func:`_inject_extras` handles it separately and
+    its contents are AF-specific bookkeeping (raw snapshots, Foundry
+    runtime extras) that don't belong inside the AF container.
+    """
+    props = message.additional_properties or {}
+    return {key: value for key, value in props.items() if key != EXTRAS_KEY}
+
+
+def _inject_af_extras(model: Any, source: Mapping[str, Any] | None) -> Any:
+    """Write AF-side namespaces onto the SDK model under :data:`AF_EXTRAS_KEY`.
+
+    This is the save-side counterpart to :func:`_attach_extras`'s
+    AF-namespace restoration. The container key collides with declared
+    schema fields only if Foundry decides to add an
+    ``agent_framework`` field to its REST contract — at which point we
+    rename the constant.
+
+    A non-empty ``source`` overwrites any value already at
+    :data:`AF_EXTRAS_KEY` on the model (e.g. a stale value baked into a
+    raw-snapshot replay) so the in-process :class:`Message` remains the
+    source of truth at write time.
+    """
+    if not source:
+        return model
+    model[AF_EXTRAS_KEY] = dict(source)
+    return model
+
+
+# endregion
+
+
+# region Small utilities
+
+
+def _arguments_to_str(arguments: str | Mapping[str, Any] | None) -> str:
+    """Convert a tool-call ``arguments`` payload to its on-the-wire JSON string form.
+
+    Args:
+        arguments: The arguments to serialise. ``None`` becomes an empty
+            string, an existing string is returned verbatim, and any mapping
+            is JSON-encoded.
+
+    Returns:
+        The arguments as a JSON string.
+    """
+    if arguments is None:
+        return ""
+    if isinstance(arguments, str):
+        return arguments
+    return json.dumps(arguments)
+
+
+# endregion
+
+
+# region Content conversion
+
+
+def _convert_file_data(data_uri: str, filename: str | None = None) -> Content:
+    """Convert a ``file_data`` data URI to a :class:`Content`.
+
+    For ``text/*`` MIME types the base64 payload is decoded and returned as
+    plain text (with a ``[File: <name>]`` prefix when a filename is known);
+    other media types fall through to a URI-based content with the
+    filename preserved as an additional property.
+    """
+    if data_uri.startswith("data:") and ";base64," in data_uri:
+        header, encoded = data_uri.split(";base64,", 1)
+        media_type = header[len("data:") :]
+        if media_type.startswith("text/"):
+            try:
+                decoded_text = base64.b64decode(encoded).decode("utf-8")
+            except (ValueError, UnicodeDecodeError):
+                logger.warning(
+                    "Failed to decode text/* file_data as UTF-8, falling through to URI passthrough.",
+                    exc_info=True,
+                )
+            else:
+                prefix = f"[File: {filename}]\n" if filename else ""
+                return Content.from_text(f"{prefix}{decoded_text}")
+    additional_properties = {"filename": filename} if filename else None
+    return Content.from_uri(data_uri, additional_properties=additional_properties)
+
+
+def _convert_message_content(content: models.MessageContent) -> Content:
+    """Convert an SDK ``MessageContent`` (input-side) into a framework ``Content``.
+
+    Handles all input/output content variants currently understood by the
+    Responses channel — text, output text, summary, refusal, reasoning text,
+    input images, input files, computer screenshot.
+
+    Args:
+        content: The SDK content node to convert.
+
+    Returns:
+        The corresponding :class:`agent_framework.Content`.
+
+    Raises:
+        ValueError: If the SDK content ``type`` is not yet supported by this
+            adapter.
+    """
+    if content.type == "input_text":
+        return _attach_content_extras(
+            Content.from_text(cast(models.MessageContentInputTextContent, content).text), content
+        )
+    if content.type == "output_text":
+        return _attach_content_extras(
+            Content.from_text(cast(models.MessageContentOutputTextContent, content).text), content
+        )
+    if content.type == "text":
+        return _attach_content_extras(Content.from_text(cast(models.TextContent, content).text), content)
+    if content.type == "summary_text":
+        return _attach_content_extras(Content.from_text(cast(models.SummaryTextContent, content).text), content)
+    if content.type == "refusal":
+        return _attach_content_extras(
+            Content.from_text(cast(models.MessageContentRefusalContent, content).refusal), content
+        )
+    if content.type == "reasoning_text":
+        return _attach_content_extras(
+            Content.from_text_reasoning(text=cast(models.MessageContentReasoningTextContent, content).text),
+            content,
+        )
+    if content.type == "input_image":
+        image = cast(models.MessageContentInputImageContent, content)
+        if image.image_url:
+            return _attach_content_extras(Content.from_uri(image.image_url), content)
+        if image.file_id:
+            return _attach_content_extras(Content.from_hosted_file(image.file_id), content)
+    if content.type == "input_file":
+        file = cast(models.MessageContentInputFileContent, content)
+        if file.file_url:
+            return _attach_content_extras(Content.from_uri(file.file_url), content)
+        if file.file_id:
+            return _attach_content_extras(Content.from_hosted_file(file.file_id, name=file.filename), content)
+        if file.file_data:
+            return _attach_content_extras(_convert_file_data(file.file_data, file.filename), content)
+    if content.type == "computer_screenshot":
+        return _attach_content_extras(
+            Content.from_uri(cast(models.ComputerScreenshotContent, content).image_url), content
+        )
+
+    raise ValueError(f"Unsupported MessageContent type: {content.type}")
+
+
+def _convert_output_message_content(content: models.OutputMessageContent) -> Content:
+    """Convert an SDK ``OutputMessageContent`` (assistant output side) into a framework ``Content``.
+
+    Handles assistant-output variants: ``output_text`` and ``refusal``.
+
+    Args:
+        content: The SDK content node to convert.
+
+    Returns:
+        The corresponding :class:`agent_framework.Content`.
+
+    Raises:
+        ValueError: If the SDK content ``type`` is not yet supported.
+    """
+    if content.type == "output_text":
+        return _attach_content_extras(
+            Content.from_text(cast(models.OutputMessageContentOutputTextContent, content).text), content
+        )
+    if content.type == "refusal":
+        return _attach_content_extras(
+            Content.from_text(cast(models.OutputMessageContentRefusalContent, content).refusal), content
+        )
+
+    raise ValueError(f"Unsupported OutputMessageContent type: {content.type}")
+
+
+def _attach_content_extras(content: Content, model: Mapping[str, Any]) -> Content:
+    """Round-trip SDK content extras onto :attr:`Content.additional_properties`.
+
+    Mirror of :func:`_attach_extras` but for individual content nodes. Only
+    attaches the bag when at least one extra key is present, so the produced
+    ``Content`` stays byte-equivalent to a non-extras conversion when there is
+    nothing to preserve.
+
+    Args:
+        content: The framework content to enrich.
+        model: The SDK content node whose extras should be preserved.
+
+    Returns:
+        The same ``content`` instance.
+    """
+    extras = _collect_unknown_keys(model)
+    if extras:
+        content.additional_properties.setdefault(EXTRAS_KEY, {}).update(extras)
+    return content
+
+
+# endregion
+
+
+# region Item → Message (input side)
+
+
+def _items_to_messages(input_items: Sequence[models.Item]) -> list[Message]:
+    """Convert a sequence of input ``Item`` SDK objects to framework ``Message`` objects.
+
+    One :class:`agent_framework.Message` per input item — fan-out is the
+    caller's responsibility.
+
+    Args:
+        input_items: The input items to convert.
+
+    Returns:
+        A list of messages in the same order as the input.
+    """
+    return [_item_to_message(item) for item in input_items]
+
+
+def _item_to_message(item: models.Item) -> Message:
+    """Convert a single input ``Item`` SDK object to a framework ``Message``.
+
+    Wraps :func:`_item_to_message_inner` and stamps a :data:`RAW_KEY`
+    snapshot of the SDK item so the write path can rebuild the original
+    shape losslessly. See :func:`_capture_raw`.
+    """
+    return _capture_raw(_item_to_message_inner(item), item)
+
+
+def _item_to_message_inner(item: models.Item) -> Message:
+    """Convert a single input ``Item`` SDK object to a framework ``Message``.
+
+    The conversion table is intentionally explicit (no auto-discovery) so it
+    is easy to scan for missing variants. To add support for a new item kind:
+
+    1. Add an ``elif item.type == "...":`` branch here.
+    2. Reference the corresponding ``models.ItemX`` (or
+       ``models.XItemParam``) type via ``cast(...)``.
+    3. Map its fields onto :class:`agent_framework.Content` factory methods.
+    4. Add an ``isinstance(...)`` branch in :func:`_output_item_to_message`
+       if the same kind also appears on the output side.
+
+    Args:
+        item: The SDK item to convert.
+
+    Returns:
+        The converted message, with any unknown extras round-tripped under
+        ``message.additional_properties[EXTRAS_KEY]``.
+
+    Raises:
+        ValueError: If the SDK item ``type`` is not yet supported by this
+            adapter.
+    """
+    if item.type == "message":
+        msg = cast(models.ItemMessage, item)
+        if isinstance(msg.content, str):
+            message = Message(role=msg.role, contents=[Content.from_text(msg.content)])
+        else:
+            message = Message(role=msg.role, contents=[_convert_message_content(part) for part in msg.content])
+        return _attach_extras(message, item)
+
+    if item.type == "output_message":
+        output_msg = cast(models.ItemOutputMessage, item)
+        return _attach_extras(
+            Message(
+                role=output_msg.role,
+                contents=[_convert_output_message_content(part) for part in output_msg.content],
+            ),
+            item,
+        )
+
+    if item.type == "function_call":
+        fc = cast(models.ItemFunctionToolCall, item)
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[Content.from_function_call(fc.call_id, fc.name, arguments=fc.arguments)],
+            ),
+            item,
+        )
+
+    if item.type == "function_call_output":
+        fco = cast(models.FunctionCallOutputItemParam, item)
+        output = fco.output if isinstance(fco.output, str) else str(fco.output)
+        return _attach_extras(
+            Message(role="tool", contents=[Content.from_function_result(fco.call_id, result=output)]),
+            item,
+        )
+
+    if item.type == "reasoning":
+        reasoning = cast(models.ItemReasoningItem, item)
+        reason_contents: list[Content] = []
+        if reasoning.summary:
+            for summary in reasoning.summary:
+                reason_contents.append(Content.from_text(summary.text))
+        return _attach_extras(Message(role="assistant", contents=reason_contents), item)
+
+    if item.type == "mcp_call":
+        mcp = cast(models.ItemMcpToolCall, item)
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[
+                    Content.from_mcp_server_tool_call(
+                        mcp.id,
+                        mcp.name,
+                        server_name=mcp.server_label,
+                        arguments=mcp.arguments,
+                    )
+                ],
+            ),
+            item,
+        )
+
+    if item.type == "mcp_approval_request":
+        mcp_req = cast(models.ItemMcpApprovalRequest, item)
+        mcp_call_content = Content.from_mcp_server_tool_call(
+            mcp_req.id,
+            mcp_req.name,
+            server_name=mcp_req.server_label,
+            arguments=mcp_req.arguments,
+        )
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[Content.from_function_approval_request(mcp_req.id, mcp_call_content)],
+            ),
+            item,
+        )
+
+    if item.type == "mcp_approval_response":
+        mcp_resp = cast(models.MCPApprovalResponse, item)
+        placeholder_content = Content.from_function_call(mcp_resp.approval_request_id, "mcp_approval")
+        return _attach_extras(
+            Message(
+                role="user",
+                contents=[
+                    Content.from_function_approval_response(
+                        mcp_resp.approve, mcp_resp.approval_request_id, placeholder_content
+                    )
+                ],
+            ),
+            item,
+        )
+
+    if item.type == "code_interpreter_call":
+        ci = cast(models.ItemCodeInterpreterToolCall, item)
+        return _attach_extras(
+            Message(role="assistant", contents=[Content.from_code_interpreter_tool_call(call_id=ci.id)]),
+            item,
+        )
+
+    if item.type == "image_generation_call":
+        ig = cast(models.ItemImageGenToolCall, item)
+        return _attach_extras(
+            Message(role="assistant", contents=[Content.from_image_generation_tool_call(image_id=ig.id)]),
+            item,
+        )
+
+    if item.type == "shell_call":
+        sc = cast(models.FunctionShellCallItemParam, item)
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[
+                    Content.from_shell_tool_call(
+                        call_id=sc.call_id,
+                        commands=sc.action.commands,
+                        status=str(sc.status),
+                    )
+                ],
+            ),
+            item,
+        )
+
+    if item.type == "shell_call_output":
+        sco = cast(models.FunctionShellCallOutputItemParam, item)
+        outputs = [
+            Content.from_shell_command_output(
+                stdout=out.stdout or "",
+                stderr=out.stderr or "",
+                exit_code=getattr(out.outcome, "exit_code", None) if hasattr(out, "outcome") else None,
+            )
+            for out in (sco.output or [])
+        ]
+        return _attach_extras(
+            Message(
+                role="tool",
+                contents=[
+                    Content.from_shell_tool_result(
+                        call_id=sco.call_id,
+                        outputs=outputs,
+                        max_output_length=sco.max_output_length,
+                    )
+                ],
+            ),
+            item,
+        )
+
+    if item.type == "local_shell_call":
+        lsc = cast(models.ItemLocalShellToolCall, item)
+        commands = lsc.action.command if hasattr(lsc.action, "command") and lsc.action.command else []
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[
+                    Content.from_shell_tool_call(
+                        call_id=lsc.call_id,
+                        commands=commands,
+                        status=str(lsc.status),
+                    )
+                ],
+            ),
+            item,
+        )
+
+    if item.type == "local_shell_call_output":
+        lsco = cast(models.ItemLocalShellToolCallOutput, item)
+        return _attach_extras(
+            Message(
+                role="tool",
+                contents=[
+                    Content.from_shell_tool_result(
+                        call_id=lsco.id,
+                        outputs=[Content.from_shell_command_output(stdout=lsco.output)],
+                    )
+                ],
+            ),
+            item,
+        )
+
+    if item.type == "file_search_call":
+        fs = cast(models.ItemFileSearchToolCall, item)
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(
+                        fs.id,
+                        "file_search",
+                        arguments=json.dumps({"queries": fs.queries}),
+                    )
+                ],
+            ),
+            item,
+        )
+
+    if item.type == "web_search_call":
+        ws = cast(models.ItemWebSearchToolCall, item)
+        return _attach_extras(
+            Message(role="assistant", contents=[Content.from_function_call(ws.id, "web_search")]),
+            item,
+        )
+
+    if item.type == "computer_call":
+        cc = cast(models.ItemComputerToolCall, item)
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(
+                        cc.call_id,
+                        "computer_use",
+                        arguments=str(cc.action),
+                    )
+                ],
+            ),
+            item,
+        )
+
+    if item.type == "computer_call_output":
+        cco = cast(models.ComputerCallOutputItemParam, item)
+        return _attach_extras(
+            Message(role="tool", contents=[Content.from_function_result(cco.call_id, result=str(cco.output))]),
+            item,
+        )
+
+    if item.type == "custom_tool_call":
+        ct = cast(models.ItemCustomToolCall, item)
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[Content.from_function_call(ct.call_id, ct.name, arguments=ct.input)],
+            ),
+            item,
+        )
+
+    if item.type == "custom_tool_call_output":
+        cto = cast(models.ItemCustomToolCallOutput, item)
+        output = cto.output if isinstance(cto.output, str) else str(cto.output)
+        # Hosted-MCP results land here because the host writes them via
+        # ``aoutput_item_custom_tool_call_output`` (see ``_to_outputs`` for
+        # ``mcp_server_tool_result``). The persisted ``call_id`` keeps its
+        # ``mcp_*`` prefix; on read, route those back to a hosted-MCP
+        # result Content so the chat-client serialize layer can coalesce
+        # them onto a single ``mcp_call`` input item with ``output``
+        # populated. Issue #5546.
+        if cto.call_id and cto.call_id.startswith("mcp_"):
+            return _attach_extras(
+                Message(
+                    role="tool",
+                    contents=[Content.from_mcp_server_tool_result(call_id=cto.call_id, output=output)],
+                ),
+                item,
+            )
+        return _attach_extras(
+            Message(role="tool", contents=[Content.from_function_result(cto.call_id, result=output)]),
+            item,
+        )
+
+    if item.type == "apply_patch_call":
+        ap = cast(models.ApplyPatchToolCallItemParam, item)
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(
+                        ap.call_id,
+                        "apply_patch",
+                        arguments=str(ap.operation),
+                    )
+                ],
+            ),
+            item,
+        )
+
+    if item.type == "apply_patch_call_output":
+        apo = cast(models.ApplyPatchToolCallOutputItemParam, item)
+        return _attach_extras(
+            Message(role="tool", contents=[Content.from_function_result(apo.call_id, result=apo.output or "")]),
+            item,
+        )
+
+    raise ValueError(f"Unsupported Item type: {item.type}")
+
+
+# endregion
+
+
+# region OutputItem → Message (output / history side)
+
+
+def _output_items_to_messages(history: Sequence[models.OutputItem]) -> list[Message]:
+    """Convert a sequence of ``OutputItem`` SDK objects to framework ``Message`` objects.
+
+    This is the function the :class:`._history_provider.FoundryHostedAgentHistoryProvider`
+    calls to materialise stored Foundry response items into the message
+    history the agent will see on its next turn.
+
+    Args:
+        history: The output items to convert, oldest-first.
+
+    Returns:
+        A list of messages, one per supported item, in the same order.
+    """
+    return [_output_item_to_message(item) for item in history]
+
+
+def _output_item_to_message(item: models.OutputItem) -> Message:
+    """Convert a single ``OutputItem`` SDK object to a framework ``Message``.
+
+    Wraps :func:`_output_item_to_message_inner` and stamps a
+    :data:`RAW_KEY` snapshot of the SDK item onto
+    ``Message.additional_properties[EXTRAS_KEY]`` so the write path can
+    re-emit byte-for-byte. See :func:`_capture_raw` for the rationale.
+    """
+    return _capture_raw(_output_item_to_message_inner(item), item)
+
+
+def _output_item_to_message_inner(item: models.OutputItem) -> Message:
+    """Convert a single ``OutputItem`` SDK object to a framework ``Message``.
+
+    Variant table — keep in sync with :func:`_item_to_message` when both
+    sides exist for the same item kind. To add a new variant:
+
+    1. Add a ``elif item.type == "...":`` branch here.
+    2. Reference the corresponding ``models.OutputItemX`` type.
+    3. Map its fields to :class:`agent_framework.Content` factory methods.
+
+    Variants currently **missing** from this dispatch (visible by scanning
+    ``models.OutputItem*`` and comparing against the branches below):
+
+    * ``models.OutputItemCompactionBody`` — context compaction summaries
+    * ``models.OutputItemMcpListTools`` — MCP server ``list_tools`` results
+    * ``models.WorkflowActionOutputItem`` — workflow-channel actions
+    * Any tool-call variant produced by Azure-specific tools
+      (Azure Search, Bing Grounding, SharePoint, Fabric, OpenAPI, A2A,
+      browser automation, memory search, …) — the ``models.*ToolCall``
+      / ``models.*ToolCallOutput`` family.
+
+    Args:
+        item: The SDK item to convert.
+
+    Returns:
+        The converted message, with any unknown extras round-tripped under
+        ``message.additional_properties[EXTRAS_KEY]``.
+
+    Raises:
+        ValueError: If the SDK item ``type`` is not yet supported.
+    """
+    if item.type == "output_message":
+        output_msg = cast(models.OutputItemOutputMessage, item)
+        return _attach_extras(
+            Message(
+                role=output_msg.role,
+                contents=[_convert_output_message_content(part) for part in output_msg.content],
+            ),
+            item,
+        )
+
+    if item.type == "message":
+        msg = cast(models.OutputItemMessage, item)
+        return _attach_extras(
+            Message(role=msg.role, contents=[_convert_message_content(part) for part in msg.content]),
+            item,
+        )
+
+    if item.type == "function_call":
+        fc = cast(models.OutputItemFunctionToolCall, item)
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[Content.from_function_call(fc.call_id, fc.name, arguments=fc.arguments)],
+            ),
+            item,
+        )
+
+    if item.type == "function_call_output":
+        fco = cast(models.FunctionCallOutputItemParam, item)
+        output = fco.output if isinstance(fco.output, str) else str(fco.output)
+        return _attach_extras(
+            Message(role="tool", contents=[Content.from_function_result(fco.call_id, result=output)]),
+            item,
+        )
+
+    if item.type == "reasoning":
+        reasoning = cast(models.OutputItemReasoningItem, item)
+        contents: list[Content] = []
+        if reasoning.summary:
+            for summary in reasoning.summary:
+                contents.append(Content.from_text(summary.text))
+        return _attach_extras(Message(role="assistant", contents=contents), item)
+
+    if item.type == "mcp_call":
+        mcp = cast(models.OutputItemMcpToolCall, item)
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[
+                    Content.from_mcp_server_tool_call(
+                        mcp.id,
+                        mcp.name,
+                        server_name=mcp.server_label,
+                        arguments=mcp.arguments,
+                    )
+                ],
+            ),
+            item,
+        )
+
+    if item.type == "mcp_approval_request":
+        mcp_req = cast(models.OutputItemMcpApprovalRequest, item)
+        mcp_call_content = Content.from_mcp_server_tool_call(
+            mcp_req.id,
+            mcp_req.name,
+            server_name=mcp_req.server_label,
+            arguments=mcp_req.arguments,
+        )
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[Content.from_function_approval_request(mcp_req.id, mcp_call_content)],
+            ),
+            item,
+        )
+
+    if item.type == "mcp_approval_response":
+        mcp_resp = cast(models.OutputItemMcpApprovalResponseResource, item)
+        # Build a placeholder function_call Content since the original call details are not available here.
+        placeholder_content = Content.from_function_call(mcp_resp.approval_request_id, "mcp_approval")
+        return _attach_extras(
+            Message(
+                role="user",
+                contents=[Content.from_function_approval_response(mcp_resp.approve, mcp_resp.id, placeholder_content)],
+            ),
+            item,
+        )
+
+    if item.type == "code_interpreter_call":
+        ci = cast(models.OutputItemCodeInterpreterToolCall, item)
+        return _attach_extras(
+            Message(role="assistant", contents=[Content.from_code_interpreter_tool_call(call_id=ci.id)]),
+            item,
+        )
+
+    if item.type == "image_generation_call":
+        ig = cast(models.OutputItemImageGenToolCall, item)
+        return _attach_extras(
+            Message(role="assistant", contents=[Content.from_image_generation_tool_call(image_id=ig.id)]),
+            item,
+        )
+
+    if item.type == "shell_call":
+        sc = cast(models.OutputItemFunctionShellCall, item)
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[
+                    Content.from_shell_tool_call(
+                        call_id=sc.call_id,
+                        commands=sc.action.commands,
+                        status=str(sc.status),
+                    )
+                ],
+            ),
+            item,
+        )
+
+    if item.type == "shell_call_output":
+        sco = cast(models.OutputItemFunctionShellCallOutput, item)
+        outputs = [
+            Content.from_shell_command_output(
+                stdout=out.stdout or "",
+                stderr=out.stderr or "",
+                exit_code=getattr(out.outcome, "exit_code", None) if hasattr(out, "outcome") else None,
+            )
+            for out in (sco.output or [])
+        ]
+        return _attach_extras(
+            Message(
+                role="tool",
+                contents=[
+                    Content.from_shell_tool_result(
+                        call_id=sco.call_id,
+                        outputs=outputs,
+                        max_output_length=sco.max_output_length,
+                    )
+                ],
+            ),
+            item,
+        )
+
+    if item.type == "local_shell_call":
+        lsc = cast(models.OutputItemLocalShellToolCall, item)
+        commands = lsc.action.command if hasattr(lsc.action, "command") and lsc.action.command else []
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[
+                    Content.from_shell_tool_call(
+                        call_id=lsc.call_id,
+                        commands=commands,
+                        status=str(lsc.status),
+                    )
+                ],
+            ),
+            item,
+        )
+
+    if item.type == "local_shell_call_output":
+        lsco = cast(models.OutputItemLocalShellToolCallOutput, item)
+        return _attach_extras(
+            Message(
+                role="tool",
+                contents=[
+                    Content.from_shell_tool_result(
+                        call_id=lsco.id,
+                        outputs=[Content.from_shell_command_output(stdout=lsco.output)],
+                    )
+                ],
+            ),
+            item,
+        )
+
+    if item.type == "file_search_call":
+        fs = cast(models.OutputItemFileSearchToolCall, item)
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(
+                        fs.id,
+                        "file_search",
+                        arguments=json.dumps({"queries": fs.queries}),
+                    )
+                ],
+            ),
+            item,
+        )
+
+    if item.type == "web_search_call":
+        ws = cast(models.OutputItemWebSearchToolCall, item)
+        return _attach_extras(
+            Message(role="assistant", contents=[Content.from_function_call(ws.id, "web_search")]),
+            item,
+        )
+
+    if item.type == "computer_call":
+        cc = cast(models.OutputItemComputerToolCall, item)
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(
+                        cc.call_id,
+                        "computer_use",
+                        arguments=str(cc.action),
+                    )
+                ],
+            ),
+            item,
+        )
+
+    if item.type == "computer_call_output":
+        cco = cast(models.OutputItemComputerToolCallOutputResource, item)
+        return _attach_extras(
+            Message(role="tool", contents=[Content.from_function_result(cco.call_id, result=str(cco.output))]),
+            item,
+        )
+
+    if item.type == "custom_tool_call":
+        ct = cast(models.OutputItemCustomToolCall, item)
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[Content.from_function_call(ct.call_id, ct.name, arguments=ct.input)],
+            ),
+            item,
+        )
+
+    if item.type == "custom_tool_call_output":
+        cto = cast(models.OutputItemCustomToolCallOutput, item)
+        output = cto.output if isinstance(cto.output, str) else str(cto.output)
+        # Hosted-MCP results land here because the host writes them via
+        # ``aoutput_item_custom_tool_call_output``. Route ``mcp_*``
+        # call_ids back to a hosted-MCP result Content so the chat-client
+        # serialize layer can coalesce onto the matching ``mcp_call``
+        # input item. Issue #5546.
+        if cto.call_id and cto.call_id.startswith("mcp_"):
+            return _attach_extras(
+                Message(
+                    role="tool",
+                    contents=[Content.from_mcp_server_tool_result(call_id=cto.call_id, output=output)],
+                ),
+                item,
+            )
+        return _attach_extras(
+            Message(role="tool", contents=[Content.from_function_result(cto.call_id, result=output)]),
+            item,
+        )
+
+    if item.type == "apply_patch_call":
+        ap = cast(models.OutputItemApplyPatchToolCall, item)
+        return _attach_extras(
+            Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(
+                        ap.call_id,
+                        "apply_patch",
+                        arguments=str(ap.operation),
+                    )
+                ],
+            ),
+            item,
+        )
+
+    if item.type == "apply_patch_call_output":
+        apo = cast(models.OutputItemApplyPatchToolCallOutput, item)
+        return _attach_extras(
+            Message(role="tool", contents=[Content.from_function_result(apo.call_id, result=apo.output or "")]),
+            item,
+        )
+
+    if item.type == "oauth_consent_request":
+        oauth = cast(models.OAuthConsentRequestOutputItem, item)
+        return _attach_extras(
+            Message(role="assistant", contents=[Content.from_oauth_consent_request(oauth.consent_link)]),
+            item,
+        )
+
+    if item.type == "structured_outputs":
+        so = cast(models.StructuredOutputsOutputItem, item)
+        text = json.dumps(so.output) if not isinstance(so.output, str) else so.output
+        return _attach_extras(Message(role="assistant", contents=[Content.from_text(text)]), item)
+
+    raise ValueError(f"Unsupported OutputItem type: {item.type}")
+
+
+# endregion
+
+
+# region AF Message → SDK OutputItem (write path)
+
+
+def _message_text(message: Message) -> str:
+    """Collapse a :class:`Message` into a single text blob.
+
+    The Foundry storage write path only persists the user-visible text — the
+    same compression the Responses runtime applies on its own write side. We
+    walk ``contents`` rather than relying on ``Message.text`` so we get a
+    consistent ordering and can drop non-text parts cleanly.
+    """
+    chunks: list[str] = []
+    for content in message.contents:
+        text = getattr(content, "text", None)
+        if isinstance(text, str) and text:
+            chunks.append(text)
+    if chunks:
+        return "".join(chunks)
+    # Fallback: surface ``Message.text`` if the framework knows how to
+    # render the contents (covers structured contents that synthesise text).
+    return message.text or ""
+
+
+def _message_to_output_item(message: Message, item_id: str) -> models.OutputItem:
+    """Convert a single :class:`Message` to a Foundry SDK :class:`OutputItem`.
+
+    Two-tier strategy:
+
+    1. **Lossless replay** — if the message carries a previously-captured
+       raw SDK snapshot under ``additional_properties[EXTRAS_KEY][RAW_KEY]``
+       (set by :func:`_capture_raw` on the read path), rebuild the SDK
+       item from that snapshot via the model registry's discriminator
+       dispatch (:meth:`models.OutputItem._deserialize`). The snapshot's
+       ``id`` is rewritten to ``item_id`` so each write turn gets a
+       unique storage row, but every other declared field — content
+       variants (citations, reasoning, tool calls, function results,
+       …) AND any undeclared extras Foundry layered on top — survives
+       intact. This is the auditable round-trip the Foundry storage
+       backend relies on.
+
+    2. **Synthesise from text** — for messages constructed in user code
+       (no raw snapshot), fall back to the text-only path. ``assistant``
+       maps to :class:`OutputItemOutputMessage` (output_text content,
+       ``status="completed"``); anything else maps to
+       :class:`OutputItemMessage` with the role normalised onto the
+       enum's three accepted values (``user`` / ``system`` /
+       ``developer`` — ``tool`` collapses to ``user`` because the
+       discriminator forbids it).
+
+    In both branches:
+
+    * ``additional_properties[EXTRAS_KEY]`` extras other than the raw
+      snapshot are layered onto the emitted model via
+      :func:`_inject_extras` so message-level Foundry annotations
+      round-trip.
+    * **Every other ``additional_properties`` namespace** (notably the
+      Hosting spec's ``hosting`` envelope — channel, identity,
+      response_target, initial-write ``deliveries[]`` — plus any future
+      AF namespaces) is funneled into a single
+      :data:`AF_EXTRAS_KEY` container key on the SDK item via
+      :func:`_inject_af_extras`. Foundry storage round-trips that key
+      as opaque JSON, and :func:`_attach_extras` peels each sub-key
+      back onto its original namespace on load. This is what makes the
+      audit/replay envelope from the Hosting spec durable across
+      Foundry-storage save/load cycles.
+    """
+    extras_raw: Any = (message.additional_properties or {}).get(EXTRAS_KEY) or {}
+    extras: dict[str, Any] = dict(cast("Mapping[str, Any]", extras_raw)) if isinstance(extras_raw, Mapping) else {}
+    raw_snapshot: Any = extras.get(RAW_KEY)
+    af_extras = _collect_af_extras(message)
+
+    if isinstance(raw_snapshot, Mapping):
+        # ``_deserialize`` does discriminator dispatch and tolerates
+        # extras-bearing mappings; bypassing it (constructing the
+        # concrete class directly) would lose the discriminator wiring
+        # and break round-trip for tool-call / reasoning / ... variants.
+        snapshot: dict[str, Any] = dict(cast("Mapping[str, Any]", raw_snapshot))
+        snapshot["id"] = item_id
+        deserialize = cast(Any, models.OutputItem)._deserialize
+        item = cast("models.OutputItem", deserialize(snapshot, []))
+        return cast(
+            "models.OutputItem",
+            _inject_af_extras(_inject_extras(item, extras), af_extras),
+        )
+
+    text = _message_text(message)
+    # ``Message.role`` is an unconstrained ``str | enum`` slot — the
+    # framework keeps whatever the constructor was handed (str literals
+    # round-trip as ``str``; converters that pass the SDK's
+    # ``MessageRole`` enum store the enum). Normalise to the enum's
+    # ``value`` (or the bare string) so we don't end up writing
+    # ``"MessageRole.USER"`` to storage.
+    role_str = getattr(message.role, "value", message.role)
+
+    # Construct via the mapping overload — the SDK's keyword overload tags
+    # ``content`` with the abstract base type and rejects our concrete list.
+    if role_str == "assistant":
+        item = models.OutputItemOutputMessage({
+            "id": item_id,
+            "type": "output_message",
+            "role": "assistant",
+            "status": "completed",
+            "content": [
+                {"type": "output_text", "text": text, "annotations": [], "logprobs": []},
+            ],
+        })
+    else:
+        # OutputItemMessage's role enum admits "user" / "system" /
+        # "developer". Anything outside that set (e.g. "tool") collapses to
+        # "user" so we don't crash on the SDK's discriminator validation.
+        role_value = role_str if role_str in ("user", "system", "developer") else "user"
+        item = models.OutputItemMessage({
+            "id": item_id,
+            "type": "message",
+            "role": role_value,
+            "status": "completed",
+            "content": [
+                {"type": "input_text", "text": text},
+            ],
+        })
+    return cast("models.OutputItem", _inject_af_extras(_inject_extras(item, extras), af_extras))
+
+
+def _messages_to_output_items(messages: Sequence[Message], *, id_prefix: str) -> list[models.OutputItem]:
+    """Convert a batch of messages to Foundry SDK items with stable IDs.
+
+    Each message gets a deterministic id of the form ``{id_prefix}_itm_{i}``.
+    Callers (typically :meth:`FoundryHostedAgentHistoryProvider.save_messages`)
+    derive ``id_prefix`` from the response id they're persisting under so
+    the per-item ids are unique across a conversation.
+    """
+    return [_message_to_output_item(msg, f"{id_prefix}_itm_{i}") for i, msg in enumerate(messages)]
+
+
+# endregion
diff --git a/python/packages/foundry_hosting/tests/test_history_provider.py b/python/packages/foundry_hosting/tests/test_history_provider.py
new file mode 100644
index 0000000000..6b6c4c42db
--- /dev/null
+++ b/python/packages/foundry_hosting/tests/test_history_provider.py
@@ -0,0 +1,969 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+"""Unit tests for FoundryHostedAgentHistoryProvider."""
+
+from __future__ import annotations
+
+import os
+import time
+from collections.abc import Iterable
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+from agent_framework import Content, HistoryProvider, Message
+from azure.ai.agentserver.responses import (
+    FoundryStorageProvider,
+    InMemoryResponseProvider,
+    IsolationContext,
+)
+from azure.ai.agentserver.responses.models import (
+    OutputItem,
+    OutputItemOutputMessage,
+    OutputMessageContentOutputTextContent,
+)
+from azure.ai.agentserver.responses.store._foundry_errors import (  # pyright: ignore[reportPrivateUsage]
+    FoundryBadRequestError,
+)
+
+from agent_framework_foundry_hosting import FoundryHostedAgentHistoryProvider
+from agent_framework_foundry_hosting._history_provider import (  # pyright: ignore[reportPrivateUsage]
+    get_current_isolation,
+    reset_current_isolation,
+    set_current_isolation,
+)
+
+
+def _with_backend(prov: FoundryHostedAgentHistoryProvider, backend: Any) -> FoundryHostedAgentHistoryProvider:
+    """Inject a fake backend into ``prov`` so ``_resolve_backend`` returns it.
+
+    Replaces the old ``backend=`` constructor parameter that was removed
+    when the dual-backend model was collapsed onto ``FoundryStorageProvider``.
+    """
+    prov._backend = backend  # pyright: ignore[reportPrivateUsage]
+    return prov
+
+
+# region Helpers
+
+
+def _make_text_item(item_id: str, text: str) -> OutputItemOutputMessage:
+    return OutputItemOutputMessage(
+        id=item_id,
+        type="output_message",
+        role="assistant",
+        status="completed",
+        content=[OutputMessageContentOutputTextContent(type="output_text", text=text, annotations=[])],
+    )
+
+
+def _make_fake_backend(
+    *,
+    history_ids: list[str] | None = None,
+    items: list[OutputItem | None] | None = None,
+) -> MagicMock:
+    """Build a MagicMock matching the _StorageBackend protocol."""
+    backend = MagicMock()
+
+    async def _ids(*args: Any, **kwargs: Any) -> list[str]:
+        return list(history_ids or [])
+
+    async def _items(item_ids: Iterable[str], *, isolation: IsolationContext | None = None) -> list[OutputItem | None]:
+        return list(items or [])
+
+    backend.get_history_item_ids = AsyncMock(side_effect=_ids)
+    backend.get_items = AsyncMock(side_effect=_items)
+    backend.create_response = AsyncMock()
+    return backend
+
+
+class _FakeAccessToken:
+    def __init__(self, token: str, *, expires_in: float = 3600.0) -> None:
+        self.token = token
+        self.expires_on = int(time.time() + expires_in)
+
+
+class _FakeCredential:
+    """Minimal AsyncTokenCredential stand-in."""
+
+    def __init__(self, *, token: str = "fake-token", expires_in: float = 3600.0) -> None:
+        self._token = token
+        self._expires_in = expires_in
+        self.calls: list[tuple[str, ...]] = []
+
+    async def get_token(self, *scopes: str) -> _FakeAccessToken:
+        self.calls.append(scopes)
+        return _FakeAccessToken(self._token, expires_in=self._expires_in)
+
+
+# region Construction
+
+
+class TestConstruction:
+    """Constructor + class-level invariants."""
+
+    def test_defaults(self) -> None:
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), _make_fake_backend())
+        assert isinstance(prov, HistoryProvider)
+        assert prov.source_id == FoundryHostedAgentHistoryProvider.DEFAULT_SOURCE_ID
+        assert prov.store_inputs is True
+        assert prov.store_outputs is True
+        assert prov.load_messages is True
+
+    def test_is_hosted_environment_reads_env(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False)
+        assert FoundryHostedAgentHistoryProvider.is_hosted_environment() is False
+        monkeypatch.setenv("FOUNDRY_HOSTING_ENVIRONMENT", "1")
+        assert FoundryHostedAgentHistoryProvider.is_hosted_environment() is True
+
+    def test_endpoint_falls_back_to_env(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.setenv("FOUNDRY_PROJECT_ENDPOINT", "https://example.foundry.azure.com")
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), _make_fake_backend())
+        assert prov._endpoint == "https://example.foundry.azure.com"  # pyright: ignore[reportPrivateUsage]
+
+
+# region Backend resolution
+
+
+class TestBackendResolution:
+    """Lazy backend construction + local fallback."""
+
+    def test_uses_explicit_backend(self) -> None:
+        backend = _make_fake_backend()
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        assert prov._resolve_backend() is backend  # pyright: ignore[reportPrivateUsage]
+
+    def test_local_fallback_when_not_hosted(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False)
+        prov = FoundryHostedAgentHistoryProvider()
+        resolved = prov._resolve_backend()  # pyright: ignore[reportPrivateUsage]
+        assert isinstance(resolved, InMemoryResponseProvider)
+        # Cached on subsequent calls.
+        assert prov._resolve_backend() is resolved  # pyright: ignore[reportPrivateUsage]
+
+    def test_hosted_without_credential_raises(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.setenv("FOUNDRY_HOSTING_ENVIRONMENT", "1")
+        monkeypatch.setenv("FOUNDRY_PROJECT_ENDPOINT", "https://x.foundry.azure.com")
+        prov = FoundryHostedAgentHistoryProvider()
+        with pytest.raises(RuntimeError, match="requires an async credential"):
+            prov._resolve_backend()  # pyright: ignore[reportPrivateUsage]
+
+    def test_hosted_without_endpoint_raises(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.setenv("FOUNDRY_HOSTING_ENVIRONMENT", "1")
+        monkeypatch.delenv("FOUNDRY_PROJECT_ENDPOINT", raising=False)
+        prov = FoundryHostedAgentHistoryProvider(credential=_FakeCredential())  # type: ignore[arg-type]
+        with pytest.raises(RuntimeError, match="needs a Foundry project endpoint"):
+            prov._resolve_backend()  # pyright: ignore[reportPrivateUsage]
+
+    def test_hosted_builds_http_backend(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.setenv("FOUNDRY_HOSTING_ENVIRONMENT", "1")
+        monkeypatch.setenv("FOUNDRY_PROJECT_ENDPOINT", "https://x.foundry.azure.com")
+        prov = FoundryHostedAgentHistoryProvider(credential=_FakeCredential())  # type: ignore[arg-type]
+        resolved = prov._resolve_backend()  # pyright: ignore[reportPrivateUsage]
+        assert isinstance(resolved, FoundryStorageProvider)
+
+
+# region get_messages
+
+
+class TestGetMessages:
+    async def test_no_session_id_returns_empty(self) -> None:
+        backend = _make_fake_backend(history_ids=["x"], items=[_make_text_item("x", "hi")])
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        assert await prov.get_messages(None) == []
+        assert await prov.get_messages("") == []
+        backend.get_history_item_ids.assert_not_called()
+
+    async def test_no_history_returns_empty(self) -> None:
+        backend = _make_fake_backend(history_ids=[])
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        assert await prov.get_messages("resp_123") == []
+        backend.get_items.assert_not_called()
+
+    async def test_loads_and_converts(self) -> None:
+        items: list[OutputItem | None] = [_make_text_item("itm_1", "hello"), _make_text_item("itm_2", "world")]
+        backend = _make_fake_backend(history_ids=["itm_1", "itm_2"], items=items)
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+
+        messages = await prov.get_messages("resp_123")
+        assert len(messages) == 2
+        assert all(isinstance(m, Message) for m in messages)
+        assert messages[0].text == "hello"
+        assert messages[1].text == "world"
+
+        backend.get_history_item_ids.assert_awaited_once()
+        call = backend.get_history_item_ids.await_args
+        assert call.args[0] == "resp_123"
+        assert call.args[1] is None  # conversation_id
+        assert call.args[2] == 100  # default history_limit
+
+    async def test_drops_missing_items(self) -> None:
+        backend = _make_fake_backend(
+            history_ids=["a", "b", "c"],
+            items=[_make_text_item("a", "first"), None, _make_text_item("c", "third")],
+        )
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        messages = await prov.get_messages("resp_x")
+        assert [m.text for m in messages] == ["first", "third"]
+
+    async def test_history_limit_propagates(self) -> None:
+        backend = _make_fake_backend(history_ids=[])
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(history_limit=7), backend)
+        # ``resp_*``-shaped session anchors directly; we expect a single
+        # backend call carrying the configured limit.
+        await prov.get_messages("resp_s")
+        assert backend.get_history_item_ids.await_count == 1
+        assert backend.get_history_item_ids.await_args.args[2] == 7
+
+    async def test_non_resp_session_skips_storage_probe(self) -> None:
+        """Non-``resp_*`` session ids (e.g. opaque chat-isolation keys)
+        are not valid storage anchors — the provider must skip the
+        backend probe entirely so we don't hit "Malformed identifier"
+        HTTP 400s, returning an empty history instead.
+        """
+        backend = _make_fake_backend(history_ids=[])
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        messages = await prov.get_messages("5leZSsJ3m1UtB-JW3m3iowFd5_zqP30SE0MmGUEkcGQ")
+        assert messages == []
+        backend.get_history_item_ids.assert_not_awaited()
+
+    async def test_resp_probe_tolerates_400(self) -> None:
+        """A 400 on the storage probe must not abort ``get_messages`` —
+        the provider falls through to an empty history."""
+        backend = _make_fake_backend()
+        backend.get_history_item_ids.side_effect = FoundryBadRequestError("malformed", response_body=None)
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        messages = await prov.get_messages("resp_x")
+        assert messages == []
+
+
+# region IsolationContext
+
+
+class TestIsolationContext:
+    async def test_explicit_isolation_kwarg_wins(self) -> None:
+        backend = _make_fake_backend(history_ids=[])
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        explicit = IsolationContext(user_key="u-explicit", chat_key="c-explicit")
+        await prov.get_messages("resp_s", isolation=explicit)
+        assert backend.get_history_item_ids.await_args.kwargs["isolation"] is explicit
+
+    async def test_contextvar_picked_up(self) -> None:
+        backend = _make_fake_backend(history_ids=["a"], items=[_make_text_item("a", "x")])
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        ctx = IsolationContext(user_key="u-1", chat_key="c-1")
+        token = set_current_isolation(ctx)
+        try:
+            assert get_current_isolation() is ctx
+            await prov.get_messages("resp_s")
+        finally:
+            reset_current_isolation(token)
+        assert backend.get_history_item_ids.await_args.kwargs["isolation"] is ctx
+        assert backend.get_items.await_args.kwargs["isolation"] is ctx
+
+    async def test_no_isolation_when_unset(self) -> None:
+        backend = _make_fake_backend(history_ids=[])
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        await prov.get_messages("resp_s")
+        assert backend.get_history_item_ids.await_args.kwargs["isolation"] is None
+
+    async def test_host_isolation_keys_picked_up(self) -> None:
+        """The host's ASGI middleware lifts the
+        ``x-agent-{user,chat}-isolation-key`` headers into a contextvar
+        exposed by ``agent_framework_hosting``. The provider lifts that
+        into its own ``IsolationContext`` so the storage call carries
+        the platform partition keys without channels having to forward
+        anything (or even know the headers exist)."""
+        pytest.importorskip("agent_framework_hosting")
+        from agent_framework_hosting import (
+            IsolationKeys,
+            reset_current_isolation_keys,
+            set_current_isolation_keys,
+        )
+
+        backend = _make_fake_backend(history_ids=["a"], items=[_make_text_item("a", "x")])
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        token = set_current_isolation_keys(IsolationKeys(user_key="u-3", chat_key="c-3"))
+        try:
+            await prov.get_messages("resp_s")
+        finally:
+            reset_current_isolation_keys(token)
+        applied = backend.get_history_item_ids.await_args.kwargs["isolation"]
+        assert applied is not None
+        assert applied.user_key == "u-3"
+        assert applied.chat_key == "c-3"
+
+
+# region save_messages
+
+
+class TestSaveMessages:
+    async def test_save_messages_writes_to_backend_when_bound(self) -> None:
+        """``save_messages`` writes a ``create_response`` envelope using
+        the host-bound response_id when present.
+
+        The host's ``_bind_request_context`` plumbs the channel-minted
+        ``response_id`` (and prior turn's ``previous_response_id``) into
+        the provider via :func:`bind_request_context`, so the channel
+        envelope and the storage write share a single id per turn —
+        which is what makes the next turn's ``previous_response_id``
+        walkable.
+        """
+        from agent_framework_foundry_hosting import bind_request_context
+
+        backend = _make_fake_backend()
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        msg = Message(role="assistant", contents=[Content.from_text("hello")])
+        with bind_request_context(response_id="resp_bound_1", previous_response_id=None):
+            await prov.save_messages("session-x", [msg])
+
+        backend.create_response.assert_awaited_once()
+        call = backend.create_response.await_args
+        response = call.args[0]
+        assert response.id == "resp_bound_1"
+        # Conversation is intentionally omitted — Foundry isolation
+        # headers handle partitioning; cross-turn chaining is via the
+        # response-id chain only.
+        assert response.conversation is None
+        # Assistant outputs go on ``response.output``, not ``input_items``
+        # — mirrors the agentserver runtime split (see
+        # ``_resolve_input_items_for_persistence``).
+        assert call.kwargs["input_items"] == []
+        output = response.output or []
+        assert len(output) == 1
+        assert output[0]["type"] == "output_message"
+
+    async def test_save_messages_falls_back_to_session_id_when_unbound(self) -> None:
+        """Without a host binding (e.g. local dev), ``save_messages``
+        mints a fresh ``resp_*`` envelope and only chains when the
+        ``session_id`` is itself ``resp_*``-shaped."""
+        backend = _make_fake_backend()
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        msg = Message(role="user", contents=[Content.from_text("hi")])
+        await prov.save_messages("resp_prev", [msg])
+
+        backend.create_response.assert_awaited_once()
+        call = backend.create_response.await_args
+        response = call.args[0]
+        assert response.id.startswith("caresp_")
+        # Provider walked the prior chain to seed history_item_ids; the
+        # fake backend returns ``[]`` so this stays empty but the call
+        # was made.
+        assert backend.get_history_item_ids.await_count == 1
+        assert backend.get_history_item_ids.await_args.args[0] == "resp_prev"
+
+    async def test_save_messages_empty_short_circuits(self) -> None:
+        backend = _make_fake_backend()
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        await prov.save_messages("s", [])
+        backend.create_response.assert_not_called()
+
+    async def test_save_messages_no_session_short_circuits(self) -> None:
+        """No session id and no host binding → nothing to anchor against,
+        skip the write."""
+        backend = _make_fake_backend()
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        await prov.save_messages(None, [Message(role="user", contents=[Content.from_text("hi")])])
+        backend.create_response.assert_not_called()
+
+    async def test_save_messages_swallows_backend_errors(self) -> None:
+        """Persistence is best-effort — backend failures must NOT propagate.
+
+        A successful agent turn that hits a transient storage error
+        (RBAC propagation lag, throttling, …) should still return a 2xx
+        to the caller; we only log so operators can spot systematic
+        failures.
+        """
+        backend = _make_fake_backend()
+        backend.create_response.side_effect = RuntimeError("simulated 500 from storage")
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        # Must not raise.
+        await prov.save_messages("resp_session_x", [Message(role="user", contents=[Content.from_text("hi")])])
+        backend.create_response.assert_awaited_once()
+
+    async def test_save_then_get_round_trip_via_in_memory_backend(self) -> None:
+        """End-to-end save→get round-trip through ``InMemoryResponseProvider``.
+
+        Mirrors the host-bound multi-turn flow: turn 1 binds a fresh
+        response id; turn 2 binds a new response id with the prior id
+        as ``previous_response_id``. ``get_messages`` on turn 2 is
+        called with the prior anchor and must return both turns.
+        """
+        from agent_framework_foundry_hosting import bind_request_context
+
+        backend = InMemoryResponseProvider()
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+
+        with bind_request_context(response_id="resp_turn1", previous_response_id=None):
+            await prov.save_messages(
+                "resp_turn1",
+                [Message(role="user", contents=[Content.from_text("ping")])],
+            )
+
+        with bind_request_context(response_id="resp_turn2", previous_response_id="resp_turn1"):
+            history = await prov.get_messages("resp_turn1")
+            assert [m.text for m in history] == ["ping"]
+            await prov.save_messages(
+                "resp_turn2",
+                [Message(role="assistant", contents=[Content.from_text("pong")])],
+            )
+
+        # Final read for turn 3: walking turn 2 must reveal both turns.
+        with bind_request_context(response_id="resp_turn3", previous_response_id="resp_turn2"):
+            messages = await prov.get_messages("resp_turn2")
+        assert [m.text for m in messages] == ["ping", "pong"]
+        roles = [getattr(m.role, "value", m.role) for m in messages]
+        assert roles == ["user", "assistant"]
+
+
+# region aclose
+
+
+class TestAclose:
+    async def test_closes_backend_with_aclose(self) -> None:
+        # Provider always closes whatever backend is currently bound;
+        # the dual-mode (external vs owned) distinction was dropped
+        # along with the ``backend=`` constructor param.
+        backend = _make_fake_backend()
+        backend.aclose = AsyncMock()
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        prov._resolve_backend()  # pyright: ignore[reportPrivateUsage]
+        await prov.aclose()
+        backend.aclose.assert_awaited_once()
+
+    async def test_aclose_idempotent(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False)
+        prov = FoundryHostedAgentHistoryProvider()
+        prov._resolve_backend()  # pyright: ignore[reportPrivateUsage]
+        await prov.aclose()
+        await prov.aclose()  # idempotent — second call is a no-op
+
+
+# region Shared module re-exports
+
+
+class TestSharedReExports:
+    """`_responses.py` must re-export the conversion helpers so tests and
+    downstream code that historically imported them keep working."""
+
+    def test_responses_re_exports_helpers(self) -> None:
+        # All of these used to live in ``_responses``; after the
+        # refactor they live in ``_shared`` but are re-exported.
+        from agent_framework_foundry_hosting import (
+            _responses,  # pyright: ignore[reportPrivateUsage]
+            _shared,  # pyright: ignore[reportPrivateUsage]
+        )
+
+        for name in (
+            "_arguments_to_str",
+            "_convert_message_content",
+            "_convert_output_message_content",
+            "_item_to_message",
+            "_items_to_messages",
+            "_output_item_to_message",
+            "_output_items_to_messages",
+        ):
+            assert getattr(_responses, name) is getattr(_shared, name), (
+                f"{name} should be re-exported from _responses for backwards compat"
+            )
+
+
+# region Full AF ↔ Foundry round-trip via InMemoryResponseProvider
+
+
+class TestAfFoundryRoundTrip:
+    """Round-trip two AF :class:`Message` instances through the Foundry SDK
+    types and back via the real :class:`InMemoryResponseProvider` backend.
+
+    This is the same backend the provider uses in its local-fallback path
+    (i.e. the one that runs whenever ``FOUNDRY_HOSTING_ENVIRONMENT`` is
+    unset), so this test gives us coverage of the
+    "AF → Foundry SDK shape → storage → Foundry SDK shape → AF" pipeline
+    using exactly the production conversion code in :mod:`._shared`.
+    """
+
+    @staticmethod
+    def _af_message(text: str, item_id: str) -> tuple[Message, OutputItem]:
+        """Build an AF ``Message`` and the matching Foundry ``OutputItem``.
+
+        Both messages are assistant ``output_message`` items because that's
+        the only OutputItem variant we round-trip through here — this test
+        exercises the conversion path, not every input/output shape.
+        """
+        from agent_framework import Content
+
+        af_message = Message(role="assistant", contents=[Content.from_text(text)])
+        foundry_item = OutputItemOutputMessage(
+            id=item_id,
+            type="output_message",
+            role="assistant",
+            status="completed",
+            content=[OutputMessageContentOutputTextContent(type="output_text", text=text, annotations=[])],
+        )
+        return af_message, foundry_item
+
+    async def test_two_messages_round_trip_through_in_memory_backend(self) -> None:
+        from azure.ai.agentserver.responses.models import ResponseObject
+
+        # 1. Start from two AF Messages (the "outside world" shape).
+        original_first, foundry_first = self._af_message("First message: 2 + 2 equals 4.", "itm_1")
+        original_second, foundry_second = self._af_message("Second message: 3 + 5 equals 8.", "itm_2")
+
+        # 2. Hand the Foundry items to the real in-memory storage backend
+        #    via the same ``create_response`` API the agent-server runtime
+        #    uses on every successful turn. Passing them as ``input_items``
+        #    is enough — the in-memory backend records each item under its
+        #    own id and exposes it via ``get_history_item_ids``.
+        backend = InMemoryResponseProvider()
+        response = ResponseObject(
+            id="resp_round_trip",
+            object="response",
+            status="completed",
+            model="test-model",
+            created_at=0,
+        )
+        await backend.create_response(
+            response,
+            input_items=[foundry_first, foundry_second],
+            history_item_ids=None,
+        )
+
+        # 3. Wire the provider to the seeded backend (no HTTP, no
+        #    credential needed — this exercises the local-mode contract).
+        provider = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+
+        # 4. Retrieve via the public API. Internally this fans out:
+        #    backend.get_history_item_ids → backend.get_items
+        #    → ``_output_items_to_messages`` from ``_shared`` → AF Messages.
+        retrieved = await provider.get_messages("resp_round_trip")
+
+        # 5. Round-trip preserves role + text content for both messages.
+        assert len(retrieved) == 2
+        assert all(isinstance(m, Message) for m in retrieved)
+
+        assert retrieved[0].role == original_first.role
+        assert retrieved[0].text == original_first.text == "First message: 2 + 2 equals 4."
+
+        assert retrieved[1].role == original_second.role
+        assert retrieved[1].text == original_second.text == "Second message: 3 + 5 equals 8."
+
+    async def test_additional_properties_round_trip_through_in_memory_backend(self) -> None:
+        """End-to-end audit/replay verification via the public provider API.
+
+        Seeds the in-memory backend with an :class:`OutputItemOutputMessage`
+        carrying:
+
+        * a non-default item id;
+        * declared content fields (``output_text`` with annotations);
+        * a non-default ``status``;
+        * an arbitrary, undeclared top-level key
+          (``"audit_trace_id": "..."``) — i.e. the kind of opaque field
+          Foundry might layer on for audit/replay;
+        * an undeclared key on a content child
+          (``"vendor_metadata": {...}``).
+
+        Reads the items back through ``get_messages`` (which captures the
+        :data:`RAW_KEY` snapshot), then writes them via ``save_messages``
+        (which re-emits via the snapshot), then reads again and asserts
+        every field above survives the storage → AF → storage hop. Without
+        the raw-snapshot path, the second read would see synthesised
+        text-only items with newly-minted ids and lose every audit field.
+        """
+        from azure.ai.agentserver.responses.models import ResponseObject
+
+        from agent_framework_foundry_hosting._shared import EXTRAS_KEY, RAW_KEY  # pyright: ignore[reportPrivateUsage]
+
+        backend = InMemoryResponseProvider()
+        original_id = "itm_audit_001"
+        seed_item = OutputItemOutputMessage(
+            id=original_id,
+            type="output_message",
+            role="assistant",
+            status="completed",
+            content=[
+                OutputMessageContentOutputTextContent(
+                    type="output_text",
+                    text="The final answer is 42.",
+                    annotations=[],
+                )
+            ],
+        )
+        # Layer audit fields onto the SDK model directly — these are the
+        # "extras" that pyright would warn about but the runtime
+        # round-trips faithfully via as_dict().
+        seed_item["audit_trace_id"] = "trace-abc-123"
+        seed_item.content[0]["vendor_metadata"] = {"score": 0.97, "model": "gpt-x"}
+
+        seed_response = ResponseObject(
+            id="resp_audit",
+            object="response",
+            status="completed",
+            model="test-model",
+            created_at=0,
+        )
+        await backend.create_response(seed_response, input_items=[seed_item], history_item_ids=None)
+
+        provider = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+
+        # 1. Read back — provider stamps the RAW_KEY snapshot onto the
+        #    AF Message's additional_properties.
+        first_read = await provider.get_messages("resp_audit")
+        assert len(first_read) == 1
+        msg = first_read[0]
+        raw = msg.additional_properties[EXTRAS_KEY][RAW_KEY]
+        assert raw["id"] == original_id
+        assert raw["type"] == "output_message"
+        assert raw["audit_trace_id"] == "trace-abc-123"
+        assert raw["content"][0]["text"] == "The final answer is 42."
+        assert raw["content"][0]["vendor_metadata"] == {"score": 0.97, "model": "gpt-x"}
+
+        # 2. Write back — this is where the snapshot-driven write path
+        #    matters: save_messages mints a new response_id but must
+        #    re-emit the SDK item from the captured raw shape.
+        from agent_framework_foundry_hosting import bind_request_context
+
+        with bind_request_context(response_id="resp_audit_replay", previous_response_id="resp_audit"):
+            await provider.save_messages("resp_audit_replay", [msg])
+
+        # 3. Inspect what was stored. We walk the new response id and
+        #    expect to see the prior history seeded plus the replayed
+        #    message — proof the snapshot survived storage→AF→storage.
+        item_ids = await backend.get_history_item_ids(
+            previous_response_id="resp_audit_replay", conversation_id=None, limit=20
+        )
+        assert len(item_ids) >= 1
+        stored_items = await backend.get_items(item_ids)
+        # Find the replayed item (its content text matches).
+        replay = next(
+            dict(it)
+            for it in stored_items
+            if it is not None
+            and dict(it).get("type") == "output_message"
+            and dict(it).get("audit_trace_id") == "trace-abc-123"
+            and dict(it).get("id") != original_id
+        )
+        stored_dict = replay
+        assert stored_dict["type"] == "output_message"
+        assert stored_dict["status"] == "completed"
+        assert stored_dict["audit_trace_id"] == "trace-abc-123"
+        assert stored_dict["content"][0]["text"] == "The final answer is 42."
+        assert stored_dict["content"][0]["vendor_metadata"] == {"score": 0.97, "model": "gpt-x"}
+        # The replay item id is regenerated per write turn (caller
+        # supplies it), so it must NOT equal the original — that's how
+        # we know the snapshot path didn't naively echo back the seed.
+        assert stored_dict["id"] != original_id
+
+        # 4. Final read confirms the entire chain is observable through
+        #    the public AF surface. Walking the new response id returns
+        #    both the seeded prior item and the replayed one.
+        second_read = await provider.get_messages("resp_audit_replay")
+        assert len(second_read) >= 1
+        # Find the replayed message (matches the seed text + audit field).
+        replayed_msg = next(
+            m
+            for m in second_read
+            if EXTRAS_KEY in m.additional_properties
+            and m.additional_properties[EXTRAS_KEY].get(RAW_KEY, {}).get("audit_trace_id") == "trace-abc-123"
+        )
+        replayed_raw = replayed_msg.additional_properties[EXTRAS_KEY][RAW_KEY]
+        assert replayed_raw["content"][0]["vendor_metadata"] == {"score": 0.97, "model": "gpt-x"}
+
+
+# region Integration tests against a real Foundry project
+#
+# Required environment variables:
+#
+# * ``FOUNDRY_PROJECT_ENDPOINT`` — base URL of a real Foundry project,
+#   e.g. ``https://my-proj.services.ai.azure.com``.
+# * Azure auth (any one of):
+#   - ``az login`` (recommended for local dev)
+#   - ``AZURE_CLIENT_ID`` + ``AZURE_CLIENT_SECRET`` + ``AZURE_TENANT_ID``
+#   - Managed identity when on Azure
+#   The identity needs at least the ``Azure AI User`` role on the project.
+#
+# Optional (enables the seeded-history test):
+#
+# * ``FOUNDRY_HOSTING_PREVIOUS_RESPONSE_ID`` — a real response id with attached items.
+# * ``FOUNDRY_HOSTING_CONVERSATION_ID`` — alternative.
+# * ``FOUNDRY_HOSTING_USER_ISOLATION_KEY`` /
+#   ``FOUNDRY_HOSTING_CHAT_ISOLATION_KEY`` — set if your project enforces isolation.
+#
+# Run with: ``uv run pytest -m integration packages/foundry_hosting/tests/test_history_provider.py``
+
+
+_FOUNDRY_PROJECT_ENDPOINT = os.getenv("FOUNDRY_PROJECT_ENDPOINT", "")
+
+_skip_if_no_foundry_endpoint = pytest.mark.skipif(
+    not _FOUNDRY_PROJECT_ENDPOINT or _FOUNDRY_PROJECT_ENDPOINT == "https://test-project.services.ai.azure.com/",
+    reason=(
+        "FOUNDRY_PROJECT_ENDPOINT not set to a real Foundry project; "
+        "skipping FoundryHostedAgentHistoryProvider integration tests."
+    ),
+)
+
+
+def _isolation_from_env() -> IsolationContext | None:
+    user_key = os.getenv("FOUNDRY_HOSTING_USER_ISOLATION_KEY")
+    chat_key = os.getenv("FOUNDRY_HOSTING_CHAT_ISOLATION_KEY")
+    if not user_key and not chat_key:
+        return None
+    return IsolationContext(user_key=user_key, chat_key=chat_key)
+
+
+@pytest.fixture
+async def _live_credential() -> object:
+    """Yield a :class:`AzureCliCredential` and close it afterwards."""
+    # Imported lazily so collection still works in environments without
+    # ``azure-identity`` available (e.g. minimal CI matrices).
+    from azure.identity.aio import AzureCliCredential
+
+    cred = AzureCliCredential()
+    try:
+        yield cred
+    finally:
+        await cred.close()
+
+
+class TestLiveFoundryStorage:
+    """End-to-end tests against a real Foundry project's storage HTTP API.
+
+    These tests are gated behind ``@pytest.mark.integration`` so the
+    default ``pytest -m 'not integration'`` run skips them; they are
+    additionally skipped unless ``FOUNDRY_PROJECT_ENDPOINT`` points at a
+    real project.
+    """
+
+    @pytest.mark.flaky
+    @pytest.mark.integration
+    @_skip_if_no_foundry_endpoint
+    async def test_get_messages_unknown_response_id_returns_empty(self, _live_credential: object) -> None:
+        """A brand-new previous_response_id should yield an empty history.
+
+        The native HTTP backend treats a 404 from the storage ``item_ids``
+        endpoint as "no prior history" rather than raising, so a freshly
+        bootstrapped client never crashes on its first request. This test
+        proves that contract end-to-end against the live service.
+        """
+        isolation = _isolation_from_env()
+        provider = FoundryHostedAgentHistoryProvider(
+            endpoint=_FOUNDRY_PROJECT_ENDPOINT,
+            credential=_live_credential,  # type: ignore[arg-type]
+        )
+        try:
+            messages = await provider.get_messages(
+                "resp_does_not_exist_integration_smoke",
+                isolation=isolation,
+            )
+        finally:
+            await provider.aclose()
+
+        assert messages == []
+
+    @pytest.mark.flaky
+    @pytest.mark.integration
+    @_skip_if_no_foundry_endpoint
+    @pytest.mark.skipif(
+        not os.getenv("FOUNDRY_HOSTING_PREVIOUS_RESPONSE_ID") and not os.getenv("FOUNDRY_HOSTING_CONVERSATION_ID"),
+        reason=(
+            "Set FOUNDRY_HOSTING_PREVIOUS_RESPONSE_ID or "
+            "FOUNDRY_HOSTING_CONVERSATION_ID to a real seeded conversation to "
+            "enable this test."
+        ),
+    )
+    async def test_get_messages_returns_real_history(self, _live_credential: object) -> None:
+        """When pointed at a real seeded conversation we should get Messages back."""
+        previous_response_id = os.getenv("FOUNDRY_HOSTING_PREVIOUS_RESPONSE_ID") or ""
+        conversation_id = os.getenv("FOUNDRY_HOSTING_CONVERSATION_ID")
+        isolation = _isolation_from_env()
+
+        provider = FoundryHostedAgentHistoryProvider(
+            endpoint=_FOUNDRY_PROJECT_ENDPOINT,
+            credential=_live_credential,  # type: ignore[arg-type]
+            history_limit=20,
+        )
+        try:
+            # ``get_messages`` is keyed on ``session_id`` (== previous_response_id)
+            # so we pass that as the primary lookup; conversation_id is the
+            # fallback when only a conversation id is configured.
+            messages = await provider.get_messages(
+                previous_response_id or (conversation_id or ""),
+                isolation=isolation,
+            )
+        finally:
+            await provider.aclose()
+
+        assert isinstance(messages, list)
+        assert messages, "Expected at least one message in the seeded history"
+        assert all(isinstance(m, Message) for m in messages)
+
+    @pytest.mark.flaky
+    @pytest.mark.integration
+    @_skip_if_no_foundry_endpoint
+    async def test_invoke_then_read_and_write_with_isolation(self, _live_credential: object) -> None:
+        """Invoke a deployed Foundry hosted agent, then round-trip via storage.
+
+        This test exercises the realistic, fully-permissioned path:
+
+        1. Use :class:`FoundryAgent` to invoke the deployed
+           ``agent-framework-hosting-sample`` (version 10) hosted agent
+           with an explicit ``isolation_key``. The Foundry runtime
+           creates the response + history items inside the storage
+           backend on the user's behalf.
+        2. Read the resulting history back through our own native HTTP
+           :class:`FoundryHostedAgentHistoryProvider` using the matching
+           :class:`IsolationContext`. This is the production read path
+           that DevUI / external clients use to render conversation
+           transcripts.
+        3. Best-effort: try to APPEND two more items to the same
+           response via :class:`FoundryStorageProvider` write API. The
+           storage write path is normally callable only from inside the
+           agent-server container's runtime identity (Foundry strips
+           the user's bearer token at the runtime boundary), so a 403
+           here is expected for ordinary user principals; we skip the
+           write-side assertions in that case rather than failing.
+        """
+        from agent_framework_foundry import FoundryAgent
+        from azure.ai.agentserver.responses import (
+            FoundryStorageProvider,
+            FoundryStorageSettings,
+        )
+        from azure.ai.agentserver.responses.store._foundry_errors import (  # pyright: ignore[reportPrivateImportUsage]
+            FoundryApiError,
+        )
+
+        # Per-run-unique isolation key keeps each test run in its own
+        # tenant partition so concurrent runs (CI matrix, retries) don't
+        # collide.
+        isolation_key = f"af-hosting-roundtrip-{int(time.time())}"
+        isolation = IsolationContext(user_key=isolation_key, chat_key=isolation_key)
+
+        # 1. Invoke the deployed hosted agent.
+        agent = FoundryAgent(
+            project_endpoint=_FOUNDRY_PROJECT_ENDPOINT,
+            agent_name="agent-framework-hosting-sample",
+            agent_version="10",
+            credential=_live_credential,  # type: ignore[arg-type]
+            allow_preview=True,
+            default_options={"isolation_key": isolation_key},
+        )
+        # ``create_session()`` makes a fresh local session with no
+        # ``service_session_id`` set; the FoundryAgent's
+        # ``_prepare_run_context`` will lazily call
+        # ``project_client.beta.agents.create_session`` under our
+        # isolation key on first run.
+        session = agent.create_session()
+        prompt = "Please reply with exactly: 'Round-trip ack.'"
+        result = await agent.run(prompt, session=session)
+
+        assert result.text, "FoundryAgent.run returned an empty response"
+        response_id = result.response_id
+        assert isinstance(response_id, str) and response_id, "Expected a non-empty response_id from FoundryAgent.run"
+
+        # 2. Read history back via the native HTTP provider with the
+        #    same isolation context. Try both the response_id and the
+        #    service_session_id Foundry created on our behalf — depending
+        #    on the runtime's storage layout, history may be anchored to
+        #    either.
+        service_session_id = session.service_session_id
+        candidates = [c for c in (response_id, service_session_id) if c]
+
+        reader = FoundryHostedAgentHistoryProvider(
+            endpoint=_FOUNDRY_PROJECT_ENDPOINT,
+            credential=_live_credential,  # type: ignore[arg-type]
+            history_limit=20,
+        )
+        try:
+            messages_after_invoke: list[Message] = []
+            for cand in candidates:
+                msgs = await reader.get_messages(cand, isolation=isolation)
+                if msgs:
+                    messages_after_invoke = msgs
+                    break
+        finally:
+            await reader.aclose()
+
+        # The read path returning a well-typed list (possibly empty if
+        # Foundry compacts items out of the response chain we queried)
+        # is enough to confirm the isolation header path works end-to-end.
+        assert all(isinstance(m, Message) for m in messages_after_invoke)
+
+        # If we got messages back, every one should carry the lossless
+        # raw-snapshot under additional_properties[EXTRAS_KEY][RAW_KEY] —
+        # this is what guarantees audit/replay round-trip through the
+        # storage backend. Without it, a write-back would synthesise a
+        # text-only item and lose every audit field.
+        if messages_after_invoke:
+            from agent_framework_foundry_hosting._shared import (  # pyright: ignore[reportPrivateUsage]
+                EXTRAS_KEY,
+                RAW_KEY,
+            )
+
+            for m in messages_after_invoke:
+                extras = m.additional_properties.get(EXTRAS_KEY) or {}
+                assert RAW_KEY in extras, f"Live read message missing raw snapshot: {m!r}"
+                raw = extras[RAW_KEY]
+                # Snapshot must carry the discriminator + id — the two
+                # fields save_messages relies on to rebuild the SDK item.
+                assert isinstance(raw, dict)
+                assert "type" in raw and "id" in raw
+
+        # 3. Best-effort write: create a fresh response under the same
+        #    isolation key carrying two known items, then read it back
+        #    via the native HTTP provider. Skip the write-side
+        #    assertions if Foundry rejects the call with 403 (expected
+        #    when the runtime is the only authorised writer).
+        from azure.ai.agentserver.responses.models import ResponseObject
+
+        write_response_id = f"resp_af_write_{int(time.time())}"
+        _, foundry_first = TestAfFoundryRoundTrip._af_message(
+            "Appended message 1: 2 + 2 equals 4.", f"{write_response_id}_itm_1"
+        )
+        _, foundry_second = TestAfFoundryRoundTrip._af_message(
+            "Appended message 2: 3 + 5 equals 8.", f"{write_response_id}_itm_2"
+        )
+
+        write_succeeded = False
+        writer = FoundryStorageProvider(
+            credential=_live_credential,  # type: ignore[arg-type]
+            settings=FoundryStorageSettings.from_endpoint(_FOUNDRY_PROJECT_ENDPOINT),
+        )
+        try:
+            await writer.create_response(
+                ResponseObject(
+                    id=write_response_id,
+                    object="response",
+                    status="completed",
+                    model="agent",
+                    created_at=int(time.time()),
+                ),
+                input_items=[foundry_first, foundry_second],
+                history_item_ids=None,
+                isolation=isolation,
+            )
+            write_succeeded = True
+        except FoundryApiError as exc:
+            if "403" not in str(exc):
+                raise
+            # Foundry strips the user bearer token at the runtime
+            # boundary, so external principals can't write directly to
+            # storage. The container's MSI is the authorised writer.
+            pytest.skip("Foundry rejected external storage write with 403 (expected outside container).")
+        finally:
+            await writer.aclose()
+
+        # Re-read and verify our two appended items now show up.
+        if not write_succeeded:  # pragma: no cover — defensive; pytest.skip already raised
+            return
+        reader2 = FoundryHostedAgentHistoryProvider(
+            endpoint=_FOUNDRY_PROJECT_ENDPOINT,
+            credential=_live_credential,  # type: ignore[arg-type]
+            history_limit=20,
+        )
+        try:
+            messages_after_write = await reader2.get_messages(write_response_id, isolation=isolation)
+        finally:
+            await reader2.aclose()
+
+        appended_texts = {m.text for m in messages_after_write}
+        assert "Appended message 1: 2 + 2 equals 4." in appended_texts
+        assert "Appended message 2: 3 + 5 equals 8." in appended_texts

From 73bf9a5bc4ee12e6a119a76f5a6f0e6e18033fce Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Tue, 5 May 2026 13:43:40 +0200
Subject: [PATCH 2/4] feat(foundry_hosting): add local_storage_root for
 file-based dev history

Adds an optional `local_storage_root: str | Path | None` parameter to
`FoundryHostedAgentHistoryProvider`. When set and the provider is
running outside a Foundry Hosted Agent container, conversations are
persisted to JSONL files via `agent_framework.FileHistoryProvider`
laid out as:

  {root}/{user_key or '~none'}/{chat_key or '~none'}/{session_id}.jsonl

Hosted mode (FOUNDRY_HOSTING_ENVIRONMENT set) ignores the option with a
one-time INFO log so Foundry storage always wins on the platform. The
in-memory fallback is unchanged when the option is omitted.

Path safety: isolation segments are validated against the same character
allowlist FileHistoryProvider uses for session-id stems and
base64-url-encoded with a reserved "~iso-" prefix when unsafe. "~none"
sentinel for missing keys can never collide with a real isolation key
(real keys starting with "~" are encoded). The resolved target dir is
also re-checked to be inside the configured root.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../_history_provider.py                      | 225 ++++++++++++++++--
 .../tests/test_history_provider.py            | 115 +++++++++
 2 files changed, 323 insertions(+), 17 deletions(-)

diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py
index a558979631..8427e9c557 100644
--- a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py
+++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py
@@ -39,7 +39,10 @@
 
 Local fallback: when ``FOUNDRY_HOSTING_ENVIRONMENT`` is unset, the provider
 transparently falls back to :class:`InMemoryResponseProvider` so the same
-agent code runs in dev.
+agent code runs in dev. Pass ``local_storage_root`` to use a persistent
+file-based store instead of in-memory; histories are then laid out as
+``{root}/{user_key or "~none"}/{chat_key or "~none"}/{session_id}.jsonl``
+via :class:`agent_framework.FileHistoryProvider`.
 """
 
 from __future__ import annotations
@@ -48,12 +51,14 @@
 import logging
 import os
 import time
+from base64 import urlsafe_b64encode
 from contextlib import contextmanager
 from contextvars import ContextVar
 from dataclasses import dataclass
+from pathlib import Path
 from typing import TYPE_CHECKING, Any, ClassVar
 
-from agent_framework import HistoryProvider, Message
+from agent_framework import FileHistoryProvider, HistoryProvider, Message
 from azure.ai.agentserver.responses import (
     FoundryStorageProvider,
     FoundryStorageSettings,
@@ -175,7 +180,7 @@ def bind_request_context(
     response_id: str,
     previous_response_id: str | None = None,
     **_unused: Any,
-) -> "Iterator[None]":
+) -> Iterator[None]:
     """Bind the per-request response-chain anchors for this provider.
 
     Intended for the host (or any caller orchestrating an
@@ -209,7 +214,7 @@ def get_current_request_context() -> _RequestContext | None:
     return _request_var.get()
 
 
-def _host_isolation() -> "IsolationContext | None":
+def _host_isolation() -> IsolationContext | None:
     """Lift the host-bound isolation contextvar into our local type.
 
     The host installs an ASGI middleware that reads
@@ -247,6 +252,62 @@ def _host_isolation() -> "IsolationContext | None":
 _StorageBackend = "FoundryStorageProvider | InMemoryResponseProvider"
 
 
+# Sentinel directory name used in place of a missing ``user_key`` /
+# ``chat_key`` when laying out file-based local history. The tilde
+# prefix is reserved (``_is_safe_isolation_segment`` rejects keys that
+# start with one) so a real isolation key can never collide with the
+# sentinel after sanitisation.
+_ISOLATION_NONE_MARKER = "~none"
+_ISOLATION_ENCODED_PREFIX = "~iso-"
+
+# Windows reserved file/directory stems. Mirrors
+# ``FileHistoryProvider._WINDOWS_RESERVED_FILE_STEMS`` so the directory
+# layer enforces the same portability constraints the file layer does.
+_WINDOWS_RESERVED_STEMS = frozenset({
+    "CON",
+    "PRN",
+    "AUX",
+    "NUL",
+    *(f"COM{i}" for i in range(1, 10)),
+    *(f"LPT{i}" for i in range(1, 10)),
+})
+
+
+def _is_safe_isolation_segment(value: str) -> bool:
+    """Return whether ``value`` is safe to use directly as a directory name.
+
+    Rules mirror :meth:`FileHistoryProvider._is_literal_session_file_stem_safe`,
+    with the additional rule that a leading tilde is reserved for our
+    sentinel/encoded prefixes so real keys can never collide with them.
+    """
+    if (
+        not value
+        or value.startswith((".", "~"))
+        or value.endswith((" ", "."))
+        or value.upper() in _WINDOWS_RESERVED_STEMS
+    ):
+        return False
+    if any(ord(character) < 32 for character in value):
+        return False
+    return all(character.isalnum() or character in "._-" for character in value)
+
+
+def _encode_isolation_segment(value: str | None) -> str:
+    """Encode an isolation key into a filesystem-safe directory name.
+
+    * ``None`` / empty → ``"~none"`` sentinel.
+    * Already-safe values pass through unchanged.
+    * Anything else is base64-url-encoded and prefixed with ``"~iso-"``
+      so it is unambiguous and never collides with a real (safe) key.
+    """
+    if value is None or value == "":
+        return _ISOLATION_NONE_MARKER
+    if _is_safe_isolation_segment(value):
+        return value
+    encoded = urlsafe_b64encode(value.encode("utf-8")).decode("ascii").rstrip("=")
+    return f"{_ISOLATION_ENCODED_PREFIX}{encoded}"
+
+
 class FoundryHostedAgentHistoryProvider(HistoryProvider):
     """``HistoryProvider`` backed by Foundry Hosted Agent storage.
 
@@ -256,11 +317,25 @@ class FoundryHostedAgentHistoryProvider(HistoryProvider):
     selection is driven by the ``FOUNDRY_HOSTING_ENVIRONMENT``
     environment variable.
 
-    ``session_id`` semantics: the value passed to :meth:`get_messages`
-    and :meth:`save_messages` is treated as the Responses
-    ``previous_response_id`` (or ``conversation_id``) whose chain to
-    load. When omitted (and no host-bound chain anchor is set),
-    :meth:`get_messages` returns an empty list (a fresh conversation).
+    For local runs that need to *persist* history across process
+    restarts, pass ``local_storage_root``: the provider then writes
+    each conversation to
+    ``{root}/{user_key or "~none"}/{chat_key or "~none"}/{session_id}.jsonl``
+    via :class:`agent_framework.FileHistoryProvider`. The Foundry
+    response-chain semantics (``previous_response_id`` walking,
+    ``caresp_*`` id stamping, ``ResponseObject`` envelopes) are
+    bypassed in file mode — the on-disk format is plain JSONL of
+    :class:`Message` payloads, identical to ``FileHistoryProvider``
+    standalone usage. ``local_storage_root`` is ignored when running
+    hosted (Foundry storage always wins).
+
+    ``session_id`` semantics: in hosted / in-memory mode the value
+    passed to :meth:`get_messages` and :meth:`save_messages` is treated
+    as the Responses ``previous_response_id`` (or ``conversation_id``)
+    whose chain to load. When omitted (and no host-bound chain anchor
+    is set), :meth:`get_messages` returns an empty list (a fresh
+    conversation). In file mode ``session_id`` is used as the literal
+    filename stem (``FileHistoryProvider`` sanitises unsafe values).
     """
 
     DEFAULT_SOURCE_ID: ClassVar[str] = "foundry_hosted_agent"
@@ -268,7 +343,7 @@ class FoundryHostedAgentHistoryProvider(HistoryProvider):
     def __init__(
         self,
         *,
-        credential: "AsyncTokenCredential | None" = None,
+        credential: AsyncTokenCredential | None = None,
         endpoint: str | None = None,
         history_limit: int = 100,
         source_id: str = DEFAULT_SOURCE_ID,
@@ -277,6 +352,7 @@ def __init__(
         store_context_messages: bool = False,
         store_context_from: set[str] | None = None,
         store_outputs: bool = True,
+        local_storage_root: str | Path | None = None,
     ) -> None:
         """Initialize the provider.
 
@@ -284,13 +360,15 @@ def __init__(
             credential: Async token credential used to authenticate against
                 the Foundry storage API. Required when running hosted
                 (``FOUNDRY_HOSTING_ENVIRONMENT`` is set). Ignored in
-                local-mode (the in-memory backend needs no auth).
+                local-mode (the in-memory / file backends need no auth).
             endpoint: Foundry project endpoint URL. Defaults to the value
                 of the ``FOUNDRY_PROJECT_ENDPOINT`` environment variable.
                 Required when running hosted.
             history_limit: Maximum number of history items to fetch per
                 ``get_messages`` call. Mirrors the agent-server runtime's
                 ``ResponseContext._history_limit``. Default ``100``.
+                Ignored in file mode (``FileHistoryProvider`` returns the
+                full session file each call).
             source_id: Unique identifier for this provider instance, as
                 required by ``HistoryProvider``.
             load_messages: Whether to load messages before invocation.
@@ -308,6 +386,13 @@ def __init__(
             store_outputs: Whether to mirror response messages into Foundry
                 storage. Default ``True`` for the same reason as
                 ``store_inputs``.
+            local_storage_root: When set, *and* the provider is running
+                outside a Foundry Hosted Agent container, persist history
+                to JSONL files under
+                ``{root}/{user_key or "~none"}/{chat_key or "~none"}/{session_id}.jsonl``
+                instead of using the in-memory backend. Ignored when
+                hosted (with a one-time INFO log). Defaults to ``None``
+                (in-memory local fallback).
         """
         super().__init__(
             source_id=source_id,
@@ -323,6 +408,17 @@ def __init__(
         self._endpoint = endpoint or os.environ.get(_ENV_FOUNDRY_PROJECT_ENDPOINT) or None
         self._backend: FoundryStorageProvider | InMemoryResponseProvider | None = None
 
+        self._local_storage_root: Path | None = (
+            Path(local_storage_root).resolve() if local_storage_root is not None else None
+        )
+        # Cache one ``FileHistoryProvider`` per (user_key, chat_key)
+        # tuple. Bounded by the number of distinct isolation scopes the
+        # process sees; cleared on ``aclose``.
+        self._file_providers: dict[tuple[str, str], FileHistoryProvider] = {}
+        self._hosted_local_root_warned = False
+        if self._local_storage_root is not None and self.is_hosted_environment():
+            self._warn_hosted_local_root_ignored()
+
     @staticmethod
     def is_hosted_environment() -> bool:
         """Return ``True`` when running inside a Foundry Hosted Agent container.
@@ -333,7 +429,7 @@ def is_hosted_environment() -> bool:
         """
         return bool(os.environ.get(_ENV_FOUNDRY_HOSTING_ENVIRONMENT))
 
-    def _resolve_backend(self) -> "FoundryStorageProvider | InMemoryResponseProvider":
+    def _resolve_backend(self) -> FoundryStorageProvider | InMemoryResponseProvider:
         """Return the storage backend, constructing it lazily on first use.
 
         * If ``FOUNDRY_HOSTING_ENVIRONMENT`` is set, build a
@@ -378,9 +474,12 @@ async def aclose(self) -> None:
         """Release storage resources held by this provider.
 
         Safe to call multiple times. Closes the lazily-constructed
-        backend if one was created. ``InMemoryResponseProvider`` has no
-        ``aclose`` and is closed implicitly on garbage collection.
+        backend if one was created and drops any cached file-history
+        providers. ``InMemoryResponseProvider`` and
+        ``FileHistoryProvider`` have no ``aclose`` and are closed
+        implicitly on garbage collection.
         """
+        self._file_providers.clear()
         if self._backend is None:
             return
         aclose = getattr(self._backend, "aclose", None)
@@ -388,6 +487,75 @@ async def aclose(self) -> None:
             await aclose()
         self._backend = None
 
+    def _warn_hosted_local_root_ignored(self) -> None:
+        """Log (once) that ``local_storage_root`` is being ignored under hosted mode."""
+        if self._hosted_local_root_warned:
+            return
+        self._hosted_local_root_warned = True
+        logger.info(
+            "FoundryHostedAgentHistoryProvider ignored local_storage_root=%s because "
+            "FOUNDRY_HOSTING_ENVIRONMENT is set; Foundry storage takes precedence "
+            "when hosted.",
+            self._local_storage_root,
+        )
+
+    def _resolve_local_file_provider(
+        self,
+        isolation: IsolationContext | None,
+    ) -> FileHistoryProvider | None:
+        """Return a ``FileHistoryProvider`` for the current isolation, or ``None``.
+
+        Returns ``None`` when ``local_storage_root`` is unset *or* the
+        provider is running in hosted mode (in which case Foundry
+        storage handles persistence). Otherwise builds — and caches —
+        one provider per (user_key, chat_key) tuple, rooted at the
+        sanitised ``{root}/{user_segment}/{chat_segment}`` directory.
+
+        Raises:
+            ValueError: If the resolved isolation directory escapes
+                ``local_storage_root`` (defence in depth — the
+                sanitisation should already prevent this).
+        """
+        if self._local_storage_root is None:
+            return None
+        if self.is_hosted_environment():
+            self._warn_hosted_local_root_ignored()
+            return None
+
+        user_key = isolation.user_key if isolation is not None else None
+        chat_key = isolation.chat_key if isolation is not None else None
+        cache_key = (user_key or "", chat_key or "")
+        cached = self._file_providers.get(cache_key)
+        if cached is not None:
+            return cached
+
+        user_segment = _encode_isolation_segment(user_key)
+        chat_segment = _encode_isolation_segment(chat_key)
+        target_dir = (self._local_storage_root / user_segment / chat_segment).resolve()
+        if not target_dir.is_relative_to(self._local_storage_root):
+            raise ValueError(
+                "Isolation segments resolved outside of local_storage_root: "
+                f"user_key={user_key!r} chat_key={chat_key!r}"
+            )
+
+        provider = FileHistoryProvider(
+            target_dir,
+            source_id=f"{self.source_id}__file__{user_segment}__{chat_segment}",
+            load_messages=self.load_messages,
+            store_inputs=self.store_inputs,
+            store_context_messages=self.store_context_messages,
+            store_context_from=self.store_context_from,
+            store_outputs=self.store_outputs,
+        )
+        self._file_providers[cache_key] = provider
+        logger.debug(
+            "FoundryHostedAgentHistoryProvider created file backend for isolation (user=%s, chat=%s) at %s",
+            user_key,
+            chat_key,
+            target_dir,
+        )
+        return provider
+
     async def get_messages(
         self,
         session_id: str | None,
@@ -421,7 +589,18 @@ async def get_messages(
             (such as chat-isolation-key values) are skipped because the
             storage backend rejects them with HTTP 400 "Malformed
             identifier".
+
+            When ``local_storage_root`` is configured (and the provider
+            is running outside a Foundry Hosted Agent container), this
+            method instead delegates to a per-isolation
+            :class:`FileHistoryProvider` and ``session_id`` is used as
+            the literal file stem.
         """
+        isolation = kwargs.get("isolation") or _host_isolation() or get_current_isolation()
+        file_provider = self._resolve_local_file_provider(isolation)
+        if file_provider is not None:
+            return await file_provider.get_messages(session_id, state=state, **kwargs)
+
         bound = get_current_request_context()
         # Prefer the host-bound previous_response_id over the session_id
         # the framework feeds in: the bound value is the id we ourselves
@@ -441,7 +620,6 @@ async def get_messages(
             # No walkable anchor → fresh conversation, nothing to load.
             return []
 
-        isolation = kwargs.get("isolation") or _host_isolation() or get_current_isolation()
         backend = self._resolve_backend()
 
         try:
@@ -471,7 +649,7 @@ async def get_messages(
     async def save_messages(
         self,
         session_id: str | None,
-        messages: "Sequence[Message]",
+        messages: Sequence[Message],
         *,
         state: dict[str, Any] | None = None,
         **kwargs: Any,
@@ -504,10 +682,24 @@ async def save_messages(
             state: Unused — kept for ``HistoryProvider`` compatibility.
             **kwargs: Extensibility hook; ``isolation`` may be supplied
                 explicitly to override the contextvar.
+
+        Notes:
+            When ``local_storage_root`` is configured (and the provider
+            is running outside a Foundry Hosted Agent container), this
+            method instead delegates to a per-isolation
+            :class:`FileHistoryProvider` and ``session_id`` is used as
+            the literal file stem. The Foundry response-chain stamping
+            described above is bypassed entirely in that mode.
         """
         if not messages:
             return
 
+        isolation = kwargs.get("isolation") or _host_isolation() or get_current_isolation()
+        file_provider = self._resolve_local_file_provider(isolation)
+        if file_provider is not None:
+            await file_provider.save_messages(session_id, messages, state=state, **kwargs)
+            return
+
         bound = get_current_request_context()
         # Prefer the host-bound response_id so the channel envelope and
         # the storage write agree on a single id per turn — which is
@@ -538,7 +730,6 @@ async def save_messages(
         if previous_response_id is None and env_session and env_session.startswith(("caresp_", "resp_")):
             previous_response_id = env_session
 
-        isolation = kwargs.get("isolation") or _host_isolation() or get_current_isolation()
         logger.debug(
             "save_messages: response_id=%r previous_response_id=%r isolation=%s",
             response_id,
diff --git a/python/packages/foundry_hosting/tests/test_history_provider.py b/python/packages/foundry_hosting/tests/test_history_provider.py
index 6b6c4c42db..763c912a41 100644
--- a/python/packages/foundry_hosting/tests/test_history_provider.py
+++ b/python/packages/foundry_hosting/tests/test_history_provider.py
@@ -439,6 +439,121 @@ async def test_aclose_idempotent(self, monkeypatch: pytest.MonkeyPatch) -> None:
         await prov.aclose()  # idempotent — second call is a no-op
 
 
+# region Local file storage option
+
+
+class TestLocalFileStorage:
+    """`local_storage_root` swaps the in-memory local fallback for a
+    per-isolation :class:`FileHistoryProvider` so dev runs persist
+    across process restarts."""
+
+    async def test_unset_keeps_in_memory_fallback(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Any) -> None:
+        monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False)
+        prov = FoundryHostedAgentHistoryProvider()
+        assert prov._resolve_local_file_provider(None) is None  # pyright: ignore[reportPrivateUsage]
+        assert isinstance(
+            prov._resolve_backend(),  # pyright: ignore[reportPrivateUsage]
+            InMemoryResponseProvider,
+        )
+
+    async def test_creates_per_isolation_provider(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Any) -> None:
+        monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False)
+        prov = FoundryHostedAgentHistoryProvider(local_storage_root=tmp_path)
+        iso = IsolationContext(user_key="alice", chat_key="chat-1")
+
+        fp = prov._resolve_local_file_provider(iso)  # pyright: ignore[reportPrivateUsage]
+        assert fp is not None
+        # Cached on subsequent calls for the same (user, chat).
+        assert prov._resolve_local_file_provider(iso) is fp  # pyright: ignore[reportPrivateUsage]
+        # Different isolation → different provider rooted at a different dir.
+        other = prov._resolve_local_file_provider(  # pyright: ignore[reportPrivateUsage]
+            IsolationContext(user_key="bob", chat_key="chat-1"),
+        )
+        assert other is not None and other is not fp
+        assert fp.storage_path != other.storage_path
+        assert fp.storage_path == (tmp_path / "alice" / "chat-1").resolve()
+
+    async def test_missing_isolation_uses_sentinel_dir(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Any) -> None:
+        monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False)
+        prov = FoundryHostedAgentHistoryProvider(local_storage_root=tmp_path)
+        fp = prov._resolve_local_file_provider(None)  # pyright: ignore[reportPrivateUsage]
+        assert fp is not None
+        assert fp.storage_path == (tmp_path / "~none" / "~none").resolve()
+
+    async def test_unsafe_isolation_segments_are_encoded(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Any) -> None:
+        monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False)
+        prov = FoundryHostedAgentHistoryProvider(local_storage_root=tmp_path)
+        iso = IsolationContext(user_key="../escape", chat_key="ok-chat")
+        fp = prov._resolve_local_file_provider(iso)  # pyright: ignore[reportPrivateUsage]
+        assert fp is not None
+        # Encoded segment never contains a ``/`` and never escapes the root.
+        assert fp.storage_path.is_relative_to(tmp_path.resolve())
+        assert "../" not in str(fp.storage_path)
+        # Encoded segments use the reserved ``~iso-`` prefix.
+        parts = fp.storage_path.relative_to(tmp_path.resolve()).parts
+        assert parts[0].startswith("~iso-")
+        assert parts[1] == "ok-chat"
+
+    async def test_hosted_mode_ignores_local_storage_root(
+        self, monkeypatch: pytest.MonkeyPatch, tmp_path: Any, caplog: pytest.LogCaptureFixture
+    ) -> None:
+        monkeypatch.setenv("FOUNDRY_HOSTING_ENVIRONMENT", "1")
+        with caplog.at_level("INFO", logger="agent_framework_foundry_hosting._history_provider"):
+            prov = FoundryHostedAgentHistoryProvider(local_storage_root=tmp_path)
+            # File provider is never resolved when hosted.
+            assert prov._resolve_local_file_provider(None) is None  # pyright: ignore[reportPrivateUsage]
+        assert any("ignored local_storage_root" in record.message for record in caplog.records)
+
+    async def test_get_and_save_round_trip_via_file(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Any) -> None:
+        monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False)
+        prov = FoundryHostedAgentHistoryProvider(local_storage_root=tmp_path)
+        iso = IsolationContext(user_key="alice", chat_key="chat-1")
+
+        msgs = [
+            Message(role="user", contents=["hello"]),
+            Message(role="assistant", contents=["hi back"]),
+        ]
+        await prov.save_messages("conv-1", msgs, isolation=iso)
+
+        # File exists at the expected nested path with session_id as stem.
+        expected_path = tmp_path / "alice" / "chat-1" / "conv-1.jsonl"
+        assert expected_path.exists()
+        # Two JSONL records (one per message).
+        assert len([line for line in expected_path.read_text().splitlines() if line.strip()]) == 2
+
+        loaded = await prov.get_messages("conv-1", isolation=iso)
+        assert [m.text for m in loaded] == ["hello", "hi back"]
+
+        # Different isolation → different file → independent history.
+        bob_loaded = await prov.get_messages(
+            "conv-1",
+            isolation=IsolationContext(user_key="bob", chat_key="chat-1"),
+        )
+        assert bob_loaded == []
+
+    async def test_session_id_with_special_chars_is_sanitised_by_file_provider(
+        self, monkeypatch: pytest.MonkeyPatch, tmp_path: Any
+    ) -> None:
+        # The wrapper passes ``session_id`` through unchanged; the
+        # delegate ``FileHistoryProvider`` is responsible for sanitising
+        # it. This test just confirms the delegation works for a
+        # non-trivial id without raising.
+        monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False)
+        prov = FoundryHostedAgentHistoryProvider(local_storage_root=tmp_path)
+        msgs = [Message(role="user", contents=["hi"])]
+        await prov.save_messages("conv:with:colons", msgs)
+        loaded = await prov.get_messages("conv:with:colons")
+        assert [m.text for m in loaded] == ["hi"]
+
+    async def test_aclose_clears_file_provider_cache(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Any) -> None:
+        monkeypatch.delenv("FOUNDRY_HOSTING_ENVIRONMENT", raising=False)
+        prov = FoundryHostedAgentHistoryProvider(local_storage_root=tmp_path)
+        prov._resolve_local_file_provider(IsolationContext(user_key="alice"))  # pyright: ignore[reportPrivateUsage]
+        assert prov._file_providers  # pyright: ignore[reportPrivateUsage]
+        await prov.aclose()
+        assert not prov._file_providers  # pyright: ignore[reportPrivateUsage]
+
+
 # region Shared module re-exports
 
 

From 20cbc3e29701b07dd88a1d8f8eaecb3fc5e5a42b Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Wed, 6 May 2026 15:43:50 +0200
Subject: [PATCH 3/4] fix(foundry_hosting): address PR-1 review comments

- _shared.py:_capture_raw narrows `except Exception` to `except TypeError`
  and emits a WARNING with traceback so the lossy fallback to a
  synthesized round-trip is observable. Mirrors the reviewer suggestion.

- _history_provider.py:save_messages narrows `except Exception` to
  `except FoundryStorageError` so only storage-validation failures
  (4xx/5xx, opaque server errors) are swallowed. Network / TLS / auth
  / payload-builder bugs propagate so the caller can retry / alert.
  Adds an instance-level `failed_writes` counter operators can poll
  for silent-drop visibility.

- _history_provider.py id-stamping loop: drops the
  `contextlib.suppress(AttributeError, TypeError)` around
  `item.id = new_id` so SDK contract changes surface in the test
  suite instead of silently corrupting the chain (the storage backend
  rejects the entire `create_response` with HTTP 500 when synthetic
  prefix-based ids leak through). `import contextlib` removed.

- tests:
  * Unit-cover `foundry_response_id` / `foundry_response_id_factory` /
    `foundry_item_id` so SDK `IdGenerator` contract changes are caught
    locally.
  * Cover the `save_messages` wire payload: required-by-storage fields
    (`background`, `parallel_tool_calls`, `instructions`,
    `agent_reference`), env-var-driven stamping (`FOUNDRY_AGENT_NAME` /
    `FOUNDRY_AGENT_VERSION` / `FOUNDRY_AGENT_SESSION_ID` /
    `MODEL_DEPLOYMENT_NAME` with `AZURE_AI_MODEL_DEPLOYMENT_NAME`
    fallback), and the rule that `model` / `agent_session_id` /
    `agent_reference.version` are omitted (not stamped to `None`) when
    their env vars are unset.
  * Cover the `FOUNDRY_AGENT_SESSION_ID` last-resort chain anchor on
    both the get and save paths, including the prefix gate that blocks
    non-`caresp_*`/`resp_*` values from reaching storage, and the
    precedence rule that a host binding wins over the env.
  * Replace the old `test_save_messages_swallows_backend_errors` with
    two tests asserting the new contract: storage errors are swallowed
    and bump `failed_writes`; everything else propagates and leaves the
    counter at zero.

141 unit tests pass; mypy + pyright + ruff clean.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../_history_provider.py                      |  43 +-
 .../_shared.py                                |  18 +-
 .../tests/test_history_provider.py            | 377 +++++++++++++++++-
 3 files changed, 420 insertions(+), 18 deletions(-)

diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py
index 8427e9c557..e6c6b09876 100644
--- a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py
+++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py
@@ -47,7 +47,6 @@
 
 from __future__ import annotations
 
-import contextlib
 import logging
 import os
 import time
@@ -70,6 +69,7 @@
 from azure.ai.agentserver.responses.store._foundry_errors import (  # pyright: ignore[reportPrivateUsage]
     FoundryBadRequestError,
     FoundryResourceNotFoundError,
+    FoundryStorageError,
 )
 
 from ._shared import (
@@ -419,6 +419,12 @@ def __init__(
         if self._local_storage_root is not None and self.is_hosted_environment():
             self._warn_hosted_local_root_ignored()
 
+        # Observability: number of ``save_messages`` calls dropped by
+        # :class:`FoundryStorageError` from ``backend.create_response``.
+        # Operators / health probes can read this attribute directly to
+        # detect silent persistence loss; never decremented.
+        self.failed_writes: int = 0
+
     @staticmethod
     def is_hosted_environment() -> bool:
         """Return ``True`` when running inside a Foundry Hosted Agent container.
@@ -789,8 +795,16 @@ async def save_messages(
             if factory is None:
                 continue
             new_id = factory(response_id)
-            with contextlib.suppress(AttributeError, TypeError):
-                item.id = new_id  # type: ignore[attr-defined]
+            # Plain attribute assignment — the SDK ``OutputItem`` models
+            # are ``MutableMapping``s with ``__setattr__`` wired to dict
+            # set, so this is expected to succeed for every type listed
+            # above. The previous ``contextlib.suppress`` masked SDK
+            # contract changes (next save would silently retain the
+            # synthetic prefix-based id and the storage backend would
+            # reject the entire ``create_response`` with HTTP 500).
+            # Letting it raise surfaces those breakages to the test
+            # suite instead.
+            item.id = new_id  # type: ignore[attr-defined]
 
         input_items: list[Any] = []
         output_items: list[Any] = []
@@ -916,15 +930,32 @@ async def save_messages(
                 history_item_ids=history_item_ids,
                 isolation=isolation,
             )
-        except Exception as exc:
+        except FoundryStorageError as exc:
+            # Storage-validation failures (4xx ``invalid_payload`` /
+            # ``not_found``, opaque 5xx) are best-effort losses: the
+            # caller's run already produced output and we don't want to
+            # crash the whole turn over a chain-write the user can't
+            # recover from. They are still observable: every drop bumps
+            # ``failed_writes`` (operators can poll it / surface in
+            # health probes) and the full traceback + ``response_body``
+            # is logged.
+            #
+            # Network / TLS / DNS errors, expired-credential 401/403s,
+            # and bugs in the wire-payload builder above (e.g. a
+            # required-field regression) deliberately propagate so they
+            # surface to the caller and trigger retry / alerting paths
+            # instead of being silently dropped here.
+            self.failed_writes += 1
             err_body = getattr(exc, "response_body", None)
             logger.exception(
-                "FoundryHostedAgentHistoryProvider.save_messages: backend rejected "
-                "%d message(s) (response_id=%s, previous_response_id=%s, error_body=%s).",
+                "FoundryHostedAgentHistoryProvider.save_messages: storage rejected "
+                "%d message(s) (response_id=%s, previous_response_id=%s, error_body=%s, "
+                "failed_writes=%d).",
                 len(messages),
                 response_id,
                 previous_response_id,
                 err_body,
+                self.failed_writes,
             )
             return
         logger.debug(
diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_shared.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_shared.py
index 890dd7bcfe..4b3d3c4dd3 100644
--- a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_shared.py
+++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_shared.py
@@ -212,14 +212,22 @@ def _capture_raw(message: Message, item: Mapping[str, Any]) -> Message:
     (citations, reasoning, tool results, …) and any extras Foundry
     layered on top of the declared schema.
 
-    A best-effort ``dict(...)`` is used so failure to snapshot (e.g. a
-    non-mapping subclass surfacing in the future) degrades gracefully to
-    the lossy-but-functional synthesise-from-text path rather than
-    crashing the read.
+    Narrow ``TypeError`` is the only swallowed failure (matches the
+    ``Mapping`` contract precondition); ``MemoryError`` and other
+    ``Exception`` subclasses propagate so genuine bugs aren't masked.
+    A WARNING with ``exc_info`` is logged so the lossy fallback is
+    observable downstream — without it a regression in the SDK schema
+    silently drops citations / reasoning / tool-result extras on every
+    round-tripped message and there is no breadcrumb pointing here.
     """
     try:
         raw = dict(item)
-    except Exception:
+    except TypeError:
+        logger.warning(
+            "_capture_raw: SDK item %r is not mapping-like; round-tripping without raw snapshot",
+            type(item).__name__,
+            exc_info=True,
+        )
         return message
     message.additional_properties.setdefault(EXTRAS_KEY, {})[RAW_KEY] = raw
     return message
diff --git a/python/packages/foundry_hosting/tests/test_history_provider.py b/python/packages/foundry_hosting/tests/test_history_provider.py
index 763c912a41..a1d372c486 100644
--- a/python/packages/foundry_hosting/tests/test_history_provider.py
+++ b/python/packages/foundry_hosting/tests/test_history_provider.py
@@ -366,20 +366,42 @@ async def test_save_messages_no_session_short_circuits(self) -> None:
         await prov.save_messages(None, [Message(role="user", contents=[Content.from_text("hi")])])
         backend.create_response.assert_not_called()
 
-    async def test_save_messages_swallows_backend_errors(self) -> None:
-        """Persistence is best-effort — backend failures must NOT propagate.
+    async def test_save_messages_swallows_storage_errors(self) -> None:
+        """Persistence is best-effort for *Foundry storage* failures.
 
-        A successful agent turn that hits a transient storage error
-        (RBAC propagation lag, throttling, …) should still return a 2xx
-        to the caller; we only log so operators can spot systematic
-        failures.
+        Storage-validation rejections, opaque 5xx, etc. should be
+        swallowed (the agent run already produced output and the
+        caller can't recover from a chain-write failure mid-stream).
+        Counter is bumped for observability.
         """
         backend = _make_fake_backend()
-        backend.create_response.side_effect = RuntimeError("simulated 500 from storage")
+        backend.create_response.side_effect = FoundryBadRequestError(
+            "simulated invalid_payload",
+            response_body={"error": {"code": "invalid_payload"}},
+        )
         prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
         # Must not raise.
         await prov.save_messages("resp_session_x", [Message(role="user", contents=[Content.from_text("hi")])])
         backend.create_response.assert_awaited_once()
+        assert prov.failed_writes == 1
+
+    async def test_save_messages_propagates_non_storage_errors(self) -> None:
+        """Network / auth / payload-builder bugs MUST surface to the caller.
+
+        Anything that's not a ``FoundryStorageError`` — connection
+        resets, expired credential 401/403s, ``AttributeError`` from a
+        regression in the wire-payload builder — propagates so the
+        caller can retry / alert. Counter is NOT bumped for these.
+        """
+        backend = _make_fake_backend()
+        backend.create_response.side_effect = ConnectionError("simulated network failure")
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        with pytest.raises(ConnectionError, match="simulated network failure"):
+            await prov.save_messages(
+                "resp_session_x",
+                [Message(role="user", contents=[Content.from_text("hi")])],
+            )
+        assert prov.failed_writes == 0
 
     async def test_save_then_get_round_trip_via_in_memory_backend(self) -> None:
         """End-to-end save→get round-trip through ``InMemoryResponseProvider``.
@@ -554,6 +576,347 @@ async def test_aclose_clears_file_provider_cache(self, monkeypatch: pytest.Monke
         assert not prov._file_providers  # pyright: ignore[reportPrivateUsage]
 
 
+# region Foundry id helpers (`_ids.py`)
+
+
+class TestFoundryIdHelpers:
+    """Cover the public ``_ids`` re-exports so SDK ``IdGenerator``
+    contract changes surface in unit tests rather than as opaque
+    HTTP 500 ``server_error`` from Foundry storage at runtime."""
+
+    def test_foundry_response_id_carries_partition_key(self) -> None:
+        """A minted ``caresp_*`` id must embed an 18-char partition key.
+
+        Free-form ``resp_<uuid>`` ids carry no parseable partition key
+        and Foundry storage rejects writes with HTTP 500.
+        """
+        from agent_framework_foundry_hosting import foundry_response_id
+
+        new_id = foundry_response_id()
+        assert new_id.startswith("caresp_")
+        # ``caresp_`` (7) + 18-char partition key + 32-char entropy = 57.
+        # The legacy 48-char body variant is also accepted by storage,
+        # so just check the lower bound.
+        assert len(new_id) >= 7 + 18 + 32 - 8
+
+    def test_foundry_response_id_reuses_previous_partition_key(self) -> None:
+        """Chained writes co-locate by reusing the prior partition key.
+
+        Foundry storage rejects chained writes whose new record sits in
+        a different partition than the prior one. Passing a ``caresp_*``
+        ``previous_response_id`` should produce a new id whose partition
+        segment matches.
+        """
+        from agent_framework_foundry_hosting import foundry_response_id
+
+        prior = foundry_response_id()
+        # Partition key = 18 chars after the ``caresp_`` prefix.
+        prior_partition = prior[len("caresp_") : len("caresp_") + 18]
+        chained = foundry_response_id(prior)
+        assert chained.startswith("caresp_")
+        assert chained != prior
+        assert chained[len("caresp_") : len("caresp_") + 18] == prior_partition
+
+    def test_foundry_response_id_factory_returns_callable(self) -> None:
+        """The factory wrapper used by ``ResponsesChannel`` must
+        delegate to :func:`foundry_response_id` so chained turns can
+        seed the partition key from ``previous_response_id``."""
+        from agent_framework_foundry_hosting import (
+            foundry_response_id,
+            foundry_response_id_factory,
+        )
+
+        factory = foundry_response_id_factory()
+        assert factory is foundry_response_id
+
+    def test_foundry_item_id_for_known_input_type(self) -> None:
+        """Recognised ``Item`` types get a typed prefix and a
+        partition-key hint matching the response id when supplied."""
+        from azure.ai.agentserver.responses.models import (
+            ItemMessage,
+            MessageContentInputTextContent,
+        )
+
+        from agent_framework_foundry_hosting import foundry_item_id, foundry_response_id
+
+        response_id = foundry_response_id()
+        partition = response_id[len("caresp_") : len("caresp_") + 18]
+        item = ItemMessage(
+            type="message",
+            role="user",
+            content=[MessageContentInputTextContent(type="input_text", text="hi")],
+        )
+        new_id = foundry_item_id(item, response_id)
+        assert new_id is not None
+        # ``msg_*`` is what ``IdGenerator.new_message_item_id`` mints.
+        assert new_id.startswith("msg_")
+        assert partition in new_id
+
+    def test_foundry_item_id_returns_none_for_unknown_type(self) -> None:
+        """Reference-only / unrecognised types must return ``None``
+        per the SDK helper's contract — callers (e.g.
+        ``save_messages``'s id-stamping loop) skip these so storage
+        only receives ids it can parse."""
+        from agent_framework_foundry_hosting import foundry_item_id
+
+        class _UnknownItem:
+            pass
+
+        assert foundry_item_id(_UnknownItem()) is None
+
+
+# region Wire payload stamping (`save_messages`)
+
+
+class TestSaveMessagesWirePayload:
+    """Storage rejects ``create_response`` payloads that omit fields
+    flagged as REQUIRED in ``ResponseObject`` (``parallel_tool_calls``,
+    ``instructions``, ``background``) or that leak extras the validator
+    refuses (``conversation``, ``model=None``, …). Any regression that
+    drops one of these silently breaks every hosted deploy with an
+    opaque 4xx; cover them here so the test suite catches it first."""
+
+    async def test_envelope_includes_required_storage_fields(
+        self,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """``background``, ``parallel_tool_calls``, ``instructions``,
+        and ``agent_reference`` MUST be present on every stamped
+        envelope; storage returns HTTP 400 ``invalid_payload`` if any
+        of them is missing."""
+        from agent_framework_foundry_hosting import bind_request_context
+
+        # Strip env so the defaults are exercised cleanly.
+        for var in (
+            "FOUNDRY_AGENT_NAME",
+            "FOUNDRY_AGENT_VERSION",
+            "FOUNDRY_AGENT_SESSION_ID",
+            "MODEL_DEPLOYMENT_NAME",
+            "AZURE_AI_MODEL_DEPLOYMENT_NAME",
+        ):
+            monkeypatch.delenv(var, raising=False)
+
+        backend = _make_fake_backend()
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        with bind_request_context(response_id="resp_envelope_1", previous_response_id=None):
+            await prov.save_messages(
+                "session-x",
+                [Message(role="assistant", contents=[Content.from_text("hi")])],
+            )
+
+        backend.create_response.assert_awaited_once()
+        response = backend.create_response.await_args.args[0]
+        body = response.as_dict()
+
+        # Required-by-storage fields.
+        assert body["background"] is False
+        assert body["parallel_tool_calls"] is False
+        assert body["instructions"] == ""
+        assert body["agent_reference"] == {
+            "type": "agent_reference",
+            "name": "agent-framework-host",
+        }
+
+    async def test_envelope_omits_optional_fields_when_env_unset(
+        self,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """``model``, ``agent_session_id``, and the ``version`` slot of
+        ``agent_reference`` are omitted (NOT stamped as ``None``) when
+        their env vars are unset — storage rejects ``model: null``."""
+        from agent_framework_foundry_hosting import bind_request_context
+
+        for var in (
+            "FOUNDRY_AGENT_NAME",
+            "FOUNDRY_AGENT_VERSION",
+            "FOUNDRY_AGENT_SESSION_ID",
+            "MODEL_DEPLOYMENT_NAME",
+            "AZURE_AI_MODEL_DEPLOYMENT_NAME",
+        ):
+            monkeypatch.delenv(var, raising=False)
+
+        backend = _make_fake_backend()
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        with bind_request_context(response_id="resp_omit_1", previous_response_id=None):
+            await prov.save_messages(
+                "session-x",
+                [Message(role="assistant", contents=[Content.from_text("hi")])],
+            )
+
+        body = backend.create_response.await_args.args[0].as_dict()
+        # Either entirely absent or explicitly None — assert the field
+        # was NOT stamped to a non-None value.
+        assert body.get("model") is None
+        assert body.get("agent_session_id") is None
+        # ``version`` slot inside agent_reference is omitted entirely
+        # (the key is absent, not set to None) when the env var is unset.
+        assert "version" not in body["agent_reference"]
+
+    async def test_envelope_picks_up_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """When the platform-set env vars are present they MUST land on
+        the envelope: ``FOUNDRY_AGENT_NAME`` / ``FOUNDRY_AGENT_VERSION``
+        feed ``agent_reference``, ``FOUNDRY_AGENT_SESSION_ID`` feeds
+        ``agent_session_id``, and ``MODEL_DEPLOYMENT_NAME`` feeds
+        ``model``."""
+        from agent_framework_foundry_hosting import bind_request_context
+
+        monkeypatch.setenv("FOUNDRY_AGENT_NAME", "concierge")
+        monkeypatch.setenv("FOUNDRY_AGENT_VERSION", "v3")
+        monkeypatch.setenv("FOUNDRY_AGENT_SESSION_ID", "caresp_envsessionABCDEF")
+        monkeypatch.setenv("MODEL_DEPLOYMENT_NAME", "gpt-4o-mini-prod")
+        monkeypatch.delenv("AZURE_AI_MODEL_DEPLOYMENT_NAME", raising=False)
+
+        backend = _make_fake_backend()
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        with bind_request_context(response_id="resp_env_1", previous_response_id=None):
+            await prov.save_messages(
+                "session-x",
+                [Message(role="assistant", contents=[Content.from_text("hi")])],
+            )
+
+        body = backend.create_response.await_args.args[0].as_dict()
+        assert body["agent_reference"] == {
+            "type": "agent_reference",
+            "name": "concierge",
+            "version": "v3",
+        }
+        assert body["agent_session_id"] == "caresp_envsessionABCDEF"
+        assert body["model"] == "gpt-4o-mini-prod"
+
+    async def test_envelope_falls_back_to_local_dev_model_var(
+        self,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """Local dev sets ``AZURE_AI_MODEL_DEPLOYMENT_NAME`` rather than
+        the platform-only ``MODEL_DEPLOYMENT_NAME``; the latter wins
+        when both are present, the former fills in when only it is."""
+        from agent_framework_foundry_hosting import bind_request_context
+
+        monkeypatch.delenv("MODEL_DEPLOYMENT_NAME", raising=False)
+        monkeypatch.setenv("AZURE_AI_MODEL_DEPLOYMENT_NAME", "gpt-4o-mini-dev")
+        for var in ("FOUNDRY_AGENT_NAME", "FOUNDRY_AGENT_VERSION", "FOUNDRY_AGENT_SESSION_ID"):
+            monkeypatch.delenv(var, raising=False)
+
+        backend = _make_fake_backend()
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        with bind_request_context(response_id="resp_devmodel_1", previous_response_id=None):
+            await prov.save_messages(
+                "session-x",
+                [Message(role="assistant", contents=[Content.from_text("hi")])],
+            )
+
+        body = backend.create_response.await_args.args[0].as_dict()
+        assert body["model"] == "gpt-4o-mini-dev"
+
+
+# region FOUNDRY_AGENT_SESSION_ID chain anchor
+
+
+class TestFoundryAgentSessionIdAnchor:
+    """The Foundry runtime stamps the previous turn's response id into
+    ``FOUNDRY_AGENT_SESSION_ID`` for the next turn's container so each
+    new container can chain back without us keeping any cross-request
+    state. A regression that moves the lookup, mistypes the prefix
+    check, or stops gating on ``caresp_*``/``resp_*`` would silently
+    make hosted multi-turn conversations forget every prior turn."""
+
+    async def test_get_messages_uses_env_anchor_when_unbound(
+        self,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """No host binding, ``session_id`` is opaque (not ``caresp_*``):
+        ``get_messages`` must fall back to ``FOUNDRY_AGENT_SESSION_ID``
+        and walk from there."""
+        for var in ("MODEL_DEPLOYMENT_NAME", "AZURE_AI_MODEL_DEPLOYMENT_NAME"):
+            monkeypatch.delenv(var, raising=False)
+        monkeypatch.setenv("FOUNDRY_AGENT_SESSION_ID", "caresp_envanchor1")
+
+        backend = _make_fake_backend(
+            history_ids=["msg_envanchor_1"],
+            items=[_make_text_item("msg_envanchor_1", "from-env-anchor")],
+        )
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+
+        # Opaque session_id — no host binding either. Without the env
+        # fallback this would return [] without making any backend call.
+        messages = await prov.get_messages("opaque-session")
+
+        assert [m.text for m in messages] == ["from-env-anchor"]
+        assert backend.get_history_item_ids.await_args.args[0] == "caresp_envanchor1"
+
+    async def test_get_messages_ignores_non_caresp_env_anchor(
+        self,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """Defence in depth: if the runtime ever stamps a non-``caresp_*``
+        value into the env var (or it leaks from another source), we
+        must NOT pass it to storage — the partition-key extractor
+        would reject it with HTTP 500."""
+        monkeypatch.setenv("FOUNDRY_AGENT_SESSION_ID", "garbage-not-an-id")
+
+        backend = _make_fake_backend()
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        messages = await prov.get_messages("opaque-session")
+
+        assert messages == []
+        backend.get_history_item_ids.assert_not_called()
+
+    async def test_save_messages_uses_env_anchor_when_unbound(
+        self,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """When no host binding supplies a previous_response_id, the
+        env anchor must be used so the new write chains correctly."""
+        for var in (
+            "FOUNDRY_AGENT_NAME",
+            "FOUNDRY_AGENT_VERSION",
+            "MODEL_DEPLOYMENT_NAME",
+            "AZURE_AI_MODEL_DEPLOYMENT_NAME",
+        ):
+            monkeypatch.delenv(var, raising=False)
+        monkeypatch.setenv("FOUNDRY_AGENT_SESSION_ID", "caresp_envchain1")
+
+        backend = _make_fake_backend()
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        # Opaque session_id, no host binding → without the env anchor
+        # the prior chain wouldn't be walked.
+        await prov.save_messages(
+            "opaque-session",
+            [Message(role="assistant", contents=[Content.from_text("hi")])],
+        )
+
+        # Provider walked the prior chain via the env anchor.
+        assert backend.get_history_item_ids.await_args.args[0] == "caresp_envchain1"
+
+    async def test_save_messages_env_anchor_skipped_when_host_bound(
+        self,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """A host-bound previous_response_id wins over the env anchor;
+        the binding is the authoritative chain seed for the request."""
+        from agent_framework_foundry_hosting import bind_request_context
+
+        for var in (
+            "FOUNDRY_AGENT_NAME",
+            "FOUNDRY_AGENT_VERSION",
+            "MODEL_DEPLOYMENT_NAME",
+            "AZURE_AI_MODEL_DEPLOYMENT_NAME",
+        ):
+            monkeypatch.delenv(var, raising=False)
+        monkeypatch.setenv("FOUNDRY_AGENT_SESSION_ID", "caresp_envignored")
+
+        backend = _make_fake_backend()
+        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
+        with bind_request_context(response_id="resp_bound_2", previous_response_id="caresp_boundprev"):
+            await prov.save_messages(
+                "session-x",
+                [Message(role="assistant", contents=[Content.from_text("hi")])],
+            )
+
+        # Host binding wins; the env anchor is ignored.
+        assert backend.get_history_item_ids.await_args.args[0] == "caresp_boundprev"
+
+
 # region Shared module re-exports
 
 

From 93dc1454913d51534da0593d3807c2a4d93d3a7f Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Thu, 7 May 2026 16:06:40 +0200
Subject: [PATCH 4/4] fix(foundry_hosting): address PR-1 round-2 review
 comments

- Hosted detection now delegates to AgentConfig.from_env().is_hosted so
  a future Foundry SDK rename of FOUNDRY_HOSTING_ENVIRONMENT propagates
  automatically; drop the local _ENV_FOUNDRY_HOSTING_ENVIRONMENT
  constant.
- Drop the FOUNDRY_AGENT_SESSION_ID fallback in both get_messages and
  save_messages: per the SDK it identifies the *container instance*,
  not the conversation, so chaining off it would silently merge
  unrelated conversations across container restarts. The host-bound
  previous_response_id (set by ResponsesChannel) is the only
  authoritative anchor; the env value is still stamped into the
  persisted envelope's agent_session_id for operator correlation.
- Update module docstring + replace TestFoundryAgentSessionIdAnchor
  with assertions for the new contract (env var ignored as anchor,
  still stamped onto persisted envelope, host binding wins).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../_history_provider.py                      | 61 ++++++++++-------
 .../tests/test_history_provider.py            | 66 ++++++++-----------
 2 files changed, 63 insertions(+), 64 deletions(-)

diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py
index e6c6b09876..06a9edcd7d 100644
--- a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py
+++ b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_history_provider.py
@@ -27,16 +27,24 @@
 Environment variables read:
 
 * ``FOUNDRY_HOSTING_ENVIRONMENT`` — non-empty marks "running inside Foundry"
-  and selects the SDK-backed storage transport.
+  and selects the SDK-backed storage transport. Detection is delegated to
+  :class:`azure.ai.agentserver.core.AgentConfig` so a future SDK rename
+  propagates without touching this module.
 * ``FOUNDRY_PROJECT_ENDPOINT`` — base URL of the Foundry project; required
   when running hosted unless an explicit ``endpoint=`` is supplied.
 * ``FOUNDRY_AGENT_NAME`` / ``FOUNDRY_AGENT_VERSION`` — stamped onto the
   ``agent_reference`` field of every persisted response envelope.
-* ``FOUNDRY_AGENT_SESSION_ID`` — used as a chain anchor when the channel
-  did not bind a per-request ``previous_response_id``.
 * ``MODEL_DEPLOYMENT_NAME`` / ``AZURE_AI_MODEL_DEPLOYMENT_NAME`` — model
   field stamped on the persisted envelope (must match a real deployment).
 
+Note on ``FOUNDRY_AGENT_SESSION_ID``: this env var identifies the
+*container instance*, not the conversation, so it is **not** consulted as
+a fallback ``previous_response_id``. The host-bound
+``previous_response_id`` (set by :class:`ResponsesChannel` from the
+request envelope) is the authoritative anchor. The value is still
+persisted into the ``agent_session_id`` envelope field for operator
+correlation only.
+
 Local fallback: when ``FOUNDRY_HOSTING_ENVIRONMENT`` is unset, the provider
 transparently falls back to :class:`InMemoryResponseProvider` so the same
 agent code runs in dev. Pass ``local_storage_root`` to use a persistent
@@ -58,6 +66,7 @@
 from typing import TYPE_CHECKING, Any, ClassVar
 
 from agent_framework import FileHistoryProvider, HistoryProvider, Message
+from azure.ai.agentserver.core import AgentConfig
 from azure.ai.agentserver.responses import (
     FoundryStorageProvider,
     FoundryStorageSettings,
@@ -84,10 +93,10 @@
 
 logger = logging.getLogger(__name__)
 
-# Environment variable names — re-declared (not imported) so this module
+# Environment variable name — re-declared (not imported) so this module
 # stays decoupled from the private ``azure.ai.agentserver.core._config``
-# constants while still matching them exactly.
-_ENV_FOUNDRY_HOSTING_ENVIRONMENT = "FOUNDRY_HOSTING_ENVIRONMENT"
+# constants while still matching exactly. Hosted-vs-local detection is
+# delegated to :class:`AgentConfig` so a future SDK rename propagates.
 _ENV_FOUNDRY_PROJECT_ENDPOINT = "FOUNDRY_PROJECT_ENDPOINT"
 
 # Per-request isolation context.  The owning Channel is expected to set this
@@ -429,11 +438,13 @@ def __init__(
     def is_hosted_environment() -> bool:
         """Return ``True`` when running inside a Foundry Hosted Agent container.
 
-        Detection uses the ``FOUNDRY_HOSTING_ENVIRONMENT`` environment
-        variable, the same signal :class:`ResponsesAgentServerHost` uses to
-        switch between hosted and local storage backends.
+        Delegates to :meth:`azure.ai.agentserver.core.AgentConfig.from_env`
+        so the detection rule stays in lockstep with the Foundry SDK; if
+        the platform ever renames the underlying signal (today
+        ``FOUNDRY_HOSTING_ENVIRONMENT``) the SDK update is picked up
+        automatically without a code change here.
         """
-        return bool(os.environ.get(_ENV_FOUNDRY_HOSTING_ENVIRONMENT))
+        return AgentConfig.from_env().is_hosted
 
     def _resolve_backend(self) -> FoundryStorageProvider | InMemoryResponseProvider:
         """Return the storage backend, constructing it lazily on first use.
@@ -614,16 +625,15 @@ async def get_messages(
         anchor = bound.previous_response_id if bound is not None else None
         if anchor is None and session_id and session_id.startswith(("caresp_", "resp_")):
             anchor = session_id
-        if anchor is None:
-            # The Foundry Hosted Agent runtime stamps the previous turn's
-            # response id into ``FOUNDRY_AGENT_SESSION_ID`` for the
-            # following turn's container, so we can walk back from it
-            # directly without keeping any cross-request state ourselves.
-            env_session = os.environ.get("FOUNDRY_AGENT_SESSION_ID") or None
-            if env_session and env_session.startswith(("caresp_", "resp_")):
-                anchor = env_session
         if anchor is None:
             # No walkable anchor → fresh conversation, nothing to load.
+            # Note: we intentionally do NOT fall back to
+            # ``FOUNDRY_AGENT_SESSION_ID`` — per the Foundry SDK that env
+            # var identifies the *container instance*, not the
+            # conversation, so it doesn't yield a walkable response-id
+            # chain. The host-bound ``previous_response_id`` (set by
+            # ``ResponsesChannel`` from the request envelope) is the
+            # authoritative anchor.
             return []
 
         backend = self._resolve_backend()
@@ -728,13 +738,14 @@ async def save_messages(
             response_id = IdGenerator.new_response_id()
             previous_response_id = session_id if session_id.startswith(("caresp_", "resp_")) else None
 
-        # Foundry session-bound containers: when ``FOUNDRY_AGENT_SESSION_ID``
-        # is set the runtime stamps it to the previous turn's response id
-        # so each new container can chain back to it directly. We don't
-        # need to maintain any cross-request map ourselves.
-        env_session = os.environ.get("FOUNDRY_AGENT_SESSION_ID") or None
-        if previous_response_id is None and env_session and env_session.startswith(("caresp_", "resp_")):
-            previous_response_id = env_session
+        # Note: we intentionally do NOT consult ``FOUNDRY_AGENT_SESSION_ID``
+        # as a fallback ``previous_response_id`` here. Per the Foundry SDK
+        # that env var identifies the *container instance*, not the
+        # conversation, so chaining off it produces an unwalkable history.
+        # The host-bound ``previous_response_id`` (set by
+        # ``ResponsesChannel`` from the request envelope) is the only
+        # authoritative anchor; if it's missing the new turn is the start
+        # of a fresh chain.
 
         logger.debug(
             "save_messages: response_id=%r previous_response_id=%r isolation=%s",
diff --git a/python/packages/foundry_hosting/tests/test_history_provider.py b/python/packages/foundry_hosting/tests/test_history_provider.py
index a1d372c486..cfdbeccacb 100644
--- a/python/packages/foundry_hosting/tests/test_history_provider.py
+++ b/python/packages/foundry_hosting/tests/test_history_provider.py
@@ -813,20 +813,21 @@ async def test_envelope_falls_back_to_local_dev_model_var(
 
 
 class TestFoundryAgentSessionIdAnchor:
-    """The Foundry runtime stamps the previous turn's response id into
-    ``FOUNDRY_AGENT_SESSION_ID`` for the next turn's container so each
-    new container can chain back without us keeping any cross-request
-    state. A regression that moves the lookup, mistypes the prefix
-    check, or stops gating on ``caresp_*``/``resp_*`` would silently
-    make hosted multi-turn conversations forget every prior turn."""
-
-    async def test_get_messages_uses_env_anchor_when_unbound(
+    """``FOUNDRY_AGENT_SESSION_ID`` identifies the *container instance*,
+    not the conversation (per the Foundry SDK), so it MUST NOT be used
+    as a fallback ``previous_response_id`` for chain walking. The host-
+    bound ``previous_response_id`` (set by ``ResponsesChannel`` from the
+    request envelope) is the only authoritative anchor; any code that
+    re-introduces an env-based fallback would silently merge unrelated
+    conversations across container restarts."""
+
+    async def test_get_messages_ignores_env_session_anchor_when_unbound(
         self,
         monkeypatch: pytest.MonkeyPatch,
     ) -> None:
-        """No host binding, ``session_id`` is opaque (not ``caresp_*``):
-        ``get_messages`` must fall back to ``FOUNDRY_AGENT_SESSION_ID``
-        and walk from there."""
+        """No host binding, opaque ``session_id`` and a populated
+        ``FOUNDRY_AGENT_SESSION_ID``: ``get_messages`` must return ``[]``
+        and never call the backend (no walkable conversation anchor)."""
         for var in ("MODEL_DEPLOYMENT_NAME", "AZURE_AI_MODEL_DEPLOYMENT_NAME"):
             monkeypatch.delenv(var, raising=False)
         monkeypatch.setenv("FOUNDRY_AGENT_SESSION_ID", "caresp_envanchor1")
@@ -837,36 +838,18 @@ async def test_get_messages_uses_env_anchor_when_unbound(
         )
         prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
 
-        # Opaque session_id — no host binding either. Without the env
-        # fallback this would return [] without making any backend call.
-        messages = await prov.get_messages("opaque-session")
-
-        assert [m.text for m in messages] == ["from-env-anchor"]
-        assert backend.get_history_item_ids.await_args.args[0] == "caresp_envanchor1"
-
-    async def test_get_messages_ignores_non_caresp_env_anchor(
-        self,
-        monkeypatch: pytest.MonkeyPatch,
-    ) -> None:
-        """Defence in depth: if the runtime ever stamps a non-``caresp_*``
-        value into the env var (or it leaks from another source), we
-        must NOT pass it to storage — the partition-key extractor
-        would reject it with HTTP 500."""
-        monkeypatch.setenv("FOUNDRY_AGENT_SESSION_ID", "garbage-not-an-id")
-
-        backend = _make_fake_backend()
-        prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
         messages = await prov.get_messages("opaque-session")
 
         assert messages == []
         backend.get_history_item_ids.assert_not_called()
 
-    async def test_save_messages_uses_env_anchor_when_unbound(
+    async def test_save_messages_ignores_env_session_anchor_when_unbound(
         self,
         monkeypatch: pytest.MonkeyPatch,
     ) -> None:
-        """When no host binding supplies a previous_response_id, the
-        env anchor must be used so the new write chains correctly."""
+        """When no host binding supplies a ``previous_response_id`` and
+        ``session_id`` is opaque, the env var must NOT be consulted as a
+        fallback; the new turn writes without a prior chain seed."""
         for var in (
             "FOUNDRY_AGENT_NAME",
             "FOUNDRY_AGENT_VERSION",
@@ -878,21 +861,26 @@ async def test_save_messages_uses_env_anchor_when_unbound(
 
         backend = _make_fake_backend()
         prov = _with_backend(FoundryHostedAgentHistoryProvider(), backend)
-        # Opaque session_id, no host binding → without the env anchor
-        # the prior chain wouldn't be walked.
+        # Opaque session_id, no host binding → save proceeds without
+        # walking any chain (no get_history_item_ids call).
         await prov.save_messages(
             "opaque-session",
             [Message(role="assistant", contents=[Content.from_text("hi")])],
         )
 
-        # Provider walked the prior chain via the env anchor.
-        assert backend.get_history_item_ids.await_args.args[0] == "caresp_envchain1"
+        backend.get_history_item_ids.assert_not_called()
+        # The persisted envelope still stamps the env value into
+        # ``agent_session_id`` for operator correlation (see the
+        # docstring on the module): only the chain anchor is gated.
+        backend.create_response.assert_awaited_once()
+        wire_payload = backend.create_response.await_args.args[0].as_dict()
+        assert wire_payload["agent_session_id"] == "caresp_envchain1"
 
     async def test_save_messages_env_anchor_skipped_when_host_bound(
         self,
         monkeypatch: pytest.MonkeyPatch,
     ) -> None:
-        """A host-bound previous_response_id wins over the env anchor;
+        """A host-bound ``previous_response_id`` wins over any env value;
         the binding is the authoritative chain seed for the request."""
         from agent_framework_foundry_hosting import bind_request_context
 
@@ -913,7 +901,7 @@ async def test_save_messages_env_anchor_skipped_when_host_bound(
                 [Message(role="assistant", contents=[Content.from_text("hi")])],
             )
 
-        # Host binding wins; the env anchor is ignored.
+        # Host binding wins; the env anchor is ignored for chaining.
         assert backend.get_history_item_ids.await_args.args[0] == "caresp_boundprev"