diff --git a/examples/agent_patterns/agents_as_tools_with_history.py b/examples/agent_patterns/agents_as_tools_with_history.py new file mode 100644 index 0000000000..e1574a4845 --- /dev/null +++ b/examples/agent_patterns/agents_as_tools_with_history.py @@ -0,0 +1,81 @@ +""" +Agent as tool with conversation history. + +Demonstrates ``include_conversation_history=True`` on ``Agent.as_tool()``. +The orchestrator delegates to two sub-agents: + +- **analyst** (with history): sees the full conversation via a + summary, so it can reference earlier facts and tool results. +- **blind** (without history): sees only the tool input string, proving the default + behavior is unchanged. + +Try telling the orchestrator some facts, then asking it to delegate to the analyst +or blind agent to see the difference. +""" + +import asyncio + +from agents import Agent, Runner, TResponseInputItem + +analyst = Agent( + name="analyst", + instructions=( + "You analyze conversations. Reference specific facts, names, and numbers " + "from the conversation history to show you have full context." + ), +) + +blind = Agent( + name="blind", + instructions="Answer questions based on whatever conversation you can see.", +) + +orchestrator = Agent( + name="orchestrator", + instructions=( + "You are a helpful assistant. For normal questions, answer directly.\n" + "You have two tools:\n" + "- ask_analyst: delegate to an analyst who can see the FULL conversation history\n" + "- ask_blind: delegate to an agent WITHOUT conversation history\n" + "Use the appropriate tool when asked." + ), + tools=[ + analyst.as_tool( + tool_name="ask_analyst", + tool_description="Delegate to the analyst (has full conversation history).", + include_conversation_history=True, + ), + blind.as_tool( + tool_name="ask_blind", + tool_description="Delegate to the blind agent (has NO conversation history).", + include_conversation_history=False, + ), + ], +) + + +async def main(): + print("=== Agent as Tool with Conversation History ===") + print("Chat with the orchestrator. It can delegate to the analyst (with history)") + print("or blind agent (without history).") + print("Type 'quit' to exit.\n") + + items: list[TResponseInputItem] = [] + + while True: + try: + user_input = input("You: ") + except (EOFError, KeyboardInterrupt): + break + + if user_input.strip().lower() in ("quit", "exit", "q"): + break + + items.append({"role": "user", "content": user_input}) + result = await Runner.run(orchestrator, items) + print(f"\nOrchestrator: {result.final_output}\n") + items = result.to_input_list() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/src/agents/agent.py b/src/agents/agent.py index 602d84066c..0d65f57c1d 100644 --- a/src/agents/agent.py +++ b/src/agents/agent.py @@ -527,6 +527,7 @@ def as_tool( parameters: type[Any] | None = None, input_builder: StructuredToolInputBuilder | None = None, include_input_schema: bool = False, + include_conversation_history: bool = False, ) -> FunctionTool: """Transform this agent into a tool, callable by other agents. @@ -556,6 +557,10 @@ def as_tool( parameters: Structured input type for the tool arguments (dataclass or Pydantic model). input_builder: Optional function to build the nested agent input from structured data. include_input_schema: Whether to include the full JSON schema in structured input. + include_conversation_history: Whether to prepend the parent agent's conversation history + to the sub-agent's input. When True, the sub-agent sees a summary of the full + conversation context from the parent run, followed by the tool input. The summary + uses the same format as handoff history nesting. Defaults to False. """ def _is_supported_parameters(value: Any) -> bool: @@ -623,6 +628,16 @@ async def _run_agent_impl(context: ToolContext, input_json: str) -> Any: if not isinstance(resolved_input, str) and not isinstance(resolved_input, list): raise ModelBehaviorError("Agent tool called with invalid input") + if include_conversation_history and isinstance(context, RunContextWrapper): + exec_ctx = context.tool_execution_context + if exec_ctx: + from .handoffs.history import build_agent_tool_history + from .items import ItemHelpers + + summary_items, forwarded_items = build_agent_tool_history(exec_ctx) + tool_input_items = ItemHelpers.input_to_new_input_list(resolved_input) + resolved_input = summary_items + forwarded_items + tool_input_items + resolved_max_turns = max_turns if max_turns is not None else DEFAULT_MAX_TURNS resolved_run_config = run_config if resolved_run_config is None and isinstance(context, ToolContext): diff --git a/src/agents/handoffs/history.py b/src/agents/handoffs/history.py index efea013523..c7d3f8a433 100644 --- a/src/agents/handoffs/history.py +++ b/src/agents/handoffs/history.py @@ -2,6 +2,7 @@ import json from copy import deepcopy +from dataclasses import dataclass from typing import TYPE_CHECKING, Any, cast from ..items import ( @@ -12,9 +13,11 @@ ) if TYPE_CHECKING: + from ..run_context import ToolExecutionContext from . import HandoffHistoryMapper, HandoffInputData __all__ = [ + "build_agent_tool_history", "default_handoff_history_mapper", "get_conversation_history_wrappers", "nest_handoff_history", @@ -68,48 +71,110 @@ def get_conversation_history_wrappers() -> tuple[str, str]: return (_conversation_history_start, _conversation_history_end) -def nest_handoff_history( - handoff_input_data: HandoffInputData, +@dataclass(frozen=True) +class _History: + """Result of ``_build_history``.""" + + summary: list[TResponseInputItem] + filtered_pre_items: list[RunItem] + filtered_new_items: list[RunItem] + + +def _build_history( + input_history: str | tuple[TResponseInputItem, ...], + pre_items: tuple[RunItem, ...], + new_items: tuple[RunItem, ...], *, history_mapper: HandoffHistoryMapper | None = None, -) -> HandoffInputData: - """Summarize the previous transcript for the next agent.""" +) -> _History: + """Shared logic for ``nest_handoff_history`` and ``build_agent_tool_history``. - normalized_history = _normalize_input_history(handoff_input_data.input_history) - flattened_history = _flatten_nested_history_messages(normalized_history) + Normalizes/flattens input_history, filters pre_items (strict) and new_items (permissive), + then builds a ```` text summary of the full transcript. + """ + normalized = _normalize_input_history(input_history) + flattened = _flatten_nested_history_messages(normalized) - # Convert items to plain inputs for the transcript summary. - pre_items_as_inputs: list[TResponseInputItem] = [] - filtered_pre_items: list[RunItem] = [] - for run_item in handoff_input_data.pre_handoff_items: + # Pre-items: strict filter — drops assistant messages, tool calls, reasoning. + pre_inputs: list[TResponseInputItem] = [] + filtered_pre: list[RunItem] = [] + for run_item in pre_items: if isinstance(run_item, ToolApprovalItem): continue - plain_input = _run_item_to_plain_input(run_item) - pre_items_as_inputs.append(plain_input) - if _should_forward_pre_item(plain_input): - filtered_pre_items.append(run_item) - - new_items_as_inputs: list[TResponseInputItem] = [] - filtered_input_items: list[RunItem] = [] - for run_item in handoff_input_data.new_items: + plain = _run_item_to_plain_input(run_item) + pre_inputs.append(plain) + if _should_forward_pre_item(plain): + filtered_pre.append(run_item) + + # New items: permissive filter — keeps items with roles (including assistant). + new_inputs: list[TResponseInputItem] = [] + filtered_new: list[RunItem] = [] + for run_item in new_items: if isinstance(run_item, ToolApprovalItem): continue - plain_input = _run_item_to_plain_input(run_item) - new_items_as_inputs.append(plain_input) - if _should_forward_new_item(plain_input): - filtered_input_items.append(run_item) - - transcript = flattened_history + pre_items_as_inputs + new_items_as_inputs + plain = _run_item_to_plain_input(run_item) + new_inputs.append(plain) + if _should_forward_new_item(plain): + filtered_new.append(run_item) + transcript = flattened + pre_inputs + new_inputs mapper = history_mapper or default_handoff_history_mapper - history_items = mapper(transcript) + summary = mapper(transcript) + + return _History( + summary=summary, + filtered_pre_items=filtered_pre, + filtered_new_items=filtered_new, + ) + +def nest_handoff_history( + handoff_input_data: HandoffInputData, + *, + history_mapper: HandoffHistoryMapper | None = None, +) -> HandoffInputData: + """Summarize the previous transcript for the next agent.""" + result = _build_history( + handoff_input_data.input_history, + handoff_input_data.pre_handoff_items, + handoff_input_data.new_items, + history_mapper=history_mapper, + ) return handoff_input_data.clone( - input_history=tuple(deepcopy(item) for item in history_items), - pre_handoff_items=tuple(filtered_pre_items), + input_history=tuple(deepcopy(item) for item in result.summary), + pre_handoff_items=tuple(result.filtered_pre_items), # new_items stays unchanged for session history. - input_items=tuple(filtered_input_items), + input_items=tuple(result.filtered_new_items), + ) + + +def build_agent_tool_history( + tool_execution_context: ToolExecutionContext, +) -> tuple[list[TResponseInputItem], list[TResponseInputItem]]: + """Build a summary and filtered forwarded items for agent-as-tool with history. + + Uses the same summarization logic as handoff history nesting to convert the parent + conversation into a ```` text summary plus filtered raw items. + + Args: + tool_execution_context: The conversation state captured during tool execution. + + Returns: + A tuple of ``(summary_items, forwarded_items)``. ``summary_items`` is a single-element + list containing the summary assistant message. ``forwarded_items`` are the raw items + that should be sent alongside the summary. + """ + + result = _build_history( + tool_execution_context.input_history, + tool_execution_context.pre_step_items, + tool_execution_context.new_step_items, ) + forwarded = [ + _run_item_to_plain_input(item) + for item in result.filtered_pre_items + result.filtered_new_items + ] + return result.summary, forwarded def default_handoff_history_mapper( diff --git a/src/agents/run_context.py b/src/agents/run_context.py index df7047eb38..f4d9e4120d 100644 --- a/src/agents/run_context.py +++ b/src/agents/run_context.py @@ -25,6 +25,24 @@ TContext = TypeVar("TContext", default=Any) +@dataclass(frozen=True) +class ToolExecutionContext: + """Conversation state at the point of tool execution. + + Mirrors ``HandoffInputData`` fields so ``build_agent_tool_history()`` can reuse + the same summarization logic as handoff history nesting. + """ + + input_history: str | tuple[TResponseInputItem, ...] + """Cross-run history — the ``original_input`` passed to ``Runner.run()``.""" + + pre_step_items: tuple[Any, ...] + """Within-run ``RunItem`` objects from prior turns in this ``Runner.run()`` call.""" + + new_step_items: tuple[Any, ...] + """Current turn ``RunItem`` objects generated before tool execution.""" + + @dataclass(eq=False) class _ApprovalRecord: """Tracks approval/rejection state for a tool. @@ -57,6 +75,10 @@ class RunContextWrapper(Generic[TContext]): """ turn_input: list[TResponseInputItem] = field(default_factory=list) + tool_execution_context: ToolExecutionContext | None = None + """Conversation state at tool execution time. Set before tools run so agent-as-tool + implementations can access the parent conversation via ``include_conversation_history``.""" + _approvals: dict[str, _ApprovalRecord] = field(default_factory=dict) tool_input: Any | None = None """Structured input for the current agent tool run, when available.""" @@ -460,6 +482,7 @@ def _fork_with_tool_input(self, tool_input: Any) -> RunContextWrapper[TContext]: fork.usage = self.usage fork._approvals = self._approvals fork.turn_input = self.turn_input + fork.tool_execution_context = self.tool_execution_context fork.tool_input = tool_input return fork @@ -469,6 +492,7 @@ def _fork_without_tool_input(self) -> RunContextWrapper[TContext]: fork.usage = self.usage fork._approvals = self._approvals fork.turn_input = self.turn_input + fork.tool_execution_context = self.tool_execution_context return fork diff --git a/src/agents/run_internal/turn_resolution.py b/src/agents/run_internal/turn_resolution.py index b37e27fbd4..84049e81d5 100644 --- a/src/agents/run_internal/turn_resolution.py +++ b/src/agents/run_internal/turn_resolution.py @@ -589,6 +589,16 @@ async def execute_tools_and_side_effects( new_items=processed_response.new_items, ) + # Snapshot conversation state for agent-as-tool with include_conversation_history. + # Cleared after _execute_tool_plan since new_step_items becomes stale. + from ..run_context import ToolExecutionContext + + context_wrapper.tool_execution_context = ToolExecutionContext( + input_history=tuple(original_input) if isinstance(original_input, list) else original_input, + pre_step_items=tuple(pre_step_items), + new_step_items=tuple(new_step_items), + ) + ( function_results, tool_input_guardrail_results, @@ -605,6 +615,8 @@ async def execute_tools_and_side_effects( context_wrapper=context_wrapper, run_config=run_config, ) + context_wrapper.tool_execution_context = None + new_step_items.extend( _build_tool_result_items( function_results=function_results, diff --git a/src/agents/tool_context.py b/src/agents/tool_context.py index eaad0cc167..65a47ecf38 100644 --- a/src/agents/tool_context.py +++ b/src/agents/tool_context.py @@ -7,7 +7,7 @@ from ._tool_identity import get_tool_call_namespace, tool_trace_name from .agent_tool_state import get_agent_tool_state_scope, set_agent_tool_state_scope -from .run_context import RunContextWrapper, TContext +from .run_context import RunContextWrapper, TContext, ToolExecutionContext from .usage import Usage if TYPE_CHECKING: @@ -70,6 +70,7 @@ def __init__( agent: AgentBase[Any] | None = None, run_config: RunConfig | None = None, turn_input: list[TResponseInputItem] | None = None, + tool_execution_context: ToolExecutionContext | None = None, _approvals: dict[str, _ApprovalRecord] | None = None, tool_input: Any | None = None, ) -> None: @@ -79,6 +80,7 @@ def __init__( context=context, usage=resolved_usage, turn_input=list(turn_input or []), + tool_execution_context=tool_execution_context, _approvals={} if _approvals is None else _approvals, tool_input=tool_input, ) diff --git a/tests/test_agent_as_tool.py b/tests/test_agent_as_tool.py index c5cc123034..2c754e3b75 100644 --- a/tests/test_agent_as_tool.py +++ b/tests/test_agent_as_tool.py @@ -2747,3 +2747,497 @@ def test_replaced_agent_as_tool_preserves_agent_markers_for_build_agent_map() -> agent_map = _build_agent_map(parent_agent) assert agent_map["nested_agent"] is nested_agent + + +# --------------------------------------------------------------------------- +# include_conversation_history tests +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_agent_as_tool_include_conversation_history_prepends_summary( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """When include_conversation_history=True, the sub-agent should receive a + CONVERSATION HISTORY summary followed by forwarded items and the tool input.""" + + agent = Agent(name="analyst") + captured_input: list[Any] = [] + + async def fake_run( + cls, + starting_agent, + input, + *, + context, + max_turns, + hooks, + run_config, + previous_response_id, + conversation_id, + session, + ): + captured_input.append(input) + return type("DummyResult", (), {"final_output": "analysis done", "new_items": []})() + + monkeypatch.setattr(Runner, "run", classmethod(fake_run)) + + tool = agent.as_tool( + tool_name="analyze", + tool_description="Analyze with context", + include_conversation_history=True, + ) + + from agents.run_context import ToolExecutionContext + + tool_context = ToolContext( + context=None, + tool_name="analyze", + tool_call_id="call_1", + tool_arguments='{"input": "summarize"}', + ) + tool_context.tool_execution_context = ToolExecutionContext( + input_history=( + cast(TResponseInputItem, {"role": "user", "content": "What is the baggage policy?"}), + cast(TResponseInputItem, {"role": "assistant", "content": "You can bring one bag."}), + ), + pre_step_items=(), + new_step_items=(), + ) + + output = await tool.on_invoke_tool(tool_context, '{"input": "summarize"}') + + assert output == "analysis done" + assert len(captured_input) == 1 + input_items = captured_input[0] + assert isinstance(input_items, list) + assert len(input_items) >= 2 + summary = input_items[0] + assert isinstance(summary, dict) + assert summary.get("role") == "assistant" + assert "" in summary.get("content", "") + assert "baggage policy" in summary.get("content", "") + tool_input_item = input_items[-1] + assert tool_input_item.get("role") == "user" + assert tool_input_item.get("content") == "summarize" + + +@pytest.mark.asyncio +async def test_agent_as_tool_no_conversation_history_by_default( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """When include_conversation_history is not set (default False), the sub-agent should + only receive the tool input, not the parent's conversation history.""" + + agent = Agent(name="translator") + captured_input: list[Any] = [] + + async def fake_run( + cls, + starting_agent, + input, + *, + context, + max_turns, + hooks, + run_config, + previous_response_id, + conversation_id, + session, + ): + captured_input.append(input) + return type("DummyResult", (), {"final_output": "translated", "new_items": []})() + + monkeypatch.setattr(Runner, "run", classmethod(fake_run)) + + tool = agent.as_tool( + tool_name="translate", + tool_description="Translate text", + ) + + from agents.run_context import ToolExecutionContext + + tool_context = ToolContext( + context=None, + tool_name="translate", + tool_call_id="call_1", + tool_arguments='{"input": "hello"}', + ) + tool_context.tool_execution_context = ToolExecutionContext( + input_history=(cast(TResponseInputItem, {"role": "user", "content": "previous message"}),), + pre_step_items=(), + new_step_items=(), + ) + + output = await tool.on_invoke_tool(tool_context, '{"input": "hello"}') + + assert output == "translated" + assert len(captured_input) == 1 + assert captured_input[0] == "hello" + + +@pytest.mark.asyncio +async def test_agent_as_tool_include_conversation_history_empty_context( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """When include_conversation_history=True but tool_execution_context is None, + the sub-agent should receive only the tool input unchanged.""" + + agent = Agent(name="helper") + captured_input: list[Any] = [] + + async def fake_run( + cls, + starting_agent, + input, + *, + context, + max_turns, + hooks, + run_config, + previous_response_id, + conversation_id, + session, + ): + captured_input.append(input) + return type("DummyResult", (), {"final_output": "done", "new_items": []})() + + monkeypatch.setattr(Runner, "run", classmethod(fake_run)) + + tool = agent.as_tool( + tool_name="help", + tool_description="Help with task", + include_conversation_history=True, + ) + + tool_context = ToolContext( + context=None, + tool_name="help", + tool_call_id="call_1", + tool_arguments='{"input": "do something"}', + ) + + output = await tool.on_invoke_tool(tool_context, '{"input": "do something"}') + + assert output == "done" + assert len(captured_input) == 1 + assert captured_input[0] == "do something" + + +@pytest.mark.asyncio +async def test_agent_as_tool_include_conversation_history_forwards_new_step_assistant( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Current turn's assistant message should be forwarded as a raw item alongside the summary.""" + from openai.types.responses import ResponseOutputMessage, ResponseOutputText + + agent = Agent(name="analyst") + captured_input: list[Any] = [] + + async def fake_run( + cls, + starting_agent, + input, + *, + context, + max_turns, + hooks, + run_config, + previous_response_id, + conversation_id, + session, + ): + captured_input.append(input) + return type("DummyResult", (), {"final_output": "done", "new_items": []})() + + monkeypatch.setattr(Runner, "run", classmethod(fake_run)) + + tool = agent.as_tool( + tool_name="analyze", + tool_description="Analyze", + include_conversation_history=True, + ) + + from agents.run_context import ToolExecutionContext + + assistant_msg = ResponseOutputMessage( + id="msg_1", + content=[ + ResponseOutputText(text="Let me analyze that.", type="output_text", annotations=[]) + ], + role="assistant", + status="completed", + type="message", + ) + new_step_item = MessageOutputItem( + agent=agent, + raw_item=assistant_msg, + ) + + tool_context = ToolContext( + context=None, + tool_name="analyze", + tool_call_id="call_1", + tool_arguments='{"input": "analyze this"}', + ) + tool_context.tool_execution_context = ToolExecutionContext( + input_history="hello", + pre_step_items=(), + new_step_items=(new_step_item,), + ) + + await tool.on_invoke_tool(tool_context, '{"input": "analyze this"}') + + assert len(captured_input) == 1 + input_items = captured_input[0] + assert isinstance(input_items, list) + assert input_items[0].get("role") == "assistant" + assert "" in input_items[0].get("content", "") + has_forwarded_assistant = any( + isinstance(item, dict) + and item.get("role") == "assistant" + and "CONVERSATION HISTORY" not in str(item.get("content", "")) + for item in input_items[1:-1] + ) + assert has_forwarded_assistant, "Current turn assistant message should be forwarded" + assert input_items[-1].get("content") == "analyze this" + + +# --------------------------------------------------------------------------- +# build_agent_tool_history / _build_history unit tests +# --------------------------------------------------------------------------- + + +def _get_summary_content(summary: list[TResponseInputItem]) -> str: + """Extract the text content from a summary message for assertions.""" + assert len(summary) == 1 + item = summary[0] + assert isinstance(item, dict) + content = item.get("content") + assert isinstance(content, str) + return content + + +def test_build_agent_tool_history_returns_summary_and_forwarded(): + """build_agent_tool_history should return a summary with CONVERSATION HISTORY markers + and forwarded items filtered appropriately.""" + from agents.handoffs.history import build_agent_tool_history + from agents.run_context import ToolExecutionContext + + exec_ctx = ToolExecutionContext( + input_history=( + cast(TResponseInputItem, {"role": "user", "content": "hello"}), + cast(TResponseInputItem, {"role": "assistant", "content": "hi there"}), + ), + pre_step_items=(), + new_step_items=(), + ) + + summary, forwarded = build_agent_tool_history(exec_ctx) + + content = _get_summary_content(summary) + assert "" in content + assert "hello" in content + assert "hi there" in content + assert len(forwarded) == 0 + + +def test_build_agent_tool_history_filters_pre_step_assistant(): + """Pre-step assistant messages should be in summary only, not forwarded.""" + from openai.types.responses import ResponseOutputMessage, ResponseOutputText + + from agents.handoffs.history import build_agent_tool_history + from agents.run_context import ToolExecutionContext + + assistant_msg = ResponseOutputMessage( + id="msg_1", + content=[ResponseOutputText(text="I looked it up.", type="output_text", annotations=[])], + role="assistant", + status="completed", + type="message", + ) + pre_item = MessageOutputItem(agent=Agent(name="test"), raw_item=assistant_msg) + + exec_ctx = ToolExecutionContext( + input_history="hello", + pre_step_items=(pre_item,), + new_step_items=(), + ) + + summary, forwarded = build_agent_tool_history(exec_ctx) + + content = _get_summary_content(summary) + assert "I looked it up" in content + assert len(forwarded) == 0 + + +def test_build_agent_tool_history_forwards_new_step_assistant(): + """Current turn assistant messages should be forwarded alongside the summary.""" + from openai.types.responses import ResponseOutputMessage, ResponseOutputText + + from agents.handoffs.history import build_agent_tool_history + from agents.run_context import ToolExecutionContext + + assistant_msg = ResponseOutputMessage( + id="msg_1", + content=[ + ResponseOutputText(text="Let me analyze that.", type="output_text", annotations=[]) + ], + role="assistant", + status="completed", + type="message", + ) + new_item = MessageOutputItem(agent=Agent(name="test"), raw_item=assistant_msg) + + exec_ctx = ToolExecutionContext( + input_history="hello", + pre_step_items=(), + new_step_items=(new_item,), + ) + + summary, forwarded = build_agent_tool_history(exec_ctx) + + content = _get_summary_content(summary) + assert "Let me analyze that" in content + assert len(forwarded) == 1 + fwd = forwarded[0] + assert isinstance(fwd, dict) + assert fwd.get("role") == "assistant" + + +def test_build_agent_tool_history_flattens_nested_history(): + """Nested blocks in input_history should be flattened.""" + from agents.handoffs.history import build_agent_tool_history + from agents.run_context import ToolExecutionContext + + nested_summary = ( + "For context, here is the conversation so far between the user and the previous agent:\n" + "\n" + "1. user: original question\n" + "2. assistant: original answer\n" + "" + ) + + exec_ctx = ToolExecutionContext( + input_history=( + cast(TResponseInputItem, {"role": "assistant", "content": nested_summary}), + cast(TResponseInputItem, {"role": "user", "content": "follow up"}), + ), + pre_step_items=(), + new_step_items=(), + ) + + summary, forwarded = build_agent_tool_history(exec_ctx) + + content = _get_summary_content(summary) + assert "original question" in content + assert "original answer" in content + assert "follow up" in content + assert content.count("") == 1 + + +def test_nest_handoff_history_uses_shared_build_history(): + """Verify nest_handoff_history still works correctly after refactoring to use _build_history.""" + from agents.handoffs import HandoffInputData + from agents.handoffs.history import nest_handoff_history + + handoff_data = HandoffInputData( + input_history=(cast(TResponseInputItem, {"role": "user", "content": "test question"}),), + pre_handoff_items=(), + new_items=(), + ) + + result = nest_handoff_history(handoff_data) + + assert isinstance(result.input_history, tuple) + assert len(result.input_history) == 1 + item = result.input_history[0] + assert isinstance(item, dict) + content = item.get("content") + assert isinstance(content, str) + assert "" in content + assert "test question" in content + + +@pytest.mark.asyncio +async def test_agent_as_tool_include_conversation_history_resume_uses_state( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """When resuming from a nested interruption, resume_state takes priority over + the history-prepended resolved_input.""" + + agent = Agent(name="outer") + tool_call = make_function_tool_call( + "outer_tool", + call_id="outer-1", + arguments='{"input": "summarize"}', + ) + tool_context = ToolContext( + context=None, + tool_name="outer_tool", + tool_call_id="outer-1", + tool_arguments=tool_call.arguments, + tool_call=tool_call, + ) + + from agents.run_context import ToolExecutionContext + + tool_context.tool_execution_context = ToolExecutionContext( + input_history=(cast(TResponseInputItem, {"role": "user", "content": "secret is 42"}),), + pre_step_items=(), + new_step_items=(), + ) + + inner_call = make_function_tool_call("inner_tool", call_id="inner-1") + approval_item = ToolApprovalItem(agent=agent, raw_item=inner_call) + + class DummyState: + def __init__(self, nested_context: ToolContext) -> None: + self._context = nested_context + + class DummyPendingResult: + def __init__(self) -> None: + self.interruptions = [approval_item] + self.final_output = None + + def to_state(self) -> DummyState: + return resume_state + + class DummyResumedResult: + def __init__(self) -> None: + self.interruptions: list[ToolApprovalItem] = [] + self.final_output = "resumed" + + nested_context = ToolContext( + context=None, + tool_name=tool_call.name, + tool_call_id=tool_call.call_id, + tool_arguments=tool_call.arguments, + tool_call=tool_call, + ) + resume_state = DummyState(nested_context) + pending_result = DummyPendingResult() + record_agent_tool_run_result(tool_call, cast(Any, pending_result)) + tool_context.reject_tool(approval_item) + + resumed_result = DummyResumedResult() + run_inputs: list[Any] = [] + + async def run_resume(cls, /, starting_agent, input, **kwargs) -> DummyResumedResult: + run_inputs.append(input) + # resume_state should be used, not the history-prepended resolved_input. + assert input is resume_state + return resumed_result + + monkeypatch.setattr(Runner, "run", classmethod(run_resume)) + + tool = agent.as_tool( + tool_name="outer_tool", + tool_description="Outer agent tool", + include_conversation_history=True, + ) + + output = await tool.on_invoke_tool(tool_context, tool_call.arguments) + + assert output == "resumed" + assert run_inputs == [resume_state] diff --git a/tests/test_run_step_execution.py b/tests/test_run_step_execution.py index d7f7ca2929..5100e0297a 100644 --- a/tests/test_run_step_execution.py +++ b/tests/test_run_step_execution.py @@ -3545,3 +3545,78 @@ async def test_execute_tools_emits_hosted_mcp_rejection_reason_from_explicit_mes assert responses[0].raw_item["approve"] is False assert responses[0].raw_item["approval_request_id"] == "mcp-approval-reject-reason" assert responses[0].raw_item["reason"] == "Denied by policy" + + +# --------------------------------------------------------------------------- +# ToolExecutionContext propagation tests +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_tool_execution_context_set_before_tool_execution(): + """execute_tools_and_side_effects should set tool_execution_context on context_wrapper + before tools execute.""" + + captured_context: list[Any] = [] + + @function_tool + async def capture_tool(context: RunContextWrapper[None]) -> str: + captured_context.append(context.tool_execution_context) + return "ok" + + agent = Agent(name="test", tools=[capture_tool]) + response = ModelResponse( + output=[get_function_tool_call("capture_tool", json.dumps({}))], + usage=Usage(), + response_id=None, + ) + + ctx = RunContextWrapper(None) + original_input = [ + cast(TResponseInputItem, {"role": "user", "content": "hello"}), + ] + + await get_execute_result(agent, response, original_input=original_input, context_wrapper=ctx) + + assert len(captured_context) == 1 + exec_ctx = captured_context[0] + assert exec_ctx is not None + assert exec_ctx.input_history == tuple(original_input) + assert isinstance(exec_ctx.new_step_items, tuple) + + +@pytest.mark.asyncio +async def test_tool_execution_context_contains_pre_step_items(): + """tool_execution_context.pre_step_items should contain items from previous turns.""" + + captured_context: list[Any] = [] + + @function_tool + async def capture_tool(context: RunContextWrapper[None]) -> str: + captured_context.append(context.tool_execution_context) + return "ok" + + agent = Agent(name="test", tools=[capture_tool]) + response = ModelResponse( + output=[get_function_tool_call("capture_tool", json.dumps({}))], + usage=Usage(), + response_id=None, + ) + + pre_item = MessageOutputItem( + agent=agent, + raw_item=ResponseOutputMessage( + id="msg_pre", + content=[], + role="assistant", + status="completed", + type="message", + ), + ) + + ctx = RunContextWrapper(None) + await get_execute_result(agent, response, generated_items=[pre_item], context_wrapper=ctx) + + assert len(captured_context) == 1 + exec_ctx = captured_context[0] + assert len(exec_ctx.pre_step_items) == 1