Skip to content

Commit 199b86b

Browse files
agents-response
Signed-off-by: Adrian Cole <adrian@tetrate.io>
1 parent c7c2a92 commit 199b86b

3 files changed

Lines changed: 47 additions & 11 deletions

File tree

inference-platforms/agent.py

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# run like this: uv run --exact -q --env-file .env agent.py
22
# /// script
33
# dependencies = [
4-
# "openai-agents",
4+
# "openai-agents @ git+https://github.com/openai/openai-agents-python.git@refs/pull/2034/head",
55
# "httpx",
66
# "mcp",
77
# "elastic-opentelemetry",
@@ -16,23 +16,25 @@
1616
# This must precede any other imports you want to instrument!
1717
auto_instrumentation.initialize()
1818

19+
import argparse
1920
import asyncio
2021
import os
2122
from datetime import datetime, timedelta
2223

2324
from agents import (
2425
Agent,
26+
HostedMCPTool,
2527
OpenAIProvider,
2628
RunConfig,
2729
Runner,
2830
Tool,
2931
)
3032
from agents.mcp import MCPServerStreamableHttp, MCPUtil
33+
from openai.types.responses.tool_param import Mcp
3134

3235

33-
async def run_agent(tools: list[Tool]):
34-
model_name = os.getenv("AGENT_MODEL", "gpt-5-nano")
35-
model = OpenAIProvider(use_responses=False).get_model(model_name)
36+
async def run_agent(tools: list[Tool], model_name: str, use_responses: bool):
37+
model = OpenAIProvider(use_responses=use_responses).get_model(model_name)
3638
agent = Agent(
3739
name="flight-search-agent",
3840
model=model,
@@ -49,18 +51,39 @@ async def run_agent(tools: list[Tool]):
4951

5052

5153
async def main():
54+
parser = argparse.ArgumentParser(description="MCP-enabled flight search agent")
55+
parser.add_argument("--use-responses-api", action="store_true", help="Use Responses API instead of Agents")
56+
args = parser.parse_args()
57+
58+
model_name = os.getenv("AGENT_MODEL", "gpt-5-nano")
59+
mcp_url = os.getenv("MCP_URL", "https://mcp.kiwi.com")
60+
mcp_headers = dict(h.split("=", 1) for h in os.getenv("MCP_HEADERS", "").split(",") if h)
61+
62+
if args.use_responses_api:
63+
# Server-side MCP via Responses API
64+
tools = [
65+
HostedMCPTool(
66+
tool_config=Mcp(
67+
type="mcp",
68+
server_url=mcp_url,
69+
server_label="kiwi-flights",
70+
headers=mcp_headers,
71+
require_approval="never",
72+
)
73+
)
74+
]
75+
await run_agent(tools, model_name, use_responses=True)
76+
return
77+
78+
# Client-side MCP orchestration
5279
async with MCPServerStreamableHttp(
53-
{
54-
"url": os.getenv("MCP_URL", "https://mcp.kiwi.com"),
55-
"headers": dict(h.split("=", 1) for h in os.getenv("MCP_HEADERS", "").split(",") if h),
56-
"timeout": 30.0,
57-
},
80+
{"url": mcp_url, "headers": mcp_headers, "timeout": 30.0},
5881
client_session_timeout_seconds=60.0,
5982
) as server:
6083
tools = await server.list_tools()
6184
util = MCPUtil()
6285
tools = [util.to_function_tool(tool, server, False) for tool in tools]
63-
await run_agent(tools)
86+
await run_agent(tools, model_name, use_responses=False)
6487

6588

6689
if __name__ == "__main__":

inference-platforms/llama-stack/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,19 @@ Or, for the OpenAI Responses API
3535
uv run --exact -q --env-file env.local ../chat.py --use-responses-api
3636
```
3737

38+
### MCP Agent
39+
40+
```bash
41+
uv run --exact -q --env-file env.local ../agent.py --use-responses-api
42+
```
43+
3844
## Notes
3945

46+
* Llama Stack's Responses API connects to MCP servers server-side (unlike aigw
47+
which proxies MCP). The agent passes MCP configuration via `HostedMCPTool`.
48+
* Until [this PR][openai-agents-pr] merges, the agent requires the fix branch
49+
for handling providers that don't return token usage details.
50+
4051
* Uses the `starter` distribution with its built-in `remote::openai` provider,
4152
pointing to Ollama via `OPENAI_BASE_URL` environment variable.
4253
* Models require `provider_id/` prefix (e.g., `openai/qwen3:0.6b`) as of
@@ -50,3 +61,4 @@ uv run --exact -q --env-file env.local ../chat.py --use-responses-api
5061
[uv]: https://docs.astral.sh/uv/getting-started/installation/
5162
[prefix-pr]: https://github.com/meta-llama/llama-stack/pull/3822
5263
[docker]: https://github.com/llamastack/llama-stack/issues/406
64+
[openai-agents-pr]: https://github.com/openai/openai-agents-python/pull/2034

inference-platforms/llama-stack/env.local

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
# OpenAI-compatible endpoint configuration
22
OPENAI_BASE_URL=http://localhost:8321/v1
3+
OPENAI_API_KEY=unused
34
# Models require `provider_id/` prefix, in this case `openai`
45
CHAT_MODEL=openai/qwen3:0.6b
5-
OPENAI_API_KEY=unused
6+
AGENT_MODEL=openai/qwen3:1.7b
67

78
# OpenTelemetry configuration
89
OTEL_SERVICE_NAME=llama-stack

0 commit comments

Comments
 (0)