diff --git a/docs/sampling.md b/docs/sampling.md new file mode 100644 index 000000000..3ec408102 --- /dev/null +++ b/docs/sampling.md @@ -0,0 +1,200 @@ +# Sampling + +Sampling is a powerful MCP feature that allows servers to request LLM completions from the client. Instead of the server needing its own LLM access, it can "borrow" the client's language model to generate text, analyze content, or perform any LLM task. + +## How It Works + +In a typical MCP interaction, the client calls tools on the server. With sampling, the flow is reversed for part of the interaction: + +```text +Client Server + │ │ + │ call_tool("summarize") │ + │──────────────────────────────>│ + │ │ + │ sampling/createMessage │ + │<──────────────────────────────│ + │ │ + │ (client calls LLM) │ + │ │ + │ CreateMessageResult │ + │──────────────────────────────>│ + │ │ + │ tool result │ + │<──────────────────────────────│ +``` + +1. The client calls a tool on the server. +2. The server's tool handler sends a `sampling/createMessage` request back to the client. +3. The client's sampling callback processes the request (typically by calling an LLM). +4. The client returns the LLM response to the server. +5. The server uses the response to complete the tool execution. + +## Server Side + +On the server side, use `ctx.session.create_message()` inside a tool handler to request a completion: + +--8<-- "examples/snippets/servers/sampling.py" + +The `create_message` method accepts these parameters: + +| Parameter | Type | Description | +|-----------|------|-------------| +| `messages` | `list[SamplingMessage]` | The conversation messages to send | +| `max_tokens` | `int` | Maximum tokens in the response | +| `system_prompt` | `str \| None` | Optional system prompt | +| `temperature` | `float \| None` | Sampling temperature (0.0 = deterministic) | +| `stop_sequences` | `list[str] \| None` | Sequences that stop generation | +| `model_preferences` | `ModelPreferences \| None` | Hints about which model to use | + +## Client Side + +On the client side, provide a `sampling_callback` when creating the session. This callback handles `sampling/createMessage` requests from the server: + +```python +from mcp import ClientSession, StdioServerParameters, types +from mcp.client.context import ClientRequestContext +from mcp.client.stdio import stdio_client + + +async def handle_sampling( + context: ClientRequestContext, + params: types.CreateMessageRequestParams, +) -> types.CreateMessageResult: + # Forward the request to your LLM + # ... call OpenAI, Anthropic, Azure OpenAI, etc. + return types.CreateMessageResult( + role="assistant", + content=types.TextContent(type="text", text="LLM response here"), + model="your-model-name", + stop_reason="endTurn", + ) + + +async def main(): + server_params = StdioServerParameters(command="your-server-command") + + async with stdio_client(server_params) as (read, write): + async with ClientSession( + read, + write, + sampling_callback=handle_sampling, + ) as session: + await session.initialize() + + # Now when you call a tool that uses sampling, + # your callback will be invoked automatically + result = await session.call_tool("summarize", {"text": "..."}) +``` + +### Using the High-Level Client + +The `Client` class also supports sampling callbacks: + +```python +from mcp import Client + +async with Client(server, sampling_callback=handle_sampling) as client: + result = await client.call_tool("summarize", {"text": "..."}) +``` + +## Integrating with LLM Providers + +Here is how to connect the sampling callback to popular LLM providers: + +### OpenAI + +```python +from openai import AsyncOpenAI + +openai_client = AsyncOpenAI() + + +async def handle_sampling( + context: ClientRequestContext, + params: types.CreateMessageRequestParams, +) -> types.CreateMessageResult: + messages = [] + if params.system_prompt: + messages.append({"role": "system", "content": params.system_prompt}) + for msg in params.messages: + if isinstance(msg.content, types.TextContent): + messages.append({"role": msg.role, "content": msg.content.text}) + + response = await openai_client.chat.completions.create( + model="gpt-4o", + messages=messages, + max_tokens=params.max_tokens, + temperature=params.temperature, + ) + return types.CreateMessageResult( + role="assistant", + content=types.TextContent( + type="text", text=response.choices[0].message.content or "" + ), + model=response.model, + stop_reason="endTurn", + ) +``` + +### Anthropic + +```python +from anthropic import AsyncAnthropic + +anthropic_client = AsyncAnthropic() + + +async def handle_sampling( + context: ClientRequestContext, + params: types.CreateMessageRequestParams, +) -> types.CreateMessageResult: + messages = [ + {"role": msg.role, "content": msg.content.text} + for msg in params.messages + if isinstance(msg.content, types.TextContent) + ] + + response = await anthropic_client.messages.create( + model="claude-sonnet-4-20250514", + messages=messages, + max_tokens=params.max_tokens or 1024, + system=params.system_prompt or "", + ) + return types.CreateMessageResult( + role="assistant", + content=types.TextContent( + type="text", text=response.content[0].text + ), + model=response.model, + stop_reason="endTurn", + ) +``` + +## Complete Example + +For a complete working example with both server and client, see: + +- **Server**: [`examples/servers/simple-sampling`](https://github.com/modelcontextprotocol/python-sdk/tree/main/examples/servers/simple-sampling) +- **Client**: [`examples/clients/simple-sampling-client`](https://github.com/modelcontextprotocol/python-sdk/tree/main/examples/clients/simple-sampling-client) + +## Model Preferences + +Servers can provide hints about which model to use via `model_preferences`: + +```python +from mcp.types import ModelPreferences, ModelHint + +result = await ctx.session.create_message( + messages=[...], + max_tokens=100, + model_preferences=ModelPreferences( + hints=[ModelHint(name="claude-sonnet-4-20250514")], + cost_priority=0.5, + speed_priority=0.8, + intelligence_priority=0.7, + ), +) +``` + +The client can use these hints to select an appropriate model, but is not required to follow them. diff --git a/examples/clients/simple-sampling-client/README.md b/examples/clients/simple-sampling-client/README.md new file mode 100644 index 000000000..c7ace738f --- /dev/null +++ b/examples/clients/simple-sampling-client/README.md @@ -0,0 +1,63 @@ +# Simple Sampling Client + +A simple MCP client that demonstrates how to handle **sampling** requests from an MCP server. + +## Overview + +When an MCP server needs LLM completions during tool execution, it sends a `sampling/createMessage` request to the client. This client provides a `sampling_callback` that handles these requests. + +In a real application, the callback would forward the request to an LLM provider (OpenAI, Anthropic, Azure OpenAI, etc.). This example uses a simple demo response for illustration. + +## Usage + +First, make sure the sampling server is available (install it from `examples/servers/simple-sampling`). + +Then run the client: + +```bash +uv run mcp-simple-sampling-client +``` + +## How It Works + +1. The client connects to the `mcp-simple-sampling` server via stdio transport. +2. It provides a `sampling_callback` function that handles `sampling/createMessage` requests. +3. When it calls a tool (e.g., `summarize`), the server sends a sampling request back to the client. +4. The client's callback processes the request and returns a response. +5. The server uses that response to complete the tool execution. + +## Integrating a Real LLM + +To use a real LLM instead of the demo response, replace the body of `handle_sampling` with your LLM call: + +```python +from openai import AsyncOpenAI + +openai_client = AsyncOpenAI() + +async def handle_sampling( + context: ClientRequestContext, + params: types.CreateMessageRequestParams, +) -> types.CreateMessageResult: + messages = [] + if params.system_prompt: + messages.append({"role": "system", "content": params.system_prompt}) + for msg in params.messages: + if isinstance(msg.content, types.TextContent): + messages.append({"role": msg.role, "content": msg.content.text}) + + response = await openai_client.chat.completions.create( + model="gpt-4o", + messages=messages, + max_tokens=params.max_tokens, + temperature=params.temperature, + ) + return types.CreateMessageResult( + role="assistant", + content=types.TextContent( + type="text", text=response.choices[0].message.content + ), + model=response.model, + stop_reason="endTurn", + ) +``` diff --git a/examples/clients/simple-sampling-client/mcp_simple_sampling_client/__init__.py b/examples/clients/simple-sampling-client/mcp_simple_sampling_client/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/clients/simple-sampling-client/mcp_simple_sampling_client/__main__.py b/examples/clients/simple-sampling-client/mcp_simple_sampling_client/__main__.py new file mode 100644 index 000000000..a6d49c349 --- /dev/null +++ b/examples/clients/simple-sampling-client/mcp_simple_sampling_client/__main__.py @@ -0,0 +1,3 @@ +from mcp_simple_sampling_client.main import main + +main() diff --git a/examples/clients/simple-sampling-client/mcp_simple_sampling_client/main.py b/examples/clients/simple-sampling-client/mcp_simple_sampling_client/main.py new file mode 100644 index 000000000..bcd00ea7b --- /dev/null +++ b/examples/clients/simple-sampling-client/mcp_simple_sampling_client/main.py @@ -0,0 +1,135 @@ +"""MCP client demonstrating the sampling feature. + +This client connects to an MCP server and provides a sampling callback +so the server can request LLM completions during tool execution. +""" + +from __future__ import annotations + +import asyncio + +import click +from mcp import ClientSession, StdioServerParameters, types +from mcp.client.context import ClientRequestContext +from mcp.client.stdio import stdio_client + + +async def handle_sampling( + context: ClientRequestContext, + params: types.CreateMessageRequestParams, +) -> types.CreateMessageResult: + """Handle sampling requests from the server. + + In a real application, this would forward the messages to an LLM + (e.g., OpenAI, Anthropic, Azure OpenAI) and return the response. + This example uses a simple echo-based response for demonstration. + + Args: + context: The request context from the client session. + params: The sampling request parameters including messages, + max_tokens, temperature, etc. + + Returns: + A CreateMessageResult with the LLM response. + """ + # Extract the user's message text + user_text = "" + for msg in params.messages: + if msg.role == "user": + if isinstance(msg.content, types.TextContent): + user_text = msg.content.text + elif isinstance(msg.content, list): + for block in msg.content: + if isinstance(block, types.TextContent): + user_text += block.text + + # In a real application, you would call your LLM here: + # + # from openai import AsyncOpenAI + # client = AsyncOpenAI() + # response = await client.chat.completions.create( + # model="gpt-4o", + # messages=[{"role": m.role, "content": m.content.text} for m in params.messages], + # max_tokens=params.max_tokens, + # temperature=params.temperature, + # ) + # return types.CreateMessageResult( + # role="assistant", + # content=types.TextContent(type="text", text=response.choices[0].message.content), + # model=response.model, + # stop_reason="endTurn", + # ) + + # For this demo, we generate a simple response + if "summary" in user_text.lower() or "summarize" in user_text.lower(): + response_text = "[Demo summary] The text discusses a topic and presents key points." + elif "sentiment" in user_text.lower(): + response_text = "positive" + else: + response_text = f"[Demo response] Processed request with {len(user_text)} characters." + + print(f" [Sampling] Received request ({len(params.messages)} message(s))") + print(f" [Sampling] Responding with: {response_text[:80]}...") + + return types.CreateMessageResult( + role="assistant", + content=types.TextContent(type="text", text=response_text), + model="demo-model", + stop_reason="endTurn", + ) + + +async def run() -> None: + """Connect to the sampling server and demonstrate tool calls.""" + server_params = StdioServerParameters( + command="uv", + args=["run", "mcp-simple-sampling"], + ) + + async with stdio_client(server_params) as (read, write): + async with ClientSession( + read, + write, + sampling_callback=handle_sampling, + ) as session: + await session.initialize() + + # List available tools + tools = await session.list_tools() + tool_names = [t.name for t in tools.tools] + print(f"Available tools: {tool_names}") + + # Call the summarize tool + print("\n--- Calling summarize tool ---") + result = await session.call_tool( + "summarize", + { + "text": ( + "The Model Context Protocol (MCP) is an open protocol that " + "standardizes how applications provide context to LLMs. MCP " + "provides a standardized way to connect AI models to different " + "data sources and tools, enabling more powerful AI applications." + ) + }, + ) + if result.content and isinstance(result.content[0], types.TextContent): + print(f"Summary: {result.content[0].text}") + + # Call the analyze_sentiment tool + print("\n--- Calling analyze_sentiment tool ---") + result = await session.call_tool( + "analyze_sentiment", + {"text": "I absolutely love this new feature! It works great."}, + ) + if result.content and isinstance(result.content[0], types.TextContent): + print(f"Sentiment: {result.content[0].text}") + + +@click.command() +def main() -> int: + asyncio.run(run()) + return 0 + + +if __name__ == "__main__": + main() diff --git a/examples/clients/simple-sampling-client/pyproject.toml b/examples/clients/simple-sampling-client/pyproject.toml new file mode 100644 index 000000000..33811da93 --- /dev/null +++ b/examples/clients/simple-sampling-client/pyproject.toml @@ -0,0 +1,36 @@ +[project] +name = "mcp-simple-sampling-client" +version = "0.1.0" +description = "A simple MCP client demonstrating the sampling feature" +readme = "README.md" +requires-python = ">=3.10" +authors = [{ name = "Model Context Protocol a Series of LF Projects, LLC." }] +keywords = ["mcp", "llm", "sampling", "client"] +license = { text = "MIT" } +dependencies = ["click>=8.2.0", "mcp"] + +[project.scripts] +mcp-simple-sampling-client = "mcp_simple_sampling_client.main:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["mcp_simple_sampling_client"] + +[tool.pyright] +include = ["mcp_simple_sampling_client"] +venvPath = "." +venv = ".venv" + +[tool.ruff.lint] +select = ["E", "F", "I"] +ignore = [] + +[tool.ruff] +line-length = 120 +target-version = "py310" + +[dependency-groups] +dev = ["pyright>=1.1.378", "pytest>=8.3.3", "ruff>=0.6.9"] diff --git a/examples/servers/simple-sampling/README.md b/examples/servers/simple-sampling/README.md new file mode 100644 index 000000000..86eca2f74 --- /dev/null +++ b/examples/servers/simple-sampling/README.md @@ -0,0 +1,32 @@ +# Simple Sampling Server + +A simple MCP server that demonstrates the **sampling** feature. Sampling allows a server to request LLM completions from the connected client, effectively "borrowing" the client's language model. + +## Overview + +This server exposes two tools that use sampling: + +- **`summarize`** — Sends a text to the client's LLM and asks for a concise summary. +- **`analyze_sentiment`** — Sends a text to the client's LLM and asks for sentiment analysis (positive, negative, or neutral). + +When a client calls either tool, the server sends a `sampling/createMessage` request back to the client. The client's sampling callback processes this request (typically by forwarding it to an LLM) and returns the result. + +## Usage + +Start the server: + +```bash +uv run mcp-simple-sampling +``` + +## How Sampling Works + +1. The client calls a tool on the server (e.g., `summarize`). +2. Inside the tool handler, the server calls `ctx.session.create_message(...)` with messages and parameters. +3. The SDK sends a `sampling/createMessage` request to the client. +4. The client's `sampling_callback` processes the request and returns a `CreateMessageResult`. +5. The server receives the result and uses it to complete the tool execution. + +## Paired Client + +See [`examples/clients/simple-sampling-client`](../../clients/simple-sampling-client/) for a client that connects to this server and provides a sampling callback. diff --git a/examples/servers/simple-sampling/mcp_simple_sampling/__init__.py b/examples/servers/simple-sampling/mcp_simple_sampling/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/servers/simple-sampling/mcp_simple_sampling/__main__.py b/examples/servers/simple-sampling/mcp_simple_sampling/__main__.py new file mode 100644 index 000000000..19d7c32c8 --- /dev/null +++ b/examples/servers/simple-sampling/mcp_simple_sampling/__main__.py @@ -0,0 +1,3 @@ +from mcp_simple_sampling.server import main + +main() diff --git a/examples/servers/simple-sampling/mcp_simple_sampling/server.py b/examples/servers/simple-sampling/mcp_simple_sampling/server.py new file mode 100644 index 000000000..757641b75 --- /dev/null +++ b/examples/servers/simple-sampling/mcp_simple_sampling/server.py @@ -0,0 +1,84 @@ +"""MCP server demonstrating the sampling feature. + +This server exposes tools that use sampling to request LLM completions +from the connected client. The client must provide a sampling callback +to handle these requests. +""" + +import click +from mcp.server.mcpserver import Context, MCPServer +from mcp.server.session import ServerSession +from mcp.types import SamplingMessage, TextContent + +mcp = MCPServer(name="Sampling Example Server") + + +@mcp.tool() +async def summarize(text: str, ctx: Context[ServerSession, None]) -> str: + """Summarize a piece of text using the client's LLM. + + This tool sends a sampling request to the connected client, + asking its LLM to produce a concise summary of the given text. + + Args: + text: The text to summarize. + """ + result = await ctx.session.create_message( + messages=[ + SamplingMessage( + role="user", + content=TextContent( + type="text", + text=f"Please provide a concise summary of the following text:\n\n{text}", + ), + ) + ], + max_tokens=200, + ) + + if result.content.type == "text": + return result.content.text + return str(result.content) + + +@mcp.tool() +async def analyze_sentiment(text: str, ctx: Context[ServerSession, None]) -> str: + """Analyze the sentiment of a piece of text using the client's LLM. + + Args: + text: The text to analyze. + """ + result = await ctx.session.create_message( + messages=[ + SamplingMessage( + role="user", + content=TextContent( + type="text", + text=( + "Analyze the sentiment of the following text. " + "Respond with exactly one word: positive, negative, or neutral.\n\n" + f"{text}" + ), + ), + ) + ], + max_tokens=10, + temperature=0.0, + ) + + if result.content.type == "text": + return result.content.text + return str(result.content) + + +@click.command() +@click.option( + "--transport", + type=click.Choice(["stdio"]), + default="stdio", + help="Transport type", +) +def main(transport: str) -> int: + if transport == "stdio": + mcp.run(transport="stdio") + return 0 diff --git a/examples/servers/simple-sampling/pyproject.toml b/examples/servers/simple-sampling/pyproject.toml new file mode 100644 index 000000000..7303ea671 --- /dev/null +++ b/examples/servers/simple-sampling/pyproject.toml @@ -0,0 +1,36 @@ +[project] +name = "mcp-simple-sampling" +version = "0.1.0" +description = "A simple MCP server demonstrating the sampling feature" +readme = "README.md" +requires-python = ">=3.10" +authors = [{ name = "Model Context Protocol a Series of LF Projects, LLC." }] +keywords = ["mcp", "llm", "sampling"] +license = { text = "MIT" } +dependencies = ["click>=8.2.0", "mcp"] + +[project.scripts] +mcp-simple-sampling = "mcp_simple_sampling.server:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["mcp_simple_sampling"] + +[tool.pyright] +include = ["mcp_simple_sampling"] +venvPath = "." +venv = ".venv" + +[tool.ruff.lint] +select = ["E", "F", "I"] +ignore = [] + +[tool.ruff] +line-length = 120 +target-version = "py310" + +[dependency-groups] +dev = ["pyright>=1.1.378", "pytest>=8.3.3", "ruff>=0.6.9"] diff --git a/mkdocs.yml b/mkdocs.yml index 070c533e3..418e1e22a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -16,6 +16,7 @@ nav: - Migration Guide: migration.md - Documentation: - Concepts: concepts.md + - Sampling: sampling.md - Low-Level Server: low-level-server.md - Authorization: authorization.md - Testing: testing.md diff --git a/uv.lock b/uv.lock index d01d510f1..f0607c9ae 100644 --- a/uv.lock +++ b/uv.lock @@ -16,6 +16,8 @@ members = [ "mcp-simple-pagination", "mcp-simple-prompt", "mcp-simple-resource", + "mcp-simple-sampling", + "mcp-simple-sampling-client", "mcp-simple-streamablehttp", "mcp-simple-streamablehttp-stateless", "mcp-simple-task", @@ -1120,6 +1122,64 @@ dev = [ { name = "ruff", specifier = ">=0.6.9" }, ] +[[package]] +name = "mcp-simple-sampling" +version = "0.1.0" +source = { editable = "examples/servers/simple-sampling" } +dependencies = [ + { name = "click" }, + { name = "mcp" }, +] + +[package.dev-dependencies] +dev = [ + { name = "pyright" }, + { name = "pytest" }, + { name = "ruff" }, +] + +[package.metadata] +requires-dist = [ + { name = "click", specifier = ">=8.2.0" }, + { name = "mcp", editable = "." }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "pyright", specifier = ">=1.1.378" }, + { name = "pytest", specifier = ">=8.3.3" }, + { name = "ruff", specifier = ">=0.6.9" }, +] + +[[package]] +name = "mcp-simple-sampling-client" +version = "0.1.0" +source = { editable = "examples/clients/simple-sampling-client" } +dependencies = [ + { name = "click" }, + { name = "mcp" }, +] + +[package.dev-dependencies] +dev = [ + { name = "pyright" }, + { name = "pytest" }, + { name = "ruff" }, +] + +[package.metadata] +requires-dist = [ + { name = "click", specifier = ">=8.2.0" }, + { name = "mcp", editable = "." }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "pyright", specifier = ">=1.1.378" }, + { name = "pytest", specifier = ">=8.3.3" }, + { name = "ruff", specifier = ">=0.6.9" }, +] + [[package]] name = "mcp-simple-streamablehttp" version = "0.1.0"