From bd9bf369bbf1959be6c33d5709e82d8ee461fd2f Mon Sep 17 00:00:00 2001 From: Giles Odigwe Date: Tue, 10 Mar 2026 15:53:22 -0700 Subject: [PATCH 1/2] Python: Sanitize MCP tool schemas before sending to LLM APIs MCP servers (e.g. matlab-mcp-core-server) can produce inputSchema dicts with standard JSON Schema fields that OpenAI-compatible API backends reject. Add sanitize_schema_for_api() utility that: - Resolves $ref pointers inline from $defs/definitions - Strips unsupported root-level keys ($schema, $id, title) - Removes $defs/definitions after resolution - Ensures type: object when properties is present - Deep-copies to avoid mutating cached schemas Apply sanitization in: - OpenAIResponsesClient._prepare_tools_for_openai() (also fixes pre-existing bug where additionalProperties=False mutated the cache) - FunctionTool.to_json_schema_spec() (covers Chat Completions + Ollama) Fixes #4540 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../packages/core/agent_framework/_tools.py | 96 +++++++++- .../openai/_responses_client.py | 3 +- python/packages/core/tests/core/test_tools.py | 170 ++++++++++++++++++ .../openai/test_openai_responses_client.py | 124 +++++++++++++ 4 files changed, 391 insertions(+), 2 deletions(-) diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py index 105738e717..733704354f 100644 --- a/python/packages/core/agent_framework/_tools.py +++ b/python/packages/core/agent_framework/_tools.py @@ -647,6 +647,9 @@ def parse_result(result: Any) -> str: def to_json_schema_spec(self) -> dict[str, Any]: """Convert a FunctionTool to the JSON Schema function specification format. + The parameter schema is sanitized to remove JSON Schema features + (e.g. ``$ref``, ``$defs``, ``$schema``) that LLM APIs may not accept. + Returns: A dictionary containing the function specification in JSON Schema format. """ @@ -655,7 +658,7 @@ def to_json_schema_spec(self) -> dict[str, Any]: "function": { "name": self.name, "description": self.description, - "parameters": self.parameters(), + "parameters": sanitize_schema_for_api(self.parameters()), }, } @@ -668,6 +671,97 @@ def to_dict(self, *, exclude: set[str] | None = None, exclude_none: bool = True) return as_dict +# Keys that are valid JSON Schema metadata but not accepted by most LLM APIs +# when used in function tool parameter schemas. +_UNSUPPORTED_SCHEMA_ROOT_KEYS: Final[frozenset[str]] = frozenset({ + "$schema", + "$id", + "title", +}) + + +def _resolve_refs(schema: dict[str, Any], defs: dict[str, Any]) -> dict[str, Any]: + """Recursively resolve ``$ref`` references by inlining definitions. + + Args: + schema: A JSON Schema node (possibly containing ``$ref``). + defs: The top-level ``$defs`` / ``definitions`` mapping to resolve against. + + Returns: + A new dict with ``$ref`` pointers replaced by their resolved definitions. + """ + if "$ref" in schema: + ref_path: str = schema["$ref"] + # Only handle local fragment references: #/$defs/Name or #/definitions/Name + for prefix in ("#/$defs/", "#/definitions/"): + if ref_path.startswith(prefix): + def_name = ref_path[len(prefix) :] + if def_name in defs: + resolved = dict(defs[def_name]) + # Merge any sibling keys (e.g. description) from the referring node + for k, v in schema.items(): + if k != "$ref" and k not in resolved: + resolved[k] = v + return _resolve_refs(resolved, defs) + # Unresolvable $ref — drop it and keep sibling keys as a best-effort fallback + return {k: v for k, v in schema.items() if k != "$ref"} + + result: dict[str, Any] = {} + for key, value in schema.items(): + if isinstance(value, dict): + result[key] = _resolve_refs(cast(dict[str, Any], value), defs) + elif isinstance(value, list): + result[key] = [ + _resolve_refs(cast(dict[str, Any], item), defs) if isinstance(item, dict) else item + for item in cast(list[Any], value) + ] + else: + result[key] = value + return result + + +def sanitize_schema_for_api(schema: dict[str, Any]) -> dict[str, Any]: + """Sanitize a JSON Schema for use as LLM function-tool parameters. + + MCP servers may return ``inputSchema`` dicts that contain standard JSON + Schema features (``$schema``, ``$defs``, ``$ref``, ``title``, etc.) which + many LLM API backends do not accept. This function produces a clean copy + suitable for the ``parameters`` field of a function-tool definition. + + The original *schema* dict is never mutated. + + Args: + schema: The raw JSON Schema dict (e.g. from ``tool.inputSchema``). + + Returns: + A sanitized deep copy with unsupported fields removed, ``$ref`` + pointers resolved inline, and ``type`` defaulting to ``"object"`` + when ``properties`` is present. + """ + if not schema: + return {"type": "object", "properties": {}} + + # Collect $defs / definitions before deep-copying the whole tree + defs: dict[str, Any] = schema.get("$defs", schema.get("definitions", {})) + + # Resolve $ref pointers inline (also deep-copies while traversing) + sanitized = _resolve_refs(schema, defs) + + # Strip unsupported root-level keys + for key in _UNSUPPORTED_SCHEMA_ROOT_KEYS: + sanitized.pop(key, None) + + # Remove $defs / definitions (no longer needed after resolution) + sanitized.pop("$defs", None) + sanitized.pop("definitions", None) + + # Ensure top-level type is "object" when properties are present + if "properties" in sanitized and "type" not in sanitized: + sanitized["type"] = "object" + + return sanitized + + ToolTypes: TypeAlias = FunctionTool | MCPTool | Mapping[str, Any] | object diff --git a/python/packages/core/agent_framework/openai/_responses_client.py b/python/packages/core/agent_framework/openai/_responses_client.py index 726616adbb..894fd8b73a 100644 --- a/python/packages/core/agent_framework/openai/_responses_client.py +++ b/python/packages/core/agent_framework/openai/_responses_client.py @@ -49,6 +49,7 @@ FunctionTool, ToolTypes, normalize_tools, + sanitize_schema_for_api, tool, ) from .._types import ( @@ -468,7 +469,7 @@ def _prepare_tools_for_openai( ) continue if isinstance(tool_item, FunctionTool): - params = tool_item.parameters() + params = sanitize_schema_for_api(tool_item.parameters()) params["additionalProperties"] = False response_tools.append( FunctionToolParam( diff --git a/python/packages/core/tests/core/test_tools.py b/python/packages/core/tests/core/test_tools.py index f7674edc9b..eaec1c2217 100644 --- a/python/packages/core/tests/core/test_tools.py +++ b/python/packages/core/tests/core/test_tools.py @@ -15,6 +15,8 @@ from agent_framework._tools import ( _parse_annotation, _parse_inputs, + _resolve_refs, + sanitize_schema_for_api, ) from agent_framework.observability import OtelAttr @@ -1001,3 +1003,171 @@ def test_parse_annotation_with_annotated_and_literal(): # endregion + +# region sanitize_schema_for_api tests + + +def test_sanitize_schema_empty_returns_default() -> None: + """An empty schema should produce a minimal valid object schema.""" + assert sanitize_schema_for_api({}) == {"type": "object", "properties": {}} + + +def test_sanitize_schema_simple_unchanged() -> None: + """A simple schema with only supported fields should pass through.""" + schema: dict[str, Any] = { + "type": "object", + "properties": {"name": {"type": "string"}}, + "required": ["name"], + } + assert sanitize_schema_for_api(schema) == schema + + +def test_sanitize_schema_does_not_mutate_original() -> None: + """The original schema dict must never be modified.""" + schema: dict[str, Any] = { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": {"x": {"type": "integer"}}, + } + original_keys = set(schema.keys()) + sanitize_schema_for_api(schema) + assert set(schema.keys()) == original_keys + + +def test_sanitize_schema_strips_unsupported_root_keys() -> None: + """$schema, $id, and title should all be stripped from the root.""" + schema: dict[str, Any] = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "urn:example", + "title": "Args", + "type": "object", + "properties": {"x": {"type": "number"}}, + } + result = sanitize_schema_for_api(schema) + assert "$schema" not in result + assert "$id" not in result + assert "title" not in result + assert result["type"] == "object" + assert result["properties"] == {"x": {"type": "number"}} + + +def test_sanitize_schema_adds_type_object_when_missing() -> None: + """type should default to 'object' when properties are present but type is missing.""" + result = sanitize_schema_for_api({"properties": {"name": {"type": "string"}}}) + assert result["type"] == "object" + + +def test_sanitize_schema_no_type_added_without_properties() -> None: + """type should not be injected when there are no properties.""" + result = sanitize_schema_for_api({"description": "A schema without properties"}) + assert "type" not in result + + +def test_sanitize_schema_resolves_simple_ref() -> None: + """A simple $ref pointing to $defs should be inlined.""" + schema: dict[str, Any] = { + "type": "object", + "properties": {"params": {"$ref": "#/$defs/CustomerIdParam"}}, + "$defs": { + "CustomerIdParam": { + "type": "object", + "properties": {"customer_id": {"type": "integer"}}, + "required": ["customer_id"], + } + }, + } + result = sanitize_schema_for_api(schema) + assert "$defs" not in result + assert result["properties"]["params"] == { + "type": "object", + "properties": {"customer_id": {"type": "integer"}}, + "required": ["customer_id"], + } + + +def test_sanitize_schema_resolves_nested_refs() -> None: + """Chained $ref references should be resolved recursively.""" + schema: dict[str, Any] = { + "type": "object", + "properties": {"order": {"$ref": "#/$defs/Order"}}, + "$defs": { + "Order": { + "type": "object", + "properties": {"customer": {"$ref": "#/$defs/Customer"}}, + }, + "Customer": { + "type": "object", + "properties": {"name": {"type": "string"}}, + }, + }, + } + result = sanitize_schema_for_api(schema) + assert "$defs" not in result + assert result["properties"]["order"]["properties"]["customer"] == { + "type": "object", + "properties": {"name": {"type": "string"}}, + } + + +def test_sanitize_schema_resolves_ref_in_array_items() -> None: + """$ref inside array items should be resolved.""" + schema: dict[str, Any] = { + "type": "object", + "properties": {"items": {"type": "array", "items": {"$ref": "#/$defs/Item"}}}, + "$defs": {"Item": {"type": "object", "properties": {"sku": {"type": "string"}}}}, + } + result = sanitize_schema_for_api(schema) + assert result["properties"]["items"]["items"] == { + "type": "object", + "properties": {"sku": {"type": "string"}}, + } + + +def test_sanitize_schema_unresolvable_ref_dropped() -> None: + """An unresolvable $ref should be dropped gracefully.""" + schema: dict[str, Any] = { + "type": "object", + "properties": {"data": {"$ref": "#/$defs/NonExistent"}}, + "$defs": {}, + } + result = sanitize_schema_for_api(schema) + assert "$ref" not in result["properties"]["data"] + + +def test_sanitize_schema_go_jsonschema_output() -> None: + """Schema generated by google/jsonschema-go (as used by matlab-mcp-core-server).""" + schema: dict[str, Any] = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "urn:matlab:evaluate_matlab_code", + "title": "Args", + "type": "object", + "properties": { + "code": {"type": "string", "description": "The MATLAB code to evaluate."}, + }, + "required": ["code"], + "additionalProperties": False, + } + result = sanitize_schema_for_api(schema) + assert "$schema" not in result + assert "$id" not in result + assert "title" not in result + assert result == { + "type": "object", + "properties": {"code": {"type": "string", "description": "The MATLAB code to evaluate."}}, + "required": ["code"], + "additionalProperties": False, + } + + +def test_resolve_refs_deep_copies() -> None: + """_resolve_refs should return a deep copy, not a reference to the input.""" + schema: dict[str, Any] = { + "type": "object", + "properties": {"nested": {"type": "object", "properties": {"deep": {"type": "boolean"}}}}, + } + result = _resolve_refs(schema, {}) + result["properties"]["nested"]["type"] = "array" + assert schema["properties"]["nested"]["type"] == "object" + + +# endregion diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py index e049dbd16e..3f1a999aa4 100644 --- a/python/packages/core/tests/openai/test_openai_responses_client.py +++ b/python/packages/core/tests/openai/test_openai_responses_client.py @@ -3310,4 +3310,128 @@ async def test_prepare_options_excludes_continuation_token() -> None: assert run_options["background"] is True +def test_prepare_tools_for_openai_sanitizes_mcp_schema() -> None: + """Test that MCP tool schemas with unsupported JSON Schema fields are sanitized.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + # Simulate a schema produced by google/jsonschema-go (used by matlab-mcp-core-server) + mcp_schema: dict[str, Any] = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "urn:matlab:evaluate_matlab_code", + "title": "Args", + "type": "object", + "properties": { + "project_path": { + "type": "string", + "description": "Absolute path to the project folder.", + }, + "code": { + "type": "string", + "description": "The MATLAB code to evaluate.", + }, + }, + "required": ["code"], + "additionalProperties": False, + } + + func_tool = FunctionTool( + func=lambda **kwargs: "ok", + name="evaluate_matlab_code", + description="Evaluate MATLAB code", + input_model=mcp_schema, + ) + + resp_tools = client._prepare_tools_for_openai([func_tool]) + assert len(resp_tools) == 1 + + tool_def = resp_tools[0] + assert tool_def["type"] == "function" + assert tool_def["name"] == "evaluate_matlab_code" + + params = tool_def["parameters"] + # These fields should be stripped — they cause 400 errors on LM Studio / Aliyun + assert "$schema" not in params + assert "$id" not in params + assert "title" not in params + # Valid fields should be preserved + assert params["type"] == "object" + assert "code" in params["properties"] + assert "project_path" in params["properties"] + assert params["required"] == ["code"] + assert params["additionalProperties"] is False + + +def test_prepare_tools_for_openai_sanitizes_refs_in_mcp_schema() -> None: + """Test that $ref and $defs in MCP tool schemas are resolved and removed.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + mcp_schema: dict[str, Any] = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "ToolInput", + "type": "object", + "properties": { + "settings": {"$ref": "#/$defs/Settings"}, + }, + "$defs": { + "Settings": { + "type": "object", + "properties": { + "mode": {"type": "string", "enum": ["fast", "slow"]}, + "retries": {"type": "integer"}, + }, + } + }, + } + + func_tool = FunctionTool( + func=lambda **kwargs: "ok", + name="configure_tool", + description="Configure settings", + input_model=mcp_schema, + ) + + resp_tools = client._prepare_tools_for_openai([func_tool]) + params = resp_tools[0]["parameters"] + + assert "$schema" not in params + assert "title" not in params + assert "$defs" not in params + assert "$ref" not in params["properties"]["settings"] + # $ref should be resolved inline + assert params["properties"]["settings"] == { + "type": "object", + "properties": { + "mode": {"type": "string", "enum": ["fast", "slow"]}, + "retries": {"type": "integer"}, + }, + } + + +def test_prepare_tools_for_openai_does_not_mutate_cached_schema() -> None: + """Test that _prepare_tools_for_openai does not mutate the FunctionTool's cached schema.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + original_schema: dict[str, Any] = { + "type": "object", + "properties": {"x": {"type": "integer"}}, + } + + func_tool = FunctionTool( + func=lambda **kwargs: "ok", + name="my_tool", + description="A tool", + input_model=original_schema, + ) + + # Prime the cache + _ = func_tool.parameters() + + # Prepare tools (this used to mutate the cache) + client._prepare_tools_for_openai([func_tool]) + + # The cached schema should NOT have been modified + cached = func_tool.parameters() + assert "additionalProperties" not in cached + + # endregion From ea3e34cd8d3a3381dc3d96893f87d13d0fe13436 Mon Sep 17 00:00:00 2001 From: Giles Odigwe Date: Tue, 10 Mar 2026 16:02:30 -0700 Subject: [PATCH 2/2] Address PR feedback: combine both defs/definitions, fix mypy cast - Merge both $defs and definitions so refs using either prefix resolve - Remove redundant cast(list[Any], ...) flagged by mypy - Add test for schema with both $defs and definitions Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../packages/core/agent_framework/_tools.py | 15 ++++++++++--- python/packages/core/tests/core/test_tools.py | 22 +++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py index 733704354f..2d241b092a 100644 --- a/python/packages/core/agent_framework/_tools.py +++ b/python/packages/core/agent_framework/_tools.py @@ -713,7 +713,7 @@ def _resolve_refs(schema: dict[str, Any], defs: dict[str, Any]) -> dict[str, Any elif isinstance(value, list): result[key] = [ _resolve_refs(cast(dict[str, Any], item), defs) if isinstance(item, dict) else item - for item in cast(list[Any], value) + for item in value # type: ignore[union-attr] ] else: result[key] = value @@ -741,8 +741,17 @@ def sanitize_schema_for_api(schema: dict[str, Any]) -> dict[str, Any]: if not schema: return {"type": "object", "properties": {}} - # Collect $defs / definitions before deep-copying the whole tree - defs: dict[str, Any] = schema.get("$defs", schema.get("definitions", {})) + # Collect $defs / definitions before traversing the tree. + # Combine both if present so that refs using either prefix can be resolved. + defs: dict[str, Any] = {} + raw_defs = schema.get("$defs") + if isinstance(raw_defs, Mapping): + defs.update(raw_defs) # type: ignore[reportUnknownArgumentType] + raw_definitions = schema.get("definitions") + if isinstance(raw_definitions, Mapping): + for def_name, def_value in raw_definitions.items(): # type: ignore[reportUnknownVariableType] + if def_name not in defs: + defs[def_name] = def_value # Resolve $ref pointers inline (also deep-copies while traversing) sanitized = _resolve_refs(schema, defs) diff --git a/python/packages/core/tests/core/test_tools.py b/python/packages/core/tests/core/test_tools.py index eaec1c2217..042117ef70 100644 --- a/python/packages/core/tests/core/test_tools.py +++ b/python/packages/core/tests/core/test_tools.py @@ -1170,4 +1170,26 @@ def test_resolve_refs_deep_copies() -> None: assert schema["properties"]["nested"]["type"] == "object" +def test_sanitize_schema_both_defs_and_definitions() -> None: + """Schemas with both $defs and definitions should resolve refs from either.""" + schema: dict[str, Any] = { + "type": "object", + "properties": { + "a": {"$ref": "#/$defs/TypeA"}, + "b": {"$ref": "#/definitions/TypeB"}, + }, + "$defs": { + "TypeA": {"type": "string"}, + }, + "definitions": { + "TypeB": {"type": "integer"}, + }, + } + result = sanitize_schema_for_api(schema) + assert "$defs" not in result + assert "definitions" not in result + assert result["properties"]["a"] == {"type": "string"} + assert result["properties"]["b"] == {"type": "integer"} + + # endregion