Skip to content
2 changes: 2 additions & 0 deletions src/openai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
APITimeoutError,
BadRequestError,
APIConnectionError,
ContentFormatError,
AuthenticationError,
InternalServerError,
PermissionDeniedError,
Expand Down Expand Up @@ -65,6 +66,7 @@
"InternalServerError",
"LengthFinishReasonError",
"ContentFilterFinishReasonError",
"ContentFormatError",
"InvalidWebhookSignatureError",
"Timeout",
"RequestOptions",
Expand Down
54 changes: 54 additions & 0 deletions src/openai/_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import json
from typing import TYPE_CHECKING, Any, Optional, cast
from typing_extensions import Literal

Expand All @@ -24,6 +25,7 @@
"InternalServerError",
"LengthFinishReasonError",
"ContentFilterFinishReasonError",
"ContentFormatError",
"InvalidWebhookSignatureError",
]

Expand Down Expand Up @@ -157,5 +159,57 @@ def __init__(self) -> None:
)


class ContentFormatError(OpenAIError):
"""Raised when the API returns content that cannot be parsed into the expected response format.

This typically happens when the model returns malformed or truncated JSON that
does not match the expected schema for the response type (for example, a Pydantic
model or dataclass validated via `pydantic.TypeAdapter`).
"""

raw_content: str
"""The raw content string returned by the API that failed to parse."""

def __init__(self, *, raw_content: str, error: Exception, response_format: object | None = None) -> None:
expected_response_format = _response_format_name(response_format)
expected_details = (
f" Expected response format: {expected_response_format}." if expected_response_format is not None else ""
)
truncated_content = raw_content[:500] + "..." if len(raw_content) > 500 else raw_content
super().__init__(
f"Could not parse response content as the response did not match the expected format."
f"{expected_details} Raw content: {truncated_content!r}."
f" Validation error: {_format_parse_error(error)}."
)
self.raw_content = raw_content
self.expected_response_format = expected_response_format
self.error = error


def _response_format_name(response_format: object | None) -> str | None:
if response_format is None:
return None
return cast(
str,
getattr(response_format, "__name__", None)
or getattr(response_format, "__qualname__", None)
or repr(response_format),
)


def _format_parse_error(error: Exception) -> str:
if isinstance(error, json.JSONDecodeError):
return f"{error.msg} (line {error.lineno}, column {error.colno})"

errors_fn = getattr(error, "errors", None)
if callable(errors_fn):
try:
return repr(errors_fn(include_input=False))
except TypeError:
return repr(errors_fn())

return str(error)


class InvalidWebhookSignatureError(ValueError):
"""Raised when a webhook signature is invalid, meaning the computed signature does not match the expected signature."""
17 changes: 10 additions & 7 deletions src/openai/lib/_parsing/_completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
ChatCompletionFunctionToolParam,
completion_create_params,
)
from ..._exceptions import LengthFinishReasonError, ContentFilterFinishReasonError
from ..._exceptions import ContentFormatError, LengthFinishReasonError, ContentFilterFinishReasonError
from ...types.shared_params import FunctionDefinition
from ...types.chat.completion_create_params import ResponseFormat as ResponseFormatParam
from ...types.chat.chat_completion_message_function_tool_call import Function
Expand Down Expand Up @@ -241,14 +241,17 @@ def is_parseable_tool(input_tool: ChatCompletionToolUnionParam) -> bool:


def _parse_content(response_format: type[ResponseFormatT], content: str) -> ResponseFormatT:
if is_basemodel_type(response_format):
return cast(ResponseFormatT, model_parse_json(response_format, content))
try:
if is_basemodel_type(response_format):
return cast(ResponseFormatT, model_parse_json(response_format, content))

if is_dataclass_like_type(response_format):
if PYDANTIC_V1:
raise TypeError(f"Non BaseModel types are only supported with Pydantic v2 - {response_format}")
if is_dataclass_like_type(response_format):
if PYDANTIC_V1:
raise TypeError(f"Non BaseModel types are only supported with Pydantic v2 - {response_format}")

return pydantic.TypeAdapter(response_format).validate_json(content)
return pydantic.TypeAdapter(response_format).validate_json(content)
except (pydantic.ValidationError, json.JSONDecodeError) as exc:
raise ContentFormatError(raw_content=content, error=exc, response_format=response_format) from exc

raise TypeError(f"Unable to automatically parse response format type {response_format}")

Expand Down
76 changes: 76 additions & 0 deletions tests/lib/chat/test_completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,82 @@ class Location(BaseModel):
)


@pytest.mark.respx(base_url=base_url)
def test_parse_malformed_json_content(client: OpenAI, respx_mock: MockRouter) -> None:
class Location(BaseModel):
city: str
temperature: float
units: Literal["c", "f"]

with pytest.raises(openai.ContentFormatError) as exc_info:
make_snapshot_request(
lambda c: c.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
"role": "user",
"content": "What's the weather like in SF?",
},
],
response_format=Location,
),
content_snapshot=snapshot(
'{"id": "chatcmpl-truncated", "object": "chat.completion", "created": 1727346163, "model": "gpt-4o-2024-08-06", "choices": [{"index": 0, "message": {"role": "assistant", "content": "{\\"city\\": \\"San", "refusal": null}, "logprobs": null, "finish_reason": "stop"}], "usage": {"prompt_tokens": 79, "completion_tokens": 5, "total_tokens": 84}, "system_fingerprint": "fp_test"}'
),
path="/chat/completions",
mock_client=client,
respx_mock=respx_mock,
)
assert '{"city": "San' in exc_info.value.raw_content
assert exc_info.value.expected_response_format == "Location"
assert "Raw content:" in str(exc_info.value)


@pytest.mark.respx(base_url=base_url)
def test_parse_invalid_json_schema(client: OpenAI, respx_mock: MockRouter) -> None:
class Location(BaseModel):
city: str
temperature: float
units: Literal["c", "f"]

with pytest.raises(openai.ContentFormatError) as exc_info:
make_snapshot_request(
lambda c: c.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
"role": "user",
"content": "What's the weather like in SF?",
},
],
response_format=Location,
),
content_snapshot=snapshot(
'{"id": "chatcmpl-badschema", "object": "chat.completion", "created": 1727346163, "model": "gpt-4o-2024-08-06", "choices": [{"index": 0, "message": {"role": "assistant", "content": "{\\"city\\": \\"San Francisco\\"}", "refusal": null}, "logprobs": null, "finish_reason": "stop"}], "usage": {"prompt_tokens": 79, "completion_tokens": 10, "total_tokens": 89}, "system_fingerprint": "fp_test"}'
),
path="/chat/completions",
mock_client=client,
respx_mock=respx_mock,
)
assert exc_info.value.raw_content == '{"city": "San Francisco"}'
assert exc_info.value.expected_response_format == "Location"
assert "Raw content:" in str(exc_info.value)


def test_content_format_error_truncates_raw_content() -> None:
"""Verify raw_content is truncated in the exception message for very large payloads."""
long_content = "x" * 1000
err = openai.ContentFormatError(raw_content=long_content, error=ValueError("bad"), response_format=None)
msg = str(err)
# Full raw_content is preserved on the attribute
assert len(err.raw_content) == 1000
# Message should contain truncated version (500 chars + "...")
assert "xxx..." in msg
assert len(long_content) > 500 # sanity
# Should not contain the full 1000-char string in the message
assert long_content not in msg


@pytest.mark.respx(base_url=base_url)
def test_parse_pydantic_model_refusal(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
class Location(BaseModel):
Expand Down