From 311d936c235404e002593574ae962b517933c0fa Mon Sep 17 00:00:00 2001
From: xodn348 <xodn348@tamu.edu>
Date: Wed, 13 May 2026 07:30:23 +0000
Subject: [PATCH] fix(parsing): use unparameterised types in parse_response to
 prevent Pydantic schema leaks

When parse_response constructs ParsedResponseOutputText[TextFormatT],
ParsedResponseOutputMessage[TextFormatT], and ParsedResponse[TextFormatT]
with an unresolved free TypeVar, Pydantic v2 calls model_rebuild on
every invocation and never caches the result because the TypeVar cannot
be resolved.  Each call therefore allocates fresh SchemaValidator and
SchemaSerializer objects (heavy Rust structs) that accumulate without
bound in long-running servers.

Use the unparameterised base classes instead.  All three guard their
Generic-annotated fields behind `if TYPE_CHECKING:` so the type
argument has no runtime effect on ParsedResponseOutputMessage and
ParsedResponse; ParsedResponseOutputText stores the actual parsed value
via the dict passed to construct_type_unchecked, so the schema type of
the `parsed` field (Optional[Any] vs Optional[TextFormatT]) does not
matter at runtime.  Cast the results to preserve static type information.

Adds two regression tests:
- correctness: parsed attribute contains the expected Pydantic model
- no-leak: SchemaValidator count does not grow after the first call

Fixes #3084
---
 src/openai/lib/_parsing/_responses.py | 46 ++++++++-----
 tests/lib/responses/test_responses.py | 94 +++++++++++++++++++++++++++
 2 files changed, 122 insertions(+), 18 deletions(-)

diff --git a/src/openai/lib/_parsing/_responses.py b/src/openai/lib/_parsing/_responses.py
index 8853a0749f..510ba6a7c3 100644
--- a/src/openai/lib/_parsing/_responses.py
+++ b/src/openai/lib/_parsing/_responses.py
@@ -67,22 +67,29 @@ def parse_response(
                     continue
 
                 content_list.append(
-                    construct_type_unchecked(
-                        type_=ParsedResponseOutputText[TextFormatT],
-                        value={
-                            **item.to_dict(),
-                            "parsed": parse_text(item.text, text_format=text_format),
-                        },
+                    cast(
+                        "ParsedResponseOutputText[TextFormatT]",
+                        construct_type_unchecked(
+                            # Unparameterised: lets Pydantic cache the schema; free TypeVar causes unbounded rebuilds.
+                            type_=ParsedResponseOutputText,
+                            value={
+                                **item.to_dict(),
+                                "parsed": parse_text(item.text, text_format=text_format),
+                            },
+                        ),
                     )
                 )
 
             output_list.append(
-                construct_type_unchecked(
-                    type_=ParsedResponseOutputMessage[TextFormatT],
-                    value={
-                        **output.to_dict(),
-                        "content": content_list,
-                    },
+                cast(
+                    "ParsedResponseOutputMessage[TextFormatT]",
+                    construct_type_unchecked(
+                        type_=ParsedResponseOutputMessage,
+                        value={
+                            **output.to_dict(),
+                            "content": content_list,
+                        },
+                    ),
                 )
             )
         elif output.type == "function_call":
@@ -129,12 +136,15 @@ def parse_response(
         else:
             output_list.append(output)
 
-    return construct_type_unchecked(
-        type_=ParsedResponse[TextFormatT],
-        value={
-            **response.to_dict(),
-            "output": output_list,
-        },
+    return cast(
+        "ParsedResponse[TextFormatT]",
+        construct_type_unchecked(
+            type_=ParsedResponse,
+            value={
+                **response.to_dict(),
+                "output": output_list,
+            },
+        ),
     )
 
 
diff --git a/tests/lib/responses/test_responses.py b/tests/lib/responses/test_responses.py
index 8e5f16df95..194d69b82d 100644
--- a/tests/lib/responses/test_responses.py
+++ b/tests/lib/responses/test_responses.py
@@ -1,13 +1,19 @@
 from __future__ import annotations
 
+import gc
+
 from typing_extensions import TypeVar
 
 import pytest
+import pydantic
 from respx import MockRouter
 from inline_snapshot import snapshot
 
 from openai import OpenAI, AsyncOpenAI
 from openai._utils import assert_signatures_in_sync
+from openai._models import construct_type_unchecked
+from openai.types.responses import Response
+from openai.lib._parsing._responses import parse_response
 
 from ...conftest import base_url
 from ..snapshots import make_snapshot_request
@@ -61,3 +67,91 @@ def test_parse_method_definition_in_sync(sync: bool, client: OpenAI, async_clien
         checking_client.responses.parse,
         exclude_params={"tools"},
     )
+
+
+_MINIMAL_RESPONSE_DICT = {
+    "id": "resp_test",
+    "object": "response",
+    "created_at": 1700000000,
+    "status": "completed",
+    "model": "gpt-4o-mini",
+    "output": [
+        {
+            "id": "msg_test",
+            "type": "message",
+            "status": "completed",
+            "role": "assistant",
+            "content": [
+                {
+                    "type": "output_text",
+                    "text": '{"name": "Birthday Party", "date": "2026-06-01"}',
+                    "annotations": [],
+                    "logprobs": [],
+                }
+            ],
+        }
+    ],
+    "parallel_tool_calls": True,
+    "reasoning": {"effort": None, "summary": None},
+    "text": {"format": {"type": "text"}, "verbosity": "medium"},
+    "tool_choice": "auto",
+    "tools": [],
+    "truncation": "disabled",
+    "usage": {
+        "input_tokens": 10,
+        "input_tokens_details": {"cached_tokens": 0},
+        "output_tokens": 10,
+        "output_tokens_details": {"reasoning_tokens": 0},
+        "total_tokens": 20,
+    },
+}
+
+
+class _CalendarEvent(pydantic.BaseModel):
+    name: str
+    date: str
+
+
+def test_parse_response_structured_output_correctness() -> None:
+    """parse_response returns correctly-typed and correctly-valued output."""
+    response = construct_type_unchecked(type_=Response, value=_MINIMAL_RESPONSE_DICT)
+
+    parsed = parse_response(text_format=_CalendarEvent, input_tools=None, response=response)
+
+    assert len(parsed.output) == 1
+    msg = parsed.output[0]
+    assert msg.type == "message"
+    content = msg.content[0]
+    assert content.type == "output_text"
+    assert isinstance(content.parsed, _CalendarEvent)
+    assert content.parsed.name == "Birthday Party"
+    assert content.parsed.date == "2026-06-01"
+
+
+def test_parse_response_no_pydantic_schema_leak() -> None:
+    """parse_response must not allocate new SchemaValidator objects on every call.
+
+    Using ParsedResponse[free_TypeVar] prevents Pydantic from caching the schema,
+    causing a new SchemaValidator/SchemaSerializer per call (issue #3084).
+    """
+    response = construct_type_unchecked(type_=Response, value=_MINIMAL_RESPONSE_DICT)
+
+    # One warm-up call triggers the initial (and only legitimate) schema build.
+    parse_response(text_format=_CalendarEvent, input_tools=None, response=response)
+
+    def _count_validators() -> int:
+        return sum(1 for obj in gc.get_objects() if type(obj).__name__ == "SchemaValidator")
+
+    gc.collect()
+    before = _count_validators()
+
+    for _ in range(50):
+        parse_response(text_format=_CalendarEvent, input_tools=None, response=response)
+
+    gc.collect()
+    after = _count_validators()
+
+    assert after == before, (
+        f"parse_response leaked {after - before} SchemaValidator object(s) over 50 calls. "
+        "The Pydantic schema for ParsedResponse and friends must be built once and cached."
+    )