Fix CI failures caused by GPT-5/Responses API migration

Qard · claude · Qard · commit 63c86f10bd24 · 2026-03-06T08:01:59.000+08:00
- Add msw mocking to partial.test.ts so ClosedQA doesn't make real API calls
- Update Python thread injection tests to mock /responses instead of /chat/completions
- Remove span_info from Responses API params in both JS and Python (re-apply dropped fix)

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/js/oai.ts b/js/oai.ts
@@ -364,10 +364,6 @@ export async function cachedChatCompletion(
     if (fullParams.reasoning_effort) {
       responsesParams.reasoning_effort = fullParams.reasoning_effort;
     }
-    if (fullParams.span_info) {
-      responsesParams.span_info = fullParams.span_info;
-    }
-
     const response: any = await openai.responses.create(responsesParams);
 
     // Convert Responses API response to Chat Completions format for compatibility
diff --git a/js/partial.test.ts b/js/partial.test.ts
@@ -1,7 +1,56 @@
-import { expect, test } from "vitest";
+import { http, HttpResponse } from "msw";
+import { setupServer } from "msw/node";
+import { OpenAI } from "openai";
+import { afterAll, afterEach, beforeAll, expect, test } from "vitest";
 import { ClosedQA } from "./llm";
+import { init } from "./oai";
 import { Levenshtein } from "./string";
 
+const server = setupServer();
+
+beforeAll(() => {
+  server.listen({
+    onUnhandledRequest: (req) => {
+      throw new Error(`Unhandled request ${req.method}, ${req.url}`);
+    },
+  });
+
+  server.use(
+    http.post("https://api.openai.com/v1/responses", async () => {
+      return HttpResponse.json({
+        id: "resp-test",
+        object: "response",
+        created: Math.floor(Date.now() / 1000),
+        model: "gpt-5-mini",
+        output: [
+          {
+            type: "function_call",
+            call_id: "call_test",
+            name: "select_choice",
+            arguments: JSON.stringify({ choice: "Y" }),
+          },
+        ],
+      });
+    }),
+  );
+
+  init({
+    client: new OpenAI({
+      apiKey: "test-api-key",
+      baseURL: "https://api.openai.com/v1",
+    }),
+  });
+});
+
+afterEach(() => {
+  server.resetHandlers();
+});
+
+afterAll(() => {
+  server.close();
+  init();
+});
+
 test("Partial Test", async () => {
   const levenshteinBasic = await Levenshtein({
     output: "abc",
diff --git a/py/autoevals/oai.py b/py/autoevals/oai.py
@@ -310,7 +310,7 @@ def prepare_responses_params(kwargs: dict[str, Any]) -> dict[str, Any]:
                         responses_params["tool_choice"] = "required"
 
                 # Copy supported parameters
-                for key in ["temperature", "reasoning_effort", "span_info"]:
+                for key in ["temperature", "reasoning_effort"]:
                     if key in kwargs:
                         responses_params[key] = kwargs[key]
 
diff --git a/py/autoevals/test_llm.py b/py/autoevals/test_llm.py
@@ -515,32 +515,22 @@ def capture_request(request):
         return Response(
             200,
             json={
-                "id": "chatcmpl-test",
-                "object": "chat.completion",
+                "id": "resp-test",
+                "object": "response",
                 "created": 1234567890,
-                "model": "gpt-4o",
-                "choices": [
+                "model": "gpt-5-mini",
+                "output": [
                     {
-                        "index": 0,
-                        "message": {
-                            "role": "assistant",
-                            "content": None,
-                            "tool_calls": [
-                                {
-                                    "id": "call_test",
-                                    "type": "function",
-                                    "function": {"name": "select_choice", "arguments": '{"choice": "1"}'},
-                                }
-                            ],
-                        },
-                        "finish_reason": "tool_calls",
+                        "type": "function_call",
+                        "call_id": "call_test",
+                        "name": "select_choice",
+                        "arguments": '{"choice": "1"}',
                     }
                 ],
-                "usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
             },
         )
 
-    respx.post("https://api.openai.com/v1/chat/completions").mock(side_effect=capture_request)
+    respx.post("https://api.openai.com/v1/responses").mock(side_effect=capture_request)
     client = OpenAI(api_key="test-api-key", base_url="https://api.openai.com/v1")
     init(client)
 
@@ -551,7 +541,7 @@ def capture_request(request):
     )
     classifier.eval(output="irrelevant", expected="irrelevant", trace=trace)
 
-    content = captured_request_body["messages"][0]["content"]
+    content = captured_request_body["input"][0]["content"]
     assert trace.calls == 1
     assert "Thread:" in content
     assert "User:" in content
@@ -573,32 +563,22 @@ async def get_thread(self):
 
     trace = TraceStub()
 
-    respx.post("https://api.openai.com/v1/chat/completions").mock(
+    respx.post("https://api.openai.com/v1/responses").mock(
         return_value=Response(
             200,
             json={
-                "id": "chatcmpl-test",
-                "object": "chat.completion",
+                "id": "resp-test",
+                "object": "response",
                 "created": 1234567890,
-                "model": "gpt-4o",
-                "choices": [
+                "model": "gpt-5-mini",
+                "output": [
                     {
-                        "index": 0,
-                        "message": {
-                            "role": "assistant",
-                            "content": None,
-                            "tool_calls": [
-                                {
-                                    "id": "call_test",
-                                    "type": "function",
-                                    "function": {"name": "select_choice", "arguments": '{"choice": "1"}'},
-                                }
-                            ],
-                        },
-                        "finish_reason": "tool_calls",
+                        "type": "function_call",
+                        "call_id": "call_test",
+                        "name": "select_choice",
+                        "arguments": '{"choice": "1"}',
                     }
                 ],
-                "usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
             },
         )
     )