Skip to content

Commit 63c86f1

Browse files
Qardclaude
andcommitted
Fix CI failures caused by GPT-5/Responses API migration
- Add msw mocking to partial.test.ts so ClosedQA doesn't make real API calls - Update Python thread injection tests to mock /responses instead of /chat/completions - Remove span_info from Responses API params in both JS and Python (re-apply dropped fix) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent e6c3470 commit 63c86f1

4 files changed

Lines changed: 70 additions & 45 deletions

File tree

js/oai.ts

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -364,10 +364,6 @@ export async function cachedChatCompletion(
364364
if (fullParams.reasoning_effort) {
365365
responsesParams.reasoning_effort = fullParams.reasoning_effort;
366366
}
367-
if (fullParams.span_info) {
368-
responsesParams.span_info = fullParams.span_info;
369-
}
370-
371367
const response: any = await openai.responses.create(responsesParams);
372368

373369
// Convert Responses API response to Chat Completions format for compatibility

js/partial.test.ts

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,56 @@
1-
import { expect, test } from "vitest";
1+
import { http, HttpResponse } from "msw";
2+
import { setupServer } from "msw/node";
3+
import { OpenAI } from "openai";
4+
import { afterAll, afterEach, beforeAll, expect, test } from "vitest";
25
import { ClosedQA } from "./llm";
6+
import { init } from "./oai";
37
import { Levenshtein } from "./string";
48

9+
const server = setupServer();
10+
11+
beforeAll(() => {
12+
server.listen({
13+
onUnhandledRequest: (req) => {
14+
throw new Error(`Unhandled request ${req.method}, ${req.url}`);
15+
},
16+
});
17+
18+
server.use(
19+
http.post("https://api.openai.com/v1/responses", async () => {
20+
return HttpResponse.json({
21+
id: "resp-test",
22+
object: "response",
23+
created: Math.floor(Date.now() / 1000),
24+
model: "gpt-5-mini",
25+
output: [
26+
{
27+
type: "function_call",
28+
call_id: "call_test",
29+
name: "select_choice",
30+
arguments: JSON.stringify({ choice: "Y" }),
31+
},
32+
],
33+
});
34+
}),
35+
);
36+
37+
init({
38+
client: new OpenAI({
39+
apiKey: "test-api-key",
40+
baseURL: "https://api.openai.com/v1",
41+
}),
42+
});
43+
});
44+
45+
afterEach(() => {
46+
server.resetHandlers();
47+
});
48+
49+
afterAll(() => {
50+
server.close();
51+
init();
52+
});
53+
554
test("Partial Test", async () => {
655
const levenshteinBasic = await Levenshtein({
756
output: "abc",

py/autoevals/oai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ def prepare_responses_params(kwargs: dict[str, Any]) -> dict[str, Any]:
310310
responses_params["tool_choice"] = "required"
311311

312312
# Copy supported parameters
313-
for key in ["temperature", "reasoning_effort", "span_info"]:
313+
for key in ["temperature", "reasoning_effort"]:
314314
if key in kwargs:
315315
responses_params[key] = kwargs[key]
316316

py/autoevals/test_llm.py

Lines changed: 19 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -515,32 +515,22 @@ def capture_request(request):
515515
return Response(
516516
200,
517517
json={
518-
"id": "chatcmpl-test",
519-
"object": "chat.completion",
518+
"id": "resp-test",
519+
"object": "response",
520520
"created": 1234567890,
521-
"model": "gpt-4o",
522-
"choices": [
521+
"model": "gpt-5-mini",
522+
"output": [
523523
{
524-
"index": 0,
525-
"message": {
526-
"role": "assistant",
527-
"content": None,
528-
"tool_calls": [
529-
{
530-
"id": "call_test",
531-
"type": "function",
532-
"function": {"name": "select_choice", "arguments": '{"choice": "1"}'},
533-
}
534-
],
535-
},
536-
"finish_reason": "tool_calls",
524+
"type": "function_call",
525+
"call_id": "call_test",
526+
"name": "select_choice",
527+
"arguments": '{"choice": "1"}',
537528
}
538529
],
539-
"usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
540530
},
541531
)
542532

543-
respx.post("https://api.openai.com/v1/chat/completions").mock(side_effect=capture_request)
533+
respx.post("https://api.openai.com/v1/responses").mock(side_effect=capture_request)
544534
client = OpenAI(api_key="test-api-key", base_url="https://api.openai.com/v1")
545535
init(client)
546536

@@ -551,7 +541,7 @@ def capture_request(request):
551541
)
552542
classifier.eval(output="irrelevant", expected="irrelevant", trace=trace)
553543

554-
content = captured_request_body["messages"][0]["content"]
544+
content = captured_request_body["input"][0]["content"]
555545
assert trace.calls == 1
556546
assert "Thread:" in content
557547
assert "User:" in content
@@ -573,32 +563,22 @@ async def get_thread(self):
573563

574564
trace = TraceStub()
575565

576-
respx.post("https://api.openai.com/v1/chat/completions").mock(
566+
respx.post("https://api.openai.com/v1/responses").mock(
577567
return_value=Response(
578568
200,
579569
json={
580-
"id": "chatcmpl-test",
581-
"object": "chat.completion",
570+
"id": "resp-test",
571+
"object": "response",
582572
"created": 1234567890,
583-
"model": "gpt-4o",
584-
"choices": [
573+
"model": "gpt-5-mini",
574+
"output": [
585575
{
586-
"index": 0,
587-
"message": {
588-
"role": "assistant",
589-
"content": None,
590-
"tool_calls": [
591-
{
592-
"id": "call_test",
593-
"type": "function",
594-
"function": {"name": "select_choice", "arguments": '{"choice": "1"}'},
595-
}
596-
],
597-
},
598-
"finish_reason": "tool_calls",
576+
"type": "function_call",
577+
"call_id": "call_test",
578+
"name": "select_choice",
579+
"arguments": '{"choice": "1"}',
599580
}
600581
],
601-
"usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
602582
},
603583
)
604584
)

0 commit comments

Comments
 (0)