Skip to content

Commit 2a023f7

Browse files
committed
Bump to gpt5 models
1 parent 0d5af03 commit 2a023f7

14 files changed

Lines changed: 49 additions & 49 deletions

SCORERS.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Evaluates whether the output is factually consistent with the expected answer.
2525
- `input` (string): The input question or prompt
2626
- `output` (string, required): The generated answer to evaluate
2727
- `expected` (string, required): The ground truth answer
28-
- `model` (string, optional): Model to use (default: configured via `init()` or "gpt-4o")
28+
- `model` (string, optional): Model to use (default: configured via `init()` or "gpt-5-mini")
2929
- `client` (Client, optional): Custom OpenAI client
3030

3131
**Score Range:** 0-1
@@ -209,7 +209,7 @@ Evaluates how relevant the retrieved context is to the input question.
209209
- `input` (string, required): The question
210210
- `output` (string, required): The generated answer
211211
- `context` (string[] | string, required): Retrieved context passages
212-
- `model` (string, optional): Model to use (default: "gpt-4o-mini")
212+
- `model` (string, optional): Model to use (default: "gpt-5-nano")
213213

214214
**Score Range:** 0-1
215215

@@ -600,7 +600,7 @@ Note: Interpretation varies by scorer type. Binary scorers (ExactMatch, ValidJSO
600600

601601
Many scorers share these common parameters:
602602

603-
- `model` (string): LLM model to use for evaluation (default: configured via `init()` or "gpt-4o")
603+
- `model` (string): LLM model to use for evaluation (default: configured via `init()` or "gpt-5-mini")
604604
- `client` (Client): Custom OpenAI-compatible client
605605
- `use_cot` (boolean): Enable chain-of-thought reasoning for LLM scorers (default: true)
606606
- `temperature` (number): LLM temperature setting
@@ -616,13 +616,13 @@ import OpenAI from "openai";
616616

617617
init({
618618
client: new OpenAI({ apiKey: "..." }),
619-
defaultModel: "gpt-4o",
619+
defaultModel: "gpt-5-mini",
620620
});
621621
```
622622

623623
```python
624624
from autoevals import init
625625
from openai import OpenAI
626626

627-
init(OpenAI(api_key="..."), default_model="gpt-4o")
627+
init(OpenAI(api_key="..."), default_model="gpt-5-mini")
628628
```

js/llm.fixtures.ts

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ export const openaiClassifierShouldEvaluateTitlesWithCoT = [
5252
id: "chatcmpl-B7XFw0OCpCbMVwLizRts3Cl72Obg0",
5353
object: "chat.completion",
5454
created: 1741135832,
55-
model: "gpt-4o-2024-08-06",
55+
model: "gpt-5-mini-2025-08-07",
5656
choices: [
5757
{
5858
index: 0,
@@ -98,7 +98,7 @@ export const openaiClassifierShouldEvaluateTitlesWithCoT = [
9898
id: "chatcmpl-B7YPU81s7cb2uzlwJ8w9aS5qhfhtJ",
9999
object: "chat.completion",
100100
created: 1741140268,
101-
model: "gpt-4o-2024-08-06",
101+
model: "gpt-5-mini-2025-08-07",
102102
choices: [
103103
{
104104
index: 0,
@@ -141,7 +141,7 @@ export const openaiClassifierShouldEvaluateTitlesWithCoT = [
141141
id: "chatcmpl-B7YQ9ILZ9DJR2AjY2s4qU15Rc6qII",
142142
object: "chat.completion",
143143
created: 1741140309,
144-
model: "gpt-4o-2024-08-06",
144+
model: "gpt-5-mini-2025-08-07",
145145
choices: [
146146
{
147147
index: 0,
@@ -180,7 +180,7 @@ export const openaiClassifierShouldEvaluateTitlesWithCoT = [
180180
id: "chatcmpl-B7YQa80DGu61zUWpdPtXRaJdRQz6l",
181181
object: "chat.completion",
182182
created: 1741140336,
183-
model: "gpt-4o-2024-08-06",
183+
model: "gpt-5-mini-2025-08-07",
184184
choices: [
185185
{
186186
index: 0,
@@ -222,7 +222,7 @@ export const openaiClassifierShouldEvaluateArithmeticExpressions = [
222222
id: "chatcmpl-B7YSMVJ7qaQTJ9OtR6zPUEdHxrNbT",
223223
object: "chat.completion",
224224
created: 1741140446,
225-
model: "gpt-4o-2024-08-06",
225+
model: "gpt-5-mini-2025-08-07",
226226
choices: [
227227
{
228228
index: 0,
@@ -265,7 +265,7 @@ export const openaiClassifierShouldEvaluateArithmeticExpressions = [
265265
id: "chatcmpl-B7YTPWIPOFpRcVOjEnU6s0kZXgPdB",
266266
object: "chat.completion",
267267
created: 1741140511,
268-
model: "gpt-4o-2024-08-06",
268+
model: "gpt-5-mini-2025-08-07",
269269
choices: [
270270
{
271271
index: 0,
@@ -308,7 +308,7 @@ export const openaiClassifierShouldEvaluateArithmeticExpressions = [
308308
id: "chatcmpl-B7YU2qluNL0SenvL1zBiSzrka236n",
309309
object: "chat.completion",
310310
created: 1741140550,
311-
model: "gpt-4o-2024-08-06",
311+
model: "gpt-5-mini-2025-08-07",
312312
choices: [
313313
{
314314
index: 0,
@@ -351,7 +351,7 @@ export const openaiClassifierShouldEvaluateArithmeticExpressions = [
351351
id: "chatcmpl-B7YUTk3771FhLlXQNZPaobEC0d8R6",
352352
object: "chat.completion",
353353
created: 1741140577,
354-
model: "gpt-4o-2024-08-06",
354+
model: "gpt-5-mini-2025-08-07",
355355
choices: [
356356
{
357357
index: 0,
@@ -390,7 +390,7 @@ export const openaiClassifierShouldEvaluateArithmeticExpressions = [
390390
id: "chatcmpl-B7YUtrpit4RvQCeqfOcZme9L6pMAP",
391391
object: "chat.completion",
392392
created: 1741140603,
393-
model: "gpt-4o-2024-08-06",
393+
model: "gpt-5-mini-2025-08-07",
394394
choices: [
395395
{
396396
index: 0,
@@ -432,7 +432,7 @@ export const openaiClassifierShouldEvaluateArithmeticExpressions = [
432432
id: "chatcmpl-B7YV8HHTm4hZU58Zp9gcjwp3MigEl",
433433
object: "chat.completion",
434434
created: 1741140618,
435-
model: "gpt-4o-2024-08-06",
435+
model: "gpt-5-mini-2025-08-07",
436436
choices: [
437437
{
438438
index: 0,

js/llm.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ Issue Description: {{page_content}}
236236
id: "chatcmpl-test",
237237
object: "chat.completion",
238238
created: 1234567890,
239-
model: "gpt-4o",
239+
model: "gpt-5-mini",
240240
choices: [
241241
{
242242
index: 0,
@@ -294,7 +294,7 @@ Issue Description: {{page_content}}
294294
id: "chatcmpl-test",
295295
object: "chat.completion",
296296
created: 1234567890,
297-
model: "gpt-4o",
297+
model: "gpt-5-mini",
298298
choices: [
299299
{
300300
index: 0,

js/llm.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ export type LLMArgs = {
6969
* The default model to use for LLM-based evaluations.
7070
* @deprecated Use `init({ defaultModel: "..." })` to configure the default model instead.
7171
*/
72-
export const DEFAULT_MODEL = "gpt-4o";
72+
export const DEFAULT_MODEL = "gpt-5-mini";
7373

7474
const PLAIN_RESPONSE_SCHEMA = {
7575
properties: {

js/oai.test.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,8 @@ describe("OAI", () => {
261261
expect(Object.is(builtClient, otherClient)).toBe(true);
262262
});
263263

264-
test("getDefaultModel returns gpt-4o by default", () => {
265-
expect(getDefaultModel()).toBe("gpt-4o");
264+
test("getDefaultModel returns gpt-5-mini by default", () => {
265+
expect(getDefaultModel()).toBe("gpt-5-mini");
266266
});
267267

268268
test("init sets default model", () => {
@@ -275,7 +275,7 @@ describe("OAI", () => {
275275
expect(getDefaultModel()).toBe("claude-3-5-sonnet-20241022");
276276

277277
init({ defaultModel: undefined });
278-
expect(getDefaultModel()).toBe("gpt-4o");
278+
expect(getDefaultModel()).toBe("gpt-5-mini");
279279
});
280280

281281
test("init can set both client and default model", () => {

js/oai.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ export interface InitOptions {
163163
client?: OpenAI;
164164
/**
165165
* The default model to use for evaluations when not specified per-call.
166-
* Defaults to "gpt-4o" if not set.
166+
* Defaults to "gpt-5-mini" if not set.
167167
*
168168
* When using non-OpenAI providers via the Braintrust proxy, set this to
169169
* the appropriate model string (e.g., "claude-3-5-sonnet-20241022").
@@ -200,10 +200,10 @@ export const init = ({ client, defaultModel }: InitOptions = {}) => {
200200
};
201201

202202
/**
203-
* Get the configured default model, or "gpt-4o" if not set.
203+
* Get the configured default model, or "gpt-5-mini" if not set.
204204
*/
205205
export const getDefaultModel = (): string => {
206-
return globalThis.__defaultModel ?? "gpt-4o";
206+
return globalThis.__defaultModel ?? "gpt-5-mini";
207207
};
208208

209209
export async function cachedChatCompletion(

js/ragas.test.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ describe("ContextRelevancy score clamping", () => {
119119
id: "chatcmpl-test",
120120
object: "chat.completion",
121121
created: Date.now(),
122-
model: "gpt-4o",
122+
model: "gpt-5-mini",
123123
choices: [
124124
{
125125
index: 0,
@@ -184,7 +184,7 @@ describe("ContextRelevancy score clamping", () => {
184184
id: "chatcmpl-test",
185185
object: "chat.completion",
186186
created: Date.now(),
187-
model: "gpt-4o",
187+
model: "gpt-5-mini",
188188
choices: [
189189
{
190190
index: 0,
@@ -264,7 +264,7 @@ describe("AnswerCorrectness custom embedding model", () => {
264264
id: "test-id",
265265
object: "chat.completion",
266266
created: Date.now(),
267-
model: "gpt-4o",
267+
model: "gpt-5-mini",
268268
choices: [
269269
{
270270
index: 0,

py/autoevals/llm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
This module provides a collection of pre-built LLM scorers for common evaluation tasks.
44
55
All evaluators accept the following common arguments:
6-
- model: Model to use (defaults to gpt-4o)
6+
- model: Model to use (defaults to gpt-5-mini)
77
- temperature: Controls randomness (0-1). If not specified, uses the model's default.
88
- max_tokens: Maximum tokens to generate. If not specified, uses the model's default.
99
- client: OpenAI client (defaults to global client from init())
@@ -79,7 +79,7 @@
7979
)
8080

8181
# Deprecated: Use init(default_model="...") to configure the default model instead.
82-
DEFAULT_MODEL = "gpt-4o"
82+
DEFAULT_MODEL = "gpt-5-mini"
8383

8484
PLAIN_RESPONSE_SCHEMA = {
8585
"properties": {"choice": {"description": "The choice", "title": "Choice", "type": "string"}},

py/autoevals/oai.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ def init(client: Client | None = None, is_async: bool = False, default_model: st
254254
is_async: Whether to create a client with async operations. Defaults to False.
255255
Deprecated: Use the `client` argument directly with your desired async/sync configuration.
256256
default_model: The default model to use for evaluations when not specified per-call.
257-
Defaults to "gpt-4o" if not set. When using non-OpenAI providers via the Braintrust
257+
Defaults to "gpt-5-mini" if not set. When using non-OpenAI providers via the Braintrust
258258
proxy, set this to the appropriate model string (e.g., "claude-3-5-sonnet-20241022").
259259
260260
Example:
@@ -284,8 +284,8 @@ def init(client: Client | None = None, is_async: bool = False, default_model: st
284284

285285

286286
def get_default_model() -> str:
287-
"""Get the configured default model, or "gpt-4o" if not set."""
288-
return _default_model_var.get(None) or "gpt-4o"
287+
"""Get the configured default model, or "gpt-5-mini" if not set."""
288+
return _default_model_var.get(None) or "gpt-5-mini"
289289

290290

291291
warned_deprecated_api_key_base_url = False

py/autoevals/ragas.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
1818
**Common arguments**:
1919
20-
- `model`: Model to use for evaluation, defaults to the model configured via init(default_model=...) or "gpt-4o"
20+
- `model`: Model to use for evaluation, defaults to the model configured via init(default_model=...) or "gpt-5-mini"
2121
- `client`: Optional Client for API calls. If not provided, uses global client from init()
2222
2323
**Example - Direct usage**:
@@ -124,8 +124,8 @@ def check_required(name, **kwargs):
124124

125125

126126
# Deprecated: Use init(default_model="...") to configure the default model instead.
127-
# This was previously "gpt-4o-mini" but now defaults to the configured model.
128-
DEFAULT_RAGAS_MODEL = "gpt-4o-mini"
127+
# This was previously "gpt-5-nano" but now defaults to the configured model.
128+
DEFAULT_RAGAS_MODEL = "gpt-5-nano"
129129

130130

131131
def _get_model(model: str | None) -> str:
@@ -138,7 +138,7 @@ def _get_model(model: str | None) -> str:
138138
return model
139139

140140
# Check if user configured a custom default via init(default_model=...)
141-
# If they did (even if it's "gpt-4o"), respect it for consistency
141+
# If they did (even if it's "gpt-5-mini"), respect it for consistency
142142
configured_default = _default_model_var.get(None)
143143
if configured_default is not None:
144144
return configured_default

0 commit comments

Comments
 (0)