Skip to content

Commit 8afd421

Browse files
committed
1 parent 30ca11a commit 8afd421

4 files changed

Lines changed: 60 additions & 34 deletions

File tree

js/dev/server.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ export function runDevServer(
117117

118118
evalDefs[name] = {
119119
parameters,
120-
scores: evaluator.scores.map((score, idx) => ({
120+
scores: (evaluator.scores ?? []).map((score, idx) => ({
121121
name: scorerName(score, idx),
122122
})),
123123
};
@@ -209,7 +209,7 @@ export function runDevServer(
209209
{
210210
...evaluator,
211211
data: evalData.data,
212-
scores: evaluator.scores.concat(
212+
scores: (evaluator.scores ?? []).concat(
213213
scores?.map((score) =>
214214
makeScorer(
215215
state,

js/src/framework.test.ts

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1498,8 +1498,7 @@ test("classifier-only evaluator populates classifications field", async () => {
14981498
{
14991499
data: [{ input: "hello", expected: "greeting" }],
15001500
task: (input) => input,
1501-
scores: [],
1502-
classifications: [
1501+
classifiers: [
15031502
() => ({
15041503
name: "category",
15051504
id: "greeting",
@@ -1551,8 +1550,7 @@ test("multiple classifiers returning the same name append items correctly", asyn
15511550
{
15521551
data: [{ input: "hello" }],
15531552
task: (input) => input,
1554-
scores: [],
1555-
classifications: [
1553+
classifiers: [
15561554
() => [
15571555
{ name: "category", id: "greeting", label: "Greeting" },
15581556
{ name: "category", id: "informal", label: "Informal" },
@@ -1586,7 +1584,7 @@ test("mixed evaluator populates both scores and classifications", async () => {
15861584
score: args.output === args.expected ? 1 : 0,
15871585
}),
15881586
],
1589-
classifications: [
1587+
classifiers: [
15901588
() => ({ name: "category", id: "greeting", label: "Greeting" }),
15911589
],
15921590
},
@@ -1606,8 +1604,7 @@ test("malformed classifier output fails clearly", async () => {
16061604
{
16071605
data: [{ input: "hello" }],
16081606
task: (input) => input,
1609-
scores: [],
1610-
classifications: [() => ({}) as never],
1607+
classifiers: [() => ({}) as never],
16111608
},
16121609
{ noSendLogs: true, returnResults: true },
16131610
);

js/src/framework.ts

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -219,13 +219,13 @@ type ErrorScoreHandler = (args: {
219219
unhandledScores: string[];
220220
}) => Record<string, number> | undefined | void;
221221

222-
export interface Evaluator<
222+
type EvaluatorBase<
223223
Input,
224224
Output,
225225
Expected,
226226
Metadata extends BaseMetadata = DefaultMetadataType,
227227
Parameters extends EvalParameters = EvalParameters,
228-
> {
228+
> = {
229229
/**
230230
* A function that returns a list of inputs, expected outputs, and metadata.
231231
*/
@@ -236,17 +236,6 @@ export interface Evaluator<
236236
*/
237237
task: EvalTask<Input, Output, Expected, Metadata, Parameters>;
238238

239-
/**
240-
* A set of functions that take an input, output, and expected value and return a {@link Score}.
241-
*/
242-
scores: EvalScorer<Input, Output, Expected, Metadata>[];
243-
244-
/**
245-
* A set of functions that take an input, output, and expected value and return a
246-
* {@link Classification}. Results are recorded under the `classifications` column.
247-
*/
248-
classifications?: EvalClassifier<Input, Output, Expected, Metadata>[];
249-
250239
/**
251240
* A set of parameters that will be passed to the evaluator.
252241
* Can be:
@@ -364,7 +353,42 @@ export interface Evaluator<
364353
* Flushes spans before calling scoring functions
365354
*/
366355
flushBeforeScoring?: boolean;
367-
}
356+
};
357+
358+
/**
359+
* Defines an evaluator. At least one of `scores` or `classifiers` must be provided.
360+
*/
361+
export type Evaluator<
362+
Input,
363+
Output,
364+
Expected,
365+
Metadata extends BaseMetadata = DefaultMetadataType,
366+
Parameters extends EvalParameters = EvalParameters,
367+
> = EvaluatorBase<Input, Output, Expected, Metadata, Parameters> &
368+
(
369+
| {
370+
/**
371+
* A set of functions that take an input, output, and expected value and return a {@link Score}.
372+
*/
373+
scores: EvalScorer<Input, Output, Expected, Metadata>[];
374+
/**
375+
* A set of functions that take an input, output, and expected value and return a
376+
* {@link Classification}. Results are recorded under the `classifications` column.
377+
*/
378+
classifiers?: EvalClassifier<Input, Output, Expected, Metadata>[];
379+
}
380+
| {
381+
/**
382+
* A set of functions that take an input, output, and expected value and return a {@link Score}.
383+
*/
384+
scores?: EvalScorer<Input, Output, Expected, Metadata>[];
385+
/**
386+
* A set of functions that take an input, output, and expected value and return a
387+
* {@link Classification}. Results are recorded under the `classifications` column.
388+
*/
389+
classifiers: EvalClassifier<Input, Output, Expected, Metadata>[];
390+
}
391+
);
368392

369393
export class EvalResultWithSummary<
370394
Input,
@@ -1023,6 +1047,11 @@ export async function runEvaluator(
10231047
enableCache = true,
10241048
// eslint-disable-next-line @typescript-eslint/no-explicit-any
10251049
): Promise<EvalResultWithSummary<any, any, any, any>> {
1050+
if (!evaluator.scores && !evaluator.classifiers) {
1051+
throw new Error(
1052+
"Evaluator must include at least one of `scores` or `classifiers`",
1053+
);
1054+
}
10261055
return await runEvaluatorInternal(
10271056
experiment,
10281057
evaluator,
@@ -1237,8 +1266,8 @@ async function runEvaluatorInternal(
12371266
let tags: string[] = [...(datum.tags ?? [])];
12381267
const scores: Record<string, number | null> = {};
12391268
const classifications: Record<string, ClassificationItem[]> = {};
1240-
const scorerNames = evaluator.scores.map(scorerName);
1241-
const classifierNames = (evaluator.classifications ?? []).map(
1269+
const scorerNames = (evaluator.scores ?? []).map(scorerName);
1270+
const classifierNames = (evaluator.classifiers ?? []).map(
12421271
classifierName,
12431272
);
12441273
let unhandledScores: string[] | null = scorerNames;
@@ -1317,7 +1346,7 @@ async function runEvaluatorInternal(
13171346

13181347
const [scoreResults, classificationResults] = await Promise.all([
13191348
Promise.all(
1320-
evaluator.scores.map((score, score_idx) =>
1349+
(evaluator.scores ?? []).map((score, score_idx) =>
13211350
runInScorerSpan(
13221351
rootSpan,
13231352
scorerNames[score_idx],
@@ -1371,7 +1400,7 @@ async function runEvaluatorInternal(
13711400
),
13721401
),
13731402
Promise.all(
1374-
(evaluator.classifications ?? []).map((classifier, idx) =>
1403+
(evaluator.classifiers ?? []).map((classifier, idx) =>
13751404
runInScorerSpan(
13761405
rootSpan,
13771406
classifierNames[idx],

js/src/parameters.test.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ test("parameters are passed to task", async () => {
2626
return output;
2727
},
2828
scores: [],
29-
classifications: [],
29+
classifiers: [],
3030
parameters: {
3131
prefix: z.string().default("start:"),
3232
suffix: z.string().default(":end"),
@@ -60,7 +60,7 @@ test("prompt parameter is passed correctly", async () => {
6060
return input;
6161
},
6262
scores: [],
63-
classifications: [],
63+
classifiers: [],
6464
parameters: {
6565
main: {
6666
type: "prompt",
@@ -101,7 +101,7 @@ test("custom parameter values override defaults", async () => {
101101
return output;
102102
},
103103
scores: [],
104-
classifications: [],
104+
classifiers: [],
105105
parameters: {
106106
prefix: z.string().default("start:"),
107107
suffix: z.string().default(":end"),
@@ -134,7 +134,7 @@ test("array parameter is handled correctly", async () => {
134134
return input;
135135
},
136136
scores: [],
137-
classifications: [],
137+
classifiers: [],
138138
parameters: {
139139
items: z.array(z.string()).default(["item1", "item2"]),
140140
},
@@ -165,7 +165,7 @@ test("object parameter is handled correctly", async () => {
165165
return input;
166166
},
167167
scores: [],
168-
classifications: [],
168+
classifiers: [],
169169
parameters: {
170170
config: z
171171
.object({
@@ -201,7 +201,7 @@ test("model parameter defaults to configured value", async () => {
201201
return input;
202202
},
203203
scores: [],
204-
classifications: [],
204+
classifiers: [],
205205
parameters: {
206206
model: {
207207
type: "model",
@@ -230,7 +230,7 @@ test("model parameter is required when default is missing", async () => {
230230
data: [{ input: "test" }],
231231
task: async (input: string) => input,
232232
scores: [],
233-
classifications: [],
233+
classifiers: [],
234234
parameters: {
235235
model: {
236236
type: "model",

0 commit comments

Comments
 (0)