@@ -219,13 +219,13 @@ type ErrorScoreHandler = (args: {
219219 unhandledScores : string [ ] ;
220220} ) => Record < string , number > | undefined | void ;
221221
222- export interface Evaluator <
222+ type EvaluatorBase <
223223 Input ,
224224 Output ,
225225 Expected ,
226226 Metadata extends BaseMetadata = DefaultMetadataType ,
227227 Parameters extends EvalParameters = EvalParameters ,
228- > {
228+ > = {
229229 /**
230230 * A function that returns a list of inputs, expected outputs, and metadata.
231231 */
@@ -236,17 +236,6 @@ export interface Evaluator<
236236 */
237237 task : EvalTask < Input , Output , Expected , Metadata , Parameters > ;
238238
239- /**
240- * A set of functions that take an input, output, and expected value and return a {@link Score}.
241- */
242- scores : EvalScorer < Input , Output , Expected , Metadata > [ ] ;
243-
244- /**
245- * A set of functions that take an input, output, and expected value and return a
246- * {@link Classification}. Results are recorded under the `classifications` column.
247- */
248- classifications ?: EvalClassifier < Input , Output , Expected , Metadata > [ ] ;
249-
250239 /**
251240 * A set of parameters that will be passed to the evaluator.
252241 * Can be:
@@ -364,7 +353,42 @@ export interface Evaluator<
364353 * Flushes spans before calling scoring functions
365354 */
366355 flushBeforeScoring ?: boolean ;
367- }
356+ } ;
357+
358+ /**
359+ * Defines an evaluator. At least one of `scores` or `classifiers` must be provided.
360+ */
361+ export type Evaluator <
362+ Input ,
363+ Output ,
364+ Expected ,
365+ Metadata extends BaseMetadata = DefaultMetadataType ,
366+ Parameters extends EvalParameters = EvalParameters ,
367+ > = EvaluatorBase < Input , Output , Expected , Metadata , Parameters > &
368+ (
369+ | {
370+ /**
371+ * A set of functions that take an input, output, and expected value and return a {@link Score}.
372+ */
373+ scores : EvalScorer < Input , Output , Expected , Metadata > [ ] ;
374+ /**
375+ * A set of functions that take an input, output, and expected value and return a
376+ * {@link Classification}. Results are recorded under the `classifications` column.
377+ */
378+ classifiers ?: EvalClassifier < Input , Output , Expected , Metadata > [ ] ;
379+ }
380+ | {
381+ /**
382+ * A set of functions that take an input, output, and expected value and return a {@link Score}.
383+ */
384+ scores ?: EvalScorer < Input , Output , Expected , Metadata > [ ] ;
385+ /**
386+ * A set of functions that take an input, output, and expected value and return a
387+ * {@link Classification}. Results are recorded under the `classifications` column.
388+ */
389+ classifiers : EvalClassifier < Input , Output , Expected , Metadata > [ ] ;
390+ }
391+ ) ;
368392
369393export class EvalResultWithSummary <
370394 Input ,
@@ -1023,6 +1047,11 @@ export async function runEvaluator(
10231047 enableCache = true ,
10241048 // eslint-disable-next-line @typescript-eslint/no-explicit-any
10251049) : Promise < EvalResultWithSummary < any , any , any , any > > {
1050+ if ( ! evaluator . scores && ! evaluator . classifiers ) {
1051+ throw new Error (
1052+ "Evaluator must include at least one of `scores` or `classifiers`" ,
1053+ ) ;
1054+ }
10261055 return await runEvaluatorInternal (
10271056 experiment ,
10281057 evaluator ,
@@ -1237,8 +1266,8 @@ async function runEvaluatorInternal(
12371266 let tags : string [ ] = [ ...( datum . tags ?? [ ] ) ] ;
12381267 const scores : Record < string , number | null > = { } ;
12391268 const classifications : Record < string , ClassificationItem [ ] > = { } ;
1240- const scorerNames = evaluator . scores . map ( scorerName ) ;
1241- const classifierNames = ( evaluator . classifications ?? [ ] ) . map (
1269+ const scorerNames = ( evaluator . scores ?? [ ] ) . map ( scorerName ) ;
1270+ const classifierNames = ( evaluator . classifiers ?? [ ] ) . map (
12421271 classifierName ,
12431272 ) ;
12441273 let unhandledScores : string [ ] | null = scorerNames ;
@@ -1317,7 +1346,7 @@ async function runEvaluatorInternal(
13171346
13181347 const [ scoreResults , classificationResults ] = await Promise . all ( [
13191348 Promise . all (
1320- evaluator . scores . map ( ( score , score_idx ) =>
1349+ ( evaluator . scores ?? [ ] ) . map ( ( score , score_idx ) =>
13211350 runInScorerSpan (
13221351 rootSpan ,
13231352 scorerNames [ score_idx ] ,
@@ -1371,7 +1400,7 @@ async function runEvaluatorInternal(
13711400 ) ,
13721401 ) ,
13731402 Promise . all (
1374- ( evaluator . classifications ?? [ ] ) . map ( ( classifier , idx ) =>
1403+ ( evaluator . classifiers ?? [ ] ) . map ( ( classifier , idx ) =>
13751404 runInScorerSpan (
13761405 rootSpan ,
13771406 classifierNames [ idx ] ,
0 commit comments