Skip to content

Commit 8de2fe9

Browse files
author
Andrei Bratu
committed
Eval works with simple callables
1 parent 35f1fd4 commit 8de2fe9

6 files changed

Lines changed: 82 additions & 85 deletions

File tree

package.json

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,6 @@
1313
"test": "jest --detectOpenHandles --forceExit"
1414
},
1515
"dependencies": {
16-
"form-data": "^4.0.0",
17-
"form-data-encoder": "^4.0.2",
18-
"formdata-node": "^6.0.3",
19-
"node-fetch": "2.7.0",
20-
"qs": "6.11.2",
21-
"readable-stream": "^4.5.2",
22-
"ts-json-schema-generator": "^2.3.0",
23-
"url-join": "4.0.1",
2416
"@opentelemetry/api": "^1.9.0",
2517
"@opentelemetry/auto-instrumentations-node": "^0.53.0",
2618
"@opentelemetry/sdk-metrics": "^1.28.0",
@@ -30,32 +22,42 @@
3022
"@traceloop/instrumentation-anthropic": "^0.11.1",
3123
"@traceloop/instrumentation-cohere": "^0.11.1",
3224
"@traceloop/instrumentation-openai": "^0.11.3",
33-
"uuid": "^11.0.3",
25+
"cli-progress": "^3.12.0",
26+
"form-data": "^4.0.0",
27+
"form-data-encoder": "^4.0.2",
28+
"formdata-node": "^6.0.3",
3429
"nanoid": "^5.0.9",
35-
"cli-progress": "^3.12.0"
30+
"node-fetch": "2.7.0",
31+
"p-map": "^7.0.3",
32+
"qs": "6.11.2",
33+
"readable-stream": "^4.5.2",
34+
"stable-hash": "^0.0.4",
35+
"ts-json-schema-generator": "^2.3.0",
36+
"url-join": "4.0.1",
37+
"uuid": "^11.0.3"
3638
},
3739
"devDependencies": {
40+
"@anthropic-ai/sdk": "^0.32.1",
41+
"@trivago/prettier-plugin-sort-imports": "^5.2.0",
42+
"@types/cli-progress": "^3.11.6",
3843
"@types/jest": "29.5.5",
3944
"@types/node": "17.0.33",
4045
"@types/node-fetch": "2.6.9",
4146
"@types/qs": "6.9.8",
4247
"@types/readable-stream": "^4.0.15",
4348
"@types/url-join": "4.0.1",
49+
"cohere-ai": "^7.15.0",
50+
"dotenv": "^16.4.6",
4451
"fetch-mock-jest": "^1.5.1",
4552
"jest": "29.7.0",
4653
"jest-environment-jsdom": "29.7.0",
54+
"jsonschema": "^1.4.1",
55+
"openai": "^4.74.0",
56+
"prettier": "^3.4.2",
4757
"ts-jest": "29.1.1",
4858
"ts-loader": "^9.3.1",
4959
"typescript": "4.6.4",
50-
"webpack": "^5.94.0",
51-
"openai": "^4.74.0",
52-
"@anthropic-ai/sdk": "^0.32.1",
53-
"cohere-ai": "^7.15.0",
54-
"dotenv": "^16.4.6",
55-
"jsonschema": "^1.4.1",
56-
"@trivago/prettier-plugin-sort-imports": "^5.2.0",
57-
"prettier": "^3.4.2",
58-
"@types/cli-progress": "^3.11.6"
60+
"webpack": "^5.94.0"
5961
},
6062
"browser": {
6163
"fs": false,

src/eval_utils/context.ts

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import hash from "stable-hash";
2+
13
import { FlowLogRequest, PromptLogRequest } from "../api";
24
import { DatapointResponse } from "../api";
35
import { Humanloop } from "../index";
@@ -22,8 +24,7 @@ type EvaluationContextValue = {
2224
class EvaluationContext {
2325
private state?: EvaluationContextState;
2426
private static instance: EvaluationContext;
25-
private inputMappings: Map<EvaluationContextKey, EvaluationContextValue[]> =
26-
new Map();
27+
private inputMappings: Map<string, EvaluationContextValue[]> = new Map();
2728

2829
private constructor() {}
2930

@@ -51,7 +52,7 @@ class EvaluationContext {
5152
if (this.state === undefined) {
5253
throw new Error("EvaluationContext state is not set");
5354
}
54-
const key = { inputs: datapoint.inputs, messages: datapoint.messages };
55+
const key = hash({ inputs: datapoint.inputs, messages: datapoint.messages });
5556

5657
if (!this.inputMappings.has(key)) {
5758
this.inputMappings.set(key, []);
@@ -65,21 +66,22 @@ class EvaluationContext {
6566
}
6667

6768
public getDatapoint(key: EvaluationContextKey): EvaluationContextValue {
68-
const mappings = this.inputMappings.get(key);
69+
const mappings = this.inputMappings.get(hash(key));
6970
if (!mappings || mappings.length === 0) {
7071
throw new Error(`No input mappings found for: ${JSON.stringify(key)}`);
7172
}
7273
return mappings.pop()!;
7374
}
7475

7576
public peekDatapoint(key: EvaluationContextKey): boolean {
76-
const mappings = this.inputMappings.get(key);
77+
const mappings = this.inputMappings.get(hash(key));
7778
return mappings !== undefined && mappings.length > 0;
7879
}
7980

8081
public isEvaluatedFile(args: FlowLogRequest | PromptLogRequest) {
8182
return (
82-
this.state && this.state.fileId === args.id && this.state.path === args.path
83+
this.state &&
84+
(this.state.fileId === args.id || this.state.path === args.path)
8385
);
8486
}
8587
}

src/eval_utils/run.ts

Lines changed: 40 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
import cliProgress from "cli-progress";
1111
import { Humanloop, HumanloopClient } from "index";
1212
import { AsyncFunction } from "otel";
13+
import pMap from "p-map";
1314

1415
import {
1516
BooleanEvaluatorStatsResponse,
16-
CreateEvaluatorLogRequest,
1717
CreateEvaluatorLogResponse,
1818
CreateFlowLogResponse,
1919
CreatePromptLogResponse,
@@ -51,13 +51,10 @@ type LogResponse =
5151
| CreatePromptLogResponse
5252
| CreateToolLogResponse
5353
| CreateEvaluatorLogResponse;
54-
type LogRequest =
55-
| FlowLogRequest
56-
| PromptLogRequest
57-
| ToolLogRequest
58-
| CreateEvaluatorLogRequest;
5954

6055
export function overloadLog<T extends Flows | Prompts>(client: T): T {
56+
const originalLog = client.log.bind(client);
57+
6158
// @ts-ignore
6259
const _overloadedLog: T["log"] = async (
6360
request: FlowLogRequest | PromptLogRequest,
@@ -83,20 +80,22 @@ export function overloadLog<T extends Flows | Prompts>(client: T): T {
8380
};
8481
}
8582

86-
response = await client.log(request, options);
83+
// @ts-ignore
84+
response = await originalLog(request, options);
8785

86+
// @ts-ignore
8887
uploadCallback(response.id);
8988
} else {
90-
response = await client.log(request, options);
89+
// @ts-ignore
90+
response = await originalLog(request, options);
9191
}
9292

9393
return response;
9494
};
9595

96-
return {
97-
...client,
98-
log: _overloadedLog,
99-
};
96+
client.log = _overloadedLog.bind(client);
97+
98+
return client;
10099
}
101100

102101
export async function runEval(
@@ -105,6 +104,7 @@ export async function runEval(
105104
dataset: Dataset,
106105
name?: string,
107106
evaluators: Evaluator[] = [],
107+
workers: number = 8,
108108
): Promise<EvaluatorCheck[]> {
109109
// Get or create the file on Humanloop
110110
if (!file.path && !file.id) {
@@ -145,6 +145,7 @@ export async function runEval(
145145
}
146146
const updatedData = { ...rest, ...version } as FlowRequest;
147147
hlFile = await client.flows.upsert(updatedData);
148+
break;
148149
}
149150
case "prompt": {
150151
hlFile = await client.prompts.upsert({
@@ -307,7 +308,6 @@ export async function runEval(
307308
path: hlFile.path,
308309
uploadCallback: async (logId: string, datapoint: DatapointResponse) => {
309310
await runLocalEvaluators(client, logId, datapoint, localEvaluators);
310-
progressBar.increment();
311311
},
312312
});
313313

@@ -327,11 +327,8 @@ export async function runEval(
327327
try {
328328
evaluationContext.addDatapoint(datapoint, runId);
329329
let output: string;
330-
if ("messages" in datapoint) {
331-
output = await function_!({
332-
...datapoint.inputs,
333-
messages: datapoint.messages,
334-
});
330+
if ("messages" in datapoint && datapoint.messages !== undefined) {
331+
output = await function_!(datapoint.inputs, datapoint.messages);
335332
} else {
336333
output = await function_!(datapoint.inputs);
337334
}
@@ -356,10 +353,7 @@ export async function runEval(
356353

357354
// The log function will take care of the sourceDatapointId and runId from the context
358355
// See overloadLog in this module for more details
359-
console.debug(
360-
`function_ ${function_} is a simple callable, datapoint context was not consumed`,
361-
);
362-
logFunc({
356+
await logFunc({
363357
inputs: datapoint.inputs,
364358
output: output,
365359
startTime: start_time,
@@ -368,13 +362,14 @@ export async function runEval(
368362
}
369363
} catch (e) {
370364
const errorMessage = e instanceof Error ? e.message : String(e);
371-
logFunc({
365+
await logFunc({
372366
inputs: datapoint.inputs,
373367
error: errorMessage,
374368
sourceDatapointId: datapoint.id,
375369
startTime: start_time,
376370
endTime: new Date(),
377371
});
372+
// console.log(e);
378373
console.warn(
379374
`\nYour ${type}'s callable failed for Datapoint: ${datapoint.id}.\nError: ${errorMessage}`,
380375
);
@@ -396,11 +391,14 @@ export async function runEval(
396391
);
397392
const totalDatapoints = hlDataset.datapoints!.length;
398393
progressBar.start(totalDatapoints, 0);
399-
const promises = hlDataset.datapoints!.map(async (datapoint) => {
400-
await processDatapoint(datapoint, runId);
401-
progressBar.increment();
402-
});
403-
await Promise.all(promises);
394+
await pMap(
395+
hlDataset.datapoints!,
396+
async (datapoint) => {
397+
await processDatapoint(datapoint, runId);
398+
progressBar.increment();
399+
},
400+
{ concurrency: workers },
401+
);
404402
progressBar.stop();
405403
} else {
406404
// TODO: trigger run when updated API is available
@@ -466,8 +464,9 @@ function getLogFunction(
466464
fileId: string,
467465
versionId: string,
468466
runId: string,
469-
): (args: LogRequest) => Promise<LogResponse> {
467+
) {
470468
/** Returns the appropriate log function pre-filled with common parameters. */
469+
471470
const logRequest = {
472471
// TODO: why does the Log `id` field refer to the file ID in the API?
473472
// Why are both `id` and `version_id` needed in the API?
@@ -478,22 +477,21 @@ function getLogFunction(
478477

479478
switch (type) {
480479
case "flow":
481-
return (args: FlowLogRequest) =>
482-
client.flows.log({
480+
return async (args: FlowLogRequest) =>
481+
await client.flows.log({
483482
...logRequest,
484483
traceStatus: "complete",
485484
...args,
486485
});
487486
case "prompt":
488-
return (args: PromptLogRequest) =>
489-
client.prompts.log({ ...logRequest, ...args });
490-
case "evaluator":
491-
// @ts-ignore
492-
return (args: CreateEvaluatorLogRequest) =>
493-
client.evaluators.log({ ...logRequest, ...args });
487+
return async (args: PromptLogRequest) =>
488+
await client.prompts.log({ ...logRequest, ...args });
489+
// case "evaluator":
490+
// return (args: CreateEvaluatorLogRequest) =>
491+
// client.evaluators.log({ ...logRequest, ...args });
494492
case "tool":
495-
return (args: ToolLogRequest) =>
496-
client.tools.log({ ...logRequest, ...args });
493+
return async (args: ToolLogRequest) =>
494+
await client.tools.log({ ...logRequest, ...args });
497495
default:
498496
throw new Error(`Unsupported File version: ${type}`);
499497
}
@@ -517,15 +515,16 @@ async function runLocalEvaluators(
517515
judgment = evalFunction(log);
518516
}
519517

520-
client.evaluators.log({
518+
await client.evaluators.log({
519+
path: evaluator.path,
521520
versionId: evaluator.versionId,
522521
parentId: logId,
523522
judgment: judgment,
524523
startTime: startTime,
525524
endTime: new Date(),
526525
});
527526
} catch (e) {
528-
client.evaluators.log({
527+
await client.evaluators.log({
529528
versionId: evaluator.versionId,
530529
parentId: logId,
531530
error: e instanceof Error ? e.message : String(e),

src/humanloop.client.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,16 @@ import { NodeTracerProvider, Tracer } from "@opentelemetry/sdk-trace-node";
22
import { AnthropicInstrumentation } from "@traceloop/instrumentation-anthropic";
33
import { CohereInstrumentation } from "@traceloop/instrumentation-cohere";
44
import { OpenAIInstrumentation } from "@traceloop/instrumentation-openai";
5-
import CohereAI from "cohere-ai";
65

7-
import { Dataset, Evaluator, EvaluatorCheck, File } from "../eval_utils/types";
86
import { HumanloopClient as BaseHumanloopClient } from "./Client";
97
import { Evaluations as BaseEvaluations } from "./api/resources/evaluations/client/Client";
108
import { Flows } from "./api/resources/flows/client/Client";
119
import { Prompts } from "./api/resources/prompts/client/Client";
1210
import { FlowKernelRequest } from "./api/types/FlowKernelRequest";
1311
import { ToolKernelRequest } from "./api/types/ToolKernelRequest";
1412
import { overloadLog, runEval } from "./eval_utils/run";
13+
import { Dataset, Evaluator, EvaluatorCheck, File } from "./eval_utils/types";
1514
import { HumanloopSpanExporter } from "./otel/exporter";
16-
import { moduleIsInstalled } from "./otel/helpers";
1715
import { HumanloopSpanProcessor } from "./otel/processor";
1816
import { flowUtilityFactory } from "./utilities/flow";
1917
import { UtilityPromptKernel, promptUtilityFactory } from "./utilities/prompt";
@@ -32,8 +30,9 @@ class ExtendedEvaluations extends BaseEvaluations {
3230
dataset: Dataset,
3331
name?: string,
3432
evaluators: Evaluator[] = [],
33+
workers: number = 8,
3534
): Promise<EvaluatorCheck[]> {
36-
return runEval(this._client, file, dataset, name, evaluators);
35+
return runEval(this._client, file, dataset, name, evaluators, workers);
3736
}
3837
}
3938

src/otel/helpers.ts

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -194,21 +194,6 @@ export function isHumanloopSpan(span: ReadableSpan): boolean {
194194
return span.attributes[HUMANLOOP_FILE_TYPE_KEY] !== undefined;
195195
}
196196

197-
/**
198-
* Determines if the current Node.js environment has a specific module installed.
199-
*
200-
* @param moduleName - Name of the module to check
201-
* @returns True if the module is installed, false otherwise
202-
*/
203-
export function moduleIsInstalled(moduleName: string): boolean {
204-
try {
205-
require.resolve(moduleName);
206-
return true;
207-
} catch {
208-
return false;
209-
}
210-
}
211-
212197
/**
213198
* Generates a unique span ID.
214199
*

0 commit comments

Comments
 (0)