Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 82 additions & 25 deletions js/src/framework.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@
vi,
} from "vitest";
import {
_exportsForTestingOnly as frameworkExportsForTestingOnly,
defaultErrorScoreHandler,
Eval,
EvalScorer,
runEvaluator,
} from "./framework";
import {
_exportsForTestingOnly,
_exportsForTestingOnly as loggerExportsForTestingOnly,
BraintrustState,
initLogger,
TestBackgroundLogger,
Expand All @@ -33,6 +34,60 @@
public increment() {}
}

test("waitForLogs3XactIngestion polls btql until the xact is queryable", async () => {
const post = vi
.fn()
.mockResolvedValueOnce({
json: async () => ({ data: [] }),
})
.mockResolvedValueOnce({
json: async () => ({ data: [{ id: "span-1" }] }),
});
const state = {
login: vi.fn().mockResolvedValue(undefined),
apiConn: () => ({ post }),
} as unknown as BraintrustState;

await frameworkExportsForTestingOnly.waitForLogs3XactIngestion({
state,
objectType: "experiment",
objectId: "exp-123",
rootSpanId: "root-456",
xactId: "xact-789",
initialBackoffMs: 0,
maxBackoffMs: 0,
timeoutMs: 100,
});

expect(state.login).toHaveBeenCalledWith({});
expect(post).toHaveBeenCalledTimes(2);
expect(post).toHaveBeenNthCalledWith(

Check failure on line 64 in js/src/framework.test.ts

View workflow job for this annotation

GitHub Actions / JS Zod Versions (3.25.34)

src/framework.test.ts > waitForLogs3XactIngestion polls btql until the xact is queryable

AssertionError: expected 1st "vi.fn()" call to have been called with [ 'btql', ObjectContaining{…}, …(1) ] - Expected + Received @@ -1,8 +1,9 @@ [ "btql", { + "brainstore_realtime": false, "query": { "filter": { "children": [ { "left": { @@ -31,17 +32,39 @@ }, }, ], "op": "and", }, + "from": { + "args": [ + { + "op": "literal", + "value": "exp-123", + }, + ], + "name": { + "name": [ + "experiment", + ], + "op": "ident", + }, + "op": "function", + }, + "limit": 1, "select": [ { - "op": "literal", - "value": 1, + "alias": "id", + "expr": { + "name": [ + "id", + ], + "op": "ident", }, + }, ], }, + "query_source": "sdk_ensure_spans_flushed_de15bf", }, { "headers": { "Accept-Encoding": "gzip", }, ❯ src/framework.test.ts:64:16

Check failure on line 64 in js/src/framework.test.ts

View workflow job for this annotation

GitHub Actions / JS Zod Versions (4.2.1)

src/framework.test.ts > waitForLogs3XactIngestion polls btql until the xact is queryable

AssertionError: expected 1st "vi.fn()" call to have been called with [ 'btql', ObjectContaining{…}, …(1) ] - Expected + Received @@ -1,8 +1,9 @@ [ "btql", { + "brainstore_realtime": false, "query": { "filter": { "children": [ { "left": { @@ -31,17 +32,39 @@ }, }, ], "op": "and", }, + "from": { + "args": [ + { + "op": "literal", + "value": "exp-123", + }, + ], + "name": { + "name": [ + "experiment", + ], + "op": "ident", + }, + "op": "function", + }, + "limit": 1, "select": [ { - "op": "literal", - "value": 1, + "alias": "id", + "expr": { + "name": [ + "id", + ], + "op": "ident", }, + }, ], }, + "query_source": "sdk_ensure_spans_flushed_de15bf", }, { "headers": { "Accept-Encoding": "gzip", }, ❯ src/framework.test.ts:64:16

Check failure on line 64 in js/src/framework.test.ts

View workflow job for this annotation

GitHub Actions / js-test (ubuntu-latest, 20)

src/framework.test.ts > waitForLogs3XactIngestion polls btql until the xact is queryable

AssertionError: expected 1st "vi.fn()" call to have been called with [ 'btql', ObjectContaining{…}, …(1) ] - Expected + Received @@ -1,8 +1,9 @@ [ "btql", { + "brainstore_realtime": false, "query": { "filter": { "children": [ { "left": { @@ -31,17 +32,39 @@ }, }, ], "op": "and", }, + "from": { + "args": [ + { + "op": "literal", + "value": "exp-123", + }, + ], + "name": { + "name": [ + "experiment", + ], + "op": "ident", + }, + "op": "function", + }, + "limit": 1, "select": [ { - "op": "literal", - "value": 1, + "alias": "id", + "expr": { + "name": [ + "id", + ], + "op": "ident", }, + }, ], }, + "query_source": "sdk_ensure_spans_flushed_de15bf", }, { "headers": { "Accept-Encoding": "gzip", }, ❯ src/framework.test.ts:64:16

Check failure on line 64 in js/src/framework.test.ts

View workflow job for this annotation

GitHub Actions / js-test (ubuntu-latest, 22)

src/framework.test.ts > waitForLogs3XactIngestion polls btql until the xact is queryable

AssertionError: expected 1st "vi.fn()" call to have been called with [ 'btql', ObjectContaining{…}, …(1) ] - Expected + Received @@ -1,8 +1,9 @@ [ "btql", { + "brainstore_realtime": false, "query": { "filter": { "children": [ { "left": { @@ -31,17 +32,39 @@ }, }, ], "op": "and", }, + "from": { + "args": [ + { + "op": "literal", + "value": "exp-123", + }, + ], + "name": { + "name": [ + "experiment", + ], + "op": "ident", + }, + "op": "function", + }, + "limit": 1, "select": [ { - "op": "literal", - "value": 1, + "alias": "id", + "expr": { + "name": [ + "id", + ], + "op": "ident", }, + }, ], }, + "query_source": "sdk_ensure_spans_flushed_de15bf", }, { "headers": { "Accept-Encoding": "gzip", }, ❯ src/framework.test.ts:64:16

Check failure on line 64 in js/src/framework.test.ts

View workflow job for this annotation

GitHub Actions / js-test (windows-latest, 20)

src/framework.test.ts > waitForLogs3XactIngestion polls btql until the xact is queryable

AssertionError: expected 1st "vi.fn()" call to have been called with [ 'btql', ObjectContaining{…}, …(1) ] - Expected + Received @@ -1,8 +1,9 @@ [ "btql", { + "brainstore_realtime": false, "query": { "filter": { "children": [ { "left": { @@ -31,17 +32,39 @@ }, }, ], "op": "and", }, + "from": { + "args": [ + { + "op": "literal", + "value": "exp-123", + }, + ], + "name": { + "name": [ + "experiment", + ], + "op": "ident", + }, + "op": "function", + }, + "limit": 1, "select": [ { - "op": "literal", - "value": 1, + "alias": "id", + "expr": { + "name": [ + "id", + ], + "op": "ident", }, + }, ], }, + "query_source": "sdk_ensure_spans_flushed_de15bf", }, { "headers": { "Accept-Encoding": "gzip", }, ❯ src/framework.test.ts:64:16

Check failure on line 64 in js/src/framework.test.ts

View workflow job for this annotation

GitHub Actions / js-test (windows-latest, 22)

src/framework.test.ts > waitForLogs3XactIngestion polls btql until the xact is queryable

AssertionError: expected 1st "vi.fn()" call to have been called with [ 'btql', ObjectContaining{…}, …(1) ] - Expected + Received @@ -1,8 +1,9 @@ [ "btql", { + "brainstore_realtime": false, "query": { "filter": { "children": [ { "left": { @@ -31,17 +32,39 @@ }, }, ], "op": "and", }, + "from": { + "args": [ + { + "op": "literal", + "value": "exp-123", + }, + ], + "name": { + "name": [ + "experiment", + ], + "op": "ident", + }, + "op": "function", + }, + "limit": 1, "select": [ { - "op": "literal", - "value": 1, + "alias": "id", + "expr": { + "name": [ + "id", + ], + "op": "ident", }, + }, ], }, + "query_source": "sdk_ensure_spans_flushed_de15bf", }, { "headers": { "Accept-Encoding": "gzip", }, ❯ src/framework.test.ts:64:16
1,
"btql",
expect.objectContaining({
query: expect.objectContaining({
select: [{ op: "literal", value: 1 }],
filter: {
op: "and",
children: [
{
op: "eq",
left: { op: "ident", name: ["root_span_id"] },
right: { op: "literal", value: "root-456" },
},
{
op: "eq",
left: { op: "ident", name: ["_xact_id"] },
right: { op: "literal", value: "xact-789" },
},
],
},
}),
}),
{ headers: { "Accept-Encoding": "gzip" } },
);
});

test("meta (write) is passed to task", async () => {
const metadata = {
bar: "baz",
Expand Down Expand Up @@ -559,7 +614,7 @@
});

test("Eval with noSendLogs: true runs locally without creating experiment", async () => {
const memoryLogger = _exportsForTestingOnly.useTestBackgroundLogger();
const memoryLogger = loggerExportsForTestingOnly.useTestBackgroundLogger();

const result = await Eval(
"test-no-logs",
Expand Down Expand Up @@ -677,10 +732,10 @@
});

test("tags can be appended and logged to root span", async () => {
await _exportsForTestingOnly.simulateLoginForTests();
const memoryLogger = _exportsForTestingOnly.useTestBackgroundLogger();
await loggerExportsForTestingOnly.simulateLoginForTests();
const memoryLogger = loggerExportsForTestingOnly.useTestBackgroundLogger();
const experiment =
_exportsForTestingOnly.initTestExperiment("js-tags-append");
loggerExportsForTestingOnly.initTestExperiment("js-tags-append");

const initialTags = ["cookies n cream"];
const appendedTags = ["chocolate", "vanilla", "strawberry"];
Expand Down Expand Up @@ -736,9 +791,10 @@
expectedTags: ["chocolate", "vanilla", "strawberry"],
},
])("$title", async ({ providedTags, expectedTags }) => {
await _exportsForTestingOnly.simulateLoginForTests();
const memoryLogger = _exportsForTestingOnly.useTestBackgroundLogger();
const experiment = _exportsForTestingOnly.initTestExperiment("js-tags-list");
await loggerExportsForTestingOnly.simulateLoginForTests();
const memoryLogger = loggerExportsForTestingOnly.useTestBackgroundLogger();
const experiment =
loggerExportsForTestingOnly.initTestExperiment("js-tags-list");

const result = await runEvaluator(
experiment,
Expand Down Expand Up @@ -769,9 +825,10 @@
});

test("tags are persisted with a failing scorer", async () => {
await _exportsForTestingOnly.simulateLoginForTests();
const memoryLogger = _exportsForTestingOnly.useTestBackgroundLogger();
const experiment = _exportsForTestingOnly.initTestExperiment("js-tags-list");
await loggerExportsForTestingOnly.simulateLoginForTests();
const memoryLogger = loggerExportsForTestingOnly.useTestBackgroundLogger();
const experiment =
loggerExportsForTestingOnly.initTestExperiment("js-tags-list");

const expectedTags = ["chocolate", "vanilla", "strawberry"];

Expand Down Expand Up @@ -809,10 +866,10 @@
});

test("tags remain empty when not set", async () => {
await _exportsForTestingOnly.simulateLoginForTests();
const memoryLogger = _exportsForTestingOnly.useTestBackgroundLogger();
await loggerExportsForTestingOnly.simulateLoginForTests();
const memoryLogger = loggerExportsForTestingOnly.useTestBackgroundLogger();
const experiment =
_exportsForTestingOnly.initTestExperiment("js-tags-append");
loggerExportsForTestingOnly.initTestExperiment("js-tags-append");

const result = await runEvaluator(
experiment,
Expand Down Expand Up @@ -842,10 +899,10 @@
});

test("scorer spans have purpose='scorer' attribute", async () => {
await _exportsForTestingOnly.simulateLoginForTests();
const memoryLogger = _exportsForTestingOnly.useTestBackgroundLogger();
await loggerExportsForTestingOnly.simulateLoginForTests();
const memoryLogger = loggerExportsForTestingOnly.useTestBackgroundLogger();
const experiment =
_exportsForTestingOnly.initTestExperiment("js-scorer-purpose");
loggerExportsForTestingOnly.initTestExperiment("js-scorer-purpose");

const result = await runEvaluator(
experiment,
Expand Down Expand Up @@ -892,8 +949,8 @@
expect((span as any).span_attributes?.purpose).not.toBe("scorer");
}

_exportsForTestingOnly.clearTestBackgroundLogger();
_exportsForTestingOnly.simulateLogoutForTests();
loggerExportsForTestingOnly.clearTestBackgroundLogger();
loggerExportsForTestingOnly.simulateLogoutForTests();
});

// ========== framework2 metadata tests ==========
Expand Down Expand Up @@ -1486,7 +1543,7 @@
});

test("Eval with enableCache: false does not use span cache", async () => {
await _exportsForTestingOnly.simulateLoginForTests();
await loggerExportsForTestingOnly.simulateLoginForTests();
const state = new BraintrustState({
apiKey: "test-api-key",
appUrl: "https://example.com",
Expand All @@ -1511,7 +1568,7 @@
});

test("Eval with enableCache: true (default) uses span cache", async () => {
await _exportsForTestingOnly.simulateLoginForTests();
await loggerExportsForTestingOnly.simulateLoginForTests();
const state = new BraintrustState({
apiKey: "test-api-key",
appUrl: "https://example.com",
Expand All @@ -1536,9 +1593,9 @@
});

test("Eval with parent flushes evaluator state, not global state", async () => {
await _exportsForTestingOnly.simulateLoginForTests();
await loggerExportsForTestingOnly.simulateLoginForTests();

_exportsForTestingOnly.useTestBackgroundLogger();
loggerExportsForTestingOnly.useTestBackgroundLogger();

const evaluatorState = new BraintrustState({
apiKey: "test-api-key",
Expand Down Expand Up @@ -1567,6 +1624,6 @@

expect(evaluatorFlushSpy).toHaveBeenCalled();

_exportsForTestingOnly.clearTestBackgroundLogger();
_exportsForTestingOnly.simulateLogoutForTests();
loggerExportsForTestingOnly.clearTestBackgroundLogger();
loggerExportsForTestingOnly.simulateLogoutForTests();
});
134 changes: 118 additions & 16 deletions js/src/framework.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,93 @@ export type BaseExperiment<
name?: string;
};

const ENSURE_SPANS_FLUSH_INITIAL_BACKOFF_MS = 100;
const ENSURE_SPANS_FLUSH_MAX_BACKOFF_MS = 2_000;
const ENSURE_SPANS_FLUSH_TIMEOUT_MS = 30_000;

async function waitForLogs3XactIngestion(args: {
state: BraintrustState;
objectType: "experiment" | "project_logs" | "playground_logs";
objectId: string;
rootSpanId: string;
xactId: string;
initialBackoffMs?: number;
maxBackoffMs?: number;
timeoutMs?: number;
}): Promise<void> {
const {
state,
objectType,
objectId,
rootSpanId,
xactId,
initialBackoffMs = ENSURE_SPANS_FLUSH_INITIAL_BACKOFF_MS,
maxBackoffMs = ENSURE_SPANS_FLUSH_MAX_BACKOFF_MS,
timeoutMs = ENSURE_SPANS_FLUSH_TIMEOUT_MS,
} = args;

await state.login({});

const startedAt = Date.now();
let backoffMs = initialBackoffMs;

while (true) {
const response = await state.apiConn().post(
"btql",
{
query: {
select: [
{
alias: "id",
expr: { op: "ident", name: ["id"] },
},
],
from: {
op: "function",
name: {
op: "ident",
name: [objectType],
},
args: [{ op: "literal", value: objectId }],
},
filter: {
op: "and",
children: [
{
op: "eq",
left: { op: "ident", name: ["root_span_id"] },
right: { op: "literal", value: rootSpanId },
},
{
op: "eq",
left: { op: "ident", name: ["_xact_id"] },
right: { op: "literal", value: xactId },
},
],
},
limit: 1,
},
brainstore_realtime: false,
query_source: `sdk_ensure_spans_flushed_de15bf`,
},
{ headers: { "Accept-Encoding": "gzip" } },
);
const result = await response.json();
if (Array.isArray(result.data) && result.data.length > 0) {
return;
}

if (Date.now() - startedAt >= timeoutMs) {
throw new Error(
`Timed out waiting for logs3 xact ${xactId} to become queryable`,
);
}

await new Promise((resolve) => setTimeout(resolve, backoffMs));
backoffMs = Math.min(backoffMs * 2, maxBackoffMs);
}
}

/**
* Use this to specify that the dataset should actually be the data from a previous (base) experiment.
* If you do not specify a name, Braintrust will automatically figure out the best base experiment to
Expand Down Expand Up @@ -1041,7 +1128,20 @@ async function runEvaluatorInternal(
};

const callback = async (rootSpan: Span) => {
const state = evaluator.state ?? _internalGetGlobalState();
const state =
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably want to verify this work for playgrounds also

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Works in playgrounds locally, we probably want to push this to staging to check though

experiment?.loggingState ??
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this now modifies the state variable so we should make sure that it works properly in the downstream callsites for state = experiment?.loggingState

evaluator.state ??
_internalGetGlobalState();
const parentStr = state.currentParent.getStore();
const parentComponents = parentStr
? SpanComponentsV3.fromStr(parentStr)
: null;
const traceObjectType = parentComponents
? spanObjectTypeV3ToTypedString(parentComponents.data.object_type)
: "experiment";
const traceObjectId =
parentComponents?.data.object_id ??
(experimentIdPromise ? ((await experimentIdPromise) ?? "") : "");
const ensureSpansFlushed = async () => {
// Flush native Braintrust spans
if (experiment) {
Expand All @@ -1056,25 +1156,23 @@ async function runEvaluatorInternal(
if (state) {
await state.flushOtel();
}
};

const parentStr = state.currentParent.getStore();
const parentComponents = parentStr
? SpanComponentsV3.fromStr(parentStr)
: null;
const xactId = state?.bgLogger().lastFlushedXactId();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

want to double check this works in otel land

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does not :(

if (state && xactId && traceObjectId) {
await waitForLogs3XactIngestion({
state,
objectType: traceObjectType,
objectId: traceObjectId,
rootSpanId: rootSpan.rootSpanId,
xactId,
});
}
};

const trace = state
? new LocalTrace({
objectType: parentComponents
? spanObjectTypeV3ToTypedString(
parentComponents.data.object_type,
)
: "experiment",
objectId:
parentComponents?.data.object_id ??
(experimentIdPromise
? ((await experimentIdPromise) ?? "")
: ""),
objectType: traceObjectType,
objectId: traceObjectId,
rootSpanId: rootSpan.rootSpanId,
ensureSpansFlushed,
state,
Expand Down Expand Up @@ -1717,3 +1815,7 @@ const defaultReporter: ReporterDef<boolean> = {
return evalReports.every((r) => r);
},
};

export const _exportsForTestingOnly = {
waitForLogs3XactIngestion,
};
20 changes: 20 additions & 0 deletions js/src/logger.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,26 @@ import { SpanComponentsV3 } from "../util/span_identifier_v3";

configureNode();

test("extractLastXactIdFromLogs3Response returns the last xact id", () => {
expect(
_exportsForTestingOnly.extractLastXactIdFromLogs3Response({
rows: [{ _xact_id: "xact-1" }, { _xact_id: "xact-2" }],
}),
).toBe("xact-2");

expect(
_exportsForTestingOnly.extractLastXactIdFromLogs3Response({
xact_ids: ["xact-1", "xact-2"],
}),
).toBe("xact-2");
});

test("maxXactId keeps the numerically largest xact id", () => {
expect(_exportsForTestingOnly.maxXactId("10", "9")).toBe("10");
expect(_exportsForTestingOnly.maxXactId("10", "11")).toBe("11");
expect(_exportsForTestingOnly.maxXactId(null, "11")).toBe("11");
});

test("renderMessage with file content parts", () => {
const message = {
role: "user" as const,
Expand Down
Loading
Loading