diff --git a/docs/ai-sdk-telemetry-integration-migration.md b/docs/ai-sdk-telemetry-integration-migration.md new file mode 100644 index 000000000..460517914 --- /dev/null +++ b/docs/ai-sdk-telemetry-integration-migration.md @@ -0,0 +1,293 @@ +# AI SDK TelemetryIntegration Migration Guide + +> **Prototype Notice:** This integration targets AI SDK v7+ (beta) which exposes a first-class `TelemetryIntegration` lifecycle. It currently covers `generateText` and `streamText`. `generateObject` and `streamObject` will be added in a future release. + +## Background + +Braintrust has historically provided tracing for the Vercel AI SDK through the `wrapAISDK(...)` proxy approach. Starting with AI SDK v7, the SDK exposes a new `TelemetryIntegration` interface that lets integrations receive lifecycle events directly — no proxy needed. + +The `BraintrustTelemetryIntegration` is Braintrust's first-party implementation of this interface. + +## Quick Start + +### Install + +```bash +npm install braintrust ai@7.0.0-beta.42 +``` + +### Register Globally (Recommended) + +```typescript +import { registerTelemetryIntegration } from "ai"; +import { BraintrustTelemetryIntegration } from "braintrust"; + +// Register once at startup — all generateText/streamText calls get traced +registerTelemetryIntegration(new BraintrustTelemetryIntegration()); +``` + +### Register Per-Call + +```typescript +import { generateText } from "ai"; +import { BraintrustTelemetryIntegration } from "braintrust"; + +const integration = new BraintrustTelemetryIntegration(); + +const result = await generateText({ + model: openai("gpt-4"), + prompt: "Hello world", + experimental_telemetry: { + integrations: [integration], + }, +}); +``` + +## Passing Braintrust Metadata + +Custom span names, metadata, and span attributes are passed through `experimental_telemetry.metadata.braintrust`: + +```typescript +import { generateText } from "ai"; + +const result = await generateText({ + model: openai("gpt-4"), + prompt: "Summarize this document", + experimental_telemetry: { + metadata: { + braintrust: { + name: "document-summarizer", + metadata: { documentId: "doc-123", user: "alice" }, + spanAttributes: { type: "function" }, + }, + }, + }, +}); +``` + +### Metadata Fields + +| Field | Type | Description | +| ---------------- | ------------------------- | ---------------------------------------------------------------------------------- | +| `name` | `string` | Custom name for the root Braintrust span (default: `generateText` or `streamText`) | +| `metadata` | `Record` | Additional metadata attached to the Braintrust span | +| `spanAttributes` | `Record` | Custom span attributes (e.g., `{ type: "function" }`) | + +## Migration from `wrapAISDK` + +### Concept Mapping + +| `wrapAISDK` (old) | `TelemetryIntegration` (new) | +| -------------------------- | -------------------------------------------------------------------- | +| `wrapAISDK(ai)` | `registerTelemetryIntegration(new BraintrustTelemetryIntegration())` | +| `span_info.name` | `experimental_telemetry.metadata.braintrust.name` | +| `span_info.metadata` | `experimental_telemetry.metadata.braintrust.metadata` | +| `span_info.spanAttributes` | `experimental_telemetry.metadata.braintrust.spanAttributes` | +| Wrapper owns tracing | Integration owns tracing | + +### Before: `wrapAISDK` with `generateText` + +```typescript +import { wrapAISDK } from "braintrust"; +import * as ai from "ai"; + +const { generateText } = wrapAISDK(ai); + +const result = await generateText({ + model: openai("gpt-4"), + prompt: "Reply with PARIS", + span_info: { + name: "city-lookup", + metadata: { region: "europe" }, + }, +}); +``` + +### After: `TelemetryIntegration` with `generateText` + +```typescript +import { registerTelemetryIntegration, generateText } from "ai"; +import { BraintrustTelemetryIntegration } from "braintrust"; + +registerTelemetryIntegration(new BraintrustTelemetryIntegration()); + +const result = await generateText({ + model: openai("gpt-4"), + prompt: "Reply with PARIS", + experimental_telemetry: { + metadata: { + braintrust: { + name: "city-lookup", + metadata: { region: "europe" }, + }, + }, + }, +}); +``` + +### Before: `wrapAISDK` with `streamText` + +```typescript +import { wrapAISDK } from "braintrust"; +import * as ai from "ai"; + +const { streamText } = wrapAISDK(ai); + +const result = streamText({ + model: openai("gpt-4"), + prompt: "Count to 3", + span_info: { + name: "counting-stream", + }, +}); + +for await (const chunk of result.textStream) { + console.log(chunk); +} +``` + +### After: `TelemetryIntegration` with `streamText` + +```typescript +import { registerTelemetryIntegration, streamText } from "ai"; +import { BraintrustTelemetryIntegration } from "braintrust"; + +registerTelemetryIntegration(new BraintrustTelemetryIntegration()); + +const result = streamText({ + model: openai("gpt-4"), + prompt: "Count to 3", + experimental_telemetry: { + metadata: { + braintrust: { + name: "counting-stream", + }, + }, + }, +}); + +for await (const chunk of result.textStream) { + console.log(chunk); +} +``` + +### Custom Span Name + +```typescript +// Old (wrapper) +generateText({ ...params, span_info: { name: "my-span" } }); + +// New (integration) +generateText({ + ...params, + experimental_telemetry: { + metadata: { braintrust: { name: "my-span" } }, + }, +}); +``` + +### Custom Metadata + +```typescript +// Old (wrapper) +generateText({ + ...params, + span_info: { metadata: { env: "production", version: "1.2" } }, +}); + +// New (integration) +generateText({ + ...params, + experimental_telemetry: { + metadata: { + braintrust: { metadata: { env: "production", version: "1.2" } }, + }, + }, +}); +``` + +## Trace Shape + +The integration produces the following trace structure: + +### `generateText` (single step) + +``` +root span (generateText / custom-name) + └── step-0 (LLM call) +``` + +### `generateText` with tool calls + +``` +root span (generateText / custom-name) + ├── step-0 (LLM call → tool call) + │ └── get_weather (tool execution) + └── step-1 (LLM call → final response) +``` + +### `streamText` + +``` +root span (streamText / custom-name) + └── step-0 (LLM call, includes time_to_first_token metric) +``` + +### What Gets Captured + +| Data | Location | +| ---------------------------------------------------- | --------------------------------------- | +| System/prompt/messages | Root span `input` | +| Model ID | Root span `metadata.model` | +| Provider | Root span `metadata.provider` | +| Step messages | Step span `input` | +| Step output (text, tool calls, finish reason, usage) | Step span `output` | +| Token usage metrics | Step span and root span `metrics` | +| Tool call input | Tool span `input` | +| Tool call output | Tool span `output` | +| Tool execution duration | Tool span `metrics.duration` | +| Time to first token (streaming) | Step span `metrics.time_to_first_token` | +| Errors | Span `error` field | + +## Current Limitations + +This is a prototype focused on validating the approach. The following are not yet supported: + +- `generateObject` / `streamObject` — planned for a future release +- AI SDK versions prior to v7 — use `wrapAISDK` for v3–v6 +- Mixed wrapper/integration coexistence — do not use both `wrapAISDK` and `BraintrustTelemetryIntegration` on the same call +- Agent class wrapping — the integration covers `generateText` and `streamText`; agent classes still use the wrapper + +## API Reference + +### `BraintrustTelemetryIntegration` + +```typescript +import { BraintrustTelemetryIntegration } from "braintrust"; + +const integration = new BraintrustTelemetryIntegration(); +``` + +Implements the AI SDK `TelemetryIntegration` interface with the following lifecycle hooks: + +- `onStart` — Creates root Braintrust span +- `onStepStart` — Creates child step span +- `onToolCallStart` — Creates child tool span +- `onToolCallFinish` — Logs tool output/error and ends tool span +- `onChunk` — Tracks `time_to_first_token` for streaming +- `onStepFinish` — Logs step output/metrics and ends step span +- `onFinish` — Logs final output/metrics and ends root span +- `onError` — Logs error on all open spans and ends them +- `executeTool` — Runs tool execution within Braintrust span context (enables nested traces) + +### `BraintrustTelemetryMetadata` + +```typescript +import type { BraintrustTelemetryMetadata } from "braintrust"; + +const meta: BraintrustTelemetryMetadata = { + name: "my-span", + metadata: { key: "value" }, + spanAttributes: { type: "function" }, +}; +``` diff --git a/e2e/scenarios/ai-sdk-telemetry-integration-traces/__snapshots__/scenario.test.ts.snap b/e2e/scenarios/ai-sdk-telemetry-integration-traces/__snapshots__/scenario.test.ts.snap new file mode 100644 index 000000000..74ba8ade4 --- /dev/null +++ b/e2e/scenarios/ai-sdk-telemetry-integration-traces/__snapshots__/scenario.test.ts.snap @@ -0,0 +1,139 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`ai sdk telemetry integration traces > creates expected trace tree 1`] = ` +[ + { + "has_error": false, + "has_input": true, + "has_output": true, + "metadata_keys": [ + "braintrust", + "model", + "provider", + "user", + ], + "metric_keys": [], + "name": "custom-generate-name", + "type": "llm", + }, + { + "has_error": false, + "has_input": true, + "has_output": true, + "metadata_keys": [ + "model", + "provider", + "stepNumber", + ], + "metric_keys": [], + "name": "step-0", + "type": "llm", + }, + { + "has_error": false, + "has_input": true, + "has_output": true, + "metadata_keys": [ + "braintrust", + "model", + "provider", + ], + "metric_keys": [], + "name": "custom-stream-name", + "type": "llm", + }, + { + "has_error": false, + "has_input": true, + "has_output": true, + "metadata_keys": [ + "model", + "provider", + "stepNumber", + ], + "metric_keys": [ + "time_to_first_token", + ], + "name": "step-0", + "type": "llm", + }, + { + "has_error": false, + "has_input": true, + "has_output": true, + "metadata_keys": [ + "braintrust", + "model", + "provider", + ], + "metric_keys": [], + "name": "tool-call-generate", + "type": "llm", + }, + { + "has_error": false, + "has_input": true, + "has_output": true, + "metadata_keys": [ + "model", + "provider", + "stepNumber", + ], + "metric_keys": [], + "name": "step-0", + "type": "llm", + }, + { + "has_error": false, + "has_input": true, + "has_output": true, + "metadata_keys": [ + "toolCallId", + ], + "metric_keys": [ + "duration", + ], + "name": "get_weather", + "type": "tool", + }, + { + "has_error": false, + "has_input": true, + "has_output": true, + "metadata_keys": [ + "model", + "provider", + "stepNumber", + ], + "metric_keys": [], + "name": "step-1", + "type": "llm", + }, + { + "has_error": true, + "has_input": true, + "has_output": false, + "metadata_keys": [ + "braintrust", + "model", + "provider", + ], + "metric_keys": [], + "name": "error-generate", + "type": "llm", + }, + { + "has_error": true, + "has_input": true, + "has_output": false, + "metadata_keys": [ + "model", + "provider", + "stepNumber", + ], + "metric_keys": [], + "name": "step-0", + "type": "llm", + }, +] +`; diff --git a/e2e/scenarios/ai-sdk-telemetry-integration-traces/package.json b/e2e/scenarios/ai-sdk-telemetry-integration-traces/package.json new file mode 100644 index 000000000..c909b290a --- /dev/null +++ b/e2e/scenarios/ai-sdk-telemetry-integration-traces/package.json @@ -0,0 +1,9 @@ +{ + "name": "@braintrust/e2e-ai-sdk-telemetry-integration-traces", + "private": true, + "dependencies": { + "ai": "npm:ai@7.0.0-beta.42", + "@ai-sdk/openai": "npm:@ai-sdk/openai@4.0.0-beta.16", + "zod": "3.25.76" + } +} diff --git a/e2e/scenarios/ai-sdk-telemetry-integration-traces/pnpm-lock.yaml b/e2e/scenarios/ai-sdk-telemetry-integration-traces/pnpm-lock.yaml new file mode 100644 index 000000000..bae39dc6f --- /dev/null +++ b/e2e/scenarios/ai-sdk-telemetry-integration-traces/pnpm-lock.yaml @@ -0,0 +1,116 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + + .: + dependencies: + '@ai-sdk/openai': + specifier: npm:@ai-sdk/openai@4.0.0-beta.16 + version: 4.0.0-beta.16(zod@3.25.76) + ai: + specifier: npm:ai@7.0.0-beta.42 + version: 7.0.0-beta.42(zod@3.25.76) + zod: + specifier: 3.25.76 + version: 3.25.76 + +packages: + + '@ai-sdk/gateway@4.0.0-beta.22': + resolution: {integrity: sha512-liHtq5g1+BA2iEIabP1dyiTbbI/VkYZFKq9gqX40LJBz3ytWq9Y1AfBSQAZtGYTyyd32Qqjq7WQsqD3wo0+jrQ==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4.1.8 + + '@ai-sdk/openai@4.0.0-beta.16': + resolution: {integrity: sha512-bd2R2NVzME3hwPxS3ThYvkYyuEcOYyYW1lTQ+tgWDMvwKdcmg65bPZZlZr6VlT3QkDejmAc/tcp+NrbmdK7VXQ==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4.1.8 + + '@ai-sdk/provider-utils@5.0.0-beta.7': + resolution: {integrity: sha512-oQqukJG/h5mS0cGx/GHUSyhhhrvVvCRRxgtvnRttEnB3C4S+oc3m8+nO3u81IO5pRggCh4ZUtqzIbqH+Famhjg==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4.1.8 + + '@ai-sdk/provider@4.0.0-beta.5': + resolution: {integrity: sha512-+07aGNCVXEKgGXtAXjjROPcpHMudWfvsm5UvlZ4LtTfjX9cT61x7h3W0pSSpWY/H7doymdm8PmuX6Z8AvP03WQ==} + engines: {node: '>=18'} + + '@opentelemetry/api@1.9.0': + resolution: {integrity: sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==} + engines: {node: '>=8.0.0'} + + '@standard-schema/spec@1.1.0': + resolution: {integrity: sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==} + + '@vercel/oidc@3.2.0': + resolution: {integrity: sha512-UycprH3T6n3jH0k44NHMa7pnFHGu/N05MjojYr+Mc6I7obkoLIJujSWwin1pCvdy/eOxrI/l3uDLQsmcrOb4ug==} + engines: {node: '>= 20'} + + ai@7.0.0-beta.42: + resolution: {integrity: sha512-tSxsCHrMxaKH8j8fkgFYcRQsES2gmR/im8xnz99TDe8HpdxKsDioyq7Jc9lNzDg/qCVLd7hSI5uJo1X03/fZwg==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4.1.8 + + eventsource-parser@3.0.6: + resolution: {integrity: sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==} + engines: {node: '>=18.0.0'} + + json-schema@0.4.0: + resolution: {integrity: sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==} + + zod@3.25.76: + resolution: {integrity: sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==} + +snapshots: + + '@ai-sdk/gateway@4.0.0-beta.22(zod@3.25.76)': + dependencies: + '@ai-sdk/provider': 4.0.0-beta.5 + '@ai-sdk/provider-utils': 5.0.0-beta.7(zod@3.25.76) + '@vercel/oidc': 3.2.0 + zod: 3.25.76 + + '@ai-sdk/openai@4.0.0-beta.16(zod@3.25.76)': + dependencies: + '@ai-sdk/provider': 4.0.0-beta.5 + '@ai-sdk/provider-utils': 5.0.0-beta.7(zod@3.25.76) + zod: 3.25.76 + + '@ai-sdk/provider-utils@5.0.0-beta.7(zod@3.25.76)': + dependencies: + '@ai-sdk/provider': 4.0.0-beta.5 + '@standard-schema/spec': 1.1.0 + eventsource-parser: 3.0.6 + zod: 3.25.76 + + '@ai-sdk/provider@4.0.0-beta.5': + dependencies: + json-schema: 0.4.0 + + '@opentelemetry/api@1.9.0': {} + + '@standard-schema/spec@1.1.0': {} + + '@vercel/oidc@3.2.0': {} + + ai@7.0.0-beta.42(zod@3.25.76): + dependencies: + '@ai-sdk/gateway': 4.0.0-beta.22(zod@3.25.76) + '@ai-sdk/provider': 4.0.0-beta.5 + '@ai-sdk/provider-utils': 5.0.0-beta.7(zod@3.25.76) + '@opentelemetry/api': 1.9.0 + zod: 3.25.76 + + eventsource-parser@3.0.6: {} + + json-schema@0.4.0: {} + + zod@3.25.76: {} diff --git a/e2e/scenarios/ai-sdk-telemetry-integration-traces/scenario.impl.ts b/e2e/scenarios/ai-sdk-telemetry-integration-traces/scenario.impl.ts new file mode 100644 index 000000000..76638f6ea --- /dev/null +++ b/e2e/scenarios/ai-sdk-telemetry-integration-traces/scenario.impl.ts @@ -0,0 +1,272 @@ +import { initLogger } from "braintrust"; +import { BraintrustTelemetryIntegration } from "braintrust"; +import { + generateText, + streamText, + tool, + registerTelemetryIntegration, + stepCountIs, +} from "ai"; +import { MockLanguageModelV4, convertArrayToReadableStream } from "ai/test"; +import { z } from "zod"; + +export const ROOT_NAME = "ai-sdk-telemetry-integration-root"; +export const SCENARIO_NAME = "ai-sdk-telemetry-integration"; + +function getTestRunId(): string { + return process.env.BRAINTRUST_E2E_RUN_ID!; +} + +function scopedName(base: string): string { + if (process.env.BRAINTRUST_E2E_PROJECT_NAME) { + return process.env.BRAINTRUST_E2E_PROJECT_NAME; + } + const suffix = getTestRunId() + .toLowerCase() + .replace(/[^a-z0-9-]/g, "-"); + return `${base}-${suffix}`; +} + +/** + * Creates a mock model that returns a simple text response. + */ +function createTextModel(text: string): MockLanguageModelV4 { + return new MockLanguageModelV4({ + provider: "mock-provider", + modelId: "mock-model", + doGenerate: { + content: [{ type: "text", text }], + finishReason: "stop", + usage: { inputTokens: 10, outputTokens: 5 }, + request: { body: "{}" }, + response: { + id: "response-1", + modelId: "mock-model", + timestamp: new Date(0), + headers: {}, + body: undefined, + }, + rawResponse: undefined, + warnings: [], + providerMetadata: undefined, + }, + }); +} + +/** + * Creates a mock model that streams text word-by-word. + * Uses the LanguageModelV4StreamPart format (text-start, text-delta with delta, text-end). + */ +function createStreamModel(text: string): MockLanguageModelV4 { + const words = text.split(" "); + const textId = "text-block-1"; + return new MockLanguageModelV4({ + provider: "mock-provider", + modelId: "mock-model", + doStream: { + stream: convertArrayToReadableStream([ + { type: "text-start" as const, id: textId }, + ...words.map((word, i) => ({ + type: "text-delta" as const, + id: textId, + delta: i === 0 ? word : ` ${word}`, + })), + { type: "text-end" as const, id: textId }, + { + type: "finish" as const, + finishReason: "stop" as const, + usage: { inputTokens: 10, outputTokens: words.length }, + }, + ]), + request: { body: "{}" }, + response: { + id: "response-1", + modelId: "mock-model", + timestamp: new Date(0), + headers: {}, + body: undefined, + }, + rawResponse: undefined, + warnings: [], + }, + }); +} + +/** + * Creates a mock model that generates a tool call, then responds with text. + */ +function createToolCallModel(): MockLanguageModelV4 { + let callCount = 0; + return new MockLanguageModelV4({ + provider: "mock-provider", + modelId: "mock-model", + doGenerate: async () => { + callCount++; + if (callCount === 1) { + // First call: return a tool call (V4 format uses `input` not `args`) + return { + content: [ + { + type: "tool-call" as const, + toolCallId: "tool-call-1", + toolName: "get_weather", + input: JSON.stringify({ location: "Paris, France" }), + }, + ], + finishReason: "tool-calls" as const, + usage: { inputTokens: 15, outputTokens: 20 }, + request: { body: "{}" }, + response: { + id: "response-1", + modelId: "mock-model", + timestamp: new Date(0), + headers: {}, + body: undefined, + }, + rawResponse: undefined, + warnings: [], + providerMetadata: undefined, + }; + } + // Second call: return text after tool result + return { + content: [ + { + type: "text" as const, + text: "The weather in Paris is sunny, 22°C.", + }, + ], + finishReason: "stop" as const, + usage: { inputTokens: 25, outputTokens: 15 }, + request: { body: "{}" }, + response: { + id: "response-2", + modelId: "mock-model", + timestamp: new Date(0), + headers: {}, + body: undefined, + }, + rawResponse: undefined, + warnings: [], + providerMetadata: undefined, + }; + }, + }); +} + +/** + * Creates a mock model that throws an error during generation. + */ +function createErrorModel(): MockLanguageModelV4 { + return new MockLanguageModelV4({ + provider: "mock-provider", + modelId: "mock-model", + doGenerate: async () => { + throw new Error("Mock generation error"); + }, + }); +} + +export async function runTelemetryIntegrationScenario(): Promise { + const testRunId = getTestRunId(); + const integration = new BraintrustTelemetryIntegration(); + + // Register globally + registerTelemetryIntegration(integration); + + const logger = initLogger({ + projectName: scopedName("e2e-ai-sdk-telemetry-integration"), + }); + + await logger.traced( + async () => { + // 1. generateText — basic + await generateText({ + model: createTextModel("PARIS"), + prompt: "Reply with the single token PARIS.", + experimental_telemetry: { + metadata: { + braintrust: { + name: "custom-generate-name", + metadata: { user: "test-user" }, + spanAttributes: { type: "llm" }, + }, + }, + }, + }); + + // 2. streamText — basic + const streamResult = streamText({ + model: createStreamModel("one two three"), + prompt: "Count from 1 to 3.", + experimental_telemetry: { + metadata: { + braintrust: { + name: "custom-stream-name", + }, + }, + }, + }); + // Consume the stream + for await (const _chunk of streamResult.textStream) { + // consume + } + + // 3. generateText with tool calls + await generateText({ + model: createToolCallModel(), + prompt: "What is the weather in Paris?", + tools: { + get_weather: tool({ + description: "Get the weather for a location", + parameters: z.object({ + location: z.string().describe("The city and country"), + }), + execute: async (args) => + JSON.stringify({ + condition: "sunny", + location: args.location, + temperatureC: 22, + }), + }), + }, + stopWhen: stepCountIs(4), + experimental_telemetry: { + metadata: { + braintrust: { + name: "tool-call-generate", + }, + }, + }, + }); + + // 4. generateText with error + try { + await generateText({ + model: createErrorModel(), + prompt: "This will fail.", + experimental_telemetry: { + metadata: { + braintrust: { + name: "error-generate", + }, + }, + }, + }); + } catch { + // Expected error - we want to verify error is captured on the span + } + }, + { + name: ROOT_NAME, + event: { + metadata: { + scenario: SCENARIO_NAME, + testRunId, + }, + }, + }, + ); + + await logger.flush(); +} diff --git a/e2e/scenarios/ai-sdk-telemetry-integration-traces/scenario.test.ts b/e2e/scenarios/ai-sdk-telemetry-integration-traces/scenario.test.ts new file mode 100644 index 000000000..408fa188e --- /dev/null +++ b/e2e/scenarios/ai-sdk-telemetry-integration-traces/scenario.test.ts @@ -0,0 +1,160 @@ +import { describe, expect, test } from "vitest"; +import { normalizeForSnapshot, type Json } from "../../helpers/normalize"; +import type { CapturedLogEvent } from "../../helpers/mock-braintrust-server"; +import { + prepareScenarioDir, + withScenarioHarness, +} from "../../helpers/scenario-harness"; +import { resolveScenarioDir } from "../../helpers/scenario-harness"; +import { + findAllSpans, + findChildSpans, + findLatestSpan, +} from "../../helpers/trace-selectors"; +import { E2E_TAGS } from "../../helpers/tags"; +import { ROOT_NAME } from "./scenario.impl"; + +const SCENARIO_TIMEOUT_MS = 30_000; + +const scenarioDir = await prepareScenarioDir({ + scenarioDir: resolveScenarioDir(import.meta.url), +}); + +function summarizeSpan(event: CapturedLogEvent): Json { + const row = event.row as Record; + return { + name: event.span.name ?? null, + type: event.span.type ?? null, + has_input: event.input !== undefined && event.input !== null, + has_output: event.output !== undefined && event.output !== null, + has_error: row.error !== undefined && row.error !== null, + metadata_keys: Object.keys( + (row.metadata as Record) ?? {}, + ).sort(), + metric_keys: Object.keys((row.metrics as Record) ?? {}) + .filter((k) => k !== "start" && k !== "end") + .sort(), + } satisfies Json; +} + +describe("ai sdk telemetry integration traces", () => { + test( + "creates expected trace tree", + { + tags: [E2E_TAGS.hermetic], + timeout: SCENARIO_TIMEOUT_MS, + }, + async () => { + await withScenarioHarness(async (harness) => { + await harness.runScenarioDir({ + scenarioDir, + timeoutMs: SCENARIO_TIMEOUT_MS, + }); + + const events = harness.events(); + expect(events.length).toBeGreaterThan(0); + + // Find the root span + const rootSpan = findLatestSpan(events, ROOT_NAME); + expect(rootSpan).toBeDefined(); + + // -------------------------------------------------- + // 1. generateText with custom name and metadata + // -------------------------------------------------- + const generateSpans = findAllSpans(events, "custom-generate-name"); + expect(generateSpans.length).toBe(1); + const genSpan = generateSpans[0]; + + // Verify custom metadata was plumbed through + const genRow = genSpan.row as Record; + const genMeta = genRow.metadata as Record; + expect(genMeta?.user).toBe("test-user"); + expect(genMeta?.model).toBe("mock-model"); + expect(genMeta?.provider).toBe("mock-provider"); + + // Verify braintrust integration marker + const btMeta = genMeta?.braintrust as Record; + expect(btMeta?.integration_name).toBe("ai-sdk-telemetry"); + + // Verify output was logged + expect(genSpan.output).toBeDefined(); + + // Verify step child spans exist + const genSteps = findChildSpans(events, "step-0", genSpan.span.id); + expect(genSteps.length).toBe(1); + + // -------------------------------------------------- + // 2. streamText with custom name + // -------------------------------------------------- + const streamSpans = findAllSpans(events, "custom-stream-name"); + expect(streamSpans.length).toBe(1); + const streamSpan = streamSpans[0]; + expect(streamSpan.output).toBeDefined(); + + // Verify step child spans exist + const streamSteps = findChildSpans( + events, + "step-0", + streamSpan.span.id, + ); + expect(streamSteps.length).toBe(1); + + // Verify time_to_first_token metric on step span + const streamStepRow = streamSteps[0].row as Record; + const streamStepMetrics = streamStepRow.metrics as Record< + string, + unknown + >; + expect(streamStepMetrics?.time_to_first_token).toBeDefined(); + + // -------------------------------------------------- + // 3. generateText with tool calls + // -------------------------------------------------- + const toolGenSpans = findAllSpans(events, "tool-call-generate"); + expect(toolGenSpans.length).toBe(1); + const toolGenSpan = toolGenSpans[0]; + + // Should have step-0 (tool call) and step-1 (final response) + const toolStep0 = findChildSpans(events, "step-0", toolGenSpan.span.id); + expect(toolStep0.length).toBe(1); + + const toolStep1 = findChildSpans(events, "step-1", toolGenSpan.span.id); + expect(toolStep1.length).toBe(1); + + // Should have a get_weather tool span + const allToolSpans = findAllSpans(events, "get_weather"); + expect(allToolSpans.length).toBe(1); + const weatherTool = allToolSpans[0]; + expect(weatherTool.span.type).toBe("tool"); + expect(weatherTool.output).toBeDefined(); + + // Verify tool span has duration metric + const toolRow = weatherTool.row as Record; + const toolMetrics = toolRow.metrics as Record; + expect(toolMetrics?.duration).toBeDefined(); + + // -------------------------------------------------- + // 4. generateText with error + // -------------------------------------------------- + const errorSpans = findAllSpans(events, "error-generate"); + expect(errorSpans.length).toBe(1); + const errorSpan = errorSpans[0]; + const errorRow = errorSpan.row as Record; + expect(errorRow.error).toBeDefined(); + + // -------------------------------------------------- + // Snapshot the full trace tree structure + // -------------------------------------------------- + const allSpans = events + .filter( + (e) => + e.span.name !== ROOT_NAME && + e.span.rootId === rootSpan!.span.rootId, + ) + .map(summarizeSpan); + + expect(normalizeForSnapshot(allSpans as Json)).toMatchSnapshot(); + }); + }, + ); +}); diff --git a/e2e/scenarios/ai-sdk-telemetry-integration-traces/scenario.ts b/e2e/scenarios/ai-sdk-telemetry-integration-traces/scenario.ts new file mode 100644 index 000000000..356f3269c --- /dev/null +++ b/e2e/scenarios/ai-sdk-telemetry-integration-traces/scenario.ts @@ -0,0 +1,4 @@ +import { runMain } from "../../helpers/scenario-runtime"; +import { runTelemetryIntegrationScenario } from "./scenario.impl"; + +runMain(async () => runTelemetryIntegrationScenario()); diff --git a/js/src/exports.ts b/js/src/exports.ts index c01c1e3cb..228eab268 100644 --- a/js/src/exports.ts +++ b/js/src/exports.ts @@ -166,9 +166,11 @@ export { export { wrapAISDK, wrapAgentClass, + BraintrustTelemetryIntegration, BraintrustMiddleware, wrapAISDKModel, } from "./wrappers/ai-sdk"; +export type { BraintrustTelemetryMetadata } from "./wrappers/ai-sdk"; export { wrapAnthropic } from "./wrappers/anthropic"; export { wrapMastraAgent } from "./wrappers/mastra"; export { wrapClaudeAgentSDK } from "./wrappers/claude-agent-sdk/claude-agent-sdk"; diff --git a/js/src/wrappers/ai-sdk/index.ts b/js/src/wrappers/ai-sdk/index.ts index 00c42bfc3..db8495d59 100644 --- a/js/src/wrappers/ai-sdk/index.ts +++ b/js/src/wrappers/ai-sdk/index.ts @@ -1,4 +1,6 @@ export { wrapAISDK, wrapAgentClass } from "./ai-sdk"; +export { BraintrustTelemetryIntegration } from "./telemetry-integration"; +export type { BraintrustTelemetryMetadata } from "./telemetry-integration"; // TODO: remove in the next major release export { wrapAISDKModel } from "./deprecated/wrapAISDKModel"; diff --git a/js/src/wrappers/ai-sdk/telemetry-integration.ts b/js/src/wrappers/ai-sdk/telemetry-integration.ts new file mode 100644 index 000000000..2c505757d --- /dev/null +++ b/js/src/wrappers/ai-sdk/telemetry-integration.ts @@ -0,0 +1,509 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ + +import { startSpan, withCurrent, type Span } from "../../logger"; +import { SpanTypeAttribute } from "../../../util"; +import { extractTokenMetrics } from "./ai-sdk"; + +/** + * Braintrust-specific metadata that can be passed through + * `experimental_telemetry.metadata.braintrust` on AI SDK calls. + */ +export interface BraintrustTelemetryMetadata { + /** Custom span name for the root Braintrust span. */ + name?: string; + /** Additional metadata to attach to the Braintrust span. */ + metadata?: Record; + /** Custom span attributes (e.g., `{ type: "function" }`). */ + spanAttributes?: Record; +} + +/** + * Internal state tracked per AI SDK call (keyed by callId). + */ +interface CallState { + /** The operation type (e.g. 'ai.generateText', 'ai.streamText'). */ + operationId: string; + /** The root Braintrust span for this call. */ + rootSpan: Span; + /** Current step span (if in a step). */ + stepSpan?: Span; + /** Tool spans keyed by toolCallId. */ + toolSpans: Map; + /** Whether we've received the first stream chunk. */ + receivedFirstChunk: boolean; + /** Start time for stream timing. */ + startTime: number; + /** Braintrust-specific metadata from telemetry settings. */ + braintrustMeta?: BraintrustTelemetryMetadata; +} + +/** + * Extracts Braintrust-specific metadata from the telemetry metadata object. + * Users pass this via `experimental_telemetry.metadata.braintrust`. + */ +function extractBraintrustMeta( + metadata: Record | undefined, +): BraintrustTelemetryMetadata | undefined { + if (!metadata) return undefined; + const bt = metadata.braintrust; + if (!bt || typeof bt !== "object") return undefined; + return bt as BraintrustTelemetryMetadata; +} + +/** + * Converts an AI SDK usage object to Braintrust metrics format. + */ +function usageToMetrics( + usage: Record | undefined, +): Record { + if (!usage) return {}; + return extractTokenMetrics({ usage } as any); +} + +/** + * Derives a default span name from the operation ID. + * e.g. 'ai.generateText' -> 'generateText', 'ai.streamText' -> 'streamText' + */ +function defaultSpanName(operationId: string): string { + const dotIndex = operationId.lastIndexOf("."); + return dotIndex >= 0 ? operationId.slice(dotIndex + 1) : operationId; +} + +/** + * Serializes messages for logging input. Truncates to avoid massive payloads. + */ +function serializeMessages(messages: unknown): unknown { + if (!messages) return undefined; + if (!Array.isArray(messages)) return messages; + return messages; +} + +/** + * Builds the input object for the root span from an onStart event. + */ +function buildRootInput( + event: Record, +): Record { + const input: Record = {}; + if (event.system !== undefined) input.system = event.system; + if (event.prompt !== undefined) input.prompt = event.prompt; + if (event.messages !== undefined) + input.messages = serializeMessages(event.messages); + return input; +} + +/** + * Builds the input object for a step span from an onStepStart event. + */ +function buildStepInput( + event: Record, +): Record { + const input: Record = {}; + if (event.messages !== undefined) + input.messages = serializeMessages(event.messages); + return input; +} + +/** + * Serializes step result output for logging. + */ +function buildStepOutput( + event: Record, +): Record { + const output: Record = {}; + if (event.text !== undefined) output.text = event.text; + if ( + event.toolCalls !== undefined && + Array.isArray(event.toolCalls) && + event.toolCalls.length > 0 + ) { + output.toolCalls = event.toolCalls; + } + if ( + event.toolResults !== undefined && + Array.isArray(event.toolResults) && + event.toolResults.length > 0 + ) { + output.toolResults = event.toolResults; + } + if (event.finishReason !== undefined) + output.finishReason = event.finishReason; + if (event.usage !== undefined) output.usage = event.usage; + return output; +} + +/** + * Serializes finish result output for logging on the root span. + */ +function buildFinishOutput( + event: Record, +): Record { + const output: Record = {}; + if (event.text !== undefined) output.text = event.text; + if ( + event.toolCalls !== undefined && + Array.isArray(event.toolCalls) && + event.toolCalls.length > 0 + ) { + output.toolCalls = event.toolCalls; + } + if ( + event.toolResults !== undefined && + Array.isArray(event.toolResults) && + event.toolResults.length > 0 + ) { + output.toolResults = event.toolResults; + } + if (event.finishReason !== undefined) + output.finishReason = event.finishReason; + if (event.totalUsage !== undefined) output.totalUsage = event.totalUsage; + else if (event.usage !== undefined) output.usage = event.usage; + return output; +} + +/** + * Serializes an error for logging. + */ +function serializeError(error: unknown): unknown { + if (error instanceof Error) { + return error; + } + if (typeof error === "object" && error !== null) { + try { + return JSON.stringify(error); + } catch { + // fall through + } + } + return String(error); +} + +/** + * A Braintrust `TelemetryIntegration` for the AI SDK (v7+). + * + * This integration creates Braintrust spans from the AI SDK's lifecycle events + * for `generateText` and `streamText`. It replaces the `wrapAISDK()` approach + * with a first-class telemetry integration pattern. + * + * ## Usage + * + * ### Global registration (recommended) + * + * ```typescript + * import { registerTelemetryIntegration } from "ai"; + * import { BraintrustTelemetryIntegration } from "braintrust"; + * + * registerTelemetryIntegration(new BraintrustTelemetryIntegration()); + * ``` + * + * ### Per-call registration + * + * ```typescript + * import { generateText } from "ai"; + * import { BraintrustTelemetryIntegration } from "braintrust"; + * + * const result = await generateText({ + * model: openai("gpt-4"), + * prompt: "Hello world", + * experimental_telemetry: { + * integrations: [new BraintrustTelemetryIntegration()], + * metadata: { + * braintrust: { + * name: "my-custom-span-name", + * metadata: { user: "test" }, + * }, + * }, + * }, + * }); + * ``` + */ +export class BraintrustTelemetryIntegration { + private callStates = new Map(); + + private getState(callId: string): CallState | undefined { + return this.callStates.get(callId); + } + + private cleanup(callId: string): void { + this.callStates.delete(callId); + } + + /** + * Called when an AI SDK operation begins (generateText or streamText). + */ + onStart = (event: any): void => { + const operationId: string = event.operationId ?? "unknown"; + + // Only handle generateText and streamText + if (operationId !== "ai.generateText" && operationId !== "ai.streamText") { + return; + } + + const braintrustMeta = extractBraintrustMeta( + event.metadata as Record | undefined, + ); + + const spanName = braintrustMeta?.name ?? defaultSpanName(operationId); + + const { model: modelId, provider } = extractModelInfo(event); + + const rootSpan = startSpan({ + name: spanName, + spanAttributes: { + type: SpanTypeAttribute.LLM, + ...(braintrustMeta?.spanAttributes ?? {}), + }, + event: { + input: buildRootInput(event), + metadata: { + ...braintrustMeta?.metadata, + model: modelId, + ...(provider ? { provider } : {}), + braintrust: { + integration_name: "ai-sdk-telemetry", + sdk_language: "typescript", + }, + }, + }, + }); + + this.callStates.set(event.callId, { + operationId, + rootSpan, + toolSpans: new Map(), + receivedFirstChunk: false, + startTime: Date.now(), + braintrustMeta, + }); + }; + + /** + * Called when a step (single LLM invocation) begins. + */ + onStepStart = (event: any): void => { + const state = this.getState(event.callId); + if (!state) return; + + const stepNumber: number = event.stepNumber ?? 0; + const { model: modelId, provider } = extractModelInfo(event); + + state.stepSpan = withCurrent(state.rootSpan, () => + startSpan({ + name: `step-${stepNumber}`, + spanAttributes: { + type: SpanTypeAttribute.LLM, + }, + event: { + input: buildStepInput(event), + metadata: { + model: modelId, + ...(provider ? { provider } : {}), + stepNumber, + }, + }, + }), + ); + }; + + /** + * Called when a tool execution begins. + */ + onToolCallStart = (event: any): void => { + const state = this.getState(event.callId); + if (!state) return; + + const parentSpan = state.stepSpan ?? state.rootSpan; + const toolCall = event.toolCall; + const toolName = toolCall?.toolName ?? "unknown-tool"; + const toolCallId = toolCall?.toolCallId; + + const toolSpan = withCurrent(parentSpan, () => + startSpan({ + name: toolName, + spanAttributes: { + type: SpanTypeAttribute.TOOL, + }, + event: { + input: toolCall?.input, + metadata: { + toolCallId, + }, + }, + }), + ); + + if (toolCallId) { + state.toolSpans.set(toolCallId, toolSpan); + } + }; + + /** + * Called when a tool execution completes. + */ + onToolCallFinish = (event: any): void => { + const state = this.getState(event.callId); + if (!state) return; + + const toolCallId = event.toolCall?.toolCallId; + if (!toolCallId) return; + + const toolSpan = state.toolSpans.get(toolCallId); + if (!toolSpan) return; + + if (event.success) { + toolSpan.log({ + output: event.output, + metrics: { + duration: event.durationMs / 1000, + }, + }); + } else { + toolSpan.log({ + error: serializeError(event.error), + metrics: { + duration: event.durationMs / 1000, + }, + }); + } + + toolSpan.end(); + state.toolSpans.delete(toolCallId); + }; + + /** + * Called for each streaming chunk (streamText only). + */ + onChunk = (event: any): void => { + const chunk = event.chunk; + if (!chunk) return; + + // Handle stream timing markers + if ( + chunk.type === "ai.stream.firstChunk" && + typeof chunk.callId === "string" + ) { + const state = this.getState(chunk.callId); + if (state && !state.receivedFirstChunk) { + state.receivedFirstChunk = true; + const parentSpan = state.stepSpan ?? state.rootSpan; + parentSpan.log({ + metrics: { + time_to_first_token: (Date.now() - state.startTime) / 1000, + }, + }); + } + } + }; + + /** + * Called when a step (single LLM invocation) completes. + */ + onStepFinish = (event: any): void => { + const state = this.getState(event.callId); + if (!state) return; + + if (state.stepSpan) { + state.stepSpan.log({ + output: buildStepOutput(event), + metrics: usageToMetrics(event.usage), + }); + state.stepSpan.end(); + state.stepSpan = undefined; + } + }; + + /** + * Called when the entire operation completes. + */ + onFinish = (event: any): void => { + const state = this.getState(event.callId); + if (!state) return; + + // Close any remaining step span + if (state.stepSpan) { + state.stepSpan.end(); + state.stepSpan = undefined; + } + + // Close any remaining tool spans + for (const [, toolSpan] of state.toolSpans) { + toolSpan.end(); + } + state.toolSpans.clear(); + + // Log output and metrics on the root span + const totalUsage = event.totalUsage ?? event.usage; + state.rootSpan.log({ + output: buildFinishOutput(event), + metrics: usageToMetrics(totalUsage), + }); + + state.rootSpan.end(); + this.cleanup(event.callId); + }; + + /** + * Called when an unrecoverable error occurs. + */ + onError = (error: unknown): void => { + // The error event may be the error itself or an object with callId + error + const event = error as { callId?: string; error?: unknown }; + const callId = event?.callId; + if (!callId) return; + + const state = this.getState(callId); + if (!state) return; + + const actualError = event.error ?? error; + + // Close step span with error + if (state.stepSpan) { + state.stepSpan.log({ error: serializeError(actualError) }); + state.stepSpan.end(); + state.stepSpan = undefined; + } + + // Close tool spans + for (const [, toolSpan] of state.toolSpans) { + toolSpan.log({ error: serializeError(actualError) }); + toolSpan.end(); + } + state.toolSpans.clear(); + + // Close root span with error + state.rootSpan.log({ error: serializeError(actualError) }); + state.rootSpan.end(); + this.cleanup(callId); + }; + + /** + * Runs tool execution within the Braintrust span context, enabling + * nested traces when a tool's execute function calls generateText/streamText. + */ + executeTool = async (params: { + callId: string; + toolCallId: string; + execute: () => PromiseLike; + }): Promise => { + const state = this.getState(params.callId); + if (!state) return params.execute(); + + const toolSpan = state.toolSpans.get(params.toolCallId); + if (!toolSpan) return params.execute(); + + return withCurrent(toolSpan, () => params.execute()); + }; +} + +/** + * Extracts model ID and provider from an event object. + */ +function extractModelInfo(event: any): { + model: string | undefined; + provider: string | undefined; +} { + return { + model: event.modelId ?? undefined, + provider: event.provider ?? undefined, + }; +}