diff --git a/dev-packages/node-integration-tests/suites/tracing/anthropic/test.ts b/dev-packages/node-integration-tests/suites/tracing/anthropic/test.ts index 182f4d4ee8c5..719333488051 100644 --- a/dev-packages/node-integration-tests/suites/tracing/anthropic/test.ts +++ b/dev-packages/node-integration-tests/suites/tracing/anthropic/test.ts @@ -266,7 +266,7 @@ describe('Anthropic integration', () => { [SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'gen_ai.chat', [SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.ai.anthropic', }), - description: 'chat claude-3-haiku-20240307 stream-response', + description: 'chat claude-3-haiku-20240307', op: 'gen_ai.chat', origin: 'auto.ai.anthropic', status: 'ok', @@ -296,7 +296,7 @@ describe('Anthropic integration', () => { [GEN_AI_REQUEST_MODEL_ATTRIBUTE]: 'claude-3-haiku-20240307', [GEN_AI_REQUEST_STREAM_ATTRIBUTE]: true, }), - description: 'chat claude-3-haiku-20240307 stream-response', + description: 'chat claude-3-haiku-20240307', op: 'gen_ai.chat', origin: 'auto.ai.anthropic', status: 'ok', @@ -401,7 +401,7 @@ describe('Anthropic integration', () => { spans: expect.arrayContaining([ // messages.create with stream: true expect.objectContaining({ - description: 'chat claude-3-haiku-20240307 stream-response', + description: 'chat claude-3-haiku-20240307', op: 'gen_ai.chat', data: expect.objectContaining({ [GEN_AI_SYSTEM_ATTRIBUTE]: 'anthropic', @@ -419,7 +419,7 @@ describe('Anthropic integration', () => { }), // messages.stream expect.objectContaining({ - description: 'chat claude-3-haiku-20240307 stream-response', + description: 'chat claude-3-haiku-20240307', op: 'gen_ai.chat', data: expect.objectContaining({ [GEN_AI_SYSTEM_ATTRIBUTE]: 'anthropic', @@ -435,7 +435,7 @@ describe('Anthropic integration', () => { }), // messages.stream with redundant stream: true param expect.objectContaining({ - description: 'chat claude-3-haiku-20240307 stream-response', + description: 'chat claude-3-haiku-20240307', op: 'gen_ai.chat', data: expect.objectContaining({ [GEN_AI_SYSTEM_ATTRIBUTE]: 'anthropic', @@ -457,7 +457,7 @@ describe('Anthropic integration', () => { transaction: 'main', spans: expect.arrayContaining([ expect.objectContaining({ - description: 'chat claude-3-haiku-20240307 stream-response', + description: 'chat claude-3-haiku-20240307', op: 'gen_ai.chat', data: expect.objectContaining({ [GEN_AI_RESPONSE_STREAMING_ATTRIBUTE]: true, @@ -466,7 +466,7 @@ describe('Anthropic integration', () => { }), }), expect.objectContaining({ - description: 'chat claude-3-haiku-20240307 stream-response', + description: 'chat claude-3-haiku-20240307', op: 'gen_ai.chat', data: expect.objectContaining({ [GEN_AI_RESPONSE_STREAMING_ATTRIBUTE]: true, @@ -474,7 +474,7 @@ describe('Anthropic integration', () => { }), }), expect.objectContaining({ - description: 'chat claude-3-haiku-20240307 stream-response', + description: 'chat claude-3-haiku-20240307', op: 'gen_ai.chat', data: expect.objectContaining({ [GEN_AI_RESPONSE_STREAMING_ATTRIBUTE]: true, @@ -536,7 +536,7 @@ describe('Anthropic integration', () => { transaction: { spans: expect.arrayContaining([ expect.objectContaining({ - description: expect.stringContaining('stream-response'), + description: 'chat claude-3-haiku-20240307', op: 'gen_ai.chat', data: expect.objectContaining({ [GEN_AI_REQUEST_AVAILABLE_TOOLS_ATTRIBUTE]: EXPECTED_TOOLS_JSON, @@ -557,7 +557,7 @@ describe('Anthropic integration', () => { spans: expect.arrayContaining([ // Error with messages.create on stream initialization expect.objectContaining({ - description: 'chat error-stream-init stream-response', + description: 'chat error-stream-init', op: 'gen_ai.chat', status: 'internal_error', // Actual status coming from the instrumentation data: expect.objectContaining({ @@ -567,7 +567,7 @@ describe('Anthropic integration', () => { }), // Error with messages.stream on stream initialization expect.objectContaining({ - description: 'chat error-stream-init stream-response', + description: 'chat error-stream-init', op: 'gen_ai.chat', status: 'internal_error', // Actual status coming from the instrumentation data: expect.objectContaining({ @@ -577,7 +577,7 @@ describe('Anthropic integration', () => { // Error midway with messages.create on streaming - note: The stream is started successfully // so we get a successful span with the content that was streamed before the error expect.objectContaining({ - description: 'chat error-stream-midway stream-response', + description: 'chat error-stream-midway', op: 'gen_ai.chat', status: 'ok', data: expect.objectContaining({ @@ -589,7 +589,7 @@ describe('Anthropic integration', () => { }), // Error midway with messages.stream - same behavior, we get a span with the streamed data expect.objectContaining({ - description: 'chat error-stream-midway stream-response', + description: 'chat error-stream-midway', op: 'gen_ai.chat', status: 'ok', data: expect.objectContaining({ @@ -731,7 +731,7 @@ describe('Anthropic integration', () => { source: { type: 'base64', media_type: 'image/png', - data: '[Filtered]', + data: '[Blob substitute]', }, }, ], diff --git a/dev-packages/node-integration-tests/suites/tracing/google-genai/test.ts b/dev-packages/node-integration-tests/suites/tracing/google-genai/test.ts index 89130a7eb425..993984cc6b3d 100644 --- a/dev-packages/node-integration-tests/suites/tracing/google-genai/test.ts +++ b/dev-packages/node-integration-tests/suites/tracing/google-genai/test.ts @@ -272,7 +272,7 @@ describe('Google GenAI integration', () => { [GEN_AI_USAGE_OUTPUT_TOKENS_ATTRIBUTE]: 10, [GEN_AI_USAGE_TOTAL_TOKENS_ATTRIBUTE]: 22, }), - description: 'generate_content gemini-2.0-flash-001 stream-response', + description: 'generate_content gemini-2.0-flash-001', op: 'gen_ai.generate_content', origin: 'auto.ai.google_genai', status: 'ok', @@ -327,7 +327,7 @@ describe('Google GenAI integration', () => { [GEN_AI_USAGE_OUTPUT_TOKENS_ATTRIBUTE]: 12, [GEN_AI_USAGE_TOTAL_TOKENS_ATTRIBUTE]: 22, }), - description: 'generate_content gemini-1.5-flash stream-response', + description: 'generate_content gemini-1.5-flash', op: 'gen_ai.generate_content', origin: 'auto.ai.google_genai', status: 'ok', @@ -361,7 +361,7 @@ describe('Google GenAI integration', () => { [GEN_AI_RESPONSE_ID_ATTRIBUTE]: 'mock-response-streaming-id', [GEN_AI_RESPONSE_MODEL_ATTRIBUTE]: 'gemini-1.5-pro', }), - description: 'chat gemini-1.5-pro stream-response', + description: 'chat gemini-1.5-pro', op: 'gen_ai.chat', origin: 'auto.ai.google_genai', status: 'ok', @@ -373,7 +373,7 @@ describe('Google GenAI integration', () => { [SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'gen_ai.generate_content', [SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.ai.google_genai', }), - description: 'generate_content blocked-model stream-response', + description: 'generate_content blocked-model', op: 'gen_ai.generate_content', origin: 'auto.ai.google_genai', status: 'internal_error', @@ -385,7 +385,7 @@ describe('Google GenAI integration', () => { [SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'gen_ai.generate_content', [SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.ai.google_genai', }), - description: 'generate_content error-model stream-response', + description: 'generate_content error-model', op: 'gen_ai.generate_content', origin: 'auto.ai.google_genai', status: 'internal_error', @@ -416,7 +416,7 @@ describe('Google GenAI integration', () => { [GEN_AI_USAGE_OUTPUT_TOKENS_ATTRIBUTE]: 12, [GEN_AI_USAGE_TOTAL_TOKENS_ATTRIBUTE]: 22, }), - description: 'generate_content gemini-1.5-flash stream-response', + description: 'generate_content gemini-1.5-flash', op: 'gen_ai.generate_content', origin: 'auto.ai.google_genai', status: 'ok', @@ -455,7 +455,7 @@ describe('Google GenAI integration', () => { [GEN_AI_USAGE_OUTPUT_TOKENS_ATTRIBUTE]: 12, [GEN_AI_USAGE_TOTAL_TOKENS_ATTRIBUTE]: 22, }), - description: 'chat gemini-1.5-pro stream-response', + description: 'chat gemini-1.5-pro', op: 'gen_ai.chat', origin: 'auto.ai.google_genai', status: 'ok', @@ -472,7 +472,7 @@ describe('Google GenAI integration', () => { [GEN_AI_INPUT_MESSAGES_ATTRIBUTE]: expect.any(String), // Should include contents when recordInputs: true [GEN_AI_RESPONSE_STREAMING_ATTRIBUTE]: true, }), - description: 'generate_content blocked-model stream-response', + description: 'generate_content blocked-model', op: 'gen_ai.generate_content', origin: 'auto.ai.google_genai', status: 'internal_error', @@ -488,7 +488,7 @@ describe('Google GenAI integration', () => { [GEN_AI_REQUEST_TEMPERATURE_ATTRIBUTE]: 0.7, [GEN_AI_INPUT_MESSAGES_ATTRIBUTE]: expect.any(String), // Should include contents when recordInputs: true }), - description: 'generate_content error-model stream-response', + description: 'generate_content error-model', op: 'gen_ai.generate_content', origin: 'auto.ai.google_genai', status: 'internal_error', diff --git a/dev-packages/node-integration-tests/suites/tracing/openai/openai-tool-calls/test.ts b/dev-packages/node-integration-tests/suites/tracing/openai/openai-tool-calls/test.ts index b2189f993b2b..5c61ec320c57 100644 --- a/dev-packages/node-integration-tests/suites/tracing/openai/openai-tool-calls/test.ts +++ b/dev-packages/node-integration-tests/suites/tracing/openai/openai-tool-calls/test.ts @@ -133,7 +133,7 @@ describe('OpenAI Tool Calls integration', () => { [OPENAI_USAGE_COMPLETION_TOKENS_ATTRIBUTE]: 25, [OPENAI_USAGE_PROMPT_TOKENS_ATTRIBUTE]: 15, }, - description: 'chat gpt-4 stream-response', + description: 'chat gpt-4', op: 'gen_ai.chat', origin: 'auto.ai.openai', status: 'ok', @@ -187,7 +187,7 @@ describe('OpenAI Tool Calls integration', () => { [OPENAI_USAGE_COMPLETION_TOKENS_ATTRIBUTE]: 12, [OPENAI_USAGE_PROMPT_TOKENS_ATTRIBUTE]: 8, }, - description: 'chat gpt-4 stream-response', + description: 'chat gpt-4', op: 'gen_ai.chat', origin: 'auto.ai.openai', status: 'ok', @@ -254,7 +254,7 @@ describe('OpenAI Tool Calls integration', () => { [OPENAI_USAGE_COMPLETION_TOKENS_ATTRIBUTE]: 25, [OPENAI_USAGE_PROMPT_TOKENS_ATTRIBUTE]: 15, }, - description: 'chat gpt-4 stream-response', + description: 'chat gpt-4', op: 'gen_ai.chat', origin: 'auto.ai.openai', status: 'ok', @@ -314,7 +314,7 @@ describe('OpenAI Tool Calls integration', () => { [OPENAI_USAGE_COMPLETION_TOKENS_ATTRIBUTE]: 12, [OPENAI_USAGE_PROMPT_TOKENS_ATTRIBUTE]: 8, }, - description: 'chat gpt-4 stream-response', + description: 'chat gpt-4', op: 'gen_ai.chat', origin: 'auto.ai.openai', status: 'ok', diff --git a/dev-packages/node-integration-tests/suites/tracing/openai/scenario-vision.mjs b/dev-packages/node-integration-tests/suites/tracing/openai/scenario-vision.mjs new file mode 100644 index 000000000000..00dd173f7b49 --- /dev/null +++ b/dev-packages/node-integration-tests/suites/tracing/openai/scenario-vision.mjs @@ -0,0 +1,101 @@ +import * as Sentry from '@sentry/node'; +import express from 'express'; +import OpenAI from 'openai'; + +function startMockServer() { + const app = express(); + app.use(express.json({ limit: '10mb' })); + + app.post('/openai/chat/completions', (req, res) => { + res.send({ + id: 'chatcmpl-vision-123', + object: 'chat.completion', + created: 1677652288, + model: req.body.model, + choices: [ + { + index: 0, + message: { + role: 'assistant', + content: 'I see a red square in the image.', + }, + finish_reason: 'stop', + }, + ], + usage: { + prompt_tokens: 50, + completion_tokens: 10, + total_tokens: 60, + }, + }); + }); + + return new Promise(resolve => { + const server = app.listen(0, () => { + resolve(server); + }); + }); +} + +// Small 10x10 red PNG image encoded as base64 +const RED_PNG_BASE64 = + 'iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8z8BQDwADhQGAWjR9awAAAABJRU5ErkJggg=='; + +async function run() { + const server = await startMockServer(); + + await Sentry.startSpan({ op: 'function', name: 'main' }, async () => { + const client = new OpenAI({ + baseURL: `http://localhost:${server.address().port}/openai`, + apiKey: 'mock-api-key', + }); + + // Vision request with inline base64 image + await client.chat.completions.create({ + model: 'gpt-4o', + messages: [ + { + role: 'user', + content: [ + { type: 'text', text: 'What is in this image?' }, + { + type: 'image_url', + image_url: { + url: `data:image/png;base64,${RED_PNG_BASE64}`, + }, + }, + ], + }, + ], + }); + + // Vision request with multiple images (one inline, one URL) + await client.chat.completions.create({ + model: 'gpt-4o', + messages: [ + { + role: 'user', + content: [ + { type: 'text', text: 'Compare these images' }, + { + type: 'image_url', + image_url: { + url: `data:image/png;base64,${RED_PNG_BASE64}`, + }, + }, + { + type: 'image_url', + image_url: { + url: 'https://example.com/image.png', + }, + }, + ], + }, + ], + }); + }); + + server.close(); +} + +run(); diff --git a/dev-packages/node-integration-tests/suites/tracing/openai/test.ts b/dev-packages/node-integration-tests/suites/tracing/openai/test.ts index 5753a494fa31..9e7a1722db11 100644 --- a/dev-packages/node-integration-tests/suites/tracing/openai/test.ts +++ b/dev-packages/node-integration-tests/suites/tracing/openai/test.ts @@ -125,7 +125,7 @@ describe('OpenAI integration', () => { [OPENAI_USAGE_COMPLETION_TOKENS_ATTRIBUTE]: 18, [OPENAI_USAGE_PROMPT_TOKENS_ATTRIBUTE]: 12, }, - description: 'chat gpt-4 stream-response', + description: 'chat gpt-4', op: 'gen_ai.chat', origin: 'auto.ai.openai', status: 'ok', @@ -152,7 +152,7 @@ describe('OpenAI integration', () => { [OPENAI_USAGE_COMPLETION_TOKENS_ATTRIBUTE]: 10, [OPENAI_USAGE_PROMPT_TOKENS_ATTRIBUTE]: 6, }, - description: 'chat gpt-4 stream-response', + description: 'chat gpt-4', op: 'gen_ai.chat', origin: 'auto.ai.openai', status: 'ok', @@ -167,7 +167,7 @@ describe('OpenAI integration', () => { [SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'gen_ai.chat', [SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.ai.openai', }, - description: 'chat error-model stream-response', + description: 'chat error-model', op: 'gen_ai.chat', origin: 'auto.ai.openai', status: 'internal_error', @@ -283,7 +283,7 @@ describe('OpenAI integration', () => { [OPENAI_USAGE_COMPLETION_TOKENS_ATTRIBUTE]: 18, [OPENAI_USAGE_PROMPT_TOKENS_ATTRIBUTE]: 12, }), - description: 'chat gpt-4 stream-response', + description: 'chat gpt-4', op: 'gen_ai.chat', origin: 'auto.ai.openai', status: 'ok', @@ -314,7 +314,7 @@ describe('OpenAI integration', () => { [OPENAI_USAGE_COMPLETION_TOKENS_ATTRIBUTE]: 10, [OPENAI_USAGE_PROMPT_TOKENS_ATTRIBUTE]: 6, }), - description: 'chat gpt-4 stream-response', + description: 'chat gpt-4', op: 'gen_ai.chat', origin: 'auto.ai.openai', status: 'ok', @@ -331,7 +331,7 @@ describe('OpenAI integration', () => { [SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'gen_ai.chat', [SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.ai.openai', }, - description: 'chat error-model stream-response', + description: 'chat error-model', op: 'gen_ai.chat', origin: 'auto.ai.openai', status: 'internal_error', @@ -983,4 +983,64 @@ describe('OpenAI integration', () => { .completed(); }); }); + + createEsmAndCjsTests(__dirname, 'scenario-vision.mjs', 'instrument-with-pii.mjs', (createRunner, test) => { + test('redacts inline base64 image data in vision requests', async () => { + await createRunner() + .ignore('event') + .expect({ + transaction: { + transaction: 'main', + spans: expect.arrayContaining([ + // Single image vision request + expect.objectContaining({ + data: expect.objectContaining({ + [GEN_AI_OPERATION_NAME_ATTRIBUTE]: 'chat', + [GEN_AI_REQUEST_MODEL_ATTRIBUTE]: 'gpt-4o', + [GEN_AI_INPUT_MESSAGES_ATTRIBUTE]: expect.stringContaining('[Blob substitute]'), + [GEN_AI_INPUT_MESSAGES_ORIGINAL_LENGTH_ATTRIBUTE]: 1, + }), + description: 'chat gpt-4o', + op: 'gen_ai.chat', + status: 'ok', + }), + // Multiple images vision request + expect.objectContaining({ + data: expect.objectContaining({ + [GEN_AI_OPERATION_NAME_ATTRIBUTE]: 'chat', + [GEN_AI_REQUEST_MODEL_ATTRIBUTE]: 'gpt-4o', + [GEN_AI_INPUT_MESSAGES_ATTRIBUTE]: expect.stringContaining('[Blob substitute]'), + [GEN_AI_INPUT_MESSAGES_ORIGINAL_LENGTH_ATTRIBUTE]: 1, + }), + description: 'chat gpt-4o', + op: 'gen_ai.chat', + status: 'ok', + }), + ]), + }, + }) + .start() + .completed(); + }); + + test('preserves regular URLs in image_url (does not redact https links)', async () => { + await createRunner() + .ignore('event') + .expect({ + transaction: { + transaction: 'main', + spans: expect.arrayContaining([ + // The second span (multiple images) should still contain the https URL + expect.objectContaining({ + data: expect.objectContaining({ + [GEN_AI_INPUT_MESSAGES_ATTRIBUTE]: expect.stringContaining('https://example.com/image.png'), + }), + }), + ]), + }, + }) + .start() + .completed(); + }); + }); }); diff --git a/dev-packages/node-integration-tests/suites/tracing/openai/v6/test.ts b/dev-packages/node-integration-tests/suites/tracing/openai/v6/test.ts index 0cb07c6eba66..626e53248e66 100644 --- a/dev-packages/node-integration-tests/suites/tracing/openai/v6/test.ts +++ b/dev-packages/node-integration-tests/suites/tracing/openai/v6/test.ts @@ -124,7 +124,7 @@ describe('OpenAI integration (V6)', () => { [OPENAI_USAGE_COMPLETION_TOKENS_ATTRIBUTE]: 18, [OPENAI_USAGE_PROMPT_TOKENS_ATTRIBUTE]: 12, }, - description: 'chat gpt-4 stream-response', + description: 'chat gpt-4', op: 'gen_ai.chat', origin: 'auto.ai.openai', status: 'ok', @@ -151,7 +151,7 @@ describe('OpenAI integration (V6)', () => { [OPENAI_USAGE_COMPLETION_TOKENS_ATTRIBUTE]: 10, [OPENAI_USAGE_PROMPT_TOKENS_ATTRIBUTE]: 6, }, - description: 'chat gpt-4 stream-response', + description: 'chat gpt-4', op: 'gen_ai.chat', origin: 'auto.ai.openai', status: 'ok', @@ -166,7 +166,7 @@ describe('OpenAI integration (V6)', () => { [SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'gen_ai.chat', [SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.ai.openai', }, - description: 'chat error-model stream-response', + description: 'chat error-model', op: 'gen_ai.chat', origin: 'auto.ai.openai', status: 'internal_error', @@ -278,7 +278,7 @@ describe('OpenAI integration (V6)', () => { [OPENAI_USAGE_COMPLETION_TOKENS_ATTRIBUTE]: 18, [OPENAI_USAGE_PROMPT_TOKENS_ATTRIBUTE]: 12, }), - description: 'chat gpt-4 stream-response', + description: 'chat gpt-4', op: 'gen_ai.chat', origin: 'auto.ai.openai', status: 'ok', @@ -309,7 +309,7 @@ describe('OpenAI integration (V6)', () => { [OPENAI_USAGE_COMPLETION_TOKENS_ATTRIBUTE]: 10, [OPENAI_USAGE_PROMPT_TOKENS_ATTRIBUTE]: 6, }), - description: 'chat gpt-4 stream-response', + description: 'chat gpt-4', op: 'gen_ai.chat', origin: 'auto.ai.openai', status: 'ok', @@ -326,7 +326,7 @@ describe('OpenAI integration (V6)', () => { [SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'gen_ai.chat', [SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.ai.openai', }, - description: 'chat error-model stream-response', + description: 'chat error-model', op: 'gen_ai.chat', origin: 'auto.ai.openai', status: 'internal_error', diff --git a/packages/core/src/tracing/ai/mediaStripping.ts b/packages/core/src/tracing/ai/mediaStripping.ts new file mode 100644 index 000000000000..f4870cd5a9de --- /dev/null +++ b/packages/core/src/tracing/ai/mediaStripping.ts @@ -0,0 +1,160 @@ +/** + * Inline media content source, with a potentially very large base64 + * blob or data: uri. + */ +export type ContentMedia = Record & + ( + | { + media_type: string; + data: string; + } + | { + image_url: `data:${string}`; + } + | { + image_url: { url: `data:${string}` }; + } + | { + type: 'blob' | 'base64'; + content: string; + } + | { + b64_json: string; + } + | { + uri: `data:${string}`; + } + | { + type: 'input_audio'; + input_audio: { data: string }; + } + | { + type: 'file'; + file: { file_data?: string }; + } + ); + +/** + * Check if a content part is an OpenAI/Anthropic media source + */ +export function isContentMedia(part: unknown): part is ContentMedia { + if (!part || typeof part !== 'object') return false; + + return ( + isContentMediaSource(part) || + hasInlineData(part) || + hasImageUrl(part) || + hasInputAudio(part) || + hasFileData(part) || + hasMediaTypeData(part) || + hasBlobOrBase64Type(part) || + hasB64Json(part) || + hasImageGenerationResult(part) || + hasDataUri(part) + ); +} + +function hasImageUrl(part: NonNullable): boolean { + if (!('image_url' in part)) return false; + if (typeof part.image_url === 'string') return part.image_url.startsWith('data:'); + return hasNestedImageUrl(part); +} + +function hasNestedImageUrl(part: NonNullable): part is { image_url: { url: string } } { + return ( + 'image_url' in part && + !!part.image_url && + typeof part.image_url === 'object' && + 'url' in part.image_url && + typeof part.image_url.url === 'string' && + part.image_url.url.startsWith('data:') + ); +} + +function isContentMediaSource(part: NonNullable): boolean { + return 'type' in part && typeof part.type === 'string' && 'source' in part && isContentMedia(part.source); +} + +function hasInlineData(part: NonNullable): part is { inlineData: { data?: string } } { + return ( + 'inlineData' in part && + !!part.inlineData && + typeof part.inlineData === 'object' && + 'data' in part.inlineData && + typeof part.inlineData.data === 'string' + ); +} + +function hasInputAudio(part: NonNullable): part is { type: 'input_audio'; input_audio: { data: string } } { + return ( + 'type' in part && + part.type === 'input_audio' && + 'input_audio' in part && + !!part.input_audio && + typeof part.input_audio === 'object' && + 'data' in part.input_audio && + typeof part.input_audio.data === 'string' + ); +} + +function hasFileData(part: NonNullable): part is { type: 'file'; file: { file_data: string } } { + return ( + 'type' in part && + part.type === 'file' && + 'file' in part && + !!part.file && + typeof part.file === 'object' && + 'file_data' in part.file && + typeof part.file.file_data === 'string' + ); +} + +function hasMediaTypeData(part: NonNullable): part is { media_type: string; data: string } { + return 'media_type' in part && typeof part.media_type === 'string' && 'data' in part; +} + +function hasBlobOrBase64Type(part: NonNullable): part is { type: 'blob' | 'base64'; content: string } { + return 'type' in part && (part.type === 'blob' || part.type === 'base64'); +} + +function hasB64Json(part: NonNullable): part is { b64_json: string } { + return 'b64_json' in part; +} + +function hasImageGenerationResult(part: NonNullable): part is { type: 'image_generation'; result: string } { + return 'type' in part && 'result' in part && part.type === 'image_generation'; +} + +function hasDataUri(part: NonNullable): part is { uri: string } { + return 'uri' in part && typeof part.uri === 'string' && part.uri.startsWith('data:'); +} + +const REMOVED_STRING = '[Blob substitute]'; + +const MEDIA_FIELDS = ['image_url', 'data', 'content', 'b64_json', 'result', 'uri'] as const; + +/** + * Replace inline binary data in a single media content part with a placeholder. + */ +export function stripInlineMediaFromSingleMessage(part: ContentMedia): ContentMedia { + const strip = { ...part }; + if (isContentMedia(strip.source)) { + strip.source = stripInlineMediaFromSingleMessage(strip.source); + } + if (hasInlineData(part)) { + strip.inlineData = { ...part.inlineData, data: REMOVED_STRING }; + } + if (hasNestedImageUrl(part)) { + strip.image_url = { ...part.image_url, url: REMOVED_STRING }; + } + if (hasInputAudio(part)) { + strip.input_audio = { ...part.input_audio, data: REMOVED_STRING }; + } + if (hasFileData(part)) { + strip.file = { ...part.file, file_data: REMOVED_STRING }; + } + for (const field of MEDIA_FIELDS) { + if (typeof strip[field] === 'string') strip[field] = REMOVED_STRING; + } + return strip; +} diff --git a/packages/core/src/tracing/ai/messageTruncation.ts b/packages/core/src/tracing/ai/messageTruncation.ts index f5c040892dcf..499d25ee6e47 100644 --- a/packages/core/src/tracing/ai/messageTruncation.ts +++ b/packages/core/src/tracing/ai/messageTruncation.ts @@ -1,3 +1,5 @@ +import { isContentMedia, stripInlineMediaFromSingleMessage } from './mediaStripping'; + /** * Default maximum size in bytes for GenAI messages. * Messages exceeding this limit will be truncated. @@ -23,31 +25,6 @@ type ContentArrayMessage = { }[]; }; -/** - * Inline media content source, with a potentially very large base64 - * blob or data: uri. - */ -type ContentMedia = Record & - ( - | { - media_type: string; - data: string; - } - | { - image_url: `data:${string}`; - } - | { - type: 'blob' | 'base64'; - content: string; - } - | { - b64_json: string; - } - | { - uri: `data:${string}`; - } - ); - /** * Message format used by Google GenAI API. * Parts can be strings or objects with a text property. @@ -85,12 +62,12 @@ const jsonBytes = (value: unknown): number => { }; /** - * Truncate a string to fit within maxBytes when encoded as UTF-8. + * Truncate a string to fit within maxBytes (inclusive) when encoded as UTF-8. * Uses binary search for efficiency with multi-byte characters. * * @param text - The string to truncate - * @param maxBytes - Maximum byte length (UTF-8 encoded) - * @returns Truncated string that fits within maxBytes + * @param maxBytes - Maximum byte length (inclusive, UTF-8 encoded) + * @returns Truncated string whose UTF-8 byte length is at most maxBytes */ function truncateTextByBytes(text: string, maxBytes: number): string { if (utf8Bytes(text) <= maxBytes) { @@ -164,36 +141,6 @@ function isContentArrayMessage(message: unknown): message is ContentArrayMessage return message !== null && typeof message === 'object' && 'content' in message && Array.isArray(message.content); } -/** - * Check if a content part is an OpenAI/Anthropic media source - */ -function isContentMedia(part: unknown): part is ContentMedia { - if (!part || typeof part !== 'object') return false; - - return ( - isContentMediaSource(part) || - hasInlineData(part) || - ('media_type' in part && typeof part.media_type === 'string' && 'data' in part) || - ('image_url' in part && typeof part.image_url === 'string' && part.image_url.startsWith('data:')) || - ('type' in part && (part.type === 'blob' || part.type === 'base64')) || - 'b64_json' in part || - ('type' in part && 'result' in part && part.type === 'image_generation') || - ('uri' in part && typeof part.uri === 'string' && part.uri.startsWith('data:')) - ); -} -function isContentMediaSource(part: NonNullable): boolean { - return 'type' in part && typeof part.type === 'string' && 'source' in part && isContentMedia(part.source); -} -function hasInlineData(part: NonNullable): part is { inlineData: { data?: string } } { - return ( - 'inlineData' in part && - !!part.inlineData && - typeof part.inlineData === 'object' && - 'data' in part.inlineData && - typeof part.inlineData.data === 'string' - ); -} - /** * Check if a message has the Google GenAI parts format. */ @@ -318,25 +265,6 @@ function truncateSingleMessage(message: unknown, maxBytes: number): unknown[] { return []; } -const REMOVED_STRING = '[Filtered]'; - -const MEDIA_FIELDS = ['image_url', 'data', 'content', 'b64_json', 'result', 'uri'] as const; - -function stripInlineMediaFromSingleMessage(part: ContentMedia): ContentMedia { - const strip = { ...part }; - if (isContentMedia(strip.source)) { - strip.source = stripInlineMediaFromSingleMessage(strip.source); - } - // google genai inline data blob objects - if (hasInlineData(part)) { - strip.inlineData = { ...part.inlineData, data: REMOVED_STRING }; - } - for (const field of MEDIA_FIELDS) { - if (typeof strip[field] === 'string') strip[field] = REMOVED_STRING; - } - return strip; -} - /** * Strip the inline media from message arrays. * @@ -401,6 +329,11 @@ function truncateMessagesByBytes(messages: unknown[], maxBytes: number): unknown return messages; } + // The result is always a single-element array that callers wrap with + // JSON.stringify([message]), so subtract the 2-byte array wrapper ("[" and "]") + // to ensure the final serialized value stays under the limit. + const effectiveMaxBytes = maxBytes - 2; + // Always keep only the last message const lastMessage = messages[messages.length - 1]; @@ -410,12 +343,12 @@ function truncateMessagesByBytes(messages: unknown[], maxBytes: number): unknown // Check if it fits const messageBytes = jsonBytes(strippedMessage); - if (messageBytes <= maxBytes) { + if (messageBytes <= effectiveMaxBytes) { return stripped; } // Truncate the single message if needed - return truncateSingleMessage(strippedMessage, maxBytes); + return truncateSingleMessage(strippedMessage, effectiveMaxBytes); } /** diff --git a/packages/core/src/tracing/anthropic-ai/index.ts b/packages/core/src/tracing/anthropic-ai/index.ts index 49ed1c3b3354..63ff1be0e52f 100644 --- a/packages/core/src/tracing/anthropic-ai/index.ts +++ b/packages/core/src/tracing/anthropic-ai/index.ts @@ -206,7 +206,7 @@ function handleStreamingRequest( ): R | Promise { const model = requestAttributes[GEN_AI_REQUEST_MODEL_ATTRIBUTE] ?? 'unknown'; const spanConfig = { - name: `${operationName} ${model} stream-response`, + name: `${operationName} ${model}`, op: getSpanOperation(methodPath), attributes: requestAttributes as Record, }; diff --git a/packages/core/src/tracing/google-genai/index.ts b/packages/core/src/tracing/google-genai/index.ts index a56985b9b6f6..7781b67d6db0 100644 --- a/packages/core/src/tracing/google-genai/index.ts +++ b/packages/core/src/tracing/google-genai/index.ts @@ -270,7 +270,7 @@ function instrumentMethod( // Use startSpanManual for streaming methods to control span lifecycle return startSpanManual( { - name: `${operationName} ${model} stream-response`, + name: `${operationName} ${model}`, op: getSpanOperation(methodPath), attributes: requestAttributes, }, diff --git a/packages/core/src/tracing/openai/index.ts b/packages/core/src/tracing/openai/index.ts index 9568567227df..cfbdc5cfb4b1 100644 --- a/packages/core/src/tracing/openai/index.ts +++ b/packages/core/src/tracing/openai/index.ts @@ -262,7 +262,7 @@ function instrumentMethod( const isStreamRequested = params && typeof params === 'object' && params.stream === true; const spanConfig = { - name: `${operationName} ${model}${isStreamRequested ? ' stream-response' : ''}`, + name: `${operationName} ${model}`, op: getSpanOperation(methodPath), attributes: requestAttributes as Record, }; diff --git a/packages/core/test/lib/tracing/ai-message-truncation.test.ts b/packages/core/test/lib/tracing/ai-message-truncation.test.ts index 8a8cefaffa5b..c7f8e0043622 100644 --- a/packages/core/test/lib/tracing/ai-message-truncation.test.ts +++ b/packages/core/test/lib/tracing/ai-message-truncation.test.ts @@ -13,7 +13,7 @@ describe('message truncation utilities', () => { it('strips inline media from messages', () => { const b64 = Buffer.from('lots of data\n').toString('base64'); - const removed = '[Filtered]'; + const removed = '[Blob substitute]'; const messages = [ { role: 'user', @@ -148,6 +148,265 @@ describe('message truncation utilities', () => { ]); }); + it('strips OpenAI vision format with nested image_url object', () => { + const b64 = + 'iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAQBf9AoL/k2KLAAAAABJRU5ErkJggg=='; + const removed = '[Blob substitute]'; + + const messages = [ + { + role: 'user', + content: [ + { type: 'text', text: 'What is in this image?' }, + { + type: 'image_url', + image_url: { + url: `data:image/png;base64,${b64}`, + }, + }, + ], + }, + ]; + + const messagesJson = JSON.stringify(messages, null, 2); + const result = truncateGenAiMessages(messages); + + // original messages must not be mutated + expect(JSON.stringify(messages, null, 2)).toBe(messagesJson); + + expect(result).toStrictEqual([ + { + role: 'user', + content: [ + { type: 'text', text: 'What is in this image?' }, + { + type: 'image_url', + image_url: { + url: removed, + }, + }, + ], + }, + ]); + + // Validate no raw base64 leaks + const serialized = JSON.stringify(result); + expect(serialized).not.toMatch(/[A-Za-z0-9+/]{100,}={0,2}/); + expect(serialized).toContain('[Blob substitute]'); + }); + + it('does not redact image_url with regular URL (non-data: scheme)', () => { + const messages = [ + { + role: 'user', + content: [ + { type: 'text', text: 'What is in this image?' }, + { + type: 'image_url', + image_url: { + url: 'https://example.com/image.png', + }, + }, + ], + }, + ]; + + const result = truncateGenAiMessages(messages); + + expect(result).toStrictEqual([ + { + role: 'user', + content: [ + { type: 'text', text: 'What is in this image?' }, + { + type: 'image_url', + image_url: { + url: 'https://example.com/image.png', + }, + }, + ], + }, + ]); + }); + + it('strips multiple image parts in a single message', () => { + const b64 = + 'iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAQBf9AoL/k2KLAAAAABJRU5ErkJggg=='; + const removed = '[Blob substitute]'; + + const messages = [ + { + role: 'user', + content: [ + { type: 'text', text: 'Compare these images' }, + { + type: 'image_url', + image_url: { url: `data:image/png;base64,${b64}` }, + }, + { + type: 'image_url', + image_url: { url: `data:image/jpeg;base64,${b64}` }, + }, + { + type: 'image_url', + image_url: { url: 'https://example.com/safe.png' }, + }, + ], + }, + ]; + + const result = truncateGenAiMessages(messages); + + expect(result).toStrictEqual([ + { + role: 'user', + content: [ + { type: 'text', text: 'Compare these images' }, + { + type: 'image_url', + image_url: { url: removed }, + }, + { + type: 'image_url', + image_url: { url: removed }, + }, + { + type: 'image_url', + image_url: { url: 'https://example.com/safe.png' }, + }, + ], + }, + ]); + }); + + it('strips input_audio data from messages', () => { + const b64Audio = Buffer.from('fake audio data for testing').toString('base64'); + const removed = '[Blob substitute]'; + + const messages = [ + { + role: 'user', + content: [ + { type: 'text', text: 'What does this audio say?' }, + { + type: 'input_audio', + input_audio: { + data: b64Audio, + format: 'wav', + }, + }, + ], + }, + ]; + + const messagesJson = JSON.stringify(messages, null, 2); + const result = truncateGenAiMessages(messages); + + expect(JSON.stringify(messages, null, 2)).toBe(messagesJson); + + expect(result).toStrictEqual([ + { + role: 'user', + content: [ + { type: 'text', text: 'What does this audio say?' }, + { + type: 'input_audio', + input_audio: { + data: removed, + format: 'wav', + }, + }, + ], + }, + ]); + + const serialized = JSON.stringify(result); + expect(serialized).not.toContain(b64Audio); + expect(serialized).toContain(removed); + }); + + it('strips file_data from file content parts', () => { + const b64File = Buffer.from('fake file content for testing').toString('base64'); + const removed = '[Blob substitute]'; + + const messages = [ + { + role: 'user', + content: [ + { type: 'text', text: 'Summarize this document' }, + { + type: 'file', + file: { + file_data: b64File, + filename: 'document.pdf', + }, + }, + ], + }, + ]; + + const messagesJson = JSON.stringify(messages, null, 2); + const result = truncateGenAiMessages(messages); + + expect(JSON.stringify(messages, null, 2)).toBe(messagesJson); + + expect(result).toStrictEqual([ + { + role: 'user', + content: [ + { type: 'text', text: 'Summarize this document' }, + { + type: 'file', + file: { + file_data: removed, + filename: 'document.pdf', + }, + }, + ], + }, + ]); + + const serialized = JSON.stringify(result); + expect(serialized).not.toContain(b64File); + expect(serialized).toContain(removed); + }); + + it('does not redact file parts that only have file_id (no inline data)', () => { + const messages = [ + { + role: 'user', + content: [ + { type: 'text', text: 'Summarize this document' }, + { + type: 'file', + file: { + file_id: 'file-abc123', + filename: 'document.pdf', + }, + }, + ], + }, + ]; + + const result = truncateGenAiMessages(messages); + + expect(result).toStrictEqual([ + { + role: 'user', + content: [ + { type: 'text', text: 'Summarize this document' }, + { + type: 'file', + file: { + file_id: 'file-abc123', + filename: 'document.pdf', + }, + }, + ], + }, + ]); + }); + const humongous = 'this is a long string '.repeat(10_000); const giant = 'this is a long string '.repeat(1_000); const big = 'this is a long string '.repeat(100); @@ -169,7 +428,7 @@ describe('message truncation utilities', () => { it('keeps only the last message with truncation when it does not fit the limit', () => { const messages = [{ content: `1 ${humongous}` }, { content: `2 ${humongous}` }, { content: `3 ${humongous}` }]; const result = truncateGenAiMessages(messages); - const truncLen = 20_000 - JSON.stringify({ content: '' }).length; + const truncLen = 20_000 - 2 - JSON.stringify({ content: '' }).length; expect(result).toStrictEqual([{ content: `3 ${humongous}`.substring(0, truncLen) }]); }); @@ -191,7 +450,7 @@ describe('message truncation utilities', () => { it('truncates if the message content string will not fit', () => { const messages = [{ content: `2 ${humongous}` }]; const result = truncateGenAiMessages(messages); - const truncLen = 20_000 - JSON.stringify({ content: '' }).length; + const truncLen = 20_000 - 2 - JSON.stringify({ content: '' }).length; expect(result).toStrictEqual([{ content: `2 ${humongous}`.substring(0, truncLen) }]); }); @@ -229,6 +488,7 @@ describe('message truncation utilities', () => { // case that seems unlikely in normal usage. const truncLen = 20_000 - + 2 - JSON.stringify({ parts: ['', { some_other_field: 'no text here', text: '' }], }).length; @@ -249,6 +509,7 @@ describe('message truncation utilities', () => { const result = truncateGenAiMessages(messages); const truncLen = 20_000 - + 2 - JSON.stringify({ parts: [{ text: '' }], }).length; diff --git a/packages/core/test/lib/utils/anthropic-utils.test.ts b/packages/core/test/lib/utils/anthropic-utils.test.ts index 91a311cc574b..797bb9bc8186 100644 --- a/packages/core/test/lib/utils/anthropic-utils.test.ts +++ b/packages/core/test/lib/utils/anthropic-utils.test.ts @@ -84,7 +84,7 @@ describe('anthropic-ai-utils', () => { it('sets length along with truncated value', () => { const content = 'A'.repeat(200_000); setMessagesAttribute(span, [{ role: 'user', content }]); - const result = [{ role: 'user', content: 'A'.repeat(19972) }]; + const result = [{ role: 'user', content: 'A'.repeat(19970) }]; expect(mock.attributes).toStrictEqual({ 'sentry.sdk_meta.gen_ai.input.messages.original_length': 1, 'gen_ai.input.messages': JSON.stringify(result),