diff --git a/packages/core/src/logs/internal.ts b/packages/core/src/logs/internal.ts index 097ffbb6906e..35a037d979b3 100644 --- a/packages/core/src/logs/internal.ts +++ b/packages/core/src/logs/internal.ts @@ -1,3 +1,4 @@ +import type { Attributes } from '../attributes'; import { serializeAttributes } from '../attributes'; import { getGlobalSingleton } from '../carrier'; import type { Client } from '../client'; @@ -161,14 +162,14 @@ export function _INTERNAL_captureLog( const serializedLog: SerializedLog = { timestamp, level, - body: message, + body: _INTERNAL_removeLoneSurrogates(String(message)), trace_id: traceContext?.trace_id, severity_number: severityNumber ?? SEVERITY_TEXT_TO_SEVERITY_NUMBER[level], - attributes: { + attributes: sanitizeLogAttributes({ ...serializeAttributes(scopeAttributes), ...serializeAttributes(logAttributes, true), [sequenceAttr.key]: sequenceAttr.value, - }, + }), }; captureSerializedLog(client, serializedLog); @@ -220,3 +221,43 @@ function _getBufferMap(): WeakMap> { // The reference to the Client <> LogBuffer map is stored on the carrier to ensure it's always the same return getGlobalSingleton('clientToLogBufferMap', () => new WeakMap>()); } + +/** + * Sanitizes serialized log attributes by replacing lone surrogates in both + * keys and string values with U+FFFD. + */ +function sanitizeLogAttributes(attributes: Attributes): Attributes { + const sanitized: Attributes = {}; + for (const [key, attr] of Object.entries(attributes)) { + const sanitizedKey = _INTERNAL_removeLoneSurrogates(key); + if (attr.type === 'string') { + sanitized[sanitizedKey] = { ...attr, value: _INTERNAL_removeLoneSurrogates(attr.value) }; + } else { + sanitized[sanitizedKey] = attr; + } + } + return sanitized; +} + +/** + * Replaces unpaired UTF-16 surrogates with U+FFFD (replacement character). + * + * Lone surrogates (U+D800–U+DFFF not part of a valid pair) cause `serde_json` + * on the server to reject the entire log/span batch when they appear in + * JSON-escaped form (e.g. `\uD800`). Replacing them at the SDK level ensures + * only the offending characters are lost instead of the whole payload. + * + * Uses the native `String.prototype.toWellFormed()` when available + * (Node 20+, Chrome 111+, Safari 15.4+, Firefox 119+, Hermes). + * On older runtimes without native support, returns the string as-is. + */ +export function _INTERNAL_removeLoneSurrogates(str: string): string { + // isWellFormed/toWellFormed are ES2024 (not in our TS lib target), so we feature-detect via Object(). + const strObj: Record = Object(str); + const isWellFormed = strObj['isWellFormed']; + const toWellFormed = strObj['toWellFormed']; + if (typeof isWellFormed === 'function' && typeof toWellFormed === 'function') { + return isWellFormed.call(str) ? str : toWellFormed.call(str); + } + return str; +} diff --git a/packages/core/test/lib/logs/internal.test.ts b/packages/core/test/lib/logs/internal.test.ts index 360485f5ca84..5731e827e238 100644 --- a/packages/core/test/lib/logs/internal.test.ts +++ b/packages/core/test/lib/logs/internal.test.ts @@ -1,6 +1,11 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; import { fmt, Scope } from '../../../src'; -import { _INTERNAL_captureLog, _INTERNAL_flushLogsBuffer, _INTERNAL_getLogBuffer } from '../../../src/logs/internal'; +import { + _INTERNAL_captureLog, + _INTERNAL_flushLogsBuffer, + _INTERNAL_getLogBuffer, + _INTERNAL_removeLoneSurrogates, +} from '../../../src/logs/internal'; import type { Log } from '../../../src/types-hoist/log'; import * as loggerModule from '../../../src/utils/debug-logger'; import * as timeModule from '../../../src/utils/time'; @@ -1261,4 +1266,158 @@ describe('_INTERNAL_captureLog', () => { expect(buffer2?.[0]?.attributes?.['sentry.timestamp.sequence']).toEqual({ value: 0, type: 'integer' }); }); }); + + // toWellFormed() is only available in Node 20+, Chrome 111+, Safari 15.4+, Firefox 119+, Hermes + const hasToWellFormed = typeof ''.isWellFormed === 'function'; + + describe.runIf(hasToWellFormed)('lone surrogate sanitization', () => { + it('sanitizes lone surrogates in log message body', () => { + const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true }); + const client = new TestClient(options); + const scope = new Scope(); + scope.setClient(client); + + _INTERNAL_captureLog({ level: 'error', message: 'bad surrogate \uD800 here' }, scope); + + const logBuffer = _INTERNAL_getLogBuffer(client); + expect(logBuffer?.[0]?.body).toBe('bad surrogate \uFFFD here'); + }); + + it('sanitizes lone surrogates in parameterized (fmt) log message body', () => { + const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true }); + const client = new TestClient(options); + const scope = new Scope(); + scope.setClient(client); + + const badValue = 'bad\uD800value'; + _INTERNAL_captureLog({ level: 'error', message: fmt`parameterized ${badValue} message` }, scope); + + const logBuffer = _INTERNAL_getLogBuffer(client); + expect(logBuffer?.[0]?.body).toBe('parameterized bad\uFFFDvalue message'); + }); + + it('sanitizes lone surrogates in log attribute values', () => { + const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true }); + const client = new TestClient(options); + const scope = new Scope(); + scope.setClient(client); + + _INTERNAL_captureLog( + { + level: 'error', + message: 'test', + attributes: { bad: '{"a":"\uD800"}' }, + }, + scope, + ); + + const logBuffer = _INTERNAL_getLogBuffer(client); + expect(logBuffer?.[0]?.attributes?.['bad']).toEqual({ + value: '{"a":"\uFFFD"}', + type: 'string', + }); + }); + + it('sanitizes lone surrogates in log attribute keys', () => { + const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true }); + const client = new TestClient(options); + const scope = new Scope(); + scope.setClient(client); + + _INTERNAL_captureLog( + { + level: 'error', + message: 'test', + attributes: { ['bad\uD800key']: 'value' }, + }, + scope, + ); + + const logBuffer = _INTERNAL_getLogBuffer(client); + expect(logBuffer?.[0]?.attributes?.['bad\uFFFDkey']).toEqual({ + value: 'value', + type: 'string', + }); + }); + + it('preserves valid emoji in log messages and attributes', () => { + const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true }); + const client = new TestClient(options); + const scope = new Scope(); + scope.setClient(client); + + _INTERNAL_captureLog( + { + level: 'info', + message: 'hello 😀 world', + attributes: { emoji: '🎉 party' }, + }, + scope, + ); + + const logBuffer = _INTERNAL_getLogBuffer(client); + expect(logBuffer?.[0]?.body).toBe('hello 😀 world'); + expect(logBuffer?.[0]?.attributes?.['emoji']).toEqual({ + value: '🎉 party', + type: 'string', + }); + }); + }); +}); + +// toWellFormed() is only available in Node 20+, Chrome 111+, Safari 15.4+, Firefox 119+, Hermes +const hasToWellFormedGlobal = typeof ''.isWellFormed === 'function'; + +describe('_INTERNAL_removeLoneSurrogates', () => { + it('returns the same string when there are no surrogates', () => { + expect(_INTERNAL_removeLoneSurrogates('hello world')).toBe('hello world'); + }); + + it('returns the same string for empty input', () => { + expect(_INTERNAL_removeLoneSurrogates('')).toBe(''); + }); + + it('preserves valid surrogate pairs (emoji)', () => { + expect(_INTERNAL_removeLoneSurrogates('hello 😀 world')).toBe('hello 😀 world'); + }); + + it.runIf(hasToWellFormedGlobal)('replaces a lone high surrogate with U+FFFD', () => { + expect(_INTERNAL_removeLoneSurrogates('before\uD800after')).toBe('before\uFFFDafter'); + }); + + it.runIf(hasToWellFormedGlobal)('replaces a lone low surrogate with U+FFFD', () => { + expect(_INTERNAL_removeLoneSurrogates('before\uDC00after')).toBe('before\uFFFDafter'); + }); + + it.runIf(hasToWellFormedGlobal)('replaces lone high surrogate at end of string', () => { + expect(_INTERNAL_removeLoneSurrogates('end\uD800')).toBe('end\uFFFD'); + }); + + it.runIf(hasToWellFormedGlobal)('replaces lone low surrogate at start of string', () => { + expect(_INTERNAL_removeLoneSurrogates('\uDC00start')).toBe('\uFFFDstart'); + }); + + it.runIf(hasToWellFormedGlobal)('replaces multiple lone surrogates', () => { + expect(_INTERNAL_removeLoneSurrogates('\uD800\uD801\uDC00')).toBe('\uFFFD\uD801\uDC00'); + }); + + it.runIf(hasToWellFormedGlobal)('handles two consecutive lone high surrogates', () => { + expect(_INTERNAL_removeLoneSurrogates('\uD800\uD800')).toBe('\uFFFD\uFFFD'); + }); + + it.runIf(hasToWellFormedGlobal)('handles mixed valid pairs and lone surrogates', () => { + expect(_INTERNAL_removeLoneSurrogates('\uD83D\uDE00\uD800')).toBe('😀\uFFFD'); + }); + + it.runIf(hasToWellFormedGlobal)('handles the exact reproduction case from issue #5186', () => { + const badValue = '{"a":"\uD800"}'; + const result = _INTERNAL_removeLoneSurrogates(badValue); + expect(result).toBe('{"a":"\uFFFD"}'); + expect(() => JSON.parse(result)).not.toThrow(); + }); + + it('returns the string as-is when toWellFormed is not available', () => { + // Verify the function doesn't throw regardless of runtime support + expect(_INTERNAL_removeLoneSurrogates('normal string')).toBe('normal string'); + }); });