From 9492b89bc50eefbefca0ad0034cda1c42244cae1 Mon Sep 17 00:00:00 2001 From: Sarath Francis Date: Mon, 8 Jun 2026 03:11:19 -0400 Subject: [PATCH] fix(isByteLength): handle unpaired surrogates instead of throwing isByteLength() computes the byte count with encodeURI(), which throws a URIError on a string containing an unpaired surrogate (e.g. '\uD835'). isEmail() runs the local part through isByteLength(), so it threw too, while every other validator just returns false on such input. Replace unpaired surrogates with U+FFFD (3 UTF-8 bytes, the same substitution a UTF-8 encoder makes) before encoding, leaving valid surrogate pairs untouched so the byte count is unchanged for them. --- src/lib/isByteLength.js | 16 +++++++++++++++- test/validators.test.js | 16 ++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/lib/isByteLength.js b/src/lib/isByteLength.js index 619d7f604..5b7334594 100644 --- a/src/lib/isByteLength.js +++ b/src/lib/isByteLength.js @@ -12,6 +12,20 @@ export default function isByteLength(str, options) { min = arguments[1]; max = arguments[2]; } - const len = encodeURI(str).split(/%..|./).length - 1; + // encodeURI() throws on unpaired surrogates, so replace any with U+FFFD first. + // A UTF-8 encoder substitutes the same replacement character (3 bytes), which + // keeps the byte count correct while leaving valid surrogate pairs untouched. + const sanitized = str.replace(/[\uD800-\uDFFF]/g, (surrogate, index) => { + const isHighSurrogate = surrogate.charCodeAt(0) <= 0xDBFF; + if (isHighSurrogate) { + const next = str.charCodeAt(index + 1); + if (next >= 0xDC00 && next <= 0xDFFF) return surrogate; + } else { + const prev = str.charCodeAt(index - 1); + if (prev >= 0xD800 && prev <= 0xDBFF) return surrogate; + } + return '\uFFFD'; + }); + const len = encodeURI(sanitized).split(/%..|./).length - 1; return len >= min && (typeof max === 'undefined' || len <= max); } diff --git a/test/validators.test.js b/test/validators.test.js index a4c3d7193..2269b9a9d 100644 --- a/test/validators.test.js +++ b/test/validators.test.js @@ -5757,6 +5757,22 @@ describe('Validators', () => { }); }); + it('should not throw on unpaired surrogates', () => { + // encodeURI() throws a URIError on unpaired surrogates; isByteLength must + // count them (as the U+FFFD replacement character, 3 bytes) rather than crash. + test({ + validator: 'isByteLength', + args: [{ min: 0, max: 3 }], + valid: ['\uD835', '\uDC00', '\uDFFF'], + invalid: ['\uD835\uDB00'], + }); + // isEmail() runs the local part through isByteLength, so it inherited the crash. + test({ + validator: 'isEmail', + invalid: ['\uD835', 'foo\uD835@bar.com'], + }); + }); + it('should validate ULIDs', () => { test({ validator: 'isULID',