diff --git a/src/lib/isByteLength.js b/src/lib/isByteLength.js index 619d7f604..5b7334594 100644 --- a/src/lib/isByteLength.js +++ b/src/lib/isByteLength.js @@ -12,6 +12,20 @@ export default function isByteLength(str, options) { min = arguments[1]; max = arguments[2]; } - const len = encodeURI(str).split(/%..|./).length - 1; + // encodeURI() throws on unpaired surrogates, so replace any with U+FFFD first. + // A UTF-8 encoder substitutes the same replacement character (3 bytes), which + // keeps the byte count correct while leaving valid surrogate pairs untouched. + const sanitized = str.replace(/[\uD800-\uDFFF]/g, (surrogate, index) => { + const isHighSurrogate = surrogate.charCodeAt(0) <= 0xDBFF; + if (isHighSurrogate) { + const next = str.charCodeAt(index + 1); + if (next >= 0xDC00 && next <= 0xDFFF) return surrogate; + } else { + const prev = str.charCodeAt(index - 1); + if (prev >= 0xD800 && prev <= 0xDBFF) return surrogate; + } + return '\uFFFD'; + }); + const len = encodeURI(sanitized).split(/%..|./).length - 1; return len >= min && (typeof max === 'undefined' || len <= max); } diff --git a/test/validators.test.js b/test/validators.test.js index a4c3d7193..2269b9a9d 100644 --- a/test/validators.test.js +++ b/test/validators.test.js @@ -5757,6 +5757,22 @@ describe('Validators', () => { }); }); + it('should not throw on unpaired surrogates', () => { + // encodeURI() throws a URIError on unpaired surrogates; isByteLength must + // count them (as the U+FFFD replacement character, 3 bytes) rather than crash. + test({ + validator: 'isByteLength', + args: [{ min: 0, max: 3 }], + valid: ['\uD835', '\uDC00', '\uDFFF'], + invalid: ['\uD835\uDB00'], + }); + // isEmail() runs the local part through isByteLength, so it inherited the crash. + test({ + validator: 'isEmail', + invalid: ['\uD835', 'foo\uD835@bar.com'], + }); + }); + it('should validate ULIDs', () => { test({ validator: 'isULID',