From 6b10f63b5188f0350762cb331e98897cb89feaec Mon Sep 17 00:00:00 2001 From: Yarchik Date: Thu, 25 Jun 2026 18:21:19 +0100 Subject: [PATCH] fix: drop partial multibyte char when write length is too small Buffer#write(string, offset, length, encoding) with an explicit length smaller than the next character's byte length wrote a partial, corrupt multibyte sequence and over-reported the byte count. utf8Write/ucs2Write passed buf.length - offset (the whole remaining buffer) as the byte cap to utf8ToBytes/utf16leToBytes, so a full multibyte char was always generated and then byte-truncated mid-sequence by blitBuffer. Node guarantees partially encoded characters are never written and counts only complete characters in the return value. Pass the effective length as the cap so an incomplete trailing character is dropped, matching node Buffer. --- index.js | 4 ++-- test/write.js | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/index.js b/index.js index bdea6048..fefa6b01 100644 --- a/index.js +++ b/index.js @@ -864,7 +864,7 @@ function hexWrite (buf, string, offset, length) { } function utf8Write (buf, string, offset, length) { - return blitBuffer(utf8ToBytes(string, buf.length - offset), buf, offset, length) + return blitBuffer(utf8ToBytes(string, length), buf, offset, length) } function asciiWrite (buf, string, offset, length) { @@ -876,7 +876,7 @@ function base64Write (buf, string, offset, length) { } function ucs2Write (buf, string, offset, length) { - return blitBuffer(utf16leToBytes(string, buf.length - offset), buf, offset, length) + return blitBuffer(utf16leToBytes(string, length), buf, offset, length) } Buffer.prototype.write = function write (string, offset, length, encoding) { diff --git a/test/write.js b/test/write.js index d132e850..d383ab63 100644 --- a/test/write.js +++ b/test/write.js @@ -122,3 +122,26 @@ test('large values do not improperly roll over (ref #80)', function (t) { t.equal(nums[2], newNum) t.end() }) + +test('write with explicit length does not write a partial multibyte char (ref node parity)', function (t) { + // Node guarantees: "partially encoded characters will not be written", and the + // return value counts only complete characters. A length smaller than the + // byte length of the next character must drop that character entirely. + + // 'é' is 2 utf8 bytes (c3 a9); length 1 cannot hold it -> nothing written. + const a = B.alloc(8, 0) + t.equal(a.write('é', 0, 1, 'utf8'), 0, 'é len 1 returns 0') + t.equal(a.toString('hex'), '0000000000000000', 'é len 1 leaves buffer zeroed') + + // '€' is 3 utf8 bytes (e2 82 ac); length 2 cannot hold it -> nothing written. + const b = B.alloc(8, 0) + t.equal(b.write('€', 0, 2, 'utf8'), 0, '€ len 2 returns 0') + t.equal(b.toString('hex'), '0000000000000000', '€ len 2 leaves buffer zeroed') + + // 'ab' utf16le is 4 bytes; length 3 holds one full 2-byte unit only. + const c = B.alloc(8, 0) + t.equal(c.write('ab', 0, 3, 'utf16le'), 2, 'ab len 3 utf16le returns 2') + t.equal(c.toString('hex'), '6100000000000000', 'ab len 3 utf16le writes only the first complete unit') + + t.end() +})