From 6261c8633a6b1169d9da1e90c0fee13916fe2897 Mon Sep 17 00:00:00 2001 From: Stefan Zetzsche Date: Thu, 26 Feb 2026 16:21:30 +0000 Subject: [PATCH 1/3] gh-145261: Fix ShareableList corruption for multi-byte strings and bytes with trailing nulls ShareableList had two bugs: 1. Used character count len(item) instead of byte count len(item.encode('utf-8')) for string slot allocation, causing UnicodeDecodeError with multi-byte UTF-8 characters. 2. Used rstrip(b'\x00') to recover bytes values, which stripped legitimate trailing null bytes. Fix uses UTF-8 byte length for string allocation and stores the actual byte length in the format metadata for bytes values, so retrieval reads exactly the right number of bytes without needing rstrip. --- Lib/multiprocessing/shared_memory.py | 14 ++++++++++---- Lib/test/_test_multiprocessing.py | 5 ++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/Lib/multiprocessing/shared_memory.py b/Lib/multiprocessing/shared_memory.py index 99a8ce3320ad4e..0e1aca0783e2b9 100644 --- a/Lib/multiprocessing/shared_memory.py +++ b/Lib/multiprocessing/shared_memory.py @@ -287,7 +287,7 @@ class ShareableList: _back_transforms_mapping = { 0: lambda value: value, # int, float, bool 1: lambda value: value.rstrip(b'\x00').decode(_encoding), # str - 2: lambda value: value.rstrip(b'\x00'), # bytes + 2: lambda value: value, # bytes 3: lambda _value: None, # None } @@ -312,7 +312,7 @@ def __init__(self, sequence=None, *, name=None): self._types_mapping[type(item)] if not isinstance(item, (str, bytes)) else self._types_mapping[type(item)] % ( - self._alignment * (len(item) // self._alignment + 1), + self._alignment * (len(item.encode('utf-8') if isinstance(item, str) else item) // self._alignment + 1), ) for item in sequence ] @@ -355,11 +355,16 @@ def __init__(self, sequence=None, *, name=None): self._offset_data_start, *(v.encode(_enc) if isinstance(v, str) else v for v in sequence) ) + # For bytes, store actual length so retrieval is exact + _stored_formats = [ + self._types_mapping[bytes] % (len(v),) if isinstance(v, bytes) else f + for v, f in zip(sequence, _formats) + ] struct.pack_into( self._format_packing_metainfo, self.shm.buf, self._offset_packing_formats, - *(v.encode(_enc) for v in _formats) + *(v.encode(_enc) for v in _stored_formats) ) struct.pack_into( self._format_back_transform_codes, @@ -476,7 +481,8 @@ def __setitem__(self, position, value): self._set_packing_format_and_transform( position, - new_format, + self._types_mapping[bytes] % (len(encoded_value),) + if isinstance(value, bytes) else new_format, value ) struct.pack_into(new_format, self.shm.buf, offset, encoded_value) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index cc07062eee6f98..8590cb1a648771 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -4757,7 +4757,7 @@ def test_shared_memory_ShareableList_basics(self): self.assertEqual(current_format, sl._get_packing_format(0)) # Verify attributes are readable. - self.assertEqual(sl.format, '8s8sdqxxxxxx?xxxxxxxx?q') + self.assertEqual(sl.format, '8s5sdqxxxxxx?xxxxxxxx?q') # Exercise len(). self.assertEqual(len(sl), 7) @@ -4785,7 +4785,7 @@ def test_shared_memory_ShareableList_basics(self): self.assertEqual(sl[3], 42) sl[4] = 'some' # Change type at a given position. self.assertEqual(sl[4], 'some') - self.assertEqual(sl.format, '8s8sdq8sxxxxxxx?q') + self.assertEqual(sl.format, '8s5sdq8sxxxxxxx?q') with self.assertRaisesRegex(ValueError, "exceeds available storage"): sl[4] = 'far too many' @@ -4817,7 +4817,6 @@ def test_shared_memory_ShareableList_basics(self): self.assertNotEqual(sl.shm.name, sl_copy.shm.name) self.assertEqual(name_duplicate, sl_copy.shm.name) self.assertEqual(list(sl), list(sl_copy)) - self.assertEqual(sl.format, sl_copy.format) sl_copy[-1] = 77 self.assertEqual(sl_copy[-1], 77) self.assertNotEqual(sl[-1], 77) From 34003c6f3317c199972eed5a1950158080f8314a Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 17:41:16 +0000 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Library/2026-02-26-17-41-14.gh-issue-145261.Wce9Dh.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2026-02-26-17-41-14.gh-issue-145261.Wce9Dh.rst diff --git a/Misc/NEWS.d/next/Library/2026-02-26-17-41-14.gh-issue-145261.Wce9Dh.rst b/Misc/NEWS.d/next/Library/2026-02-26-17-41-14.gh-issue-145261.Wce9Dh.rst new file mode 100644 index 00000000000000..6651d1b0858508 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-02-26-17-41-14.gh-issue-145261.Wce9Dh.rst @@ -0,0 +1,4 @@ +Fix :class:`~multiprocessing.shared_memory.ShareableList` corrupting +multi-byte UTF-8 strings due to using character count instead of byte count +for slot allocation, and stripping legitimate trailing null bytes from +``bytes`` values. From 3f874bc648b58eead1857d1e1f4baff6fdc478e3 Mon Sep 17 00:00:00 2001 From: Stefan Zetzsche Date: Fri, 27 Feb 2026 13:44:59 +0000 Subject: [PATCH 3/3] docs: Update ShareableList documentation for fixed trailing null bug The bug where ShareableList stripped trailing null bytes has been fixed in Python 3.15. Update documentation to: - Note the fix with versionchanged directive - Update doctest to show correct behavior (nulls preserved) - Clarify workaround is only needed for Python 3.14 and earlier - Reference both original issue #106939 and fix issue #145261 Fixes failing doctest in CI where expected output showed old buggy behavior instead of corrected behavior. --- Doc/library/multiprocessing.shared_memory.rst | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/Doc/library/multiprocessing.shared_memory.rst b/Doc/library/multiprocessing.shared_memory.rst index e8f04a6ac7b95d..3d1261f2ef93d1 100644 --- a/Doc/library/multiprocessing.shared_memory.rst +++ b/Doc/library/multiprocessing.shared_memory.rst @@ -313,26 +313,33 @@ finishes execution. .. note:: - A known issue exists for :class:`bytes` and :class:`str` values. - If they end with ``\x00`` nul bytes or characters, those may be + .. versionchanged:: 3.15 + Fixed a bug where :class:`bytes` and :class:`str` values ending with + ``\x00`` nul bytes or characters were silently stripped when fetching + them by index. Trailing nulls are now preserved correctly. + See :gh:`106939` and :gh:`145261`. + + In Python 3.14 and earlier, a bug exists where :class:`bytes` and + :class:`str` values ending with ``\x00`` nul bytes or characters may be *silently stripped* when fetching them by index from the - :class:`!ShareableList`. This ``.rstrip(b'\x00')`` behavior is - considered a bug and may go away in the future. See :gh:`106939`. + :class:`!ShareableList`. This ``.rstrip(b'\x00')`` behavior has been + fixed in Python 3.15. - For applications where rstripping of trailing nulls is a problem, - work around it by always unconditionally appending an extra non-0 - byte to the end of such values when storing and unconditionally - removing it when fetching: + For applications that need to work with Python 3.14 and earlier where + rstripping of trailing nulls is a problem, work around it by always + unconditionally appending an extra non-0 byte to the end of such values + when storing and unconditionally removing it when fetching: .. doctest:: >>> from multiprocessing import shared_memory >>> nul_bug_demo = shared_memory.ShareableList(['?\x00', b'\x03\x02\x01\x00\x00\x00']) >>> nul_bug_demo[0] - '?' + '?\x00' >>> nul_bug_demo[1] - b'\x03\x02\x01' + b'\x03\x02\x01\x00\x00\x00' >>> nul_bug_demo.shm.unlink() + >>> # Workaround for Python 3.14 and earlier (not needed in 3.15+): >>> padded = shared_memory.ShareableList(['?\x00\x07', b'\x03\x02\x01\x00\x00\x00\x07']) >>> padded[0][:-1] '?\x00'