diff --git a/Doc/library/multiprocessing.shared_memory.rst b/Doc/library/multiprocessing.shared_memory.rst index e8f04a6ac7b95d..3d1261f2ef93d1 100644 --- a/Doc/library/multiprocessing.shared_memory.rst +++ b/Doc/library/multiprocessing.shared_memory.rst @@ -313,26 +313,33 @@ finishes execution. .. note:: - A known issue exists for :class:`bytes` and :class:`str` values. - If they end with ``\x00`` nul bytes or characters, those may be + .. versionchanged:: 3.15 + Fixed a bug where :class:`bytes` and :class:`str` values ending with + ``\x00`` nul bytes or characters were silently stripped when fetching + them by index. Trailing nulls are now preserved correctly. + See :gh:`106939` and :gh:`145261`. + + In Python 3.14 and earlier, a bug exists where :class:`bytes` and + :class:`str` values ending with ``\x00`` nul bytes or characters may be *silently stripped* when fetching them by index from the - :class:`!ShareableList`. This ``.rstrip(b'\x00')`` behavior is - considered a bug and may go away in the future. See :gh:`106939`. + :class:`!ShareableList`. This ``.rstrip(b'\x00')`` behavior has been + fixed in Python 3.15. - For applications where rstripping of trailing nulls is a problem, - work around it by always unconditionally appending an extra non-0 - byte to the end of such values when storing and unconditionally - removing it when fetching: + For applications that need to work with Python 3.14 and earlier where + rstripping of trailing nulls is a problem, work around it by always + unconditionally appending an extra non-0 byte to the end of such values + when storing and unconditionally removing it when fetching: .. doctest:: >>> from multiprocessing import shared_memory >>> nul_bug_demo = shared_memory.ShareableList(['?\x00', b'\x03\x02\x01\x00\x00\x00']) >>> nul_bug_demo[0] - '?' + '?\x00' >>> nul_bug_demo[1] - b'\x03\x02\x01' + b'\x03\x02\x01\x00\x00\x00' >>> nul_bug_demo.shm.unlink() + >>> # Workaround for Python 3.14 and earlier (not needed in 3.15+): >>> padded = shared_memory.ShareableList(['?\x00\x07', b'\x03\x02\x01\x00\x00\x00\x07']) >>> padded[0][:-1] '?\x00' diff --git a/Lib/multiprocessing/shared_memory.py b/Lib/multiprocessing/shared_memory.py index 99a8ce3320ad4e..0e1aca0783e2b9 100644 --- a/Lib/multiprocessing/shared_memory.py +++ b/Lib/multiprocessing/shared_memory.py @@ -287,7 +287,7 @@ class ShareableList: _back_transforms_mapping = { 0: lambda value: value, # int, float, bool 1: lambda value: value.rstrip(b'\x00').decode(_encoding), # str - 2: lambda value: value.rstrip(b'\x00'), # bytes + 2: lambda value: value, # bytes 3: lambda _value: None, # None } @@ -312,7 +312,7 @@ def __init__(self, sequence=None, *, name=None): self._types_mapping[type(item)] if not isinstance(item, (str, bytes)) else self._types_mapping[type(item)] % ( - self._alignment * (len(item) // self._alignment + 1), + self._alignment * (len(item.encode('utf-8') if isinstance(item, str) else item) // self._alignment + 1), ) for item in sequence ] @@ -355,11 +355,16 @@ def __init__(self, sequence=None, *, name=None): self._offset_data_start, *(v.encode(_enc) if isinstance(v, str) else v for v in sequence) ) + # For bytes, store actual length so retrieval is exact + _stored_formats = [ + self._types_mapping[bytes] % (len(v),) if isinstance(v, bytes) else f + for v, f in zip(sequence, _formats) + ] struct.pack_into( self._format_packing_metainfo, self.shm.buf, self._offset_packing_formats, - *(v.encode(_enc) for v in _formats) + *(v.encode(_enc) for v in _stored_formats) ) struct.pack_into( self._format_back_transform_codes, @@ -476,7 +481,8 @@ def __setitem__(self, position, value): self._set_packing_format_and_transform( position, - new_format, + self._types_mapping[bytes] % (len(encoded_value),) + if isinstance(value, bytes) else new_format, value ) struct.pack_into(new_format, self.shm.buf, offset, encoded_value) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index cc07062eee6f98..8590cb1a648771 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -4757,7 +4757,7 @@ def test_shared_memory_ShareableList_basics(self): self.assertEqual(current_format, sl._get_packing_format(0)) # Verify attributes are readable. - self.assertEqual(sl.format, '8s8sdqxxxxxx?xxxxxxxx?q') + self.assertEqual(sl.format, '8s5sdqxxxxxx?xxxxxxxx?q') # Exercise len(). self.assertEqual(len(sl), 7) @@ -4785,7 +4785,7 @@ def test_shared_memory_ShareableList_basics(self): self.assertEqual(sl[3], 42) sl[4] = 'some' # Change type at a given position. self.assertEqual(sl[4], 'some') - self.assertEqual(sl.format, '8s8sdq8sxxxxxxx?q') + self.assertEqual(sl.format, '8s5sdq8sxxxxxxx?q') with self.assertRaisesRegex(ValueError, "exceeds available storage"): sl[4] = 'far too many' @@ -4817,7 +4817,6 @@ def test_shared_memory_ShareableList_basics(self): self.assertNotEqual(sl.shm.name, sl_copy.shm.name) self.assertEqual(name_duplicate, sl_copy.shm.name) self.assertEqual(list(sl), list(sl_copy)) - self.assertEqual(sl.format, sl_copy.format) sl_copy[-1] = 77 self.assertEqual(sl_copy[-1], 77) self.assertNotEqual(sl[-1], 77) diff --git a/Misc/NEWS.d/next/Library/2026-02-26-17-41-14.gh-issue-145261.Wce9Dh.rst b/Misc/NEWS.d/next/Library/2026-02-26-17-41-14.gh-issue-145261.Wce9Dh.rst new file mode 100644 index 00000000000000..6651d1b0858508 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-02-26-17-41-14.gh-issue-145261.Wce9Dh.rst @@ -0,0 +1,4 @@ +Fix :class:`~multiprocessing.shared_memory.ShareableList` corrupting +multi-byte UTF-8 strings due to using character count instead of byte count +for slot allocation, and stripping legitimate trailing null bytes from +``bytes`` values.