From 591127b0e8aaaf35ac41c2c1fb3372fc91e7e327 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 6 Nov 2025 12:03:34 +0200 Subject: [PATCH 1/7] gh-141061: Fix decoding with non-standard Base64 alphabet The "+" and "/" characters are no longer recognized as the part of the Base64 alphabet in base64.urlsafe_b64decode() and base64.b64decode() the altchars argument that does not contain them. --- Lib/base64.py | 7 +++--- Lib/test/test_base64.py | 23 ++++++++++--------- ...-11-06-12-03-29.gh-issue-141061.7Gfpgw.rst | 4 ++++ 3 files changed, 20 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-141061.7Gfpgw.rst diff --git a/Lib/base64.py b/Lib/base64.py index 5d78cc09f40cd3..530babdfae2a75 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -80,8 +80,9 @@ def b64decode(s, altchars=None, validate=False): s = _bytes_from_decode_data(s) if altchars is not None: altchars = _bytes_from_decode_data(altchars) - assert len(altchars) == 2, repr(altchars) - s = s.translate(bytes.maketrans(altchars, b'+/')) + if len(altchars) != 2: + raise ValueError(f'invalid altchars: {altchars!r}') + s = s.translate(bytes.maketrans(b'+/' + altchars, altchars + b'+/')) return binascii.a2b_base64(s, strict_mode=validate) @@ -104,7 +105,7 @@ def standard_b64decode(s): _urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_') -_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/') +_urlsafe_decode_translation = bytes.maketrans(b'+/-_', b'-_+/') def urlsafe_b64encode(s): """Encode bytes using the URL- and filesystem-safe Base64 alphabet. diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 65977ca8c9f2e0..efbd554b00f476 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -265,6 +265,11 @@ def test_b64decode_altchars(self): eq(base64.b64decode(data, altchars=altchars_str), res) eq(base64.b64decode(data_str, altchars=altchars_str), res) + self.assertRaises(ValueError, base64.b64decode, b'', altchars=b'+') + self.assertRaises(ValueError, base64.b64decode, b'', altchars=b'+/-') + self.assertRaises(ValueError, base64.b64decode, '', altchars='+') + self.assertRaises(ValueError, base64.b64decode, '', altchars='+/-') + def test_b64decode_padding_error(self): self.assertRaises(binascii.Error, base64.b64decode, b'abc') self.assertRaises(binascii.Error, base64.b64decode, 'abc') @@ -296,13 +301,13 @@ def test_b64decode_invalid_chars(self): with self.assertRaises(binascii.Error): base64.b64decode(bstr.decode('ascii'), validate=True) - # Normal alphabet characters not discarded when alternative given - res = b'\xfb\xef\xff' - self.assertEqual(base64.b64decode(b'++//', validate=True), res) - self.assertEqual(base64.b64decode(b'++//', '-_', validate=True), res) - self.assertEqual(base64.b64decode(b'--__', '-_', validate=True), res) - self.assertEqual(base64.urlsafe_b64decode(b'++//'), res) - self.assertEqual(base64.urlsafe_b64decode(b'--__'), res) + # Normal alphabet characters are discarded when alternative given + self.assertEqual(base64.b64decode(b'++//', altchars=b'-_'), b'') + self.assertEqual(base64.urlsafe_b64decode(b'++//'), b'') + with self.assertRaises(binascii.Error): + base64.b64decode(b'++++', altchars=b'-_', validate=True) + with self.assertRaises(binascii.Error): + base64.b64decode(b'////', altchars=b'-_', validate=True) def _altchars_strategy(): """Generate 'altchars' for base64 encoding.""" @@ -394,10 +399,6 @@ def test_b32decode_casefold(self): self.assertRaises(binascii.Error, base64.b32decode, b'me======') self.assertRaises(binascii.Error, base64.b32decode, 'me======') - # Mapping zero and one - eq(base64.b32decode(b'MLO23456'), b'b\xdd\xad\xf3\xbe') - eq(base64.b32decode('MLO23456'), b'b\xdd\xad\xf3\xbe') - def test_b32decode_map01(self): # Mapping zero and one eq = self.assertEqual diff --git a/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-141061.7Gfpgw.rst b/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-141061.7Gfpgw.rst new file mode 100644 index 00000000000000..6fec18992622d5 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-141061.7Gfpgw.rst @@ -0,0 +1,4 @@ +The ``+`` and ``/`` characters are no longer recognized as the part of the +Base64 alphabet in :func:`base64.urlsafe_b64decode` and +:func:`base64.b64decode` with the *altchars* argument that does not contain +them. From 5c1e8d4011477be9ca4d4a4c96acd67a28b1843d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 6 Nov 2025 17:36:50 +0200 Subject: [PATCH 2/7] Fix the issue number. --- ....7Gfpgw.rst => 2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/Security/{2025-11-06-12-03-29.gh-issue-141061.7Gfpgw.rst => 2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst} (100%) diff --git a/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-141061.7Gfpgw.rst b/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst similarity index 100% rename from Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-141061.7Gfpgw.rst rename to Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst From b0d5877394d4c17996e9552709b9e2790780e580 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 6 Nov 2025 17:47:13 +0200 Subject: [PATCH 3/7] Remove unrelated changes. --- Lib/test/test_base64.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index efbd554b00f476..5c797a9cc745fb 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -399,6 +399,10 @@ def test_b32decode_casefold(self): self.assertRaises(binascii.Error, base64.b32decode, b'me======') self.assertRaises(binascii.Error, base64.b32decode, 'me======') + # Mapping zero and one + eq(base64.b32decode(b'MLO23456'), b'b\xdd\xad\xf3\xbe') + eq(base64.b32decode('MLO23456'), b'b\xdd\xad\xf3\xbe') + def test_b32decode_map01(self): # Mapping zero and one eq = self.assertEqual From 414e4ac592ac247e2220ed38e34a1dae100e43c2 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 6 Nov 2025 23:02:01 +0200 Subject: [PATCH 4/7] Only emit a warning if validate=False. --- Lib/base64.py | 19 +++++++++++++++++-- Lib/test/test_base64.py | 14 +++++++++++--- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/Lib/base64.py b/Lib/base64.py index 530babdfae2a75..edac542a7dca4c 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -78,12 +78,27 @@ def b64decode(s, altchars=None, validate=False): https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64 """ s = _bytes_from_decode_data(s) + badchar = None if altchars is not None: altchars = _bytes_from_decode_data(altchars) if len(altchars) != 2: raise ValueError(f'invalid altchars: {altchars!r}') - s = s.translate(bytes.maketrans(b'+/' + altchars, altchars + b'+/')) - return binascii.a2b_base64(s, strict_mode=validate) + if validate: + s = s.translate(bytes.maketrans(b'+/' + altchars, altchars + b'+/')) + else: + for b in set(b'+/') - set(altchars): + if b in s: + badchar = b + break + s = s.translate(bytes.maketrans(altchars, b'+/')) + result = binascii.a2b_base64(s, strict_mode=validate) + if badchar is not None: + import warnings + warnings.warn(f'invalid character {chr(badchar)!a} in base64 data ' + f'with altchars={altchars!r} will be discarded in ' + f'future Python versions', + FutureWarning, stacklevel=2) + return result def standard_b64encode(s): diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index efbd554b00f476..312db0f3a0f776 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -301,13 +301,21 @@ def test_b64decode_invalid_chars(self): with self.assertRaises(binascii.Error): base64.b64decode(bstr.decode('ascii'), validate=True) - # Normal alphabet characters are discarded when alternative given - self.assertEqual(base64.b64decode(b'++//', altchars=b'-_'), b'') - self.assertEqual(base64.urlsafe_b64decode(b'++//'), b'') + # Normal alphabet characters will be discarded when alternative given + with self.assertWarns(FutureWarning): + self.assertEqual(base64.b64decode(b'++++', altchars=b'-_'), + b'\xfb\xef\xbe') + with self.assertWarns(FutureWarning): + self.assertEqual(base64.b64decode(b'////', altchars=b'-_'), + b'\xff\xff\xff') + self.assertEqual(base64.urlsafe_b64decode(b'++++'), b'') + self.assertEqual(base64.urlsafe_b64decode(b'////'), b'') with self.assertRaises(binascii.Error): base64.b64decode(b'++++', altchars=b'-_', validate=True) with self.assertRaises(binascii.Error): base64.b64decode(b'////', altchars=b'-_', validate=True) + with self.assertRaises(binascii.Error): + base64.b64decode(b'+/!', altchars=b'-_') def _altchars_strategy(): """Generate 'altchars' for base64 encoding.""" From db32b3297b83454e71f634a296a15de96337271e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 22 Nov 2025 19:04:17 +0200 Subject: [PATCH 5/7] Make validate=True by default in base64.b64decode(). --- Doc/library/base64.rst | 29 +++++++++++---- Doc/whatsnew/3.15.rst | 15 ++++++++ Lib/base64.py | 6 +-- Lib/test/test_base64.py | 37 ++++++++----------- ...-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst | 6 +++ ...-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst | 4 -- 6 files changed, 62 insertions(+), 35 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst delete mode 100644 Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 529a7242443820..26bc6448b300f9 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -65,7 +65,7 @@ POST request. :exc:`TypeError` if *altchars* is not a :term:`bytes-like object`. -.. function:: b64decode(s, altchars=None, validate=False) +.. function:: b64decode(s, altchars=None, validate=True) Decode the Base64 encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. @@ -77,15 +77,24 @@ POST request. A :exc:`binascii.Error` exception is raised if *s* is incorrectly padded. - If *validate* is ``False`` (the default), characters that are neither - in the normal base-64 alphabet nor the alternative alphabet are - discarded prior to the padding check. If *validate* is ``True``, - these non-alphabet characters in the input result in a + By default, non-alphabet characters in the input result in a :exc:`binascii.Error`. + If *validate* is false, characters that are neither in the normal base-64 + alphabet nor the alternative alphabet are discarded prior to the padding + check, but the ``+`` and ``/`` characters keep their meaning if they are + not in *altchars* (they will be discarded in future Python versions). - For more information about the strict base64 check, see :func:`binascii.a2b_base64` + For more information about the strict base64 check, see + :func:`binascii.a2b_base64`. + + .. versionchanged:: next + *validate* is now ``True`` by default. + The ``+`` and ``/`` characters no longer preserve their meaning if they + are not in the alternative alphabet and *validate* is true. + :exc:`FutureWarning` is now emitted if the ``+`` or ``/`` characters + which are not in the alternative alphabet occur in the input and + *validate* is false. - May assert or raise a :exc:`ValueError` if the length of *altchars* is not 2. .. function:: standard_b64encode(s) @@ -98,6 +107,9 @@ POST request. Decode :term:`bytes-like object` or ASCII string *s* using the standard Base64 alphabet and return the decoded :class:`bytes`. + .. versionchanged:: next + Non-alphabet characters in the input result in a :exc:`binascii.Error`. + .. function:: urlsafe_b64encode(s) @@ -116,6 +128,9 @@ POST request. ``/`` in the standard Base64 alphabet, and return the decoded :class:`bytes`. + .. versionchanged:: next + Non-alphabet characters in the input result in a :exc:`binascii.Error`. + .. function:: b32encode(s) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 8991584a9f22dd..70b24ffde9df32 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -1282,3 +1282,18 @@ that may require changes to your code. *dest* is now ``'foo'`` instead of ``'f'``. Pass an explicit *dest* argument to preserve the old behavior. (Contributed by Serhiy Storchaka in :gh:`138697`.) + +* :func:`base64.b64decode` now rejects all characters not in the base 64 + alphabet by default. + You can pass the ``validate=False`` argument to get the old behavior. + If *validate* is false, :exc:`FutureWarning` is now emitted if the ``+`` or + ``/`` characters which are not in the alternative alphabet occur in the input. + To get rid of the potential warnings, either replace these characters with + the corresponding alternative characters (to keep the old behavior), + or remove them from the input (to get the future behavior). + + In :func:`base64.b64decode` and :func:`base64.b64decode`, non-alphabet + characters in the input now result in a :exc:`binascii.Error`. + You can use :func:`base64.b64decode` with ``validate=False`` and optionally + the corresponding *altchars* argument to get the old behavior. + (Contributed by Serhiy Storchaka in :gh:`125346`.) diff --git a/Lib/base64.py b/Lib/base64.py index d8d04386e69453..7c073aa6b6adc9 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -59,7 +59,7 @@ def b64encode(s, altchars=None): return encoded -def b64decode(s, altchars=None, validate=False): +def b64decode(s, altchars=None, validate=True): """Decode the Base64 encoded bytes-like object or ASCII string s. Optional altchars must be a bytes-like object or ASCII string of length 2 @@ -86,8 +86,8 @@ def b64decode(s, altchars=None, validate=False): if validate: s = s.translate(bytes.maketrans(b'+/' + altchars, altchars + b'+/')) else: - for b in set(b'+/') - set(altchars): - if b in s: + for b in b'+/': + if b not in altchars and b in s: badchar = b break s = s.translate(bytes.maketrans(altchars, b'+/')) diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 312db0f3a0f776..9cdcc8f8342c6c 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -216,7 +216,7 @@ def test_b64decode(self): b"YWI=": b"ab", b"YWJj": b"abc", b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE" - b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT" + b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT" b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==": b"abcdefghijklmnopqrstuvwxyz" @@ -286,36 +286,31 @@ def test_b64decode_invalid_chars(self): (b'!', b''), (b"YWJj\n", b"abc"), (b'YWJj\nYWI=', b'abcab')) - funcs = ( - base64.b64decode, - base64.standard_b64decode, - base64.urlsafe_b64decode, - ) for bstr, res in tests: - for func in funcs: - with self.subTest(bstr=bstr, func=func): - self.assertEqual(func(bstr), res) - self.assertEqual(func(bstr.decode('ascii')), res) - with self.assertRaises(binascii.Error): - base64.b64decode(bstr, validate=True) - with self.assertRaises(binascii.Error): - base64.b64decode(bstr.decode('ascii'), validate=True) + with self.subTest(bstr=bstr): + for data in bstr, bstr.decode('ascii'): + self.assertEqual(base64.b64decode(data, validate=False), res) + self.assertRaises(binascii.Error, base64.b64decode, data) + self.assertRaises(binascii.Error, base64.standard_b64decode, data) + self.assertRaises(binascii.Error, base64.urlsafe_b64decode, data) # Normal alphabet characters will be discarded when alternative given with self.assertWarns(FutureWarning): - self.assertEqual(base64.b64decode(b'++++', altchars=b'-_'), + self.assertEqual(base64.b64decode(b'++++', altchars=b'-_', validate=False), b'\xfb\xef\xbe') with self.assertWarns(FutureWarning): - self.assertEqual(base64.b64decode(b'////', altchars=b'-_'), + self.assertEqual(base64.b64decode(b'////', altchars=b'-_', validate=False), b'\xff\xff\xff') - self.assertEqual(base64.urlsafe_b64decode(b'++++'), b'') - self.assertEqual(base64.urlsafe_b64decode(b'////'), b'') with self.assertRaises(binascii.Error): - base64.b64decode(b'++++', altchars=b'-_', validate=True) + base64.urlsafe_b64decode(b'++++') + with self.assertRaises(binascii.Error): + base64.urlsafe_b64decode(b'////') + with self.assertRaises(binascii.Error): + base64.b64decode(b'++++', altchars=b'-_') with self.assertRaises(binascii.Error): - base64.b64decode(b'////', altchars=b'-_', validate=True) + base64.b64decode(b'////', altchars=b'-_') with self.assertRaises(binascii.Error): - base64.b64decode(b'+/!', altchars=b'-_') + base64.b64decode(b'+/!', altchars=b'-_', validate=False) def _altchars_strategy(): """Generate 'altchars' for base64 encoding.""" diff --git a/Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst b/Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst new file mode 100644 index 00000000000000..ef6b05f90df77e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst @@ -0,0 +1,6 @@ +In :func:`base64.b64decode`, characters not in the base 64 alphabet now +result in a :exc:`binascii.Error` by default. +If *validate* is false, :exc:`FutureWarning` is now emitted if the ``+`` or +``/`` characters which are not in the alternative alphabet occur in the input. +In :func:`base64.standard_b64decode` and :func:`base64.urlsafe_b64decode`, +non-alphabet characters in the input now result in a :exc:`binascii.Error`. diff --git a/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst b/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst deleted file mode 100644 index 6fec18992622d5..00000000000000 --- a/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst +++ /dev/null @@ -1,4 +0,0 @@ -The ``+`` and ``/`` characters are no longer recognized as the part of the -Base64 alphabet in :func:`base64.urlsafe_b64decode` and -:func:`base64.b64decode` with the *altchars* argument that does not contain -them. From e9db343787324a8674979f1810db16ef7897c009 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 18 Jan 2026 11:12:05 +0200 Subject: [PATCH 6/7] Revert "Make validate=True by default in base64.b64decode()." This reverts commit db32b3297b83454e71f634a296a15de96337271e. --- Doc/library/base64.rst | 29 ++++----------- Doc/whatsnew/3.15.rst | 15 -------- Lib/base64.py | 6 +-- Lib/test/test_base64.py | 37 +++++++++++-------- ...-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst | 6 --- ...-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst | 4 ++ 6 files changed, 35 insertions(+), 62 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst create mode 100644 Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index c2231ac1bee7ff..4876117f6403b2 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -72,7 +72,7 @@ POST request. Added the *wrapcol* parameter. -.. function:: b64decode(s, altchars=None, validate=True) +.. function:: b64decode(s, altchars=None, validate=False) Decode the Base64 encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. @@ -84,24 +84,15 @@ POST request. A :exc:`binascii.Error` exception is raised if *s* is incorrectly padded. - By default, non-alphabet characters in the input result in a + If *validate* is ``False`` (the default), characters that are neither + in the normal base-64 alphabet nor the alternative alphabet are + discarded prior to the padding check. If *validate* is ``True``, + these non-alphabet characters in the input result in a :exc:`binascii.Error`. - If *validate* is false, characters that are neither in the normal base-64 - alphabet nor the alternative alphabet are discarded prior to the padding - check, but the ``+`` and ``/`` characters keep their meaning if they are - not in *altchars* (they will be discarded in future Python versions). - For more information about the strict base64 check, see - :func:`binascii.a2b_base64`. - - .. versionchanged:: next - *validate* is now ``True`` by default. - The ``+`` and ``/`` characters no longer preserve their meaning if they - are not in the alternative alphabet and *validate* is true. - :exc:`FutureWarning` is now emitted if the ``+`` or ``/`` characters - which are not in the alternative alphabet occur in the input and - *validate* is false. + For more information about the strict base64 check, see :func:`binascii.a2b_base64` + May assert or raise a :exc:`ValueError` if the length of *altchars* is not 2. .. function:: standard_b64encode(s) @@ -114,9 +105,6 @@ POST request. Decode :term:`bytes-like object` or ASCII string *s* using the standard Base64 alphabet and return the decoded :class:`bytes`. - .. versionchanged:: next - Non-alphabet characters in the input result in a :exc:`binascii.Error`. - .. function:: urlsafe_b64encode(s) @@ -135,9 +123,6 @@ POST request. ``/`` in the standard Base64 alphabet, and return the decoded :class:`bytes`. - .. versionchanged:: next - Non-alphabet characters in the input result in a :exc:`binascii.Error`. - .. function:: b32encode(s) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 5bbc227e63dc6a..b7a27d5db63875 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -1467,18 +1467,3 @@ that may require changes to your code. *dest* is now ``'foo'`` instead of ``'f'``. Pass an explicit *dest* argument to preserve the old behavior. (Contributed by Serhiy Storchaka in :gh:`138697`.) - -* :func:`base64.b64decode` now rejects all characters not in the base 64 - alphabet by default. - You can pass the ``validate=False`` argument to get the old behavior. - If *validate* is false, :exc:`FutureWarning` is now emitted if the ``+`` or - ``/`` characters which are not in the alternative alphabet occur in the input. - To get rid of the potential warnings, either replace these characters with - the corresponding alternative characters (to keep the old behavior), - or remove them from the input (to get the future behavior). - - In :func:`base64.b64decode` and :func:`base64.b64decode`, non-alphabet - characters in the input now result in a :exc:`binascii.Error`. - You can use :func:`base64.b64decode` with ``validate=False`` and optionally - the corresponding *altchars* argument to get the old behavior. - (Contributed by Serhiy Storchaka in :gh:`125346`.) diff --git a/Lib/base64.py b/Lib/base64.py index e390f28ed91db5..281de77aa4ca54 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -62,7 +62,7 @@ def b64encode(s, altchars=None, *, wrapcol=0): return encoded -def b64decode(s, altchars=None, validate=True): +def b64decode(s, altchars=None, validate=False): """Decode the Base64 encoded bytes-like object or ASCII string s. Optional altchars must be a bytes-like object or ASCII string of length 2 @@ -89,8 +89,8 @@ def b64decode(s, altchars=None, validate=True): if validate: s = s.translate(bytes.maketrans(b'+/' + altchars, altchars + b'+/')) else: - for b in b'+/': - if b not in altchars and b in s: + for b in set(b'+/') - set(altchars): + if b in s: badchar = b break s = s.translate(bytes.maketrans(altchars, b'+/')) diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 2c29f20639e621..b05f18c134c84e 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -243,7 +243,7 @@ def test_b64decode(self): b"YWI=": b"ab", b"YWJj": b"abc", b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE" - b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT" + b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT" b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==": b"abcdefghijklmnopqrstuvwxyz" @@ -313,31 +313,36 @@ def test_b64decode_invalid_chars(self): (b'!', b''), (b"YWJj\n", b"abc"), (b'YWJj\nYWI=', b'abcab')) + funcs = ( + base64.b64decode, + base64.standard_b64decode, + base64.urlsafe_b64decode, + ) for bstr, res in tests: - with self.subTest(bstr=bstr): - for data in bstr, bstr.decode('ascii'): - self.assertEqual(base64.b64decode(data, validate=False), res) - self.assertRaises(binascii.Error, base64.b64decode, data) - self.assertRaises(binascii.Error, base64.standard_b64decode, data) - self.assertRaises(binascii.Error, base64.urlsafe_b64decode, data) + for func in funcs: + with self.subTest(bstr=bstr, func=func): + self.assertEqual(func(bstr), res) + self.assertEqual(func(bstr.decode('ascii')), res) + with self.assertRaises(binascii.Error): + base64.b64decode(bstr, validate=True) + with self.assertRaises(binascii.Error): + base64.b64decode(bstr.decode('ascii'), validate=True) # Normal alphabet characters will be discarded when alternative given with self.assertWarns(FutureWarning): - self.assertEqual(base64.b64decode(b'++++', altchars=b'-_', validate=False), + self.assertEqual(base64.b64decode(b'++++', altchars=b'-_'), b'\xfb\xef\xbe') with self.assertWarns(FutureWarning): - self.assertEqual(base64.b64decode(b'////', altchars=b'-_', validate=False), + self.assertEqual(base64.b64decode(b'////', altchars=b'-_'), b'\xff\xff\xff') + self.assertEqual(base64.urlsafe_b64decode(b'++++'), b'') + self.assertEqual(base64.urlsafe_b64decode(b'////'), b'') with self.assertRaises(binascii.Error): - base64.urlsafe_b64decode(b'++++') - with self.assertRaises(binascii.Error): - base64.urlsafe_b64decode(b'////') - with self.assertRaises(binascii.Error): - base64.b64decode(b'++++', altchars=b'-_') + base64.b64decode(b'++++', altchars=b'-_', validate=True) with self.assertRaises(binascii.Error): - base64.b64decode(b'////', altchars=b'-_') + base64.b64decode(b'////', altchars=b'-_', validate=True) with self.assertRaises(binascii.Error): - base64.b64decode(b'+/!', altchars=b'-_', validate=False) + base64.b64decode(b'+/!', altchars=b'-_') def _altchars_strategy(): """Generate 'altchars' for base64 encoding.""" diff --git a/Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst b/Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst deleted file mode 100644 index ef6b05f90df77e..00000000000000 --- a/Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst +++ /dev/null @@ -1,6 +0,0 @@ -In :func:`base64.b64decode`, characters not in the base 64 alphabet now -result in a :exc:`binascii.Error` by default. -If *validate* is false, :exc:`FutureWarning` is now emitted if the ``+`` or -``/`` characters which are not in the alternative alphabet occur in the input. -In :func:`base64.standard_b64decode` and :func:`base64.urlsafe_b64decode`, -non-alphabet characters in the input now result in a :exc:`binascii.Error`. diff --git a/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst b/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst new file mode 100644 index 00000000000000..6fec18992622d5 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst @@ -0,0 +1,4 @@ +The ``+`` and ``/`` characters are no longer recognized as the part of the +Base64 alphabet in :func:`base64.urlsafe_b64decode` and +:func:`base64.b64decode` with the *altchars* argument that does not contain +them. From 220fc4ef643476a40d0da3bbd188b34c75e5af44 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 18 Jan 2026 12:01:38 +0200 Subject: [PATCH 7/7] Always only emit a warning. --- Doc/library/base64.rst | 18 ++++++-- Doc/whatsnew/3.15.rst | 9 ++++ Lib/base64.py | 46 ++++++++++++------- Lib/test/test_base64.py | 16 ++++--- ...-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst | 5 ++ ...-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst | 4 -- 6 files changed, 67 insertions(+), 31 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst delete mode 100644 Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 4876117f6403b2..3e7884debd5948 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -84,15 +84,20 @@ POST request. A :exc:`binascii.Error` exception is raised if *s* is incorrectly padded. - If *validate* is ``False`` (the default), characters that are neither + If *validate* is false (the default), characters that are neither in the normal base-64 alphabet nor the alternative alphabet are - discarded prior to the padding check. If *validate* is ``True``, - these non-alphabet characters in the input result in a - :exc:`binascii.Error`. + discarded prior to the padding check, but the ``+`` and ``/`` characters + keep their meaning if they are not in *altchars* (they will be discarded + in future Python versions). + If *validate* is true, these non-alphabet characters in the input + result in a :exc:`binascii.Error`. For more information about the strict base64 check, see :func:`binascii.a2b_base64` - May assert or raise a :exc:`ValueError` if the length of *altchars* is not 2. + .. deprecated:: next + Accepting the ``+`` and ``/`` characters with an alternative alphabet + is now deprecated. + .. function:: standard_b64encode(s) @@ -123,6 +128,9 @@ POST request. ``/`` in the standard Base64 alphabet, and return the decoded :class:`bytes`. + .. deprecated:: next + Accepting the ``+`` and ``/`` characters is now deprecated. + .. function:: b32encode(s) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index b7a27d5db63875..0e3e0c49bfc579 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -1143,6 +1143,15 @@ Deprecated New deprecations ---------------- +* :mod:`base64`: + + * Accepting the ``+`` and ``/`` characters with an alternative alphabet in + :func:`~base64.b64decode` and :func:`~base64.urlsafe_b64decode` is now + deprecated. + In future Python versions they will be errors in the strict mode and + discarded in the non-strict mode. + (Contributed by Serhiy Storchaka in :gh:`125346`.) + * CLI: * Deprecate :option:`-b` and :option:`!-bb` command-line options diff --git a/Lib/base64.py b/Lib/base64.py index 281de77aa4ca54..6e0da16b23ce99 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -72,9 +72,9 @@ def b64decode(s, altchars=None, validate=False): The result is returned as a bytes object. A binascii.Error is raised if s is incorrectly padded. - If validate is False (the default), characters that are neither in the + If validate is false (the default), characters that are neither in the normal base-64 alphabet nor the alternative alphabet are discarded prior - to the padding check. If validate is True, these non-alphabet characters + to the padding check. If validate is true, these non-alphabet characters in the input result in a binascii.Error. For more information about the strict base64 check, see: @@ -86,21 +86,24 @@ def b64decode(s, altchars=None, validate=False): altchars = _bytes_from_decode_data(altchars) if len(altchars) != 2: raise ValueError(f'invalid altchars: {altchars!r}') - if validate: - s = s.translate(bytes.maketrans(b'+/' + altchars, altchars + b'+/')) - else: - for b in set(b'+/') - set(altchars): - if b in s: - badchar = b - break - s = s.translate(bytes.maketrans(altchars, b'+/')) + for b in b'+/': + if b not in altchars and b in s: + badchar = b + break + s = s.translate(bytes.maketrans(altchars, b'+/')) result = binascii.a2b_base64(s, strict_mode=validate) if badchar is not None: import warnings - warnings.warn(f'invalid character {chr(badchar)!a} in base64 data ' - f'with altchars={altchars!r} will be discarded in ' - f'future Python versions', - FutureWarning, stacklevel=2) + if validate: + warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data ' + f'with altchars={altchars!r} and validate=True ' + f'will be an error in future Python versions', + DeprecationWarning, stacklevel=2) + else: + warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data ' + f'with altchars={altchars!r} and validate=False ' + f'will be discarded in future Python versions', + FutureWarning, stacklevel=2) return result @@ -123,7 +126,7 @@ def standard_b64decode(s): _urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_') -_urlsafe_decode_translation = bytes.maketrans(b'+/-_', b'-_+/') +_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/') def urlsafe_b64encode(s): """Encode bytes using the URL- and filesystem-safe Base64 alphabet. @@ -146,8 +149,19 @@ def urlsafe_b64decode(s): The alphabet uses '-' instead of '+' and '_' instead of '/'. """ s = _bytes_from_decode_data(s) + badchar = None + for b in b'+/': + if b in s: + badchar = b + break s = s.translate(_urlsafe_decode_translation) - return b64decode(s) + result = binascii.a2b_base64(s, strict_mode=False) + if badchar is not None: + import warnings + warnings.warn(f'invalid character {chr(badchar)!a} in URL-safe Base64 data ' + f'will be discarded in future Python versions', + FutureWarning, stacklevel=2) + return result diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index b05f18c134c84e..d02992903f15a7 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -335,12 +335,16 @@ def test_b64decode_invalid_chars(self): with self.assertWarns(FutureWarning): self.assertEqual(base64.b64decode(b'////', altchars=b'-_'), b'\xff\xff\xff') - self.assertEqual(base64.urlsafe_b64decode(b'++++'), b'') - self.assertEqual(base64.urlsafe_b64decode(b'////'), b'') - with self.assertRaises(binascii.Error): - base64.b64decode(b'++++', altchars=b'-_', validate=True) - with self.assertRaises(binascii.Error): - base64.b64decode(b'////', altchars=b'-_', validate=True) + with self.assertWarns(DeprecationWarning): + self.assertEqual(base64.b64decode(b'++++', altchars=b'-_', validate=True), + b'\xfb\xef\xbe') + with self.assertWarns(DeprecationWarning): + self.assertEqual(base64.b64decode(b'////', altchars=b'-_', validate=True), + b'\xff\xff\xff') + with self.assertWarns(FutureWarning): + self.assertEqual(base64.urlsafe_b64decode(b'++++'), b'\xfb\xef\xbe') + with self.assertWarns(FutureWarning): + self.assertEqual(base64.urlsafe_b64decode(b'////'), b'\xff\xff\xff') with self.assertRaises(binascii.Error): base64.b64decode(b'+/!', altchars=b'-_') diff --git a/Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst b/Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst new file mode 100644 index 00000000000000..187a6ebbe79b26 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst @@ -0,0 +1,5 @@ +Accepting ``+`` and ``/`` characters with an alternative alphabet in +:func:`base64.b64decode` and :func:`base64.urlsafe_b64decode` is now +deprecated. +In future Python versions they will be errors in the strict mode and +discarded in the non-strict mode. diff --git a/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst b/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst deleted file mode 100644 index 6fec18992622d5..00000000000000 --- a/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst +++ /dev/null @@ -1,4 +0,0 @@ -The ``+`` and ``/`` characters are no longer recognized as the part of the -Base64 alphabet in :func:`base64.urlsafe_b64decode` and -:func:`base64.b64decode` with the *altchars* argument that does not contain -them.