From 0229b061a7d65d0d0f42531ebf6cd92f5d9e65f6 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 26 Feb 2026 20:13:21 +0200 Subject: [PATCH] gh-145264: Do not ignore excess Base64 data after the first padded quad Base64 decoder (see binascii.a2b_base64(), base64.b64decode(), etc) no longer ignores excess data after the first padded quad in non-strict (default) mode. Instead, in conformance with RFC 4648, it ignores the pad character, "=", if it is present before the end of the encoded data. --- Lib/test/test_binascii.py | 16 +++---- ...-02-26-20-13-16.gh-issue-145264.4pggX_.rst | 4 ++ Modules/binascii.c | 43 +++++++------------ 3 files changed, 26 insertions(+), 37 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 9f0d15fe538810..3aa139e15e7653 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -240,23 +240,21 @@ def assertNonBase64Data(data, expected, ignorechars): def test_base64_excess_data(self): # Test excess data exceptions - def assertExcessData(data, non_strict_expected, - ignore_padchar_expected=None): + def assertExcessData(data, non_strict_expected): assert_regex = r'(?i)Excess data' data = self.type2test(data) with self.assertRaisesRegex(binascii.Error, assert_regex): binascii.a2b_base64(data, strict_mode=True) self.assertEqual(binascii.a2b_base64(data, strict_mode=False), non_strict_expected) - if ignore_padchar_expected is not None: - self.assertEqual(binascii.a2b_base64(data, strict_mode=True, - ignorechars=b'='), - ignore_padchar_expected) + self.assertEqual(binascii.a2b_base64(data, strict_mode=True, + ignorechars=b'='), + non_strict_expected) self.assertEqual(binascii.a2b_base64(data), non_strict_expected) - assertExcessData(b'ab==c', b'i') - assertExcessData(b'ab==cd', b'i', b'i\xb7\x1d') - assertExcessData(b'abc=d', b'i\xb7', b'i\xb7\x1d') + assertExcessData(b'ab==c=', b'i\xb7') + assertExcessData(b'ab==cd', b'i\xb7\x1d') + assertExcessData(b'abc=d', b'i\xb7\x1d') def test_base64errors(self): # Test base64 with invalid padding diff --git a/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst b/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst new file mode 100644 index 00000000000000..998c003527926a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst @@ -0,0 +1,4 @@ +Base64 decoder (see :func:`binascii.a2b_base64`, :func:`base64.b64decode`, etc) no +longer ignores excess data after the first padded quad in non-strict +(default) mode. Instead, in conformance with :rfc:`4648`, it ignores +the pad character, "=", if it is present before the end of the encoded data. diff --git a/Modules/binascii.c b/Modules/binascii.c index e6cd64338064b3..b3f79e2433dffc 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -640,36 +640,24 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, */ if (this_ch == BASE64_PAD) { pads++; - - if (strict_mode) { - if (quad_pos >= 2 && quad_pos + pads <= 4) { - continue; - } - if (ignorechar(BASE64_PAD, ignorechars, ignorecache)) { - continue; - } - if (quad_pos == 1) { - /* Set an error below. */ - break; - } - state = get_binascii_state(module); - if (state) { - PyErr_SetString(state->Error, - (quad_pos == 0 && ascii_data == data->buf) - ? "Leading padding not allowed" - : "Excess padding not allowed"); - } - goto error_end; + if (quad_pos >= 2 && quad_pos + pads <= 4) { + continue; } - else { - if (quad_pos >= 2 && quad_pos + pads >= 4) { - /* A pad sequence means we should not parse more input. - ** We've already interpreted the data from the quad at this point. - */ - goto done; - } + if (!strict_mode || ignorechar(BASE64_PAD, ignorechars, ignorecache)) { continue; } + if (quad_pos == 1) { + /* Set an error below. */ + break; + } + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, + (quad_pos == 0 && ascii_data == data->buf) + ? "Leading padding not allowed" + : "Excess padding not allowed"); + } + goto error_end; } unsigned char v = table_a2b_base64[this_ch]; @@ -748,7 +736,6 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, goto error_end; } -done: return PyBytesWriter_FinishWithPointer(writer, bin_data); error_end: