Skip to content

Commit 192d535

Browse files
gh-144001: Support ignorechars in binascii.a2b_base64() and base64.b64decode()
1 parent 63cc125 commit 192d535

File tree

9 files changed

+217
-98
lines changed

9 files changed

+217
-98
lines changed

Doc/library/base64.rst

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ POST request.
7373

7474

7575
.. function:: b64decode(s, altchars=None, validate=False)
76+
b64decode(s, altchars=None, validate=True, *, ignorechars)
7677
7778
Decode the Base64 encoded :term:`bytes-like object` or ASCII string
7879
*s* and return the decoded :class:`bytes`.
@@ -84,16 +85,24 @@ POST request.
8485
A :exc:`binascii.Error` exception is raised
8586
if *s* is incorrectly padded.
8687

87-
If *validate* is ``False`` (the default), characters that are neither
88+
If *ignorechars* is specified, it should be a byte string containing
89+
characters to ignore from the input, and *validate* is ``True`` by default.
90+
Otherwise *validate* is ``False`` by default.
91+
92+
If *validate* is false, characters that are neither
8893
in the normal base-64 alphabet nor the alternative alphabet are
89-
discarded prior to the padding check. If *validate* is ``True``,
90-
these non-alphabet characters in the input result in a
91-
:exc:`binascii.Error`.
94+
discarded prior to the padding check.
95+
If *validate* is true, these non-alphabet characters in the input
96+
result in a :exc:`binascii.Error`.
9297

9398
For more information about the strict base64 check, see :func:`binascii.a2b_base64`
9499

95100
May assert or raise a :exc:`ValueError` if the length of *altchars* is not 2.
96101

102+
.. versionchanged:: next
103+
Added the *ignorechars* parameter.
104+
105+
97106
.. function:: standard_b64encode(s)
98107

99108
Encode :term:`bytes-like object` *s* using the standard Base64 alphabet

Doc/library/binascii.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,16 @@ The :mod:`binascii` module defines the following functions:
4949

5050

5151
.. function:: a2b_base64(string, /, *, strict_mode=False)
52+
a2b_base64(string, /, *, strict_mode=True, ignorechars)
5253
5354
Convert a block of base64 data back to binary and return the binary data. More
5455
than one line may be passed at a time.
5556

57+
If *ignorechars* is specified, it should be a byte string containing
58+
characters to ignore from the input when *strict_mode* is true.
59+
*strict_mode* is ``True`` by default, if *ignorechars* is specified,
60+
``False`` otherwise.
61+
5662
If *strict_mode* is true, only valid base64 data will be converted. Invalid base64
5763
data will raise :exc:`binascii.Error`.
5864

@@ -66,6 +72,9 @@ The :mod:`binascii` module defines the following functions:
6672
.. versionchanged:: 3.11
6773
Added the *strict_mode* parameter.
6874

75+
.. versionchanged:: next
76+
Added the *ignorechars* parameter.
77+
6978

7079
.. function:: b2a_base64(data, *, wrapcol=0, newline=True)
7180

Doc/whatsnew/3.15.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,13 +444,18 @@ base64
444444
* Added the *wrapcol* parameter in :func:`~base64.b64encode`.
445445
(Contributed by Serhiy Storchaka in :gh:`143214`.)
446446

447+
* Added the *ignorechars* parameter in :func:`~base64.b64decode`.
448+
(Contributed by Serhiy Storchaka in :gh:`144001`.)
447449

448450
binascii
449451
--------
450452

451453
* Added the *wrapcol* parameter in :func:`~binascii.b2a_base64`.
452454
(Contributed by Serhiy Storchaka in :gh:`143214`.)
453455

456+
* Added the *ignorechars* parameter in :func:`~binascii.a2b_base64`.
457+
(Contributed by Serhiy Storchaka in :gh:`144001`.)
458+
454459

455460
calendar
456461
--------

Lib/base64.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
]
2727

2828

29+
_NOT_SPECIFIED = ['NOT SPECIFIED']
30+
2931
bytes_types = (bytes, bytearray) # Types acceptable as binary data
3032

3133
def _bytes_from_decode_data(s):
@@ -62,7 +64,7 @@ def b64encode(s, altchars=None, *, wrapcol=0):
6264
return encoded
6365

6466

65-
def b64decode(s, altchars=None, validate=False):
67+
def b64decode(s, altchars=None, validate=_NOT_SPECIFIED, *, ignorechars=_NOT_SPECIFIED):
6668
"""Decode the Base64 encoded bytes-like object or ASCII string s.
6769
6870
Optional altchars must be a bytes-like object or ASCII string of length 2
@@ -72,10 +74,14 @@ def b64decode(s, altchars=None, validate=False):
7274
The result is returned as a bytes object. A binascii.Error is raised if
7375
s is incorrectly padded.
7476
75-
If validate is False (the default), characters that are neither in the
76-
normal base-64 alphabet nor the alternative alphabet are discarded prior
77-
to the padding check. If validate is True, these non-alphabet characters
78-
in the input result in a binascii.Error.
77+
If ignorechars is specified, it should be a byte string containing
78+
characters to ignore from the input, and validate is True by default.
79+
Otherwise validate is False by default.
80+
81+
If validate is false, characters that are neither in the normal base-64
82+
alphabet nor the alternative alphabet are discarded prior to the
83+
padding check. If validate is true, these non-alphabet characters in
84+
the input result in a binascii.Error if they are not in ignorechars.
7985
For more information about the strict base64 check, see:
8086
8187
https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64
@@ -85,7 +91,12 @@ def b64decode(s, altchars=None, validate=False):
8591
altchars = _bytes_from_decode_data(altchars)
8692
assert len(altchars) == 2, repr(altchars)
8793
s = s.translate(bytes.maketrans(altchars, b'+/'))
88-
return binascii.a2b_base64(s, strict_mode=validate)
94+
if validate is _NOT_SPECIFIED:
95+
validate = ignorechars is not _NOT_SPECIFIED
96+
if ignorechars is _NOT_SPECIFIED:
97+
ignorechars = b''
98+
return binascii.a2b_base64(s, strict_mode=validate,
99+
ignorechars=ignorechars)
89100

90101

91102
def standard_b64encode(s):

Lib/test/test_base64.py

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -298,22 +298,22 @@ def test_b64decode_padding_error(self):
298298

299299
def test_b64decode_invalid_chars(self):
300300
# issue 1466065: Test some invalid characters.
301-
tests = ((b'%3d==', b'\xdd'),
302-
(b'$3d==', b'\xdd'),
303-
(b'[==', b''),
304-
(b'YW]3=', b'am'),
305-
(b'3{d==', b'\xdd'),
306-
(b'3d}==', b'\xdd'),
307-
(b'@@', b''),
308-
(b'!', b''),
309-
(b"YWJj\n", b"abc"),
310-
(b'YWJj\nYWI=', b'abcab'))
301+
tests = ((b'%3d==', b'\xdd', b'%$'),
302+
(b'$3d==', b'\xdd', b'%$'),
303+
(b'[==', b'', None),
304+
(b'YW]3=', b'am', b']'),
305+
(b'3{d==', b'\xdd', b'{}'),
306+
(b'3d}==', b'\xdd', b'{}'),
307+
(b'@@', b'', b'@!'),
308+
(b'!', b'', b'@!'),
309+
(b"YWJj\n", b"abc", b'\n'),
310+
(b'YWJj\nYWI=', b'abcab', b'\n'))
311311
funcs = (
312312
base64.b64decode,
313313
base64.standard_b64decode,
314314
base64.urlsafe_b64decode,
315315
)
316-
for bstr, res in tests:
316+
for bstr, res, ignorechars in tests:
317317
for func in funcs:
318318
with self.subTest(bstr=bstr, func=func):
319319
self.assertEqual(func(bstr), res)
@@ -322,6 +322,21 @@ def test_b64decode_invalid_chars(self):
322322
base64.b64decode(bstr, validate=True)
323323
with self.assertRaises(binascii.Error):
324324
base64.b64decode(bstr.decode('ascii'), validate=True)
325+
with self.assertRaises(binascii.Error):
326+
base64.b64decode(bstr, ignorechars=b'')
327+
if ignorechars is not None:
328+
self.assertEqual(
329+
base64.b64decode(bstr, ignorechars=ignorechars),
330+
res)
331+
332+
with self.assertRaises(TypeError):
333+
base64.b64decode(b'', ignorechars=bytearray())
334+
with self.assertRaises(TypeError):
335+
base64.b64decode(b'', ignorechars='')
336+
with self.assertRaises(TypeError):
337+
base64.b64decode(b'', ignorechars=[])
338+
with self.assertRaises(TypeError):
339+
base64.b64decode(b'', ignorechars=None)
325340

326341
# Normal alphabet characters not discarded when alternative given
327342
res = b'\xfb\xef\xff'

Lib/test/test_binascii.py

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -145,16 +145,16 @@ def assertExcessPadding(data, non_strict_mode_expected_result: bytes):
145145

146146
# Test excess data exceptions
147147
assertExcessData(b'ab==a', b'i')
148-
assertExcessData(b'ab===', b'i')
149-
assertExcessData(b'ab====', b'i')
150-
assertExcessData(b'ab==:', b'i')
148+
assertExcessPadding(b'ab===', b'i')
149+
assertExcessPadding(b'ab====', b'i')
150+
assertNonBase64Data(b'ab==:', b'i')
151151
assertExcessData(b'abc=a', b'i\xb7')
152-
assertExcessData(b'abc=:', b'i\xb7')
153-
assertExcessData(b'ab==\n', b'i')
154-
assertExcessData(b'abc==', b'i\xb7')
155-
assertExcessData(b'abc===', b'i\xb7')
156-
assertExcessData(b'abc====', b'i\xb7')
157-
assertExcessData(b'abc=====', b'i\xb7')
152+
assertNonBase64Data(b'abc=:', b'i\xb7')
153+
assertNonBase64Data(b'ab==\n', b'i')
154+
assertExcessPadding(b'abc==', b'i\xb7')
155+
assertExcessPadding(b'abc===', b'i\xb7')
156+
assertExcessPadding(b'abc====', b'i\xb7')
157+
assertExcessPadding(b'abc=====', b'i\xb7')
158158

159159
# Test non-base64 data exceptions
160160
assertNonBase64Data(b'\nab==', b'i')
@@ -170,12 +170,45 @@ def assertExcessPadding(data, non_strict_mode_expected_result: bytes):
170170
assertLeadingPadding(b'=====', b'')
171171
assertDiscontinuousPadding(b'ab=c=', b'i\xb7')
172172
assertDiscontinuousPadding(b'ab=ab==', b'i\xb6\x9b')
173+
assertNonBase64Data(b'ab=:=', b'i')
173174
assertExcessPadding(b'abcd=', b'i\xb7\x1d')
174175
assertExcessPadding(b'abcd==', b'i\xb7\x1d')
175176
assertExcessPadding(b'abcd===', b'i\xb7\x1d')
176177
assertExcessPadding(b'abcd====', b'i\xb7\x1d')
177178
assertExcessPadding(b'abcd=====', b'i\xb7\x1d')
178179

180+
def test_base64_invalidchars(self):
181+
def assertNonBase64Data(data, expected, ignorechars):
182+
data = self.type2test(data)
183+
assert_regex = r'(?i)Only base64 data'
184+
self.assertEqual(binascii.a2b_base64(data), expected)
185+
with self.assertRaisesRegex(binascii.Error, assert_regex):
186+
binascii.a2b_base64(data, strict_mode=True)
187+
with self.assertRaisesRegex(binascii.Error, assert_regex):
188+
binascii.a2b_base64(data, ignorechars=b'')
189+
self.assertEqual(binascii.a2b_base64(data, ignorechars=ignorechars),
190+
expected)
191+
self.assertEqual(binascii.a2b_base64(data, strict_mode=False, ignorechars=b''),
192+
expected)
193+
194+
assertNonBase64Data(b'\nab==', b'i', ignorechars=b'\n')
195+
assertNonBase64Data(b'ab:(){:|:&};:==', b'i', ignorechars=b':;(){}|&')
196+
assertNonBase64Data(b'a\nb==', b'i', ignorechars=b'\n')
197+
assertNonBase64Data(b'a\x00b==', b'i', ignorechars=b'\x00')
198+
assertNonBase64Data(b'ab==:', b'i', ignorechars=b':')
199+
assertNonBase64Data(b'abc=:', b'i\xb7', ignorechars=b':')
200+
assertNonBase64Data(b'ab==\n', b'i', ignorechars=b'\n')
201+
assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':')
202+
203+
data = self.type2test(b'a\nb==')
204+
with self.assertRaises(TypeError):
205+
binascii.a2b_base64(data, ignorechars=bytearray())
206+
with self.assertRaises(TypeError):
207+
binascii.a2b_base64(data, ignorechars='')
208+
with self.assertRaises(TypeError):
209+
binascii.a2b_base64(data, ignorechars=[])
210+
with self.assertRaises(TypeError):
211+
binascii.a2b_base64(data, ignorechars=None)
179212

180213
def test_base64errors(self):
181214
# Test base64 with invalid padding
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Added the *ignorechars* parameter in :func:`binascii.a2b_base64` and
2+
:func:`base64.b64decode`.

0 commit comments

Comments
 (0)