From 57b23d15259c32dba80bb093645242f1b3b25365 Mon Sep 17 00:00:00 2001 From: Sarah Boyce <42296566+sarahboyce@users.noreply.github.com> Date: Mon, 20 Apr 2026 14:07:29 +0200 Subject: [PATCH] Fixed #37053 -- Added validate=True to base64.b64decode() calls. --- django/contrib/auth/hashers.py | 2 +- django/core/cache/backends/db.py | 2 +- django/db/models/fields/__init__.py | 14 +++++++++++++- django/http/multipartparser.py | 6 ++++-- docs/releases/6.1.txt | 16 ++++++++++++++++ tests/file_uploads/tests.py | 22 +++++++++++++--------- tests/model_fields/test_binaryfield.py | 9 +++++++++ tests/urlpatterns/converters.py | 2 +- 8 files changed, 58 insertions(+), 15 deletions(-) diff --git a/django/contrib/auth/hashers.py b/django/contrib/auth/hashers.py index 35a295cf1714..814185e51aa1 100644 --- a/django/contrib/auth/hashers.py +++ b/django/contrib/auth/hashers.py @@ -422,7 +422,7 @@ def decode(self, encoded): variety, *_, b64salt, hash = rest.split("$") # Add padding. b64salt += "=" * (-len(b64salt) % 4) - salt = base64.b64decode(b64salt).decode("latin1") + salt = base64.b64decode(b64salt, validate=True).decode("latin1") return { "algorithm": algorithm, "hash": hash, diff --git a/django/core/cache/backends/db.py b/django/core/cache/backends/db.py index 03add8c4a5c1..8245e4f22525 100644 --- a/django/core/cache/backends/db.py +++ b/django/core/cache/backends/db.py @@ -93,7 +93,7 @@ def get_many(self, keys, version=None): expired_keys.append(key) else: value = connection.ops.process_clob(value) - value = pickle.loads(base64.b64decode(value.encode())) + value = pickle.loads(base64.b64decode(value.encode(), validate=True)) result[key_map.get(key)] = value self._base_delete_many(expired_keys) return result diff --git a/django/db/models/fields/__init__.py b/django/db/models/fields/__init__.py index a7ec41bf7536..16f1071396d0 100644 --- a/django/db/models/fields/__init__.py +++ b/django/db/models/fields/__init__.py @@ -1,3 +1,4 @@ +import binascii import copy import datetime import decimal @@ -2743,6 +2744,9 @@ def formfield(self, **kwargs): class BinaryField(Field): description = _("Raw binary data") empty_values = [None, b""] + default_error_messages = { + "invalid": _("ā€œ%(value)sā€ is not a valid binary value."), + } def __init__(self, *args, **kwargs): kwargs.setdefault("editable", False) @@ -2800,7 +2804,15 @@ def value_to_string(self, obj): def to_python(self, value): # If it's a string, it should be base64-encoded data if isinstance(value, str): - return memoryview(b64decode(value.encode("ascii"))) + try: + return memoryview(b64decode(value.encode("ascii"), validate=True)) + except (UnicodeEncodeError, binascii.Error): + raise exceptions.ValidationError( + self.error_messages["invalid"], + code="invalid", + params={"value": value}, + ) + return value diff --git a/django/http/multipartparser.py b/django/http/multipartparser.py index 1195b056d955..9048d81c1a36 100644 --- a/django/http/multipartparser.py +++ b/django/http/multipartparser.py @@ -231,7 +231,7 @@ def _parse(self): raw_data = field_stream.read(size=read_size) num_bytes_read += len(raw_data) try: - data = base64.b64decode(raw_data) + data = base64.b64decode(raw_data, validate=True) except binascii.Error: data = raw_data else: @@ -319,7 +319,9 @@ def _parse(self): stripped_chunk = b"".join(stripped_parts) try: - chunk = base64.b64decode(stripped_chunk) + chunk = base64.b64decode( + stripped_chunk, validate=True + ) except Exception as exc: # Since this is only a chunk, any error is # an unfixable error. diff --git a/docs/releases/6.1.txt b/docs/releases/6.1.txt index 2604f047c535..7af3c6af7205 100644 --- a/docs/releases/6.1.txt +++ b/docs/releases/6.1.txt @@ -338,6 +338,10 @@ Models ``OR``, ``XOR``, respectively. These aggregates were previously included only in ``contrib.postgres``. +* :class:`django.db.models.BinaryField` now validates Base64 input strictly. + Invalid Base64 strings now raise ``ValidationError`` instead of being + silently accepted. + Pagination ~~~~~~~~~~ @@ -546,6 +550,18 @@ Miscellaneous ``SimpleUploadedFile`` retain the previous behavior of evaluating based on the ``name`` attribute. +* ``django.http.multipartparser.MultiPartParser`` now uses strict Base64 + validation when decoding encoded request data. Previously, invalid data could + be silently ignored or result in empty values. Invalid data now raises + ``MultiPartParserError``. + +* ``django.core.cache.backends.db.DatabaseCache`` now uses strict Base64 + validation when decoding cached values. Invalid Base64 data will raise an + exception instead of being silently ignored. Cache values generated by Django + are unaffected, as they are always valid Base64. However, existing cache + entries containing non-standard or corrupted Base64 data may no longer be + readable. + .. _deprecated-features-6.1: Features deprecated in 6.1 diff --git a/tests/file_uploads/tests.py b/tests/file_uploads/tests.py index a5c9c36ac5a4..19c50d769ac4 100644 --- a/tests/file_uploads/tests.py +++ b/tests/file_uploads/tests.py @@ -14,6 +14,7 @@ from django.core.files import temp as tempfile from django.core.files.storage import default_storage from django.core.files.uploadedfile import SimpleUploadedFile, UploadedFile +from django.core.handlers.wsgi import WSGIRequest from django.http.multipartparser import ( FILE, MAX_TOTAL_HEADER_SIZE, @@ -172,15 +173,18 @@ def test_base64_invalid_upload(self): ) payload.write(b"\r\n!\r\n") payload.write("--" + client.BOUNDARY + "--\r\n") - r = { - "CONTENT_LENGTH": len(payload), - "CONTENT_TYPE": client.MULTIPART_CONTENT, - "PATH_INFO": "/echo_content/", - "REQUEST_METHOD": "POST", - "wsgi.input": payload, - } - response = self.client.request(**r) - self.assertEqual(response.json()["file"], "") + request = WSGIRequest( + { + "CONTENT_LENGTH": len(payload), + "CONTENT_TYPE": client.MULTIPART_CONTENT, + "PATH_INFO": "/echo_content/", + "REQUEST_METHOD": "POST", + "wsgi.input": payload, + } + ) + msg = "Could not decode base64 data." + with self.assertRaisesMessage(MultiPartParserError, msg): + request.POST def test_unicode_file_name(self): with sys_tempfile.TemporaryDirectory() as temp_dir: diff --git a/tests/model_fields/test_binaryfield.py b/tests/model_fields/test_binaryfield.py index 9d89fdff0b56..8cd901497470 100644 --- a/tests/model_fields/test_binaryfield.py +++ b/tests/model_fields/test_binaryfield.py @@ -58,3 +58,12 @@ def test_filter_memoryview(self): self.assertSequenceEqual( DataModel.objects.filter(data=memoryview(self.binary_data)), [dm] ) + + def test_invalid_data(self): + invalid_values = [" A", "!"] + for value in invalid_values: + with self.subTest(value=value): + with self.assertRaisesMessage( + ValidationError, f"ā€œ{value}ā€ is not a valid binary value." + ): + DataModel(data=value).full_clean() diff --git a/tests/urlpatterns/converters.py b/tests/urlpatterns/converters.py index 9e2af062698f..11daaa6b3049 100644 --- a/tests/urlpatterns/converters.py +++ b/tests/urlpatterns/converters.py @@ -5,7 +5,7 @@ class Base64Converter: regex = r"[a-zA-Z0-9+/]*={0,2}" def to_python(self, value): - return base64.b64decode(value) + return base64.b64decode(value, validate=True) def to_url(self, value): return base64.b64encode(value).decode("ascii")