From f336af1c4b103297de1322bbe00f920f3e58b899 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Tue, 20 Jan 2026 14:45:58 -0600 Subject: [PATCH 1/2] [3.12] gh-143925: Reject control characters in data: URL mediatypes (cherry picked from commit f25509e78e8be6ea73c811ac2b8c928c28841b9f) (cherry picked from commit 2c9c746077d8119b5bcf5142316992e464594946) Co-authored-by: Seth Michael Larson --- Lib/test/test_urllib.py | 9 +++++++++ Lib/urllib/request.py | 5 +++++ .../2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst | 1 + 3 files changed, 15 insertions(+) create mode 100644 Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 9becb72c741511..d496861c6d1bab 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -12,6 +12,8 @@ from test.support import os_helper from test.support import socket_helper from test.support import warnings_helper +from test.support import control_characters_c0 +from test.support.testcase import ExtraAssertions import os try: import ssl @@ -688,6 +690,13 @@ def test_invalid_base64_data(self): # missing padding character self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=') + def test_invalid_mediatype(self): + for c0 in control_characters_c0(): + self.assertRaises(ValueError,urllib.request.urlopen, + f'data:text/html;{c0},data') + for c0 in control_characters_c0(): + self.assertRaises(ValueError,urllib.request.urlopen, + f'data:text/html{c0};base64,ZGF0YQ==') class urlretrieve_FileTests(unittest.TestCase): """Test urllib.urlretrieve() on local files""" diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index c7ded0f67fc67e..16449d6ff71939 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1655,6 +1655,11 @@ def data_open(self, req): scheme, data = url.split(":",1) mediatype, data = data.split(",",1) + # Disallow control characters within mediatype. + if re.search(r"[\x00-\x1F\x7F]", mediatype): + raise ValueError( + "Control characters not allowed in data: mediatype") + # even base64 encoded data URLs might be quoted so unquote in any case: data = unquote_to_bytes(data) if mediatype.endswith(";base64"): diff --git a/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst b/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst new file mode 100644 index 00000000000000..46109dfbef3ee7 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst @@ -0,0 +1 @@ +Reject control characters in ``data:`` URL media types. From 699b8d5a1b6927f78d367d35fb765a7930beef57 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Wed, 21 Jan 2026 15:46:59 +0000 Subject: [PATCH 2/2] Remove "ExtraAssertations" --- Lib/test/test_urllib.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index d496861c6d1bab..0d46a455664bad 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -13,7 +13,6 @@ from test.support import socket_helper from test.support import warnings_helper from test.support import control_characters_c0 -from test.support.testcase import ExtraAssertions import os try: import ssl