Skip to content

Commit 5fcd644

Browse files
committed
gh-143925: Reject control characters in data: URL mediatypes
1 parent d8850aa commit 5fcd644

File tree

3 files changed

+14
-0
lines changed

3 files changed

+14
-0
lines changed

Lib/test/test_urllib.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from test import support
1111
from test.support import os_helper
1212
from test.support import socket_helper
13+
from test.support import control_characters_c0
1314
import os
1415
import socket
1516
try:
@@ -590,6 +591,13 @@ def test_invalid_base64_data(self):
590591
# missing padding character
591592
self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
592593

594+
def test_invalid_mediatype(self):
595+
for c0 in control_characters_c0():
596+
self.assertRaises(ValueError,urllib.request.urlopen,
597+
f'data:text/html;{c0},data')
598+
for c0 in control_characters_c0():
599+
self.assertRaises(ValueError,urllib.request.urlopen,
600+
f'data:text/html{c0};base64,ZGF0YQ==')
593601

594602
class urlretrieve_FileTests(unittest.TestCase):
595603
"""Test urllib.urlretrieve() on local files"""

Lib/urllib/request.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1636,6 +1636,11 @@ def data_open(self, req):
16361636
scheme, data = url.split(":",1)
16371637
mediatype, data = data.split(",",1)
16381638

1639+
# Disallow control characters within mediatype.
1640+
if re.search(r"[\x00-\x1F\x7F]", mediatype):
1641+
raise ValueError(
1642+
"Control characters not allowed in data: mediatype")
1643+
16391644
# even base64 encoded data URLs might be quoted so unquote in any case:
16401645
data = unquote_to_bytes(data)
16411646
if mediatype.endswith(";base64"):
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Reject control characters in ``data:`` URL media types.

0 commit comments

Comments
 (0)