Skip to content

Commit c42e6d3

Browse files
[3.14] gh-149489: Fix ElementTree serialization to HTML (GH-149490) (GH-150596)
* The content of elements "xmp", "iframe", "noembed", "noframes", and "plaintext" is no longer escaped. * The "plaintext" element no longer have the closing tag. (cherry picked from commit bcd29e4)
1 parent 9b27bdd commit c42e6d3

3 files changed

Lines changed: 32 additions & 4 deletions

File tree

Lib/test/test_xml_etree.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,7 +1247,12 @@ def check(p, expected, namespaces=None):
12471247
{'': 'http://www.w3.org/2001/XMLSchema',
12481248
'ns': 'http://www.w3.org/2001/XMLSchema'})
12491249

1250-
def test_processinginstruction(self):
1250+
def test_comment_serialization(self):
1251+
comm = ET.Comment('<spam> & ham')
1252+
# comments are not escaped
1253+
self.assertEqual(ET.tostring(comm), b'<!--<spam> & ham-->')
1254+
1255+
def test_processinginstruction_serialization(self):
12511256
# Test ProcessingInstruction directly
12521257

12531258
self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
@@ -1256,13 +1261,22 @@ def test_processinginstruction(self):
12561261
b'<?test instruction?>')
12571262

12581263
# Issue #2746
1259-
1264+
# processing instructions are not escaped
12601265
self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
12611266
b'<?test <testing&>?>')
12621267
self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
12631268
b"<?xml version='1.0' encoding='latin-1'?>\n"
12641269
b"<?test <testing&>\xe3?>")
12651270

1271+
@support.subTests('tag', ("script", "style", "xmp", "iframe", "noembed", "noframes"))
1272+
def test_html_cdata_elems_serialization(self, tag):
1273+
# content of raw text elements is not escaped in html
1274+
tag = tag.title()
1275+
elem = ET.Element(tag)
1276+
elem.text = '<spam>&ham'
1277+
self.assertEqual(ET.tostring(elem, method='html'),
1278+
('<%s><spam>&ham</%s>' % (tag, tag)).encode())
1279+
12661280
def test_html_empty_elems_serialization(self):
12671281
# issue 15970
12681282
# from http://www.w3.org/TR/html401/index/elements.html
@@ -1277,6 +1291,14 @@ def test_html_empty_elems_serialization(self):
12771291
method='html')
12781292
self.assertEqual(serialized, expected)
12791293

1294+
def test_html_plaintext_serialization(self):
1295+
# content of plaintext is not escaped in html
1296+
# no end tag for plaintext
1297+
elem = ET.Element('PlainText')
1298+
elem.text = '<spam>&ham'
1299+
self.assertEqual(ET.tostring(elem, method='html'),
1300+
b'<PlainText><spam>&ham')
1301+
12801302
def test_dump_attribute_order(self):
12811303
# See BPO 34160
12821304
e = ET.Element('cirriculum', status='public', company='example')

Lib/xml/etree/ElementTree.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -917,9 +917,12 @@ def _serialize_xml(write, elem, qnames, namespaces,
917917
if elem.tail:
918918
write(_escape_cdata(elem.tail))
919919

920+
_CDATA_CONTENT_ELEMENTS = {"script", "style", "xmp", "iframe", "noembed",
921+
"noframes", "plaintext"}
922+
920923
HTML_EMPTY = {"area", "base", "basefont", "br", "col", "embed", "frame", "hr",
921924
"img", "input", "isindex", "link", "meta", "param", "source",
922-
"track", "wbr"}
925+
"track", "wbr", "plaintext"}
923926

924927
def _serialize_html(write, elem, qnames, namespaces, **kwargs):
925928
tag = elem.tag
@@ -960,7 +963,7 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs):
960963
write(">")
961964
ltag = tag.lower()
962965
if text:
963-
if ltag == "script" or ltag == "style":
966+
if ltag in _CDATA_CONTENT_ELEMENTS:
964967
write(text)
965968
else:
966969
write(_escape_cdata(text))
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix :mod:`~xml.etree.ElementTree` serialization to HTML. The content of
2+
elements "xmp", "iframe", "noembed", "noframes", and "plaintext" is no longer
3+
escaped. The "plaintext" element no longer have the closing tag.

0 commit comments

Comments
 (0)