Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion Lib/test/test_xml_etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -1278,7 +1278,13 @@ def check(p, expected, namespaces=None):
{'': 'http://www.w3.org/2001/XMLSchema',
'ns': 'http://www.w3.org/2001/XMLSchema'})

def test_processinginstruction(self):
def test_comment_serialization(self):
comm = ET.Comment('<spam> & ham')
self.assertEqual(ET.tostring(comm), b'<!--<spam> & ham-->')
self.assertEqual(ET.tostring(comm, method='html'), b'<!--<spam> & ham-->')
self.assertEqual(ET.tostring(comm, method='text'), b'<spam> & ham')

def test_processinginstruction_serialization(self):
# Test ProcessingInstruction directly

self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
Expand All @@ -1293,6 +1299,21 @@ def test_processinginstruction(self):
self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
b"<?xml version='1.0' encoding='latin-1'?>\n"
b"<?test <testing&>\xe3?>")
self.assertEqual(ET.tostring(ET.PI('test', 'ham & eggs < spam'), method='html'),
b'<?test ham & eggs < spam?>')

def test_empty_attribute_serialization(self):
elem = ET.Element('tag', attrib={'attr': None})
self.assertRaises(TypeError, ET.tostring, elem)
self.assertEqual(ET.tostring(elem, method='html'), b'<tag attr></tag>')

@support.subTests('tag', ("script", "style", "xmp", "iframe", "noembed", "noframes"))
def test_html_cdata_elems_serialization(self, tag):
tag = tag.title()
elem = ET.Element(tag)
elem.text = '<spam>&ham'
self.assertEqual(ET.tostring(elem, method='html'),
('<%s><spam>&ham</%s>' % (tag, tag)).encode())

def test_html_empty_elems_serialization(self):
# issue 15970
Expand All @@ -1308,6 +1329,12 @@ def test_html_empty_elems_serialization(self):
method='html')
self.assertEqual(serialized, expected)

def test_html_plaintext_serialization(self):
elem = ET.Element('PlainText')
elem.text = '<spam>&ham'
self.assertEqual(ET.tostring(elem, method='html'),
b'<PlainText><spam>&ham')

def test_dump_attribute_order(self):
# See BPO 34160
e = ET.Element('cirriculum', status='public', company='example')
Expand Down
24 changes: 15 additions & 9 deletions Lib/xml/etree/ElementTree.py
Original file line number Diff line number Diff line change
Expand Up @@ -907,17 +907,20 @@ def _serialize_xml(write, elem, qnames, namespaces,
if elem.tail:
write(_escape_cdata(elem.tail))

_CDATA_CONTENT_ELEMENTS = {"script", "style", "xmp", "iframe", "noembed",
"noframes", "plaintext"}

HTML_EMPTY = {"area", "base", "basefont", "br", "col", "embed", "frame", "hr",
"img", "input", "isindex", "link", "meta", "param", "source",
"track", "wbr"}
"track", "wbr", "plaintext"}

def _serialize_html(write, elem, qnames, namespaces, **kwargs):
tag = elem.tag
text = elem.text
if tag is Comment:
write("<!--%s-->" % _escape_cdata(text))
write("<!--%s-->" % text)
elif tag is ProcessingInstruction:
write("<?%s?>" % _escape_cdata(text))
write("<?%s?>" % text)
else:
tag = qnames[tag]
if tag is None:
Expand All @@ -941,16 +944,19 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs):
for k, v in items:
if isinstance(k, QName):
k = k.text
if isinstance(v, QName):
v = qnames[v.text]
k = qnames[k]
if v is None:
write(" %s" % k)
else:
v = _escape_attrib_html(v)
# FIXME: handle boolean attributes
write(" %s=\"%s\"" % (qnames[k], v))
if isinstance(v, QName):
v = qnames[v.text]
else:
v = _escape_attrib_html(v)
write(" %s=\"%s\"" % (k, v))
write(">")
ltag = tag.lower()
if text:
if ltag == "script" or ltag == "style":
if ltag in _CDATA_CONTENT_ELEMENTS:
write(text)
else:
write(_escape_cdata(text))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Fix :mod:`~xml.etree.ElementTree` serialization to HTML. The content of
comments, processing instructions and elements "xmp", "iframe", "noembed",
"noframes", and "plaintext" is no longer escaped. The "plaintext" element no
longer have the closing tag. Add support of empty attributes (with value
``None``).
Loading