diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 8a7ec0076ff036..e5a60ef80dc3e6 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -173,7 +173,7 @@ def assertEqualElements(self, alice, bob):
# --------------------------------------------------------------------
# element tree tests
-class ElementTreeTest(unittest.TestCase):
+class ElementTreeTest(ElementTestCase, unittest.TestCase):
def serialize_check(self, elem, expected):
self.assertEqual(serialize(elem), expected)
@@ -972,6 +972,79 @@ def test_qname(self):
self.assertNotEqual(q1, 'ns:tag')
self.assertEqual(q1, '{ns}tag')
+ def test_namespace_attribs(self):
+ # Unprefixed attributes are unqualified even if a default
+ # namespace is in effect. (This is a little unclear in some
+ # versions of the XML TR but is clarified in errata and other
+ # versions.) See bugs.python.org issue 17088.
+ #
+ # The reasoning behind this, alluded to in the spec, is that
+ # attribute meanings already depend on the element they're
+ # attached to; attributes have always lived in per-element
+ # namespaces even before explicit XML namespaces were
+ # introduced. For that reason qualified attribute names are
+ # only really needed when one XML module defines attributes
+ # that can be placed on elements defined in a different module
+ # (such as happens with XLINK or, for that matter, the XML
+ # namespace spec itself).
+ e = ET.XML(
+ ''
+ ''
+ ''
+ ''
+ '')
+ self.assertEqual(e.tag, '{space1}elt')
+ self.assertEqual(e.get('foo'), 'value')
+ self.assertIsNone(e.get('{space1}foo'))
+ self.assertIsNone(e.get('{space2}foo'))
+ self.assertEqual(e[0].tag, '{space1}foo')
+ self.assertEqual(e[0].attrib, { 'foo': 'value2',
+ '{space2}foo': 'value3' })
+ self.assertEqual(e[1].tag, '{space2}foo')
+ self.assertEqual(e[1].attrib, { 'foo': 'value4',
+ '{space1}foo': 'value5',
+ '{space2}foo': 'value6' })
+ self.assertEqual(e[2].tag, 'foo')
+ self.assertEqual(e[2].attrib, { 'foo': 'value7',
+ '{space1}foo': 'value8' })
+
+ serialized1 = (
+ ''
+ ''
+ ''
+ ''
+ '')
+ self.assertEqual(serialize(e), serialized1)
+ self.assertEqualElements(e, ET.XML(serialized1))
+
+ # Test writing with a default namespace.
+ with self.assertRaisesRegex(ValueError,
+ 'cannot use non-qualified name.* with default_namespace option'):
+ serialize(e, default_namespace="space1")
+
+ # Remove the unqualified element from the tree so we can test
+ # further.
+ del e[2]
+
+ # Serialization can require a namespace prefix to be declared for
+ # space1 even if no elements use that prefix, in order to
+ # write an attribute name in that namespace.
+ serialized2 = (
+ ''
+ ''
+ ''
+ '')
+ self.assertEqual(serialize(e, default_namespace="space2"), serialized2)
+ self.assertEqualElements(e, ET.XML(serialized2))
+
+ serialized3 = (
+ ''
+ ''
+ ''
+ '')
+ self.assertEqual(serialize(e, default_namespace="space1"), serialized3)
+ self.assertEqualElements(e, ET.XML(serialized3))
+
def test_doctype_public(self):
# Test PUBLIC doctype.
@@ -1619,18 +1692,17 @@ def test_bug_200709_default_namespace(self):
s = ET.SubElement(e, "{default}elem")
s = ET.SubElement(e, "{not-default}elem")
self.assertEqual(serialize(e, default_namespace="default"), # 2
- ''
+ ''
''
- ''
+ ''
'')
e = ET.Element("{default}elem")
s = ET.SubElement(e, "{default}elem")
s = ET.SubElement(e, "elem") # unprefixed name
- with self.assertRaises(ValueError) as cm:
+ with self.assertRaisesRegex(ValueError,
+ 'cannot use non-qualified name.* with default_namespace option'):
serialize(e, default_namespace="default") # 3
- self.assertEqual(str(cm.exception),
- 'cannot use non-qualified names with default_namespace option')
def test_bug_200709_register_namespace(self):
e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index c1cf483cf56bb2..0683020352de97 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -836,19 +836,21 @@ def _get_writer(file_or_filename, encoding):
def _namespaces(elem, default_namespace=None):
# identify namespaces used in this tree
- # maps qnames to *encoded* prefix:local names
- qnames = {None: None}
+ # maps qnames to *encoded* prefix:local names,
+ # value is a pair of prefix:local strings,
+ # second value is for attribute names
+ qnames = {None: (None, None)}
# maps uri:s to prefixes
namespaces = {}
- if default_namespace:
- namespaces[default_namespace] = ""
- def add_qname(qname):
+ def serialize_qname(qname, is_attr):
# calculate serialized qname representation
- try:
- if qname[:1] == "{":
- uri, tag = qname[1:].rsplit("}", 1)
+ if qname[:1] == "{":
+ uri, local = qname[1:].rsplit("}", 1)
+ if not is_attr and uri == default_namespace:
+ prefix = ""
+ else:
prefix = namespaces.get(uri)
if prefix is None:
prefix = _namespace_map.get(uri)
@@ -856,43 +858,59 @@ def add_qname(qname):
prefix = "ns%d" % len(namespaces)
if prefix != "xml":
namespaces[uri] = prefix
- if prefix:
- qnames[qname] = "%s:%s" % (prefix, tag)
- else:
- qnames[qname] = tag # default element
+ if prefix:
+ return "%s:%s" % (prefix, local)
else:
- if default_namespace:
- # FIXME: can this be handled in XML 1.0?
- raise ValueError(
- "cannot use non-qualified names with "
- "default_namespace option"
- )
- qnames[qname] = qname
- except TypeError:
- _raise_serialization_error(qname)
+ return local # default element
+ else:
+ if not is_attr and default_namespace:
+ # FIXME: can this be handled in XML 1.0?
+ raise ValueError(
+ "cannot use non-qualified name (<%s>) with "
+ "default_namespace option" % qname
+ )
+ return qname
+
+ def add_qname(qname, is_attr=False):
+ ser_tag, ser_attr = qnames.get(qname, (None, None))
+ if is_attr:
+ if ser_attr is None:
+ ser_attr = serialize_qname(qname, True)
+ if not default_namespace:
+ ser_tag = ser_attr
+ qnames[qname] = (ser_tag, ser_attr)
+ else:
+ if ser_tag is None:
+ ser_tag = serialize_qname(qname, False)
+ if not default_namespace:
+ ser_attr = ser_tag
+ qnames[qname] = (ser_tag, ser_attr)
# populate qname and namespaces table
for elem in elem.iter():
tag = elem.tag
if isinstance(tag, QName):
- if tag.text not in qnames:
- add_qname(tag.text)
+ add_qname(tag.text)
elif isinstance(tag, str):
- if tag not in qnames:
- add_qname(tag)
+ add_qname(tag)
elif tag is not None and tag is not Comment and tag is not PI:
_raise_serialization_error(tag)
for key, value in elem.items():
if isinstance(key, QName):
key = key.text
- if key not in qnames:
- add_qname(key)
- if isinstance(value, QName) and value.text not in qnames:
+ elif not isinstance(key, str):
+ _raise_serialization_error(key)
+ add_qname(key, is_attr=True)
+ if isinstance(value, QName):
add_qname(value.text)
text = elem.text
- if isinstance(text, QName) and text.text not in qnames:
+ if isinstance(text, QName):
add_qname(text.text)
- return qnames, namespaces
+
+ prefix_map = {prefix: ns for ns, prefix in namespaces.items()}
+ if default_namespace:
+ prefix_map[""] = default_namespace
+ return qnames, prefix_map
def _serialize_xml(write, elem, qnames, namespaces,
short_empty_elements, **kwargs):
@@ -903,7 +921,7 @@ def _serialize_xml(write, elem, qnames, namespaces,
elif tag is ProcessingInstruction:
write("%s?>" % text)
else:
- tag = qnames[tag]
+ tag = qnames[tag][0]
if tag is None:
if text:
write(_escape_cdata(text))
@@ -915,8 +933,7 @@ def _serialize_xml(write, elem, qnames, namespaces,
items = list(elem.items())
if items or namespaces:
if namespaces:
- for v, k in sorted(namespaces.items(),
- key=lambda x: x[1]): # sort on prefix
+ for k, v in sorted(namespaces.items()):
if k:
k = ":" + k
write(" xmlns%s=\"%s\"" % (
@@ -927,10 +944,10 @@ def _serialize_xml(write, elem, qnames, namespaces,
if isinstance(k, QName):
k = k.text
if isinstance(v, QName):
- v = qnames[v.text]
+ v = qnames[v.text][0]
else:
v = _escape_attrib(v)
- write(" %s=\"%s\"" % (qnames[k], v))
+ write(" %s=\"%s\"" % (qnames[k][1], v))
if text or len(elem) or not short_empty_elements:
write(">")
if text:
@@ -960,7 +977,7 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs):
elif tag is ProcessingInstruction:
write("%s?>" % _escape_cdata(text))
else:
- tag = qnames[tag]
+ tag = qnames[tag][0]
if tag is None:
if text:
write(_escape_cdata(text))
@@ -971,8 +988,7 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs):
items = list(elem.items())
if items or namespaces:
if namespaces:
- for v, k in sorted(namespaces.items(),
- key=lambda x: x[1]): # sort on prefix
+ for k, v in sorted(namespaces.items()):
if k:
k = ":" + k
write(" xmlns%s=\"%s\"" % (
@@ -983,11 +999,11 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs):
if isinstance(k, QName):
k = k.text
if isinstance(v, QName):
- v = qnames[v.text]
+ v = qnames[v.text][0]
else:
v = _escape_attrib_html(v)
# FIXME: handle boolean attributes
- write(" %s=\"%s\"" % (qnames[k], v))
+ write(" %s=\"%s\"" % (qnames[k][1], v))
write(">")
ltag = tag.lower()
if text:
diff --git a/Misc/NEWS.d/next/Library/2018-12-09-14-58-08.bpo-17088.AHFvrn.rst b/Misc/NEWS.d/next/Library/2018-12-09-14-58-08.bpo-17088.AHFvrn.rst
new file mode 100644
index 00000000000000..e92d690d4331fd
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-12-09-14-58-08.bpo-17088.AHFvrn.rst
@@ -0,0 +1,5 @@
+ElementTree's serialization of attribute names when a default namespace is
+passed was corrected. Previously, unqualified attribute names would be
+rejected unnecessarily, while attributes in the default namespace would have
+their prefix stripped, which goes against the rules for namespace defaulting
+and uniqueness of attributes as specified in the XML namespaces spec.