diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 8a7ec0076ff036..e5a60ef80dc3e6 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -173,7 +173,7 @@ def assertEqualElements(self, alice, bob): # -------------------------------------------------------------------- # element tree tests -class ElementTreeTest(unittest.TestCase): +class ElementTreeTest(ElementTestCase, unittest.TestCase): def serialize_check(self, elem, expected): self.assertEqual(serialize(elem), expected) @@ -972,6 +972,79 @@ def test_qname(self): self.assertNotEqual(q1, 'ns:tag') self.assertEqual(q1, '{ns}tag') + def test_namespace_attribs(self): + # Unprefixed attributes are unqualified even if a default + # namespace is in effect. (This is a little unclear in some + # versions of the XML TR but is clarified in errata and other + # versions.) See bugs.python.org issue 17088. + # + # The reasoning behind this, alluded to in the spec, is that + # attribute meanings already depend on the element they're + # attached to; attributes have always lived in per-element + # namespaces even before explicit XML namespaces were + # introduced. For that reason qualified attribute names are + # only really needed when one XML module defines attributes + # that can be placed on elements defined in a different module + # (such as happens with XLINK or, for that matter, the XML + # namespace spec itself). + e = ET.XML( + '' + '' + '' + '' + '') + self.assertEqual(e.tag, '{space1}elt') + self.assertEqual(e.get('foo'), 'value') + self.assertIsNone(e.get('{space1}foo')) + self.assertIsNone(e.get('{space2}foo')) + self.assertEqual(e[0].tag, '{space1}foo') + self.assertEqual(e[0].attrib, { 'foo': 'value2', + '{space2}foo': 'value3' }) + self.assertEqual(e[1].tag, '{space2}foo') + self.assertEqual(e[1].attrib, { 'foo': 'value4', + '{space1}foo': 'value5', + '{space2}foo': 'value6' }) + self.assertEqual(e[2].tag, 'foo') + self.assertEqual(e[2].attrib, { 'foo': 'value7', + '{space1}foo': 'value8' }) + + serialized1 = ( + '' + '' + '' + '' + '') + self.assertEqual(serialize(e), serialized1) + self.assertEqualElements(e, ET.XML(serialized1)) + + # Test writing with a default namespace. + with self.assertRaisesRegex(ValueError, + 'cannot use non-qualified name.* with default_namespace option'): + serialize(e, default_namespace="space1") + + # Remove the unqualified element from the tree so we can test + # further. + del e[2] + + # Serialization can require a namespace prefix to be declared for + # space1 even if no elements use that prefix, in order to + # write an attribute name in that namespace. + serialized2 = ( + '' + '' + '' + '') + self.assertEqual(serialize(e, default_namespace="space2"), serialized2) + self.assertEqualElements(e, ET.XML(serialized2)) + + serialized3 = ( + '' + '' + '' + '') + self.assertEqual(serialize(e, default_namespace="space1"), serialized3) + self.assertEqualElements(e, ET.XML(serialized3)) + def test_doctype_public(self): # Test PUBLIC doctype. @@ -1619,18 +1692,17 @@ def test_bug_200709_default_namespace(self): s = ET.SubElement(e, "{default}elem") s = ET.SubElement(e, "{not-default}elem") self.assertEqual(serialize(e, default_namespace="default"), # 2 - '' + '' '' - '' + '' '') e = ET.Element("{default}elem") s = ET.SubElement(e, "{default}elem") s = ET.SubElement(e, "elem") # unprefixed name - with self.assertRaises(ValueError) as cm: + with self.assertRaisesRegex(ValueError, + 'cannot use non-qualified name.* with default_namespace option'): serialize(e, default_namespace="default") # 3 - self.assertEqual(str(cm.exception), - 'cannot use non-qualified names with default_namespace option') def test_bug_200709_register_namespace(self): e = ET.Element("{http://namespace.invalid/does/not/exist/}title") diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index c1cf483cf56bb2..0683020352de97 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -836,19 +836,21 @@ def _get_writer(file_or_filename, encoding): def _namespaces(elem, default_namespace=None): # identify namespaces used in this tree - # maps qnames to *encoded* prefix:local names - qnames = {None: None} + # maps qnames to *encoded* prefix:local names, + # value is a pair of prefix:local strings, + # second value is for attribute names + qnames = {None: (None, None)} # maps uri:s to prefixes namespaces = {} - if default_namespace: - namespaces[default_namespace] = "" - def add_qname(qname): + def serialize_qname(qname, is_attr): # calculate serialized qname representation - try: - if qname[:1] == "{": - uri, tag = qname[1:].rsplit("}", 1) + if qname[:1] == "{": + uri, local = qname[1:].rsplit("}", 1) + if not is_attr and uri == default_namespace: + prefix = "" + else: prefix = namespaces.get(uri) if prefix is None: prefix = _namespace_map.get(uri) @@ -856,43 +858,59 @@ def add_qname(qname): prefix = "ns%d" % len(namespaces) if prefix != "xml": namespaces[uri] = prefix - if prefix: - qnames[qname] = "%s:%s" % (prefix, tag) - else: - qnames[qname] = tag # default element + if prefix: + return "%s:%s" % (prefix, local) else: - if default_namespace: - # FIXME: can this be handled in XML 1.0? - raise ValueError( - "cannot use non-qualified names with " - "default_namespace option" - ) - qnames[qname] = qname - except TypeError: - _raise_serialization_error(qname) + return local # default element + else: + if not is_attr and default_namespace: + # FIXME: can this be handled in XML 1.0? + raise ValueError( + "cannot use non-qualified name (<%s>) with " + "default_namespace option" % qname + ) + return qname + + def add_qname(qname, is_attr=False): + ser_tag, ser_attr = qnames.get(qname, (None, None)) + if is_attr: + if ser_attr is None: + ser_attr = serialize_qname(qname, True) + if not default_namespace: + ser_tag = ser_attr + qnames[qname] = (ser_tag, ser_attr) + else: + if ser_tag is None: + ser_tag = serialize_qname(qname, False) + if not default_namespace: + ser_attr = ser_tag + qnames[qname] = (ser_tag, ser_attr) # populate qname and namespaces table for elem in elem.iter(): tag = elem.tag if isinstance(tag, QName): - if tag.text not in qnames: - add_qname(tag.text) + add_qname(tag.text) elif isinstance(tag, str): - if tag not in qnames: - add_qname(tag) + add_qname(tag) elif tag is not None and tag is not Comment and tag is not PI: _raise_serialization_error(tag) for key, value in elem.items(): if isinstance(key, QName): key = key.text - if key not in qnames: - add_qname(key) - if isinstance(value, QName) and value.text not in qnames: + elif not isinstance(key, str): + _raise_serialization_error(key) + add_qname(key, is_attr=True) + if isinstance(value, QName): add_qname(value.text) text = elem.text - if isinstance(text, QName) and text.text not in qnames: + if isinstance(text, QName): add_qname(text.text) - return qnames, namespaces + + prefix_map = {prefix: ns for ns, prefix in namespaces.items()} + if default_namespace: + prefix_map[""] = default_namespace + return qnames, prefix_map def _serialize_xml(write, elem, qnames, namespaces, short_empty_elements, **kwargs): @@ -903,7 +921,7 @@ def _serialize_xml(write, elem, qnames, namespaces, elif tag is ProcessingInstruction: write("" % text) else: - tag = qnames[tag] + tag = qnames[tag][0] if tag is None: if text: write(_escape_cdata(text)) @@ -915,8 +933,7 @@ def _serialize_xml(write, elem, qnames, namespaces, items = list(elem.items()) if items or namespaces: if namespaces: - for v, k in sorted(namespaces.items(), - key=lambda x: x[1]): # sort on prefix + for k, v in sorted(namespaces.items()): if k: k = ":" + k write(" xmlns%s=\"%s\"" % ( @@ -927,10 +944,10 @@ def _serialize_xml(write, elem, qnames, namespaces, if isinstance(k, QName): k = k.text if isinstance(v, QName): - v = qnames[v.text] + v = qnames[v.text][0] else: v = _escape_attrib(v) - write(" %s=\"%s\"" % (qnames[k], v)) + write(" %s=\"%s\"" % (qnames[k][1], v)) if text or len(elem) or not short_empty_elements: write(">") if text: @@ -960,7 +977,7 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs): elif tag is ProcessingInstruction: write("" % _escape_cdata(text)) else: - tag = qnames[tag] + tag = qnames[tag][0] if tag is None: if text: write(_escape_cdata(text)) @@ -971,8 +988,7 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs): items = list(elem.items()) if items or namespaces: if namespaces: - for v, k in sorted(namespaces.items(), - key=lambda x: x[1]): # sort on prefix + for k, v in sorted(namespaces.items()): if k: k = ":" + k write(" xmlns%s=\"%s\"" % ( @@ -983,11 +999,11 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs): if isinstance(k, QName): k = k.text if isinstance(v, QName): - v = qnames[v.text] + v = qnames[v.text][0] else: v = _escape_attrib_html(v) # FIXME: handle boolean attributes - write(" %s=\"%s\"" % (qnames[k], v)) + write(" %s=\"%s\"" % (qnames[k][1], v)) write(">") ltag = tag.lower() if text: diff --git a/Misc/NEWS.d/next/Library/2018-12-09-14-58-08.bpo-17088.AHFvrn.rst b/Misc/NEWS.d/next/Library/2018-12-09-14-58-08.bpo-17088.AHFvrn.rst new file mode 100644 index 00000000000000..e92d690d4331fd --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-12-09-14-58-08.bpo-17088.AHFvrn.rst @@ -0,0 +1,5 @@ +ElementTree's serialization of attribute names when a default namespace is +passed was corrected. Previously, unqualified attribute names would be +rejected unnecessarily, while attributes in the default namespace would have +their prefix stripped, which goes against the rules for namespace defaulting +and uniqueness of attributes as specified in the XML namespaces spec.