diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index f9936288fd42cdc..025893932e82a16 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -599,7 +599,7 @@ task isn't already covered by the URL parsing functions above. .. function:: urlencode(query, doseq=False, safe='', encoding=None, \ errors=None, quote_via=quote_plus) - Convert a mapping object or a sequence of two-element tuples, which may + Convert a mapping object or an iterable of two-element iterables, which may contain :class:`str` or :class:`bytes` objects, to a percent-encoded ASCII text string. If the resultant string is to be used as a *data* for POST operation with the :func:`~urllib.request.urlopen` function, then @@ -616,13 +616,14 @@ task isn't already covered by the URL parsing functions above. and not encode '/' characters. For maximum control of what is quoted, use ``quote`` and specify a value for *safe*. - When a sequence of two-element tuples is used as the *query* - argument, the first element of each tuple is a key and the second is a - value. The value element in itself can be a sequence and in that case, if - the optional parameter *doseq* is evaluates to ``True``, individual - ``key=value`` pairs separated by ``'&'`` are generated for each element of - the value sequence for the key. The order of parameters in the encoded - string will match the order of parameter tuples in the sequence. + When an iterable of two-element iterables is used as the *query* argument, + the first element of each tuple is a key and the second is a value. The + value element is interpreted as a string when the *doseq* parameter + evaluates to ``False``. When it evaluates to ``True`` and value is an + iterable, then individual ``key=value`` pairs separated by ``'&'`` are + generated for each element of the value sequence for the key. The order of + parameters in the encoded string will match the order of parameter tuples in + the sequence. The *safe*, *encoding*, and *errors* parameters are passed down to *quote_via* (the *encoding* and *errors* parameters are only passed @@ -640,6 +641,9 @@ task isn't already covered by the URL parsing functions above. .. versionadded:: 3.5 *quote_via* parameter. + .. versionchanged:: 3.8 + *doseq=True* iterates on generators and iterators. + .. seealso:: diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index d0365ecab72ccb1..d4969fbaaf4d854 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -723,10 +723,6 @@ def test_portseparator(self): self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), (b'http',b'www.python.org:80',b'',b'',b'',b'')) - def test_usingsys(self): - # Issue 3314: sys module is used in the error - self.assertRaises(TypeError, urllib.parse.urlencode, "foo") - def test_anyscheme(self): # Issue 7904: s3://foo.com/stuff has netloc "foo.com". self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"), @@ -902,6 +898,39 @@ def __str__(self): result = urllib.parse.urlencode({'a': Trivial()}, True) self.assertEqual(result, 'a=trivial') + def test_urlencode_generators(self): + def gen(): + yield from range(2) + + result = urllib.parse.urlencode({'a': gen()}, True) + self.assertEqual(result, 'a=0&a=1') + + def test_urlencode_iterable_of_iterable_pairs(self): + def pair(start): + yield ['key', range(start, start+2)] + + def elements(): + for i in [0, 2]: + yield from pair(i) + + result = urllib.parse.urlencode(elements(), doseq=True) + self.assertEqual(result, 'key=0&key=1&key=2&key=3') + + result = urllib.parse.urlencode(elements(), doseq=False) + # str is called on the range, resulting in values like 'range(0, 2)' + self.assertEqual(result, 'key=range%280%2C+2%29&key=range%282%2C+4%29') + + def test_urlencode_two_chars(self): + """Do not interpret strings of length 2 as key value pair.""" + self.assertRaises(TypeError, urllib.parse.urlencode, ['ab'], doseq=True) + self.assertRaises(TypeError, urllib.parse.urlencode, ['ab'], doseq=False) + + def test_urlencode_sequence_of_non_pairs(self): + self.assertRaises(ValueError, urllib.parse.urlencode, [(1,)], doseq=True) + self.assertRaises(ValueError, urllib.parse.urlencode, [(1,)], doseq=False) + self.assertRaises(ValueError, urllib.parse.urlencode, [(1, 2, 3)], doseq=True) + self.assertRaises(ValueError, urllib.parse.urlencode, [(1, 2, 3)], doseq=False) + def test_urlencode_quote_via(self): result = urllib.parse.urlencode({'a': 'some value'}) self.assertEqual(result, "a=some+value") diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index dfba704144e9b4b..b83b790fef08fd8 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -28,7 +28,6 @@ """ import re -import sys import collections import warnings @@ -879,82 +878,61 @@ def quote_from_bytes(bs, safe='/'): def urlencode(query, doseq=False, safe='', encoding=None, errors=None, quote_via=quote_plus): - """Encode a dict or sequence of two-element tuples into a URL query string. + """Encode a dict or iterable of two-element iterables into a URL query + string. - If any values in the query arg are sequences and doseq is true, each - sequence element is converted to a separate parameter. + If any values in the query argument are iterables and doseq is true, each + iterable element is converted to a separate parameter. - If the query arg is a sequence of two-element tuples, the order of the - parameters in the output will match the order of parameters in the - input. + If the query argument is an iterable, the order of the parameters in the + output will match the order of parameters in the input. - The components of a query arg may each be either a string or a bytes type. + The components of a query argument may each be either a string or a bytes + type. The safe, encoding, and errors parameters are passed down to the function specified by quote_via (encoding and errors only if a component is a str). """ - if hasattr(query, "items"): query = query.items() - else: - # It's a bother at times that strings and string-like objects are - # sequences. - try: - # non-sequence items should not work with len() - # non-empty strings will fail this - if len(query) and not isinstance(query[0], tuple): - raise TypeError - # Zero-length sequences of all types will get here and succeed, - # but that's a minor nit. Since the original implementation - # allowed empty dicts that type of behavior probably should be - # preserved for consistency - except TypeError: - ty, va, tb = sys.exc_info() - raise TypeError("not a valid non-string sequence " - "or mapping object").with_traceback(tb) l = [] - if not doseq: - for k, v in query: - if isinstance(k, bytes): - k = quote_via(k, safe) - else: - k = quote_via(str(k), safe, encoding, errors) + for pair in query: + # Evaluate then check, in case pair is an iterator. + k, v = pair + if isinstance(pair, (str, bytes)): + # 'ab' would unpack as k='a' and v='b'. + raise TypeError("not a valid non-string sequence or mapping object") + + if isinstance(k, bytes): + k = quote_via(k, safe) + else: + k = quote_via(str(k), safe, encoding, errors) - if isinstance(v, bytes): - v = quote_via(v, safe) - else: - v = quote_via(str(v), safe, encoding, errors) + if isinstance(v, bytes): + v = quote_via(v, safe) l.append(k + '=' + v) - else: - for k, v in query: - if isinstance(k, bytes): - k = quote_via(k, safe) - else: - k = quote_via(str(k), safe, encoding, errors) - - if isinstance(v, bytes): - v = quote_via(v, safe) - l.append(k + '=' + v) - elif isinstance(v, str): - v = quote_via(v, safe, encoding, errors) + elif isinstance(v, str): + v = quote_via(v, safe, encoding, errors) + l.append(k + '=' + v) + elif not doseq: + v = quote_via(str(v), safe, encoding, errors) + l.append(k + '=' + v) + else: # doseq=True + try: + iter(v) + except TypeError: + # not a sequence + v = quote_via(str(v), safe, encoding, errors) l.append(k + '=' + v) else: - try: - # Is this a sufficient test for sequence-ness? - x = len(v) - except TypeError: - # not a sequence - v = quote_via(str(v), safe, encoding, errors) - l.append(k + '=' + v) - else: - # loop over the sequence - for elt in v: - if isinstance(elt, bytes): - elt = quote_via(elt, safe) - else: - elt = quote_via(str(elt), safe, encoding, errors) - l.append(k + '=' + elt) + # loop over the sequence + for elt in v: + if isinstance(elt, bytes): + elt = quote_via(elt, safe) + else: + elt = quote_via(str(elt), safe, encoding, errors) + l.append(k + '=' + elt) return '&'.join(l) diff --git a/Misc/NEWS.d/next/Library/2017-11-02-20-47-19.bpo-31706.wA-rWj.rst b/Misc/NEWS.d/next/Library/2017-11-02-20-47-19.bpo-31706.wA-rWj.rst new file mode 100644 index 000000000000000..115e1eed7b1a596 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-11-02-20-47-19.bpo-31706.wA-rWj.rst @@ -0,0 +1,3 @@ +Made ``urllib.parse.urlencode`` iterate on any iterable object when the +``doseq`` parameter is ``True``. Each pair in the iterable can now be an +iterable (while only ``tuple`` was accepted before).