Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions Doc/library/urllib.parse.rst
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ task isn't already covered by the URL parsing functions above.
.. function:: urlencode(query, doseq=False, safe='', encoding=None, \
errors=None, quote_via=quote_plus)

Convert a mapping object or a sequence of two-element tuples, which may
Convert a mapping object or an iterable of two-element iterables, which may
contain :class:`str` or :class:`bytes` objects, to a percent-encoded ASCII
text string. If the resultant string is to be used as a *data* for POST
operation with the :func:`~urllib.request.urlopen` function, then
Expand All @@ -616,13 +616,14 @@ task isn't already covered by the URL parsing functions above.
and not encode '/' characters. For maximum control of what is quoted, use
``quote`` and specify a value for *safe*.

When a sequence of two-element tuples is used as the *query*
argument, the first element of each tuple is a key and the second is a
value. The value element in itself can be a sequence and in that case, if
the optional parameter *doseq* is evaluates to ``True``, individual
``key=value`` pairs separated by ``'&'`` are generated for each element of
the value sequence for the key. The order of parameters in the encoded
string will match the order of parameter tuples in the sequence.
When an iterable of two-element iterables is used as the *query* argument,
the first element of each tuple is a key and the second is a value. The
value element is interpreted as a string when the *doseq* parameter
evaluates to ``False``. When it evaluates to ``True`` and value is an
iterable, then individual ``key=value`` pairs separated by ``'&'`` are
generated for each element of the value sequence for the key. The order of
parameters in the encoded string will match the order of parameter tuples in
the sequence.

The *safe*, *encoding*, and *errors* parameters are passed down to
*quote_via* (the *encoding* and *errors* parameters are only passed
Expand All @@ -640,6 +641,9 @@ task isn't already covered by the URL parsing functions above.
.. versionadded:: 3.5
*quote_via* parameter.

.. versionchanged:: 3.8
*doseq=True* iterates on generators and iterators.


.. seealso::

Expand Down
37 changes: 33 additions & 4 deletions Lib/test/test_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,10 +723,6 @@ def test_portseparator(self):
self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
(b'http',b'www.python.org:80',b'',b'',b'',b''))

def test_usingsys(self):
# Issue 3314: sys module is used in the error
self.assertRaises(TypeError, urllib.parse.urlencode, "foo")

def test_anyscheme(self):
# Issue 7904: s3://foo.com/stuff has netloc "foo.com".
self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
Expand Down Expand Up @@ -902,6 +898,39 @@ def __str__(self):
result = urllib.parse.urlencode({'a': Trivial()}, True)
self.assertEqual(result, 'a=trivial')

def test_urlencode_generators(self):
def gen():
yield from range(2)

result = urllib.parse.urlencode({'a': gen()}, True)
self.assertEqual(result, 'a=0&a=1')

def test_urlencode_iterable_of_iterable_pairs(self):
def pair(start):
yield ['key', range(start, start+2)]

def elements():
for i in [0, 2]:
yield from pair(i)

result = urllib.parse.urlencode(elements(), doseq=True)
self.assertEqual(result, 'key=0&key=1&key=2&key=3')

result = urllib.parse.urlencode(elements(), doseq=False)
# str is called on the range, resulting in values like 'range(0, 2)'
self.assertEqual(result, 'key=range%280%2C+2%29&key=range%282%2C+4%29')

def test_urlencode_two_chars(self):
"""Do not interpret strings of length 2 as key value pair."""
self.assertRaises(TypeError, urllib.parse.urlencode, ['ab'], doseq=True)
self.assertRaises(TypeError, urllib.parse.urlencode, ['ab'], doseq=False)

def test_urlencode_sequence_of_non_pairs(self):
self.assertRaises(ValueError, urllib.parse.urlencode, [(1,)], doseq=True)
self.assertRaises(ValueError, urllib.parse.urlencode, [(1,)], doseq=False)
self.assertRaises(ValueError, urllib.parse.urlencode, [(1, 2, 3)], doseq=True)
self.assertRaises(ValueError, urllib.parse.urlencode, [(1, 2, 3)], doseq=False)

def test_urlencode_quote_via(self):
result = urllib.parse.urlencode({'a': 'some value'})
self.assertEqual(result, "a=some+value")
Expand Down
102 changes: 40 additions & 62 deletions Lib/urllib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
"""

import re
import sys
import collections
import warnings

Expand Down Expand Up @@ -879,82 +878,61 @@ def quote_from_bytes(bs, safe='/'):

def urlencode(query, doseq=False, safe='', encoding=None, errors=None,
quote_via=quote_plus):
"""Encode a dict or sequence of two-element tuples into a URL query string.
"""Encode a dict or iterable of two-element iterables into a URL query
string.

If any values in the query arg are sequences and doseq is true, each
sequence element is converted to a separate parameter.
If any values in the query argument are iterables and doseq is true, each
iterable element is converted to a separate parameter.

If the query arg is a sequence of two-element tuples, the order of the
parameters in the output will match the order of parameters in the
input.
If the query argument is an iterable, the order of the parameters in the
output will match the order of parameters in the input.

The components of a query arg may each be either a string or a bytes type.
The components of a query argument may each be either a string or a bytes
type.

The safe, encoding, and errors parameters are passed down to the function
specified by quote_via (encoding and errors only if a component is a str).
"""

if hasattr(query, "items"):
query = query.items()
else:
# It's a bother at times that strings and string-like objects are
# sequences.
try:
# non-sequence items should not work with len()
# non-empty strings will fail this
if len(query) and not isinstance(query[0], tuple):
raise TypeError
# Zero-length sequences of all types will get here and succeed,
# but that's a minor nit. Since the original implementation
# allowed empty dicts that type of behavior probably should be
# preserved for consistency
except TypeError:
ty, va, tb = sys.exc_info()
raise TypeError("not a valid non-string sequence "
"or mapping object").with_traceback(tb)

l = []
if not doseq:
for k, v in query:
if isinstance(k, bytes):
k = quote_via(k, safe)
else:
k = quote_via(str(k), safe, encoding, errors)
for pair in query:
# Evaluate then check, in case pair is an iterator.
k, v = pair
if isinstance(pair, (str, bytes)):
# 'ab' would unpack as k='a' and v='b'.
raise TypeError("not a valid non-string sequence or mapping object")

if isinstance(k, bytes):
k = quote_via(k, safe)
else:
k = quote_via(str(k), safe, encoding, errors)

if isinstance(v, bytes):
v = quote_via(v, safe)
else:
v = quote_via(str(v), safe, encoding, errors)
if isinstance(v, bytes):
v = quote_via(v, safe)
l.append(k + '=' + v)
else:
for k, v in query:
if isinstance(k, bytes):
k = quote_via(k, safe)
else:
k = quote_via(str(k), safe, encoding, errors)

if isinstance(v, bytes):
v = quote_via(v, safe)
l.append(k + '=' + v)
elif isinstance(v, str):
v = quote_via(v, safe, encoding, errors)
elif isinstance(v, str):
v = quote_via(v, safe, encoding, errors)
l.append(k + '=' + v)
elif not doseq:
v = quote_via(str(v), safe, encoding, errors)
l.append(k + '=' + v)
else: # doseq=True
try:
iter(v)
except TypeError:
# not a sequence
v = quote_via(str(v), safe, encoding, errors)
l.append(k + '=' + v)
else:
try:
# Is this a sufficient test for sequence-ness?
x = len(v)
except TypeError:
# not a sequence
v = quote_via(str(v), safe, encoding, errors)
l.append(k + '=' + v)
else:
# loop over the sequence
for elt in v:
if isinstance(elt, bytes):
elt = quote_via(elt, safe)
else:
elt = quote_via(str(elt), safe, encoding, errors)
l.append(k + '=' + elt)
# loop over the sequence
for elt in v:
if isinstance(elt, bytes):
elt = quote_via(elt, safe)
else:
elt = quote_via(str(elt), safe, encoding, errors)
l.append(k + '=' + elt)
return '&'.join(l)


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Made ``urllib.parse.urlencode`` iterate on any iterable object when the
``doseq`` parameter is ``True``. Each pair in the iterable can now be an
iterable (while only ``tuple`` was accepted before).