Skip to content

Commit 53c6ccc

Browse files
bpo-46337: Allow caller modification of url classes
This allows the callers of urljoin and urlparse to add guaranteed scheme classes to the url regardless of the actual scheme, which may not be in the default uses_* lists of schemes. This call-time behavior is done through an optional parameter that preserves backwards compatibility. A test case is added for this, and requires the change present in test_urlparse.checkJoin.
1 parent ee890ed commit 53c6ccc

2 files changed

Lines changed: 16 additions & 10 deletions

File tree

Lib/test/test_urlparse.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,12 +213,12 @@ def _encode(t):
213213
split = (scheme,) + split
214214
self.checkRoundtrips(url, parsed, split)
215215

216-
def checkJoin(self, base, relurl, expected):
216+
def checkJoin(self, base, relurl, expected, classes=[]):
217217
str_components = (base, relurl, expected)
218-
self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
218+
self.assertEqual(urllib.parse.urljoin(base, relurl, classes=classes), expected)
219219
bytes_components = baseb, relurlb, expectedb = [
220220
x.encode('ascii') for x in str_components]
221-
self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
221+
self.assertEqual(urllib.parse.urljoin(baseb, relurlb, classes=classes), expectedb)
222222

223223
def test_unparse_parse(self):
224224
str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
@@ -417,6 +417,11 @@ def test_urljoins(self):
417417
self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
418418
self.checkJoin('ws://a/b','g','ws://a/g')
419419
self.checkJoin('wss://a/b','g','wss://a/g')
420+
self.checkJoin(
421+
'nonsensebase://net.loc/url/', '..',
422+
'nonsensebase://net.loc/',
423+
classes=[urllib.parse.SchemeClass.RELATIVE, urllib.parse.SchemeClass.NETLOC],
424+
)
420425

421426
# XXX: The following tests are no longer compatible with RFC3986
422427
# self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')

Lib/urllib/parse.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ def _fix_result_transcoding():
388388
_fix_result_transcoding()
389389
del _fix_result_transcoding
390390

391-
def urlparse(url, scheme='', allow_fragments=True):
391+
def urlparse(url, scheme='', allow_fragments=True, classes=set()):
392392
"""Parse a URL into 6 components:
393393
<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
394394
@@ -411,7 +411,7 @@ def urlparse(url, scheme='', allow_fragments=True):
411411
url, scheme, _coerce_result = _coerce_args(url, scheme)
412412
splitresult = urlsplit(url, scheme, allow_fragments)
413413
scheme, netloc, url, query, fragment = splitresult
414-
scheme_classes = _scheme_classes(scheme)
414+
scheme_classes = _scheme_classes(scheme, overrides=classes)
415415
if SchemeClass.PARAMS in scheme_classes and ';' in url:
416416
url, params = _splitparams(url)
417417
else:
@@ -539,21 +539,22 @@ def urlunsplit(components):
539539
url = url + '#' + fragment
540540
return _coerce_result(url)
541541

542-
def urljoin(base, url, allow_fragments=True):
542+
def urljoin(base, url, allow_fragments=True, classes=set()):
543543
"""Join a base URL and a possibly relative URL to form an absolute
544-
interpretation of the latter."""
544+
interpretation of the latter. Some logic may be enabled by setting
545+
the classes variable."""
545546
if not base:
546547
return url
547548
if not url:
548549
return base
549550

550551
base, url, _coerce_result = _coerce_args(base, url)
551552
bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
552-
urlparse(base, '', allow_fragments)
553+
urlparse(base, '', allow_fragments, classes=classes)
553554
scheme, netloc, path, params, query, fragment = \
554-
urlparse(url, bscheme, allow_fragments)
555+
urlparse(url, bscheme, allow_fragments, classes=classes)
555556

556-
scheme_classes = _scheme_classes(scheme)
557+
scheme_classes = _scheme_classes(scheme, overrides=classes)
557558

558559
if scheme != bscheme or SchemeClass.RELATIVE not in scheme_classes:
559560
return _coerce_result(url)

0 commit comments

Comments
 (0)