Skip to content

Commit ee890ed

Browse files
bpo-46337: Enumerate URL types for urllib's scheme-based behavior
Some features in urllib are dependent on schemes, (i.e., preserving the netloc in url joining). Prior to this patch, this was governed by the uses_* lists (uses_relative, uses_netloc, uses_params) which hard code these attributes for certain schemes. Providing an enum interface and a 'constructor' that allows overrides makes this mechanism a bit more flexible for future modifications.
1 parent 13e4659 commit ee890ed

1 file changed

Lines changed: 35 additions & 5 deletions

File tree

Lib/urllib/parse.py

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
"""
2929

3030
from collections import namedtuple
31+
from enum import Enum
3132
import functools
3233
import re
3334
import sys
@@ -38,13 +39,19 @@
3839
"urlsplit", "urlunsplit", "urlencode", "parse_qs",
3940
"parse_qsl", "quote", "quote_plus", "quote_from_bytes",
4041
"unquote", "unquote_plus", "unquote_to_bytes",
41-
"DefragResult", "ParseResult", "SplitResult",
42+
"DefragResult", "ParseResult", "SplitResult", "SchemeClass",
4243
"DefragResultBytes", "ParseResultBytes", "SplitResultBytes"]
4344

4445
# A classification of schemes.
4546
# The empty string classifies URLs with no scheme specified,
4647
# being the default value returned by “urlsplit” and “urlparse”.
4748

49+
"""SchemeClass is an enum with members. RELATIVE, NETLOC, and PARAMS. These
50+
describe methods for URL resolution, usually by scheme. These resolution classes
51+
determine, namely, whether a scheme supports, respectively, relative addressing,
52+
preserving the netloc (domain name), and preserving the parameters."""
53+
SchemeClass = Enum('SchemeClass', 'RELATIVE NETLOC PARAMS')
54+
4855
uses_relative = ['', 'ftp', 'http', 'gopher', 'nntp', 'imap',
4956
'wais', 'file', 'https', 'shttp', 'mms',
5057
'prospero', 'rtsp', 'rtspu', 'sftp',
@@ -60,6 +67,24 @@
6067
'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
6168
'mms', 'sftp', 'tel']
6269

70+
def _scheme_classes(scheme, overrides=set()):
71+
"""Find out what scheme classes a given scheme fits in. This consults the
72+
variables uses_relative, uses_netloc, and uses_params. It returns a set of
73+
all the classes that apply, with at least the unique classes specified by
74+
the optional overrides parameter."""
75+
scheme_classes = set(overrides)
76+
77+
if scheme in uses_relative:
78+
scheme_classes.add(SchemeClass.RELATIVE)
79+
80+
if scheme in uses_netloc:
81+
scheme_classes.add(SchemeClass.NETLOC)
82+
83+
if scheme in uses_params:
84+
scheme_classes.add(SchemeClass.PARAMS)
85+
86+
return scheme_classes
87+
6388
# These are not actually used anymore, but should stay for backwards
6489
# compatibility. (They are undocumented, but have a public-looking name.)
6590

@@ -386,7 +411,8 @@ def urlparse(url, scheme='', allow_fragments=True):
386411
url, scheme, _coerce_result = _coerce_args(url, scheme)
387412
splitresult = urlsplit(url, scheme, allow_fragments)
388413
scheme, netloc, url, query, fragment = splitresult
389-
if scheme in uses_params and ';' in url:
414+
scheme_classes = _scheme_classes(scheme)
415+
if SchemeClass.PARAMS in scheme_classes and ';' in url:
390416
url, params = _splitparams(url)
391417
else:
392418
params = ''
@@ -500,7 +526,9 @@ def urlunsplit(components):
500526
empty query; the RFC states that these are equivalent)."""
501527
scheme, netloc, url, query, fragment, _coerce_result = (
502528
_coerce_args(*components))
503-
if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
529+
530+
scheme_classes = _scheme_classes(scheme)
531+
if netloc or (scheme and SchemeClass.NETLOC in scheme_classes and url[:2] != '//'):
504532
if url and url[:1] != '/': url = '/' + url
505533
url = '//' + (netloc or '') + url
506534
if scheme:
@@ -525,9 +553,11 @@ def urljoin(base, url, allow_fragments=True):
525553
scheme, netloc, path, params, query, fragment = \
526554
urlparse(url, bscheme, allow_fragments)
527555

528-
if scheme != bscheme or scheme not in uses_relative:
556+
scheme_classes = _scheme_classes(scheme)
557+
558+
if scheme != bscheme or SchemeClass.RELATIVE not in scheme_classes:
529559
return _coerce_result(url)
530-
if scheme in uses_netloc:
560+
if SchemeClass.NETLOC in scheme_classes:
531561
if netloc:
532562
return _coerce_result(urlunparse((scheme, netloc, path,
533563
params, query, fragment)))

0 commit comments

Comments
 (0)