2828"""
2929
3030from collections import namedtuple
31+ from enum import Enum
3132import functools
3233import re
3334import sys
3839 "urlsplit" , "urlunsplit" , "urlencode" , "parse_qs" ,
3940 "parse_qsl" , "quote" , "quote_plus" , "quote_from_bytes" ,
4041 "unquote" , "unquote_plus" , "unquote_to_bytes" ,
41- "DefragResult" , "ParseResult" , "SplitResult" ,
42+ "DefragResult" , "ParseResult" , "SplitResult" , "SchemeClass" ,
4243 "DefragResultBytes" , "ParseResultBytes" , "SplitResultBytes" ]
4344
4445# A classification of schemes.
4546# The empty string classifies URLs with no scheme specified,
4647# being the default value returned by “urlsplit” and “urlparse”.
4748
49+ """SchemeClass is an enum with members. RELATIVE, NETLOC, and PARAMS. These
50+ describe methods for URL resolution, usually by scheme. These resolution classes
51+ determine, namely, whether a scheme supports, respectively, relative addressing,
52+ preserving the netloc (domain name), and preserving the parameters."""
53+ SchemeClass = Enum ('SchemeClass' , 'RELATIVE NETLOC PARAMS' )
54+
4855uses_relative = ['' , 'ftp' , 'http' , 'gopher' , 'nntp' , 'imap' ,
4956 'wais' , 'file' , 'https' , 'shttp' , 'mms' ,
5057 'prospero' , 'rtsp' , 'rtspu' , 'sftp' ,
6067 'https' , 'shttp' , 'rtsp' , 'rtspu' , 'sip' , 'sips' ,
6168 'mms' , 'sftp' , 'tel' ]
6269
70+ def _scheme_classes (scheme , overrides = set ()):
71+ """Find out what scheme classes a given scheme fits in. This consults the
72+ variables uses_relative, uses_netloc, and uses_params. It returns a set of
73+ all the classes that apply, with at least the unique classes specified by
74+ the optional overrides parameter."""
75+ scheme_classes = set (overrides )
76+
77+ if scheme in uses_relative :
78+ scheme_classes .add (SchemeClass .RELATIVE )
79+
80+ if scheme in uses_netloc :
81+ scheme_classes .add (SchemeClass .NETLOC )
82+
83+ if scheme in uses_params :
84+ scheme_classes .add (SchemeClass .PARAMS )
85+
86+ return scheme_classes
87+
6388# These are not actually used anymore, but should stay for backwards
6489# compatibility. (They are undocumented, but have a public-looking name.)
6590
@@ -386,7 +411,8 @@ def urlparse(url, scheme='', allow_fragments=True):
386411 url , scheme , _coerce_result = _coerce_args (url , scheme )
387412 splitresult = urlsplit (url , scheme , allow_fragments )
388413 scheme , netloc , url , query , fragment = splitresult
389- if scheme in uses_params and ';' in url :
414+ scheme_classes = _scheme_classes (scheme )
415+ if SchemeClass .PARAMS in scheme_classes and ';' in url :
390416 url , params = _splitparams (url )
391417 else :
392418 params = ''
@@ -500,7 +526,9 @@ def urlunsplit(components):
500526 empty query; the RFC states that these are equivalent)."""
501527 scheme , netloc , url , query , fragment , _coerce_result = (
502528 _coerce_args (* components ))
503- if netloc or (scheme and scheme in uses_netloc and url [:2 ] != '//' ):
529+
530+ scheme_classes = _scheme_classes (scheme )
531+ if netloc or (scheme and SchemeClass .NETLOC in scheme_classes and url [:2 ] != '//' ):
504532 if url and url [:1 ] != '/' : url = '/' + url
505533 url = '//' + (netloc or '' ) + url
506534 if scheme :
@@ -525,9 +553,11 @@ def urljoin(base, url, allow_fragments=True):
525553 scheme , netloc , path , params , query , fragment = \
526554 urlparse (url , bscheme , allow_fragments )
527555
528- if scheme != bscheme or scheme not in uses_relative :
556+ scheme_classes = _scheme_classes (scheme )
557+
558+ if scheme != bscheme or SchemeClass .RELATIVE not in scheme_classes :
529559 return _coerce_result (url )
530- if scheme in uses_netloc :
560+ if SchemeClass . NETLOC in scheme_classes :
531561 if netloc :
532562 return _coerce_result (urlunparse ((scheme , netloc , path ,
533563 params , query , fragment )))
0 commit comments