88 :copyright: (c) 2013-present by Abhinav Singh and contributors.
99 :license: BSD, see LICENSE for more details.
1010"""
11- from urllib import parse as urlparse
1211from typing import TypeVar , NamedTuple , Optional , Dict , Type , Tuple , List
1312
14- from .methods import httpMethods
15- from .chunk_parser import ChunkParser , chunkParserStates
16-
17- from ..common .constants import DEFAULT_DISABLE_HEADERS , COLON , SLASH , CRLF , WHITESPACE , HTTP_1_1 , DEFAULT_HTTP_PORT
18- from ..common .utils import build_http_request , build_http_response , find_http_line , text_
13+ from ...common .constants import DEFAULT_DISABLE_HEADERS , COLON , HTTP_1_0 , SLASH , CRLF
14+ from ...common .constants import WHITESPACE , HTTP_1_1 , DEFAULT_HTTP_PORT
15+ from ...common .utils import build_http_request , build_http_response , find_http_line , text_
1916
17+ from .url import Url
18+ from .methods import httpMethods
19+ from .chunk import ChunkParser , chunkParserStates
2020
2121HttpParserStates = NamedTuple (
2222 'HttpParserStates' , [
@@ -58,44 +58,26 @@ class HttpParser:
5858 def __init__ (self , parser_type : int ) -> None :
5959 self .type : int = parser_type
6060 self .state : int = httpParserStates .INITIALIZED
61-
6261 self .host : Optional [bytes ] = None
6362 self .port : Optional [int ] = None
6463 self .path : Optional [bytes ] = None
6564 self .method : Optional [bytes ] = None
6665 self .code : Optional [bytes ] = None
6766 self .reason : Optional [bytes ] = None
6867 self .version : Optional [bytes ] = None
69-
7068 # Total size of raw bytes passed for parsing
7169 self .total_size : int = 0
72-
7370 # Buffer to hold unprocessed bytes
7471 self .buffer : bytes = b''
75-
76- # Keys are lower case header names
77- # Values are 2-tuple containing original
78- # header and it's value as received.
72+ # Internal headers datastructure:
73+ # - Keys are lower case header names.
74+ # - Values are 2-tuple containing original
75+ # header and it's value as received.
7976 self .headers : Dict [bytes , Tuple [bytes , bytes ]] = {}
8077 self .body : Optional [bytes ] = None
81-
82- self .chunk_parser : Optional [ChunkParser ] = None
83-
84- # TODO: Deprecate me, we don't need this in core.
85- #
86- # Deprecated since v2.4.0
87- #
88- # This is mostly for developers so that they can directly
89- # utilize a url object, but is unnecessary as parser
90- # provides all the necessary parsed information.
91- #
92- # But developers can utilize urlsplit or whatever
93- # library they are using when necessary. This will certainly
94- # give some performance boost as url parsing won't be needed
95- # for every request/response object.
96- #
97- # (except query string and fragments)
98- self ._url : Optional [urlparse .SplitResultBytes ] = None
78+ self .chunk : Optional [ChunkParser ] = None
79+ # Internal request line as a url structure
80+ self ._url : Optional [Url ] = None
9981
10082 @classmethod
10183 def request (cls : Type [T ], raw : bytes ) -> T :
@@ -110,50 +92,74 @@ def response(cls: Type[T], raw: bytes) -> T:
11092 return parser
11193
11294 def header (self , key : bytes ) -> bytes :
95+ """Convenient method to return original header value from internal datastructure."""
11396 if key .lower () not in self .headers :
11497 raise KeyError ('%s not found in headers' , text_ (key ))
11598 return self .headers [key .lower ()][1 ]
11699
117100 def has_header (self , key : bytes ) -> bool :
101+ """Returns true if header key was found in payload."""
118102 return key .lower () in self .headers
119103
120104 def add_header (self , key : bytes , value : bytes ) -> None :
105+ """Add/Update a header to internal data structure."""
121106 self .headers [key .lower ()] = (key , value )
122107
123108 def add_headers (self , headers : List [Tuple [bytes , bytes ]]) -> None :
109+ """Add/Update multiple headers to internal data structure"""
124110 for (key , value ) in headers :
125111 self .add_header (key , value )
126112
127113 def del_header (self , header : bytes ) -> None :
114+ """Delete a header from internal data structure."""
128115 if header .lower () in self .headers :
129116 del self .headers [header .lower ()]
130117
131118 def del_headers (self , headers : List [bytes ]) -> None :
119+ """Delete headers from internal datastructure."""
132120 for key in headers :
133121 self .del_header (key .lower ())
134122
135123 def set_url (self , url : bytes ) -> None :
136- # Work around with urlsplit semantics.
137- #
138- # For CONNECT requests, request line contains
139- # upstream_host:upstream_port which is not complaint
140- # with urlsplit, which expects a fully qualified url.
141- if self .is_https_tunnel ():
142- url = b'https://' + url
143- self ._url = urlparse .urlsplit (url )
124+ """Given a request line, parses it and sets line attributes a.k.a. host, port, path."""
125+ self ._url = Url .from_bytes (url )
144126 self ._set_line_attributes ()
145127
128+ def has_host (self ) -> bool :
129+ """Returns whether host line attribute was parsed or set.
130+
131+ NOTE: Host field WILL be None for incoming local WebServer requests."""
132+ return self .host is not None
133+
134+ def is_http_1_1_keep_alive (self ) -> bool :
135+ """Returns true for HTTP/1.1 keep-alive connections."""
136+ return self .version == HTTP_1_1 and \
137+ (
138+ not self .has_header (b'Connection' ) or
139+ self .header (b'Connection' ).lower () == b'keep-alive'
140+ )
141+
142+ def is_connection_upgrade (self ) -> bool :
143+ """Returns true for websocket upgrade requests."""
144+ return self .version == HTTP_1_1 and \
145+ self .has_header (b'Connection' ) and \
146+ self .has_header (b'Upgrade' )
147+
146148 def is_https_tunnel (self ) -> bool :
149+ """Returns true for HTTPS CONNECT tunnel request."""
147150 return self .method == httpMethods .CONNECT
148151
149152 def is_chunked_encoded (self ) -> bool :
153+ """Returns true if transfer-encoding chunked is used."""
150154 return b'transfer-encoding' in self .headers and \
151155 self .headers [b'transfer-encoding' ][1 ].lower () == b'chunked'
152156
153157 def content_expected (self ) -> bool :
158+ """Returns true if content-length is present and not 0."""
154159 return b'content-length' in self .headers and int (self .header (b'content-length' )) > 0
155160
156161 def body_expected (self ) -> bool :
162+ """Returns true if content or chunked response is expected."""
157163 return self .content_expected () or self .is_chunked_encoded ()
158164
159165 def parse (self , raw : bytes ) -> None :
@@ -173,20 +179,20 @@ def parse(self, raw: bytes) -> None:
173179
174180 def build (self , disable_headers : Optional [List [bytes ]] = None , for_proxy : bool = False ) -> bytes :
175181 """Rebuild the request object."""
176- assert self .method and self .version and self .path and self . type == httpParserTypes .REQUEST_PARSER
182+ assert self .method and self .version and self .type == httpParserTypes .REQUEST_PARSER
177183 if disable_headers is None :
178184 disable_headers = DEFAULT_DISABLE_HEADERS
179185 body : Optional [bytes ] = self ._get_body_or_chunks ()
180- path = self .path
186+ path = self .path or b'/'
181187 if for_proxy :
182- assert self ._url and self . host and self .port and self .path
188+ assert self .host and self .port and self ._url
183189 path = (
184- self ._url .scheme +
190+ b'http' if not self . _url . scheme else self ._url .scheme +
185191 COLON + SLASH + SLASH +
186192 self .host +
187193 COLON +
188194 str (self .port ).encode () +
189- self . path
195+ path
190196 ) if not self .is_https_tunnel () else (self .host + COLON + str (self .port ).encode ())
191197 return build_http_request (
192198 self .method , path , self .version ,
@@ -210,24 +216,26 @@ def build_response(self) -> bytes:
210216 body = self ._get_body_or_chunks (),
211217 )
212218
213- def has_host (self ) -> bool :
214- """Host field SHOULD be None for incoming local WebServer requests."""
215- return self .host is not None
216-
217- def is_http_1_1_keep_alive (self ) -> bool :
218- return self .version == HTTP_1_1 and \
219- (
220- not self .has_header (b'Connection' ) or
221- self .header (b'Connection' ).lower () == b'keep-alive'
222- )
223-
224- def is_connection_upgrade (self ) -> bool :
225- return self .version == HTTP_1_1 and \
226- self .has_header (b'Connection' ) and \
227- self .has_header (b'Upgrade' )
228-
229219 def _process_body (self , raw : bytes ) -> Tuple [bool , bytes ]:
230- if b'content-length' in self .headers :
220+ # Ref: http://www.ietf.org/rfc/rfc2616.txt
221+ # 3.If a Content-Length header field (section 14.13) is present, its
222+ # decimal value in OCTETs represents both the entity-length and the
223+ # transfer-length. The Content-Length header field MUST NOT be sent
224+ # if these two lengths are different (i.e., if a Transfer-Encoding
225+ # header field is present). If a message is received with both a
226+ # Transfer-Encoding header field and a Content-Length header field,
227+ # the latter MUST be ignored.
228+ #
229+ # TL;DR -- Give transfer-encoding header preference over content-length.
230+ if self .is_chunked_encoded ():
231+ if not self .chunk :
232+ self .chunk = ChunkParser ()
233+ raw = self .chunk .parse (raw )
234+ if self .chunk .state == chunkParserStates .COMPLETE :
235+ self .body = self .chunk .body
236+ self .state = httpParserStates .COMPLETE
237+ more = False
238+ elif b'content-length' in self .headers :
231239 self .state = httpParserStates .RCVING_BODY
232240 if self .body is None :
233241 self .body = b''
@@ -238,19 +246,17 @@ def _process_body(self, raw: bytes) -> Tuple[bool, bytes]:
238246 len (self .body ) == int (self .header (b'content-length' )):
239247 self .state = httpParserStates .COMPLETE
240248 more , raw = len (raw ) > 0 , raw [total_size - received_size :]
241- elif self .is_chunked_encoded ():
242- if not self .chunk_parser :
243- self .chunk_parser = ChunkParser ()
244- raw = self .chunk_parser .parse (raw )
245- if self .chunk_parser .state == chunkParserStates .COMPLETE :
246- self .body = self .chunk_parser .body
247- self .state = httpParserStates .COMPLETE
248- more = False
249249 else :
250- raise NotImplementedError (
251- 'Parser shouldn\' t have reached here. ' +
252- 'This can happen when content length header is missing but their is a body in the payload' ,
253- )
250+ # HTTP/1.0 scenario only
251+ assert self .version == HTTP_1_0
252+ self .state = httpParserStates .RCVING_BODY
253+ # Received a packet without content-length header
254+ # and no transfer-encoding specified.
255+ #
256+ # Ref https://github.com/abhinavsingh/proxy.py/issues/398
257+ # See TestHttpParser.test_issue_398 scenario
258+ self .body = raw
259+ more , raw = False , b''
254260 return more , raw
255261
256262 def _process_line_and_headers (self , raw : bytes ) -> Tuple [bool , bytes ]:
@@ -319,16 +325,4 @@ def _set_line_attributes(self) -> None:
319325 'Invalid request. Method: %r, Url: %r' %
320326 (self .method , self ._url ),
321327 )
322- self .path = self ._build_path ()
323-
324- def _build_path (self ) -> bytes :
325- if not self ._url :
326- return b'/None'
327- url = self ._url .path
328- if url == b'' :
329- url = b'/'
330- if not self ._url .query == b'' :
331- url += b'?' + self ._url .query
332- if not self ._url .fragment == b'' :
333- url += b'#' + self ._url .fragment
334- return url
328+ self .path = self ._url .remainder
0 commit comments