-
-
Notifications
You must be signed in to change notification settings - Fork 779
WIP: redo validation by content type #760
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
53b74f6
452db0d
bfa477b
a95e987
062de8b
5a028ee
2494326
fd89862
171cbb3
0023799
bfbb519
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,181 @@ | ||
| import logging | ||
| import re | ||
|
|
||
| from jsonschema import ValidationError | ||
|
|
||
| from .exceptions import ExtraParameterProblem, BadRequestProblem | ||
| from .types import coerce_type | ||
| from .utils import is_null | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class ContentHandlerFactory(object): | ||
|
|
||
| def __init__(self, validator, schema, strict_validation, | ||
| is_null_value_valid, consumes): | ||
| self.validator = validator | ||
| self.schema = schema | ||
| self.strict_validation = strict_validation | ||
| self.is_null_value_valid = is_null_value_valid | ||
| self.consumes = consumes | ||
| self._content_handlers = self._discover() | ||
|
|
||
| def _discover(self): | ||
| content_handlers = ContentHandler.discover_subclasses() | ||
| return { | ||
| name: cls(self.validator, self.schema, | ||
| self.strict_validation, self.is_null_value_valid) | ||
| for name, cls in content_handlers.items() | ||
| } | ||
|
|
||
| def get_handler(self, content_type): | ||
| match = None | ||
|
|
||
| if content_type is None: | ||
| return match | ||
|
|
||
| media_type = content_type.split(";", 1)[0] | ||
| if media_type not in self.consumes: | ||
| return None | ||
|
|
||
| try: | ||
| return self._content_handlers[media_type] | ||
| except KeyError: | ||
| pass | ||
|
|
||
| matches = [ | ||
| (name, handler) for name, handler in self._content_handlers.items() | ||
| if handler.regex.match(content_type) | ||
| ] | ||
| if len(matches) > 1: | ||
| logger.warning(f"Content could be handled by multiple validators: {matches}") | ||
|
|
||
| if matches: | ||
| name, handler = matches[0] | ||
| return handler | ||
|
|
||
|
|
||
| class ContentHandler(object): | ||
|
|
||
| def __init__(self, validator, schema, strict, is_null_value_valid): | ||
| self.schema = schema | ||
| self.strict_validation = strict | ||
| self.is_null_value_valid = is_null_value_valid | ||
| self.validator = validator | ||
| self.default = schema.get('default') | ||
|
|
||
| def validate_schema(self, data, url): | ||
| # type: (dict, AnyStr) -> Union[ConnexionResponse, None] | ||
| if is_null(data): | ||
| if self.default: | ||
| # TODO do we need to do this? If the spec is valid, this will pass | ||
| data = self.default | ||
| elif self.is_null_value_valid: | ||
| return | ||
|
|
||
| try: | ||
| self.validator.validate(data) | ||
| except ValidationError as exception: | ||
| error_path = '.'.join(str(item) for item in exception.path) | ||
| error_path_msg = " - '{path}'".format(path=error_path) \ | ||
| if error_path else "" | ||
| logger.error( | ||
| "{url} validation error: {error}{error_path_msg}".format( | ||
| url=url, error=exception.message, | ||
| error_path_msg=error_path_msg), | ||
| extra={'validator': 'body'}) | ||
| raise BadRequestProblem(detail="{message}{error_path_msg}".format( | ||
| message=exception.message, | ||
| error_path_msg=error_path_msg)) | ||
|
|
||
| def deserialize(self, request): | ||
| return request.body | ||
|
|
||
| def validate_request(self, request): | ||
| data = self.deserialize(request) | ||
| self.validate_schema(data, request.url) | ||
|
|
||
| @classmethod | ||
| def discover_subclasses(cls): | ||
| subclasses = {c.name: c for c in cls.__subclasses__()} | ||
| for s in cls.__subclasses__(): | ||
| subclasses.update(s.discover_subclasses()) | ||
| return subclasses | ||
|
|
||
|
|
||
| class StreamingContentHandler(ContentHandler): | ||
| name = "application/octet-stream" | ||
| regex = re.compile(r'^application\/octet-stream.*') | ||
|
|
||
| def validate_request(self, request): | ||
| # Don't validate, leave stream for user to read | ||
| pass | ||
|
|
||
|
|
||
| class TextPlainContentHandler(ContentHandler): | ||
| name = "text/plain" | ||
| regex = re.compile(r'^text\/plain.*') | ||
|
|
||
| def validate_request(self, request): | ||
| # Don't validate, leave stream for user to read | ||
| pass | ||
|
|
||
|
|
||
| class JSONContentHandler(ContentHandler): | ||
| name = "application/json" | ||
| regex = re.compile(r'^application\/json.*|^.*\+json$') | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are several places that said validation can occur for |
||
|
|
||
| def deserialize(self, request): | ||
| data = request.json | ||
| empty_body = not(request.body or request.form or request.files) | ||
| if data is None and not empty_body and not self.is_null_value_valid: | ||
| # Content-Type is json but actual body was not parsed | ||
| raise BadRequestProblem(detail="Request body is not valid JSON") | ||
| return data | ||
|
|
||
|
|
||
| def validate_parameter_list(request_params, spec_params): | ||
| request_params = set(request_params) | ||
| spec_params = set(spec_params) | ||
|
|
||
| return request_params.difference(spec_params) | ||
|
|
||
|
|
||
| class FormDataContentHandler(ContentHandler): | ||
| name = "application/x-www-form-urlencoded" | ||
| regex = re.compile( | ||
| r'^application\/x-www-form-urlencoded.*' | ||
| ) | ||
|
|
||
| def _validate_formdata_parameter_list(self, request): | ||
| request_params = request.form.keys() | ||
| spec_params = self.schema.get('properties', {}).keys() | ||
| return validate_parameter_list(request_params, spec_params) | ||
|
|
||
| def deserialize(self, request): | ||
| data = dict(request.form.items()) or \ | ||
| (request.body if len(request.body) > 0 else {}) | ||
| data.update(dict.fromkeys(request.files, '')) # validator expects string.. | ||
| logger.debug('%s validating schema...', request.url) | ||
|
|
||
| if self.strict_validation: | ||
| formdata_errors = self._validate_formdata_parameter_list(request) | ||
| if formdata_errors: | ||
| raise ExtraParameterProblem(formdata_errors, []) | ||
|
|
||
| if data: | ||
| props = self.schema.get("properties", {}) | ||
| for k, param_defn in props.items(): | ||
| if k in data: | ||
| data[k] = coerce_type(param_defn, data[k], 'requestBody', k) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This used to be in a try/except to catch TypeValidationError and return a problem for it. Is that handled elsewhere? |
||
| # XXX it's surprising to hide this in validation | ||
| request.form = data | ||
|
Comment on lines
+172
to
+173
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is this doing? I don't see this in the old code. I see that request.form is overwritten in Is this actually something specific to aiohttp? Flask extracts the form data but aiohttp does not?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To make this less surprising, are you thinking of creating a new decorator, maybe a One possible gotcha is that Deserialization and Validation are tightly coupled because you can discover validation errors when coercing types during deserializtion. |
||
| return data | ||
|
|
||
|
|
||
| class MultiPartFormDataContentHandler(FormDataContentHandler): | ||
| name = "multipart/form-data" | ||
| regex = re.compile( | ||
| r'^multipart\/form-data.*' | ||
| ) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So, do you think we could also add a
def serialize(self, response):onContentHandlermaybe as part of making pluggable serialization in #1095? Or would we need separate classes for request and response?