diff --git a/CHANGES b/CHANGES index 176dde1c..2005da7c 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,108 @@ Bleach changes ============== +Version 3.1.2 (March 11th, 2020) +-------------------------------- + +**Security fixes** + +* ``bleach.clean`` behavior parsing embedded MathML and SVG content + with RCDATA tags did not match browser behavior and could result in + a mutation XSS. + + Calls to ``bleach.clean`` with ``strip=False`` and ``math`` or + ``svg`` tags and one or more of the RCDATA tags ``script``, + ``noscript``, ``style``, ``noframes``, ``iframe``, ``noembed``, or + ``xmp`` in the allowed tags whitelist were vulnerable to a mutation + XSS. + + This security issue was confirmed in Bleach version v3.1.1. Earlier + versions are likely affected too. + + Anyone using Bleach <=v3.1.1 is encouraged to upgrade. + + https://bugzilla.mozilla.org/show_bug.cgi?id=1621692 + +**Backwards incompatible changes** + +None + +**Features** + +None + +**Bug fixes** + +None + +Version 3.1.1 (February 13th, 2020) +----------------------------------- + +**Security fixes** + +* ``bleach.clean`` behavior parsing ``noscript`` tags did not match + browser behavior. + + Calls to ``bleach.clean`` allowing ``noscript`` and one or more of + the raw text tags (``title``, ``textarea``, ``script``, ``style``, + ``noembed``, ``noframes``, ``iframe``, and ``xmp``) were vulnerable + to a mutation XSS. + + This security issue was confirmed in Bleach versions v2.1.4, v3.0.2, + and v3.1.0. Earlier versions are probably affected too. + + Anyone using Bleach <=v3.1.0 is highly encouraged to upgrade. + + https://bugzilla.mozilla.org/show_bug.cgi?id=1615315 + +**Backwards incompatible changes** + +None + +**Features** + +None + +**Bug fixes** + +None + +Bleach changes +============== + +Version 3.1.0 (January 9th, 2019) +--------------------------------- + +**Security fixes** + +None + +**Backwards incompatible changes** + +None + +**Features** + +* Add ``recognized_tags`` argument to the linkify ``Linker`` class. This + fixes issues when linkifying on its own and having some tags get escaped. + It defaults to a list of HTML5 tags. Thank you, Chad Birch! (#409) + +**Bug fixes** + +* Add ``six>=1.9`` to requirements. Thank you, Dave Shawley (#416) + +* Fix cases where attribute names could have invalid characters in them. + (#419) + +* Fix problems with ``LinkifyFilter`` not being able to match links + across ``&``. (#422) + +* Fix ``InputStreamWithMemory`` when the ``BleachHTMLParser`` is + parsing ``meta`` tags. (#431) + +* Fix doctests. (#357) + + Version 3.0.2 (October 11th, 2018) ---------------------------------- @@ -43,7 +145,7 @@ None * Fix ``list`` object has no attribute ``lower`` in ``clean``. (#398) * Fix ``abbr`` getting escaped in ``linkify``. (#400) - + Version 3.0.0 (October 3rd, 2018) --------------------------------- diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 5783ab17..2b0137d0 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -18,21 +18,25 @@ Contributors: - Adam Lofts - Adrian "ThiefMaster" - Alek -- Alexandre Macabies -- Alexandr N. Zamaraev - Alex Defsen - Alex Ehlke +- Alexandre Macabies +- Alexandr N. Zamaraev - Alireza Savand - Andreas Malecki - Andy Freeland - Antoine Leclair +- Anton Backer - Anton Kovalyov +- Chad Birch - Chris Beaven - Dan Gayle +- dave-shawley - Erik Rose - Gaurav Dadhania - Geoffrey Sneddon - Greg Guthe +- hugovk - Istvan Albert - Jaime Irurzun - James Socol @@ -49,6 +53,7 @@ Contributors: - Mark Lee - Mark Paschal - mdxs +- Nikita Sobolev - nikolas - Oh Jinkyun - Paul Craciunoiu @@ -56,8 +61,11 @@ Contributors: - Ryan Niemeyer - Sébastien Fievet - sedrubal +- Stephane Blondon +- Stu Cox - Tim Dumol - Timothy Fitz +- Vadim Kotov - Vitaly Volkov - Will Kahn-Greene - Zoltán diff --git a/bleach/__init__.py b/bleach/__init__.py index 8ed01763..bb5a5b6d 100644 --- a/bleach/__init__.py +++ b/bleach/__init__.py @@ -18,9 +18,9 @@ # yyyymmdd -__releasedate__ = '20181011' +__releasedate__ = '20200311' # x.y.z or x.y.z.dev0 -- semver -__version__ = '3.0.2' +__version__ = '3.1.2' VERSION = parse_version(__version__) diff --git a/bleach/html5lib_shim.py b/bleach/html5lib_shim.py index 4e6e054f..dcd98a1a 100644 --- a/bleach/html5lib_shim.py +++ b/bleach/html5lib_shim.py @@ -43,21 +43,125 @@ constants.tokenTypes['EmptyTag'] ]) CHARACTERS_TYPE = constants.tokenTypes['Characters'] +PARSEERROR_TYPE = constants.tokenTypes['ParseError'] -#: List of HTML tags +#: List of valid HTML tags, from WHATWG HTML Living Standard as of 2018-10-17 +#: https://html.spec.whatwg.org/multipage/indices.html#elements-3 HTML_TAGS = [ - tag for namespace, tag in - ( - list(constants.scopingElements) + - list(constants.formattingElements) + - list(constants.specialElements) + - list(constants.htmlIntegrationPointElements) + - list(constants.mathmlTextIntegrationPointElements) - ) + 'a', + 'abbr', + 'address', + 'area', + 'article', + 'aside', + 'audio', + 'b', + 'base', + 'bdi', + 'bdo', + 'blockquote', + 'body', + 'br', + 'button', + 'canvas', + 'caption', + 'cite', + 'code', + 'col', + 'colgroup', + 'data', + 'datalist', + 'dd', + 'del', + 'details', + 'dfn', + 'dialog', + 'div', + 'dl', + 'dt', + 'em', + 'embed', + 'fieldset', + 'figcaption', + 'figure', + 'footer', + 'form', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'head', + 'header', + 'hgroup', + 'hr', + 'html', + 'i', + 'iframe', + 'img', + 'input', + 'ins', + 'kbd', + 'keygen', + 'label', + 'legend', + 'li', + 'link', + 'map', + 'mark', + 'menu', + 'meta', + 'meter', + 'nav', + 'noscript', + 'object', + 'ol', + 'optgroup', + 'option', + 'output', + 'p', + 'param', + 'picture', + 'pre', + 'progress', + 'q', + 'rp', + 'rt', + 'ruby', + 's', + 'samp', + 'script', + 'section', + 'select', + 'slot', + 'small', + 'source', + 'span', + 'strong', + 'style', + 'sub', + 'summary', + 'sup', + 'table', + 'tbody', + 'td', + 'template', + 'textarea', + 'tfoot', + 'th', + 'thead', + 'time', + 'title', + 'tr', + 'track', + 'u', + 'ul', + 'var', + 'video', + 'wbr', ] -# Add tags that aren't in html5lib.constants -HTML_TAGS.extend(['abbr']) class InputStreamWithMemory(object): @@ -77,6 +181,14 @@ def __init__(self, inner_stream): def errors(self): return self._inner_stream.errors + @property + def charEncoding(self): + return self._inner_stream.charEncoding + + @property + def changeEncoding(self): + return self._inner_stream.changeEncoding + def char(self): c = self._inner_stream.char() # char() can return None if EOF, so ignore that @@ -129,7 +241,21 @@ def __iter__(self): for token in super(BleachHTMLTokenizer, self).__iter__(): if last_error_token is not None: - if ((last_error_token['data'] == 'expected-closing-tag-but-got-char' and + if ((last_error_token['data'] == 'invalid-character-in-attribute-name' and + token['type'] in TAG_TOKEN_TYPES and + token.get('data'))): + # Remove attribute names that have ', " or < in them + # because those characters are invalid for attribute names. + token['data'] = [ + item for item in token['data'] + if ('"' not in item[0] and + "'" not in item[0] and + '<' not in item[0]) + ] + last_error_token = None + yield token + + elif ((last_error_token['data'] == 'expected-closing-tag-but-got-char' and token['data'].lower().strip() not in self.parser.tags)): # We've got either a malformed tag or a pseudo-tag or # something that html5lib wants to turn into a malformed @@ -145,24 +271,33 @@ def __iter__(self): token['data'] = self.stream.get_tag() token['type'] = CHARACTERS_TYPE - # Yield the adjusted token + last_error_token = None yield token + elif token['type'] == PARSEERROR_TYPE: + # If the token is a parse error, then let the last_error_token + # go, and make token the new last_error_token + yield last_error_token + last_error_token = token + else: yield last_error_token yield token + last_error_token = None - last_error_token = None continue # If the token is a ParseError, we hold on to it so we can get the # next token and potentially fix it. - if token['type'] == constants.tokenTypes['ParseError']: + if token['type'] == PARSEERROR_TYPE: last_error_token = token continue yield token + if last_error_token: + yield last_error_token + def consumeEntity(self, allowedChar=None, fromAttribute=False): # If this tokenizer is set to consume entities, then we can let the # superclass do its thing. @@ -241,7 +376,12 @@ def __init__(self, tags, strip, consume_entities, **kwargs): self.consume_entities = consume_entities super(BleachHTMLParser, self).__init__(**kwargs) - def _parse(self, stream, innerHTML=False, container='div', scripting=False, **kwargs): + def _parse(self, stream, innerHTML=False, container='div', scripting=True, **kwargs): + # set scripting=True to parse