diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..0b72d60 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,62 @@ +name: ci +concurrency: + group: "${{github.workflow}}-${{github.ref}}" + cancel-in-progress: true +on: + workflow_dispatch: + push: + branches: + - master + - v*.*.x + tags: + - v*.*.* + pull_request: + types: [opened, synchronize] + branches: + - '*' + +jobs: + cruby: + strategy: + fail-fast: false + matrix: + ruby: ["2.2", "2.3", "2.4", "2.5", "2.6", "2.7", "3.0", "3.1", "ruby-head"] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{matrix.ruby}} + bundler-cache: true + - run: bundle exec rake + + cruby-nokogiri-system-libraries: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "3.1" + - name: Install nokogiri with system libraries + run: | + sudo apt install pkg-config libxml2-dev libxslt-dev + bundle config set force_ruby_platform true + bundle config build.nokogiri --enable-system-libraries + bundle install + bundle exec nokogiri -v + - run: bundle exec rake + + jruby: + continue-on-error: true # nokogiri on jruby has different behavior + strategy: + fail-fast: false + matrix: + ruby: ["jruby-9.2", "jruby-9.3", "jruby-head"] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{matrix.ruby}} + bundler-cache: true + - run: bundle exec rake diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index d514f0f..0000000 --- a/.travis.yml +++ /dev/null @@ -1,29 +0,0 @@ -language: ruby -sudo: false -cache: bundler -before_install: gem update bundler --no-document -rvm: - - 1.9.3 - - 2.0.0 - - 2.1.10 - - 2.2.6 - - 2.3.3 - - 2.4.0 - - ruby-head - - jruby -matrix: - fast_finish: true - allow_failures: - - rvm: jruby -notifications: - email: false - irc: - on_success: change - on_failure: always - channels: - - "irc.freenode.org#rails-contrib" - campfire: - on_success: change - on_failure: always - rooms: - - secure: "VZlPFPKZPJtzHnGpoV/BV2IGbJjplemHgqDciE2jRtwepjZMekqLgaUaH9CWUggQdaVCMQI0XLl0qrU54SMBNjE2tdx3WiAzN83oWVCBg4XYogY1U0N0/92egld6UcVAK7HKYzSGYj6hqxmkCn8c8reih7eUFCFUsn/5D/FPlI4=" diff --git a/CHANGELOG.md b/CHANGELOG.md index 547d8bb..e18051c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,118 @@ +## 1.4.4 / 2022-12-13 + +* Address inefficient regular expression complexity with certain configurations of Rails::Html::Sanitizer. + + Fixes CVE-2022-23517. See + [GHSA-5x79-w82f-gw8w](https://github.com/rails/rails-html-sanitizer/security/advisories/GHSA-5x79-w82f-gw8w) + for more information. + + *Mike Dalessio* + +* Address improper sanitization of data URIs. + + Fixes CVE-2022-23518 and #135. See + [GHSA-mcvf-2q2m-x72m](https://github.com/rails/rails-html-sanitizer/security/advisories/GHSA-mcvf-2q2m-x72m) + for more information. + + *Mike Dalessio* + +* Address possible XSS vulnerability with certain configurations of Rails::Html::Sanitizer. + + Fixes CVE-2022-23520. See + [GHSA-rrfc-7g8p-99q8](https://github.com/rails/rails-html-sanitizer/security/advisories/GHSA-rrfc-7g8p-99q8) + for more information. + + *Mike Dalessio* + +* Address possible XSS vulnerability with certain configurations of Rails::Html::Sanitizer. + + Fixes CVE-2022-23519. See + [GHSA-9h9g-93gc-623h](https://github.com/rails/rails-html-sanitizer/security/advisories/GHSA-9h9g-93gc-623h) + for more information. + + *Mike Dalessio* + + +## 1.4.3 / 2022-06-09 + +* Address a possible XSS vulnerability with certain configurations of Rails::Html::Sanitizer. + + Prevent the combination of `select` and `style` as allowed tags in SafeListSanitizer. + + Fixes CVE-2022-32209 + + *Mike Dalessio* + + +## 1.4.2 / 2021-08-23 + +* Slightly improve performance. + + Assuming elements are more common than comments, make one less method call per node. + + *Mike Dalessio* + +## 1.4.1 / 2021-08-18 + +* Fix regression in v1.4.0 that did not pass comment nodes to the scrubber. + + Some scrubbers will want to override the default behavior and allow comments, but v1.4.0 only + passed through elements to the scrubber's `keep_node?` method. + + This change once again allows the scrubber to make the decision on comment nodes, but still skips + other non-elements like processing instructions (see #115). + + *Mike Dalessio* + +## 1.4.0 / 2021-08-18 + +* Processing Instructions are no longer allowed by Rails::Html::PermitScrubber + + Previously, a PI with a name (or "target") matching an allowed tag name was not scrubbed. There + are no known security issues associated with these PIs, but similar to comments it's preferred to + omit these nodes when possible from sanitized output. + + Fixes #115. + + *Mike Dalessio* + +## 1.3.0 + +* Address deprecations in Loofah 2.3.0. + + *Josh Goodall* + +## 1.2.0 + +* Remove needless `white_list_sanitizer` deprecation. + + By deprecating this, we were forcing Rails 5.2 to be updated or spew + deprecations that users could do nothing about. + + That's pointless and I'm sorry for adding that! + + Now there's no deprecation warning and Rails 5.2 works out of the box, while + Rails 6 can use the updated naming. + + *Kasper Timm Hansen* + +## 1.1.0 + +* Add `safe_list_sanitizer` and deprecate `white_list_sanitizer` to be removed + in 1.2.0. https://github.com/rails/rails-html-sanitizer/pull/87 + + *Juanito Fatas* + +* Remove `href` from LinkScrubber's `tags` as it's not an element. + https://github.com/rails/rails-html-sanitizer/pull/92 + + *Juanito Fatas* + +* Explain that we don't need to bump Loofah here if there's CVEs. + https://github.com/rails/rails-html-sanitizer/commit/d4d823c617fdd0064956047f7fbf23fff305a69b + + *Kasper Timm Hansen* + ## 1.0.1 * Added support for Rails 4.2.0.beta2 and above diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 40f4dd1..8885691 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,7 +1,7 @@ Contributing to Rails Html Sanitizers ===================== -[![Build Status](https://travis-ci.org/rails/rails-html-sanitizer.svg?branch=master)](https://travis-ci.org/rails/rails-html-sanitizer) +[![Build Status](https://github.com/rails/rails-html-sanitizer/actions/workflows/ci.yml/badge.svg)](https://github.com/rails/rails-html-sanitizer/actions/workflows/ci.yml) Rails Html Sanitizers is work of [many contributors](https://github.com/rails/rails-html-sanitizer/graphs/contributors). You're encouraged to submit [pull requests](https://github.com/rails/rails-html-sanitizer/pulls), [propose features and discuss issues](https://github.com/rails/rails-html-sanitizer/issues). @@ -88,7 +88,7 @@ git push origin my-feature-branch -f #### Check on Your Pull Request -Go back to your pull request after a few minutes and see whether it passed muster with Travis-CI. Everything should look green, otherwise fix issues and amend your commit as described above. +Go back to your pull request after a few minutes and see whether it passed muster with CI. Everything should look green, otherwise fix issues and amend your commit as described above. #### Be Patient diff --git a/README.md b/README.md index 5a6ffc2..7b160b5 100644 --- a/README.md +++ b/README.md @@ -41,22 +41,22 @@ link_sanitizer.sanitize('Only the link text will be kept.< # => Only the link text will be kept. ``` -#### WhiteListSanitizer +#### SafeListSanitizer ```ruby -white_list_sanitizer = Rails::Html::WhiteListSanitizer.new +safe_list_sanitizer = Rails::Html::SafeListSanitizer.new -# sanitize via an extensive white list of allowed elements -white_list_sanitizer.sanitize(@article.body) +# sanitize via an extensive safe list of allowed elements +safe_list_sanitizer.sanitize(@article.body) -# white list only the supplied tags and attributes -white_list_sanitizer.sanitize(@article.body, tags: %w(table tr td), attributes: %w(id class style)) +# safe list only the supplied tags and attributes +safe_list_sanitizer.sanitize(@article.body, tags: %w(table tr td), attributes: %w(id class style)) -# white list via a custom scrubber -white_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new) +# safe list via a custom scrubber +safe_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new) -# white list sanitizer can also sanitize css -white_list_sanitizer.sanitize_css('background-color: #000;') +# safe list sanitizer can also sanitize css +safe_list_sanitizer.sanitize_css('background-color: #000;') ``` ### Scrubbers @@ -81,8 +81,10 @@ html_fragment.to_s # => "" #### `Rails::Html::TargetScrubber` Where `PermitScrubber` picks out tags and attributes to permit in sanitization, -`Rails::Html::TargetScrubber` targets them for removal. +`Rails::Html::TargetScrubber` targets them for removal. See https://github.com/flavorjones/loofah/blob/main/lib/loofah/html5/safelist.rb for the tag list. +**Note:** by default, it will scrub anything that is not part of the permitted tags from +loofah `HTML5::Scrub.allowed_element?`. ```ruby scrubber = Rails::Html::TargetScrubber.new @@ -127,7 +129,7 @@ Loofah is what underlies the sanitizers and scrubbers of rails-html-sanitizer. - [Loofah and Loofah Scrubbers](https://github.com/flavorjones/loofah) The `node` argument passed to some methods in a custom scrubber is an instance of `Nokogiri::XML::Node`. -- [`Nokogiri::XML::Node`](http://nokogiri.org/Nokogiri/XML/Node.html) +- [`Nokogiri::XML::Node`](https://nokogiri.org/rdoc/Nokogiri/XML/Node.html) - [Nokogiri](http://nokogiri.org) ## Contributing to Rails Html Sanitizers diff --git a/lib/rails-html-sanitizer.rb b/lib/rails-html-sanitizer.rb index 494a56f..59ed70d 100644 --- a/lib/rails-html-sanitizer.rb +++ b/lib/rails-html-sanitizer.rb @@ -15,8 +15,12 @@ def link_sanitizer Html::LinkSanitizer end + def safe_list_sanitizer + Html::SafeListSanitizer + end + def white_list_sanitizer - Html::WhiteListSanitizer + safe_list_sanitizer end end end @@ -34,7 +38,7 @@ module ClassMethods # end # def sanitized_allowed_tags=(tags) - sanitizer_vendor.white_list_sanitizer.allowed_tags = tags + sanitizer_vendor.safe_list_sanitizer.allowed_tags = tags end # Replaces the allowed HTML attributes for the +sanitize+ helper. @@ -44,7 +48,7 @@ def sanitized_allowed_tags=(tags) # end # def sanitized_allowed_attributes=(attributes) - sanitizer_vendor.white_list_sanitizer.allowed_attributes = attributes + sanitizer_vendor.safe_list_sanitizer.allowed_attributes = attributes end [:protocol_separator, diff --git a/lib/rails/html/sanitizer.rb b/lib/rails/html/sanitizer.rb index 7fc6994..5633ca1 100644 --- a/lib/rails/html/sanitizer.rb +++ b/lib/rails/html/sanitizer.rb @@ -40,15 +40,16 @@ def sanitize(html, options = {}) end # === Rails::Html::LinkSanitizer - # Removes a tags and href attributes leaving only the link text + # Removes +a+ tags and +href+ attributes leaving only the link text. # - # link_sanitizer = Rails::Html::LinkSanitizer.new - # link_sanitizer.sanitize('Only the link text will be kept.') - # # => Only the link text will be kept. + # link_sanitizer = Rails::Html::LinkSanitizer.new + # link_sanitizer.sanitize('Only the link text will be kept.') + # + # => 'Only the link text will be kept.' class LinkSanitizer < Sanitizer def initialize @link_scrubber = TargetScrubber.new - @link_scrubber.tags = %w(a href) + @link_scrubber.tags = %w(a) @link_scrubber.attributes = %w(href) end @@ -57,8 +58,8 @@ def sanitize(html, options = {}) end end - # === Rails::Html::WhiteListSanitizer - # Sanitizes html and css from an extensive white list (see link further down). + # === Rails::Html::SafeListSanitizer + # Sanitizes html and css from an extensive safe list (see link further down). # # === Whitespace # We can't make any guarantees about whitespace being kept or stripped. @@ -72,34 +73,34 @@ def sanitize(html, options = {}) # so automatically. # # === Options - # Sanitizes both html and css via the white lists found here: - # https://github.com/flavorjones/loofah/blob/master/lib/loofah/html5/whitelist.rb + # Sanitizes both html and css via the safe lists found here: + # https://github.com/flavorjones/loofah/blob/master/lib/loofah/html5/safelist.rb # - # WhiteListSanitizer also accepts options to configure - # the white list used when sanitizing html. + # SafeListSanitizer also accepts options to configure + # the safe list used when sanitizing html. # There's a class level option: - # Rails::Html::WhiteListSanitizer.allowed_tags = %w(table tr td) - # Rails::Html::WhiteListSanitizer.allowed_attributes = %w(id class style) + # Rails::Html::SafeListSanitizer.allowed_tags = %w(table tr td) + # Rails::Html::SafeListSanitizer.allowed_attributes = %w(id class style) # # Tags and attributes can also be passed to +sanitize+. # Passed options take precedence over the class level options. # # === Examples - # white_list_sanitizer = Rails::Html::WhiteListSanitizer.new + # safe_list_sanitizer = Rails::Html::SafeListSanitizer.new # # Sanitize css doesn't take options - # white_list_sanitizer.sanitize_css('background-color: #000;') + # safe_list_sanitizer.sanitize_css('background-color: #000;') # - # Default: sanitize via a extensive white list of allowed elements - # white_list_sanitizer.sanitize(@article.body) + # Default: sanitize via a extensive safe list of allowed elements + # safe_list_sanitizer.sanitize(@article.body) # - # White list via the supplied tags and attributes - # white_list_sanitizer.sanitize(@article.body, tags: %w(table tr td), + # Safe list via the supplied tags and attributes + # safe_list_sanitizer.sanitize(@article.body, tags: %w(table tr td), # attributes: %w(id class style)) # - # White list via a custom scrubber - # white_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new) - class WhiteListSanitizer < Sanitizer + # Safe list via a custom scrubber + # safe_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new) + class SafeListSanitizer < Sanitizer class << self attr_accessor :allowed_tags attr_accessor :allowed_attributes @@ -148,5 +149,7 @@ def allowed_attributes(options) options[:attributes] || self.class.allowed_attributes end end + + WhiteListSanitizer = SafeListSanitizer end end diff --git a/lib/rails/html/sanitizer/version.rb b/lib/rails/html/sanitizer/version.rb index 9a815d6..3ceb4c8 100644 --- a/lib/rails/html/sanitizer/version.rb +++ b/lib/rails/html/sanitizer/version.rb @@ -1,7 +1,7 @@ module Rails module Html class Sanitizer - VERSION = "1.0.4" + VERSION = "1.4.4" end end end diff --git a/lib/rails/html/scrubbers.rb b/lib/rails/html/scrubbers.rb index 42436f4..674d1c4 100644 --- a/lib/rails/html/scrubbers.rb +++ b/lib/rails/html/scrubbers.rb @@ -2,9 +2,9 @@ module Rails module Html # === Rails::Html::PermitScrubber # - # Rails::Html::PermitScrubber allows you to permit only your own tags and/or attributes. + # +Rails::Html::PermitScrubber+ allows you to permit only your own tags and/or attributes. # - # Rails::Html::PermitScrubber can be subclassed to determine: + # +Rails::Html::PermitScrubber+ can be subclassed to determine: # - When a node should be skipped via +skip_node?+. # - When a node is allowed via +allowed_node?+. # - When an attribute should be scrubbed via +scrub_attribute?+. @@ -27,23 +27,23 @@ module Html # If set, attributes excluded will be removed. # If not, attributes are removed based on Loofahs +HTML5::Scrub.scrub_attributes+. # - # class CommentScrubber < Html::PermitScrubber - # def initialize - # super - # self.tags = %w(form script comment blockquote) - # end + # class CommentScrubber < Html::PermitScrubber + # def initialize + # super + # self.tags = %w(form script comment blockquote) + # end # - # def skip_node?(node) - # node.text? - # end + # def skip_node?(node) + # node.text? + # end # - # def scrub_attribute?(name) - # name == "style" - # end - # end + # def scrub_attribute?(name) + # name == "style" + # end + # end # - # See the documentation for Nokogiri::XML::Node to understand what's possible - # with nodes: http://nokogiri.org/Nokogiri/XML/Node.html + # See the documentation for +Nokogiri::XML::Node+ to understand what's possible + # with nodes: https://nokogiri.org/rdoc/Nokogiri/XML/Node.html class PermitScrubber < Loofah::Scrubber attr_reader :tags, :attributes @@ -61,14 +61,14 @@ def attributes=(attributes) end def scrub(node) - if node.cdata? - text = node.document.create_text_node node.text - node.replace text + if Loofah::HTML5::Scrub.cdata_needs_escaping?(node) + replacement = Loofah::HTML5::Scrub.cdata_escape(node) + node.replace(replacement) return CONTINUE end return CONTINUE if skip_node?(node) - unless keep_node?(node) + unless (node.element? || node.comment?) && keep_node?(node) return STOP if scrub_node(node) == STOP end @@ -138,17 +138,15 @@ def scrub_attribute(node, attr_node) attr_node.node_name end - if Loofah::HTML5::WhiteList::ATTR_VAL_IS_URI.include?(attr_name) - # this block lifted nearly verbatim from HTML5 sanitization - val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(Loofah::HTML5::Scrub::CONTROL_CHARACTERS,'').downcase - if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! Loofah::HTML5::WhiteList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(Loofah::HTML5::WhiteList::PROTOCOL_SEPARATOR)[0]) - attr_node.remove - end + if Loofah::HTML5::SafeList::ATTR_VAL_IS_URI.include?(attr_name) + return if Loofah::HTML5::Scrub.scrub_uri_attribute(attr_node) end - if Loofah::HTML5::WhiteList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name) - attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value + + if Loofah::HTML5::SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name) + Loofah::HTML5::Scrub.scrub_attribute_that_allows_local_ref(attr_node) end - if Loofah::HTML5::WhiteList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m + + if Loofah::HTML5::SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m attr_node.remove end @@ -160,8 +158,8 @@ def scrub_attribute(node, attr_node) # === Rails::Html::TargetScrubber # - # Where Rails::Html::PermitScrubber picks out tags and attributes to permit in - # sanitization, Rails::Html::TargetScrubber targets them for removal. + # Where +Rails::Html::PermitScrubber+ picks out tags and attributes to permit in + # sanitization, +Rails::Html::TargetScrubber+ targets them for removal. # # +tags=+ # If set, elements included will be stripped. @@ -180,7 +178,7 @@ def scrub_attribute?(name) # === Rails::Html::TextOnlyScrubber # - # Rails::Html::TextOnlyScrubber allows you to permit text nodes. + # +Rails::Html::TextOnlyScrubber+ allows you to permit text nodes. # # Unallowed elements will be stripped, i.e. element is removed but its subtree kept. class TextOnlyScrubber < Loofah::Scrubber diff --git a/rails-html-sanitizer.gemspec b/rails-html-sanitizer.gemspec index e774244..653084c 100644 --- a/rails-html-sanitizer.gemspec +++ b/rails-html-sanitizer.gemspec @@ -13,13 +13,22 @@ Gem::Specification.new do |spec| spec.homepage = "https://github.com/rails/rails-html-sanitizer" spec.license = "MIT" + spec.metadata = { + "bug_tracker_uri" => "https://github.com/rails/rails-html-sanitizer/issues", + "changelog_uri" => "https://github.com/rails/rails-html-sanitizer/blob/v#{spec.version}/CHANGELOG.md", + "documentation_uri" => "https://www.rubydoc.info/gems/rails-html-sanitizer/#{spec.version}", + "source_code_uri" => "https://github.com/rails/rails-html-sanitizer/tree/v#{spec.version}", + } + spec.files = Dir["lib/**/*", "README.md", "MIT-LICENSE", "CHANGELOG.md"] spec.test_files = Dir["test/**/*"] spec.require_paths = ["lib"] - spec.add_dependency "loofah", "~> 2.2", ">= 2.2.2" + # NOTE: There's no need to update this dependency for Loofah CVEs + # in minor releases when users can simply run `bundle update loofah`. + spec.add_dependency "loofah", "~> 2.19", ">= 2.19.1" - spec.add_development_dependency "bundler", "~> 1.3" + spec.add_development_dependency "bundler", ">= 1.3" spec.add_development_dependency "rake" spec.add_development_dependency "minitest" spec.add_development_dependency "rails-dom-testing" diff --git a/test/sanitizer_test.rb b/test/sanitizer_test.rb index 7bcab6f..cd0b046 100644 --- a/test/sanitizer_test.rb +++ b/test/sanitizer_test.rb @@ -2,6 +2,8 @@ require "rails-html-sanitizer" require "rails/dom/testing/assertions/dom_assertions" +puts Nokogiri::VERSION_INFO + class SanitizersTest < Minitest::Test include Rails::Dom::Testing::Assertions::DomAssertions @@ -12,13 +14,11 @@ def test_sanitizer_sanitize_raises_not_implemented_error end def test_sanitize_nested_script - sanitizer = Rails::Html::WhiteListSanitizer.new - assert_equal '<script>alert("XSS");</script>', sanitizer.sanitize('alert("XSS");/', tags: %w(em)) + assert_equal '<script>alert("XSS");</script>', safe_list_sanitize('alert("XSS");/', tags: %w(em)) end def test_sanitize_nested_script_in_style - sanitizer = Rails::Html::WhiteListSanitizer.new - assert_equal '<script>alert("XSS");</script>', sanitizer.sanitize('alert("XSS");/', tags: %w(em)) + assert_equal '<script>alert("XSS");</script>', safe_list_sanitize('alert("XSS");/', tags: %w(em)) end class XpathRemovalTestSanitizer < Rails::Html::Sanitizer @@ -54,7 +54,8 @@ def test_remove_xpaths_called_with_enumerable_xpaths def test_strip_tags_with_quote input = '<" hi' - assert_equal ' hi', full_sanitize(input) + expected = libxml_2_9_14_recovery_lt? ? %{<" hi} : %{ hi} + assert_equal(expected, full_sanitize(input)) end def test_strip_invalid_html @@ -75,15 +76,21 @@ def test_strip_tags_multiline end def test_remove_unclosed_tags - assert_equal "This is ", full_sanitize("This is <-- not\n a comment here.") + input = "This is <-- not\n a comment here." + expected = libxml_2_9_14_recovery_lt? ? %{This is <-- not\n a comment here.} : %{This is } + assert_equal(expected, full_sanitize(input)) end def test_strip_cdata - assert_equal "This has a ]]> here.", full_sanitize("This has a ]]> here.") + input = "This has a ]]> here." + expected = libxml_2_9_14_recovery_lt_bang? ? %{This has a <![CDATA[]]> here.} : %{This has a ]]> here.} + assert_equal(expected, full_sanitize(input)) end def test_strip_unclosed_cdata - assert_equal "This has an unclosed ]] here...", full_sanitize("This has an unclosed ]] here...") + input = "This has an unclosed ]] here..." + expected = libxml_2_9_14_recovery_lt_bang? ? %{This has an unclosed <![CDATA[]] here...} : %{This has an unclosed ]] here...} + assert_equal(expected, full_sanitize(input)) end def test_strip_blank_string @@ -93,7 +100,7 @@ def test_strip_blank_string end def test_strip_tags_with_plaintext - assert_equal "Dont touch me", full_sanitize("Dont touch me") + assert_equal "Don't touch me", full_sanitize("Don't touch me") end def test_strip_tags_with_tags @@ -135,7 +142,7 @@ def test_strip_links_with_unclosed_tags end def test_strip_links_with_plaintext - assert_equal "Dont touch me", link_sanitize("Dont touch me") + assert_equal "Don't touch me", link_sanitize("Don't touch me") end def test_strip_links_with_line_feed_and_uppercase_tag @@ -154,10 +161,6 @@ def test_strip_links_with_linkception assert_equal "Magic", link_sanitize("Magic") end - def test_strip_links_with_a_tag_in_href - assert_equal "FrrFox", link_sanitize("FrrFox") - end - def test_sanitize_form assert_sanitized "
", '' end @@ -185,7 +188,7 @@ def test_sanitize_image_src assert_sanitized raw, %{src="javascript:bang" foo, bar} end - tags = Loofah::HTML5::WhiteList::ALLOWED_ELEMENTS - %w(script form) + tags = Loofah::HTML5::SafeList::ALLOWED_ELEMENTS - %w(script form) tags.each do |tag_name| define_method "test_should_allow_#{tag_name}_tag" do scope_allowed_tags(tags) do @@ -255,38 +258,39 @@ def test_custom_attributes_overrides_allowed_attributes def test_should_allow_custom_tags text = "foo" - assert_equal text, white_list_sanitize(text, tags: %w(u)) + assert_equal text, safe_list_sanitize(text, tags: %w(u)) end def test_should_allow_only_custom_tags text = "foo with bar" - assert_equal "foo with bar", white_list_sanitize(text, tags: %w(u)) + assert_equal "foo with bar", safe_list_sanitize(text, tags: %w(u)) end def test_should_allow_custom_tags_with_attributes text = %(
foo
) - assert_equal text, white_list_sanitize(text) + assert_equal text, safe_list_sanitize(text) end def test_should_allow_custom_tags_with_custom_attributes text = %(
Lorem ipsum
) - assert_equal text, white_list_sanitize(text, attributes: ['foo']) + assert_equal text, safe_list_sanitize(text, attributes: ['foo']) end def test_scrub_style_if_style_attribute_option_is_passed input = '

' - assert_equal '

', white_list_sanitize(input, attributes: %w(style)) + actual = safe_list_sanitize(input, attributes: %w(style)) + assert_includes(['

', '

'], actual) end def test_should_raise_argument_error_if_tags_is_not_enumerable assert_raises ArgumentError do - white_list_sanitize('some html', tags: 'foo') + safe_list_sanitize('some html', tags: 'foo') end end def test_should_raise_argument_error_if_attributes_is_not_enumerable assert_raises ArgumentError do - white_list_sanitize('some html', attributes: 'foo') + safe_list_sanitize('some html', attributes: 'foo') end end @@ -295,7 +299,7 @@ def test_should_not_accept_non_loofah_inheriting_scrubber def scrubber.scrub(node); node.name = 'h1'; end assert_raises Loofah::ScrubberNotFound do - white_list_sanitize('some html', scrubber: scrubber) + safe_list_sanitize('some html', scrubber: scrubber) end end @@ -304,19 +308,19 @@ def test_should_accept_loofah_inheriting_scrubber def scrubber.scrub(node); node.name = 'h1'; end html = "" - assert_equal "

hello!

", white_list_sanitize(html, scrubber: scrubber) + assert_equal "

hello!

", safe_list_sanitize(html, scrubber: scrubber) end def test_should_accept_loofah_scrubber_that_wraps_a_block scrubber = Loofah::Scrubber.new { |node| node.name = 'h1' } html = "" - assert_equal "

hello!

", white_list_sanitize(html, scrubber: scrubber) + assert_equal "

hello!

", safe_list_sanitize(html, scrubber: scrubber) end def test_custom_scrubber_takes_precedence_over_other_options scrubber = Loofah::Scrubber.new { |node| node.name = 'h1' } html = "" - assert_equal "

hello!

", white_list_sanitize(html, scrubber: scrubber, tags: ['foo']) + assert_equal "

hello!

", safe_list_sanitize(html, scrubber: scrubber, tags: ['foo']) end [%w(img src), %w(a href)].each do |(tag, attr)| @@ -417,8 +421,25 @@ def test_should_sanitize_img_dynsrc_lowsrc end def test_should_sanitize_div_background_image_unicode_encoded - raw = %(background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029) - assert_equal '', sanitize_css(raw) + [ + convert_to_css_hex("url(javascript:alert(1))", false), + convert_to_css_hex("url(javascript:alert(1))", true), + convert_to_css_hex("url(https://example.com)", false), + convert_to_css_hex("url(https://example.com)", true), + ].each do |propval| + raw = "background-image:" + propval + assert_empty(sanitize_css(raw)) + end + end + + def test_should_allow_div_background_image_unicode_encoded_safe_functions + [ + convert_to_css_hex("rgb(255,0,0)", false), + convert_to_css_hex("rgb(255,0,0)", true), + ].each do |propval| + raw = "background-image:" + propval + assert_includes(sanitize_css(raw), "background-image") + end end def test_should_sanitize_div_style_expression @@ -436,11 +457,15 @@ def test_should_sanitize_img_vbscript end def test_should_sanitize_cdata_section - assert_sanitized "section]]>", "section]]>" + input = "section]]>" + expected = libxml_2_9_14_recovery_lt_bang? ? %{<![CDATA[section]]>} : %{section]]>} + assert_sanitized(input, expected) end def test_should_sanitize_unterminated_cdata_section - assert_sanitized "neverending...", "neverending..." + input = "neverending..." + expected = libxml_2_9_14_recovery_lt_bang? ? %{<![CDATA[neverending...} : %{neverending...} + assert_sanitized(input, expected) end def test_should_not_mangle_urls_with_ampersand @@ -468,7 +493,7 @@ def test_x03a_legitimate end def test_sanitize_ascii_8bit_string - white_list_sanitize('hello'.encode('ASCII-8BIT')).tap do |sanitized| + safe_list_sanitize('hello'.encode('ASCII-8BIT')).tap do |sanitized| assert_equal 'hello', sanitized assert_equal Encoding::UTF_8, sanitized.encoding end @@ -481,39 +506,199 @@ def test_sanitize_data_attributes def test_allow_data_attribute_if_requested text = %(foo) - assert_equal %(foo), white_list_sanitize(text, attributes: ['data-foo']) + assert_equal %(foo), safe_list_sanitize(text, attributes: ['data-foo']) end - def test_uri_escaping_of_href_attr_in_a_tag_in_white_list_sanitizer + def test_uri_escaping_of_href_attr_in_a_tag_in_safe_list_sanitizer + skip if RUBY_VERSION < "2.3" + html = %{test} - text = white_list_sanitize(html) + text = safe_list_sanitize(html) - assert_equal %{test}, text + acceptable_results = [ + # nokogiri w/vendored+patched libxml2 + %{test}, + # nokogiri w/ system libxml2 + %{test}, + ] + assert_includes(acceptable_results, text) end - def test_uri_escaping_of_src_attr_in_a_tag_in_white_list_sanitizer + def test_uri_escaping_of_src_attr_in_a_tag_in_safe_list_sanitizer + skip if RUBY_VERSION < "2.3" + html = %{test} - text = white_list_sanitize(html) + text = safe_list_sanitize(html) - assert_equal %{test}, text + acceptable_results = [ + # nokogiri w/vendored+patched libxml2 + %{test}, + # nokogiri w/system libxml2 + %{test}, + ] + assert_includes(acceptable_results, text) end - def test_uri_escaping_of_name_attr_in_a_tag_in_white_list_sanitizer + def test_uri_escaping_of_name_attr_in_a_tag_in_safe_list_sanitizer + skip if RUBY_VERSION < "2.3" + html = %{test} - text = white_list_sanitize(html) + text = safe_list_sanitize(html) - assert_equal %{test}, text + acceptable_results = [ + # nokogiri w/vendored+patched libxml2 + %{test}, + # nokogiri w/system libxml2 + %{test}, + ] + assert_includes(acceptable_results, text) end - def test_uri_escaping_of_name_action_in_a_tag_in_white_list_sanitizer + def test_uri_escaping_of_name_action_in_a_tag_in_safe_list_sanitizer + skip if RUBY_VERSION < "2.3" + html = %{test} - text = white_list_sanitize(html, attributes: ['action']) + text = safe_list_sanitize(html, attributes: ['action']) + + acceptable_results = [ + # nokogiri w/vendored+patched libxml2 + %{test}, + # nokogiri w/system libxml2 + %{test}, + ] + assert_includes(acceptable_results, text) + end + + def test_exclude_node_type_processing_instructions + assert_equal("
text
text", safe_list_sanitize("
text
text")) + end + + def test_exclude_node_type_comment + assert_equal("
text
text", safe_list_sanitize("
text
text")) + end + + %w[text/plain text/css image/png image/gif image/jpeg].each do |mediatype| + define_method "test_mediatype_#{mediatype}_allowed" do + input = %Q() + expected = input + actual = safe_list_sanitize(input) + assert_equal(expected, actual) + + input = %Q() + expected = input + actual = safe_list_sanitize(input) + assert_equal(expected, actual) + end + end + + def test_mediatype_text_html_disallowed + input = %q() + expected = %q() + actual = safe_list_sanitize(input) + assert_equal(expected, actual) + + input = %q() + expected = %q() + actual = safe_list_sanitize(input) + assert_equal(expected, actual) + end + + def test_mediatype_image_svg_xml_disallowed + input = %q() + expected = %q() + actual = safe_list_sanitize(input) + assert_equal(expected, actual) + + input = %q() + expected = %q() + actual = safe_list_sanitize(input) + assert_equal(expected, actual) + end + + def test_mediatype_other_disallowed + input = %q(foo) + expected = %q(foo) + actual = safe_list_sanitize(input) + assert_equal(expected, actual) + + input = %q(foo) + expected = %q(foo) + actual = safe_list_sanitize(input) + assert_equal(expected, actual) + end + + def test_scrubbing_svg_attr_values_that_allow_ref + input = %Q(
hey
) + expected = %Q(
hey
) + actual = scope_allowed_attributes %w(fill) do + safe_list_sanitize(input) + end + + assert_equal(expected, actual) + end + + def test_style_with_css_payload + input, tags = "", ["style"] + expected = "" + actual = safe_list_sanitize(input, tags: tags) + + assert_equal(expected, actual) + end + + def test_combination_of_select_and_style_with_css_payload + input, tags = "", ["select", "style"] + expected = "" + actual = safe_list_sanitize(input, tags: tags) - assert_equal %{test}, text + assert_equal(expected, actual) + end + + def test_combination_of_select_and_style_with_script_payload + input, tags = "", ["select", "style"] + expected = "" + actual = safe_list_sanitize(input, tags: tags) + + assert_equal(expected, actual) + end + + def test_combination_of_svg_and_style_with_script_payload + input, tags = "", ["svg", "style"] + expected = "" + actual = safe_list_sanitize(input, tags: tags) + + assert_equal(expected, actual) + end + + def test_combination_of_math_and_style_with_img_payload + input, tags = "", ["math", "style"] + expected = "" + actual = safe_list_sanitize(input, tags: tags) + + assert_equal(expected, actual) + + input, tags = "", ["math", "style", "img"] + expected = "" + actual = safe_list_sanitize(input, tags: tags) + + assert_equal(expected, actual) + end + + def test_combination_of_svg_and_style_with_img_payload + input, tags = "", ["svg", "style"] + expected = "" + actual = safe_list_sanitize(input, tags: tags) + + assert_equal(expected, actual) + + input, tags = "", ["svg", "style", "img"] + expected = "" + actual = safe_list_sanitize(input, tags: tags) + + assert_equal(expected, actual) end protected @@ -530,35 +715,57 @@ def link_sanitize(input, options = {}) Rails::Html::LinkSanitizer.new.sanitize(input, options) end - def white_list_sanitize(input, options = {}) - Rails::Html::WhiteListSanitizer.new.sanitize(input, options) + def safe_list_sanitize(input, options = {}) + Rails::Html::SafeListSanitizer.new.sanitize(input, options) end def assert_sanitized(input, expected = nil) if input - assert_dom_equal expected || input, white_list_sanitize(input) + assert_dom_equal expected || input, safe_list_sanitize(input) else - assert_nil white_list_sanitize(input) + assert_nil safe_list_sanitize(input) end end def sanitize_css(input) - Rails::Html::WhiteListSanitizer.new.sanitize_css(input) + Rails::Html::SafeListSanitizer.new.sanitize_css(input) end def scope_allowed_tags(tags) - old_tags = Rails::Html::WhiteListSanitizer.allowed_tags - Rails::Html::WhiteListSanitizer.allowed_tags = tags - yield Rails::Html::WhiteListSanitizer.new + old_tags = Rails::Html::SafeListSanitizer.allowed_tags + Rails::Html::SafeListSanitizer.allowed_tags = tags + yield Rails::Html::SafeListSanitizer.new ensure - Rails::Html::WhiteListSanitizer.allowed_tags = old_tags + Rails::Html::SafeListSanitizer.allowed_tags = old_tags end def scope_allowed_attributes(attributes) - old_attributes = Rails::Html::WhiteListSanitizer.allowed_attributes - Rails::Html::WhiteListSanitizer.allowed_attributes = attributes - yield Rails::Html::WhiteListSanitizer.new + old_attributes = Rails::Html::SafeListSanitizer.allowed_attributes + Rails::Html::SafeListSanitizer.allowed_attributes = attributes + yield Rails::Html::SafeListSanitizer.new ensure - Rails::Html::WhiteListSanitizer.allowed_attributes = old_attributes + Rails::Html::SafeListSanitizer.allowed_attributes = old_attributes + end + + # note that this is used for testing CSS hex encoding: \\[0-9a-f]{1,6} + def convert_to_css_hex(string, escape_parens=false) + string.chars.map do |c| + if !escape_parens && (c == "(" || c == ")") + c + else + format('\00%02X', c.ord) + end + end.join + end + + def libxml_2_9_14_recovery_lt? + # changed in 2.9.14, see https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5 + Nokogiri.method(:uses_libxml?).arity == -1 && Nokogiri.uses_libxml?(">= 2.9.14") + end + + def libxml_2_9_14_recovery_lt_bang? + # changed in 2.9.14, see https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5 + # then reverted in 2.10.0, see https://gitlab.gnome.org/GNOME/libxml2/-/issues/380 + Nokogiri.method(:uses_libxml?).arity == -1 && Nokogiri.uses_libxml?("= 2.9.14") end end diff --git a/test/scrubbers_test.rb b/test/scrubbers_test.rb index 4b84263..a825404 100644 --- a/test/scrubbers_test.rb +++ b/test/scrubbers_test.rb @@ -41,6 +41,16 @@ def test_default_scrub_behavior assert_scrubbed 'hello', 'hello' end + def test_default_scrub_removes_comments + assert_scrubbed('
one
three', + '
one
three') + end + + def test_default_scrub_removes_processing_instructions + assert_scrubbed('
one
three', + '
one
three') + end + def test_default_attributes_removal_behavior assert_scrubbed '

hello

', '

hello

' end @@ -56,6 +66,12 @@ def test_leaves_only_supplied_tags assert_scrubbed html, 'leave me now' end + def test_leaves_comments_when_supplied_as_tag + @scrubber.tags = %w(div comment) + assert_scrubbed('
one
three', + '
one
three') + end + def test_leaves_only_supplied_tags_nested html = 'leave me now' @scrubber.tags = %w(tag)