diff --git a/packages/dom/src/dom/strip-html.js b/packages/dom/src/dom/strip-html.js index efd5e2d8784cc2..811979bf3c6dc4 100644 --- a/packages/dom/src/dom/strip-html.js +++ b/packages/dom/src/dom/strip-html.js @@ -1,3 +1,8 @@ +/** + * Internal dependencies + */ +import safeHTML from './safe-html'; + /** * Removes any HTML tags from the provided string. * @@ -6,9 +11,11 @@ * @return {string} The text content with any html removed. */ export default function stripHTML( html ) { - const document = new window.DOMParser().parseFromString( - html, - 'text/html' - ); - return document.body.textContent || ''; + // Remove any script tags or on* attributes otherwise their *contents* will be left + // in place following removal of HTML tags. + html = safeHTML( html ); + + const doc = document.implementation.createHTMLDocument( '' ); + doc.body.innerHTML = html; + return doc.body.textContent || ''; } diff --git a/packages/dom/src/dom/test/strip-html.js b/packages/dom/src/dom/test/strip-html.js new file mode 100644 index 00000000000000..cfff0d799cbddf --- /dev/null +++ b/packages/dom/src/dom/test/strip-html.js @@ -0,0 +1,64 @@ +/** + * Internal dependencies + */ +import stripHTML from '../strip-html'; + +describe( 'stripHTML', () => { + it( 'should strip valid HTML, scripts and on attributes', () => { + const input = `Here is some text that contains HTML markup.`; + const output = 'Here is some text that contains HTML markup.'; + expect( stripHTML( input ) ).toBe( output ); + } ); + + it( 'should strip invalid HTML, scripts and on attributes', () => { + const input = `Here is some text

that contains HTML markup

.`; + const output = 'Here is some text that contains HTML markup.'; + expect( stripHTML( input ) ).toBe( output ); + } ); + + describe( 'whitespace preservation', () => { + it( 'should preserve leading spaces', () => { + const input = + ' Here is some text with leading spaces.'; + const output = ' Here is some text with leading spaces.'; + expect( stripHTML( input ) ).toBe( output ); + } ); + + it( 'should preserve leading spaces with HTML', () => { + const input = + ' Here is some text with leading spaces.'; + const output = ' Here is some text with leading spaces.'; + expect( stripHTML( input ) ).toBe( output ); + } ); + + it( 'should preserve trailing spaces with HTML', () => { + const input = + 'Here is some text with trailing spaces. '; + const output = 'Here is some text with trailing spaces. '; + expect( stripHTML( input ) ).toBe( output ); + } ); + + it( 'should preserve consecutive spaces within string', () => { + const input = + 'Here is some text with a lot of spaces inside.'; + const output = + 'Here is some text with a lot of spaces inside.'; + expect( stripHTML( input ) ).toBe( output ); + } ); + + it( 'should preserve new lines in multi-line HTML string', () => { + const input = `
+ Here is some + text + with new lines +
`; + + const output = ` + Here is some + text + with new lines + `; + expect( stripHTML( input ) ).toBe( output ); + } ); + } ); +} ); diff --git a/packages/dom/src/test/dom.js b/packages/dom/src/test/dom.js index 95c7dfae0523e0..4e3f645591fb40 100644 --- a/packages/dom/src/test/dom.js +++ b/packages/dom/src/test/dom.js @@ -5,7 +5,6 @@ import { isHorizontalEdge, placeCaretAtHorizontalEdge, isTextField, - __unstableStripHTML as stripHTML, isNumberInput, removeInvalidHTML, isEmpty, @@ -194,20 +193,6 @@ describe( 'DOM', () => { ); } ); } ); - - describe( 'stripHTML', () => { - it( 'removes any HTML from a text string', () => { - expect( stripHTML( 'This is emphasized' ) ).toBe( - 'This is emphasized' - ); - } ); - - it( 'removes script tags, but does not execute them', () => { - const html = 'This will not '; - expect( stripHTML( html ) ).toBe( 'This will not throw "Error"' ); - expect( () => stripHTML( html ) ).not.toThrow(); - } ); - } ); } ); describe( 'removeInvalidHTML', () => {