88use stringEncode \Encode ;
99
1010class Dom {
11-
11+
1212 /**
1313 * The charset we would like the output to be in.
1414 *
@@ -43,7 +43,7 @@ class Dom {
4343 * @var int
4444 */
4545 protected $ rawSize ;
46-
46+
4747 /**
4848 * The size of the document after it is cleaned.
4949 *
@@ -59,7 +59,7 @@ class Dom {
5959 protected $ globalOptions = [];
6060
6161 /**
62- * A persistent option object to be used for all options in the
62+ * A persistent option object to be used for all options in the
6363 * parsing of the file.
6464 *
6565 * @var Options
@@ -232,7 +232,7 @@ public function addSelfClosingTag($tag)
232232 }
233233 return $ this ;
234234 }
235-
235+
236236 /**
237237 * Removes the tag (or tags in an array) from the list of tags that will
238238 * always be self closing.
@@ -297,7 +297,7 @@ public function getElementById($id)
297297 }
298298
299299 /**
300- * Simple wrapper function that returns all elements by
300+ * Simple wrapper function that returns all elements by
301301 * tag name.
302302 *
303303 * @param string $name
@@ -343,6 +343,12 @@ protected function isLoaded()
343343 */
344344 protected function clean ($ str )
345345 {
346+ if ($ this ->options ->get ('cleanupInput ' ) != true )
347+ {
348+ // skip entire cleanup step
349+ return $ str ;
350+ }
351+
346352 // clean out the \n\r
347353 $ str = str_replace (["\r\n" , "\r" , "\n" ], ' ' , $ str );
348354
@@ -351,24 +357,30 @@ protected function clean($str)
351357
352358 // strip out comments
353359 $ str = mb_eregi_replace ("<!--(.*?)--> " , '' , $ str );
354-
360+
355361 // strip out cdata
356362 $ str = mb_eregi_replace ("<!\[CDATA\[(.*?)\]\]> " , '' , $ str );
357-
363+
358364 // strip out <script> tags
359- $ str = mb_eregi_replace ("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*> " , '' , $ str );
360- $ str = mb_eregi_replace ("<\s*script\s*>(.*?)<\s*/\s*script\s*> " , '' , $ str );
361-
365+ if ($ this ->options ->get ('removeScripts ' ) == true )
366+ {
367+ $ str = mb_eregi_replace ("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*> " , '' , $ str );
368+ $ str = mb_eregi_replace ("<\s*script\s*>(.*?)<\s*/\s*script\s*> " , '' , $ str );
369+ }
370+
362371 // strip out <style> tags
363- $ str = mb_eregi_replace ("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*> " , '' , $ str );
364- $ str = mb_eregi_replace ("<\s*style\s*>(.*?)<\s*/\s*style\s*> " , '' , $ str );
365-
372+ if ($ this ->options ->get ('removeStyles ' ) == true )
373+ {
374+ $ str = mb_eregi_replace ("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*> " , '' , $ str );
375+ $ str = mb_eregi_replace ("<\s*style\s*>(.*?)<\s*/\s*style\s*> " , '' , $ str );
376+ }
377+
366378 // strip out preformatted tags
367379 $ str = mb_eregi_replace ("<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*> " , '' , $ str );
368-
380+
369381 // strip out server side scripts
370382 $ str = mb_eregi_replace ("(<\?)(.*?)(\?>) " , '' , $ str );
371-
383+
372384 // strip smarty scripts
373385 $ str = mb_eregi_replace ("(\{\w)(.*?)(\}) " , '' , $ str );
374386
@@ -469,7 +481,7 @@ protected function parseTag()
469481 // move to end of tag
470482 $ this ->content ->copyUntil ('> ' );
471483 $ this ->content ->fastForward (1 );
472-
484+
473485 // check if this closing tag counts
474486 $ tag = strtolower ($ tag );
475487 if (in_array ($ tag , $ this ->selfClosing ))
@@ -570,7 +582,7 @@ protected function parseTag()
570582 }
571583 elseif (in_array ($ tag , $ this ->selfClosing ))
572584 {
573-
585+
574586 // Should be a self closing tag, check if we are strict
575587 if ( $ this ->options ->strict )
576588 {
@@ -581,7 +593,7 @@ protected function parseTag()
581593 // We force self closing on this tag.
582594 $ node ->getTag ()->selfClosing ();
583595 }
584-
596+
585597 $ this ->content ->fastForward (1 );
586598
587599 $ return ['status ' ] = true ;
@@ -630,7 +642,7 @@ protected function detectCharset()
630642 $ this ->root ->propagateEncoding ($ encode );
631643 return true ;
632644 }
633-
645+
634646 // no charset found
635647 $ this ->root ->propagateEncoding ($ encode );
636648 return false ;
0 commit comments