Skip to content

Commit fc41886

Browse files
committed
Merge branch 'LukasRos-master'
2 parents 3cd5a70 + 45ff7fe commit fc41886

File tree

4 files changed

+95
-5
lines changed

4 files changed

+95
-5
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
vendor
2+
composer.lock

src/PHPHtmlParser/Dom.php

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,12 @@ protected function isLoaded()
359359
*/
360360
protected function clean($str)
361361
{
362+
if ($this->options->get('cleanupInput') != true)
363+
{
364+
// skip entire cleanup step
365+
return $str;
366+
}
367+
362368
// clean out the \n\r
363369
$str = str_replace(["\r\n", "\r", "\n"], ' ', $str);
364370

@@ -372,14 +378,20 @@ protected function clean($str)
372378
$str = mb_eregi_replace("<!\[CDATA\[(.*?)\]\]>", '', $str);
373379

374380
// strip out <script> tags
375-
$str = mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
376-
$str = mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
381+
if ($this->options->get('removeScripts') == true)
382+
{
383+
$str = mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
384+
$str = mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
385+
}
377386

378387
// strip out <style> tags
379-
$str = mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
380-
$str = mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
388+
if ($this->options->get('removeStyles') == true)
389+
{
390+
$str = mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
391+
$str = mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
392+
}
381393

382-
// strip out pre-formatted tags
394+
// strip out preformatted tags
383395
$str = mb_eregi_replace("<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>", '', $str);
384396

385397
// strip out server side scripts

src/PHPHtmlParser/Options.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ class Options {
2020
'whitespaceTextNode' => true,
2121
'strict' => false,
2222
'enforceEncoding' => null,
23+
'cleanupInput' => true,
24+
'removeScripts' => true,
25+
'removeStyles' => true
2326
];
2427

2528
/**

tests/Options/CleanupTest.php

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
<?php
2+
3+
use PHPHtmlParser\Dom;
4+
5+
class CleanupTest extends PHPUnit_Framework_TestCase {
6+
7+
public function testCleanupInputTrue()
8+
{
9+
$dom = new Dom;
10+
$dom->setOptions([
11+
'cleanupInput' => true,
12+
]);
13+
$dom->loadFromFile('tests/files/horrible.html');
14+
$this->assertEquals(0, count($dom->find('style')));
15+
$this->assertEquals(0, count($dom->find('script')));
16+
}
17+
18+
public function testCleanupInputFalse()
19+
{
20+
$dom = new Dom;
21+
$dom->setOptions([
22+
'cleanupInput' => false,
23+
]);
24+
$dom->loadFromFile('tests/files/horrible.html');
25+
$this->assertEquals(1, count($dom->find('style')));
26+
$this->assertEquals(1, count($dom->find('script')));
27+
}
28+
29+
public function testRemoveStylesTrue()
30+
{
31+
$dom = new Dom;
32+
$dom->setOptions([
33+
'removeStyles' => true,
34+
]);
35+
$dom->loadFromFile('tests/files/horrible.html');
36+
$this->assertEquals(0, count($dom->find('style')));
37+
}
38+
39+
public function testRemoveStylesFalse()
40+
{
41+
$dom = new Dom;
42+
$dom->setOptions([
43+
'removeStyles' => false,
44+
]);
45+
$dom->loadFromFile('tests/files/horrible.html');
46+
$this->assertEquals(1, count($dom->find('style')));
47+
$this->assertEquals('text/css',
48+
$dom->find('style')->getAttribute('type'));
49+
}
50+
51+
public function testRemoveScriptsTrue()
52+
{
53+
$dom = new Dom;
54+
$dom->setOptions([
55+
'removeScripts' => true,
56+
]);
57+
$dom->loadFromFile('tests/files/horrible.html');
58+
$this->assertEquals(0, count($dom->find('script')));
59+
}
60+
61+
public function testRemoveScriptsFalse()
62+
{
63+
$dom = new Dom;
64+
$dom->setOptions([
65+
'removeScripts' => false,
66+
]);
67+
$dom->loadFromFile('tests/files/horrible.html');
68+
$this->assertEquals(1, count($dom->find('script')));
69+
$this->assertEquals('text/JavaScript',
70+
$dom->find('script')->getAttribute('type'));
71+
}
72+
73+
}

0 commit comments

Comments
 (0)