From b605f64d39e3a3c1987c0269976a8da9e3adb264 Mon Sep 17 00:00:00 2001 From: "Lior.php" <39471599+liorphp@users.noreply.github.com> Date: Fri, 22 Mar 2024 16:05:31 +0200 Subject: [PATCH 1/8] Update Collection.php --- src/PHPHtmlParser/Dom/Node/Collection.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PHPHtmlParser/Dom/Node/Collection.php b/src/PHPHtmlParser/Dom/Node/Collection.php index ff447259..15cb5b4b 100644 --- a/src/PHPHtmlParser/Dom/Node/Collection.php +++ b/src/PHPHtmlParser/Dom/Node/Collection.php @@ -130,7 +130,7 @@ public function offsetUnset($offset): void * * @return mixed */ - public function offsetGet($offset) + public function offsetGet($offset): mixed { return $this->collection[$offset] ?? null; } From 6aac3f71b955c8620e50787c40f10f6497f23ba0 Mon Sep 17 00:00:00 2001 From: "Lior.php" <39471599+liorphp@users.noreply.github.com> Date: Fri, 22 Mar 2024 16:06:34 +0200 Subject: [PATCH 2/8] Update composer.json --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 166886f7..5fccf8fc 100755 --- a/composer.json +++ b/composer.json @@ -1,5 +1,5 @@ { - "name": "paquettg/php-html-parser", + "name": "liorphp/php-html-parser", "type": "library", "description": "An HTML DOM parser. It allows you to manipulate HTML. Find tags on an HTML page with selectors just like jQuery.", "keywords": ["html", "dom", "parser"], From b453d867ab6b82ced33d499369dc665ab9684499 Mon Sep 17 00:00:00 2001 From: "Lior.php" <39471599+liorphp@users.noreply.github.com> Date: Wed, 15 May 2024 23:48:53 +0300 Subject: [PATCH 3/8] Update TextNode.php --- src/PHPHtmlParser/Dom/Node/TextNode.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/PHPHtmlParser/Dom/Node/TextNode.php b/src/PHPHtmlParser/Dom/Node/TextNode.php index 1c8b646c..ec3314b0 100644 --- a/src/PHPHtmlParser/Dom/Node/TextNode.php +++ b/src/PHPHtmlParser/Dom/Node/TextNode.php @@ -57,6 +57,7 @@ public function __construct(string $text, $removeDoubleSpace = true) } // restore line breaks + $text = \parse_str($text); $text = \str_replace(' ', "\n", $text); $this->text = $text; From 5aceabc9f981fcdd5ebc36a351d0b1b17ffccaaa Mon Sep 17 00:00:00 2001 From: "Lior.php" <39471599+liorphp@users.noreply.github.com> Date: Thu, 16 May 2024 01:10:35 +0300 Subject: [PATCH 4/8] Update TextNode.php --- src/PHPHtmlParser/Dom/Node/TextNode.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PHPHtmlParser/Dom/Node/TextNode.php b/src/PHPHtmlParser/Dom/Node/TextNode.php index ec3314b0..7291caf4 100644 --- a/src/PHPHtmlParser/Dom/Node/TextNode.php +++ b/src/PHPHtmlParser/Dom/Node/TextNode.php @@ -57,7 +57,7 @@ public function __construct(string $text, $removeDoubleSpace = true) } // restore line breaks - $text = \parse_str($text); + $text = (string)$text; $text = \str_replace(' ', "\n", $text); $this->text = $text; From d976350bda57334b928f4f9f2dc0b01e40eeb0a3 Mon Sep 17 00:00:00 2001 From: "Lior.php" <39471599+liorphp@users.noreply.github.com> Date: Fri, 22 Nov 2024 02:23:05 +0200 Subject: [PATCH 5/8] Update Parser.php --- src/PHPHtmlParser/Dom/Parser.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PHPHtmlParser/Dom/Parser.php b/src/PHPHtmlParser/Dom/Parser.php index 7ed310cb..48f7dcda 100644 --- a/src/PHPHtmlParser/Dom/Parser.php +++ b/src/PHPHtmlParser/Dom/Parser.php @@ -37,7 +37,7 @@ public function parse(Options $options, Content $content, int $size): AbstractNo $root->setHtmlSpecialCharsDecode($options->isHtmlSpecialCharsDecode()); $activeNode = $root; while ($activeNode !== null) { - if ($activeNode && $activeNode->tag->name() === 'script' + if ($activeNode && $activeNode->tag && $activeNode->tag->name() === 'script' && $options->isCleanupInput() !== true ) { $str = $content->copyUntil(' Date: Fri, 22 Nov 2024 11:41:10 +0200 Subject: [PATCH 6/8] Update Parser.php --- src/PHPHtmlParser/Dom/Parser.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PHPHtmlParser/Dom/Parser.php b/src/PHPHtmlParser/Dom/Parser.php index 48f7dcda..9ca1a435 100644 --- a/src/PHPHtmlParser/Dom/Parser.php +++ b/src/PHPHtmlParser/Dom/Parser.php @@ -37,7 +37,7 @@ public function parse(Options $options, Content $content, int $size): AbstractNo $root->setHtmlSpecialCharsDecode($options->isHtmlSpecialCharsDecode()); $activeNode = $root; while ($activeNode !== null) { - if ($activeNode && $activeNode->tag && $activeNode->tag->name() === 'script' + if ($activeNode && method_exists($activeNode->tag, 'name') && $activeNode->tag->name() === 'script' && $options->isCleanupInput() !== true ) { $str = $content->copyUntil(' Date: Mon, 24 Feb 2025 23:28:18 +0200 Subject: [PATCH 7/8] Update Cleaner.php --- src/PHPHtmlParser/Dom/Cleaner.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/PHPHtmlParser/Dom/Cleaner.php b/src/PHPHtmlParser/Dom/Cleaner.php index 52e5728b..c3238f92 100644 --- a/src/PHPHtmlParser/Dom/Cleaner.php +++ b/src/PHPHtmlParser/Dom/Cleaner.php @@ -10,6 +10,9 @@ class Cleaner implements CleanerInterface { + mb_regex_encoding('UTF-8'); + mb_regex_set_options('m'); + /** * Cleans the html of any none-html information. * @@ -74,7 +77,7 @@ public function clean(string $str, Options $options, string $defaultCharset): st // strip out