From 91289e2321517b6ef5e515e1ece9ef28a000a2ed Mon Sep 17 00:00:00 2001 From: Axel Date: Wed, 5 Jun 2024 19:28:15 +0200 Subject: [PATCH 1/9] - fix warning --- composer.json | 13 +++++-------- src/PHPHtmlParser/Dom/Node/Collection.php | 2 +- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/composer.json b/composer.json index 166886f7..2c38efe0 100755 --- a/composer.json +++ b/composer.json @@ -1,15 +1,12 @@ { - "name": "paquettg/php-html-parser", - "type": "library", - "description": "An HTML DOM parser. It allows you to manipulate HTML. Find tags on an HTML page with selectors just like jQuery.", - "keywords": ["html", "dom", "parser"], - "homepage": "https://github.com/paquettg/php-html-parser", + "name": "translate5/php-html-parser", + "description": "A Fork of the paquettg/php-html-parser PHP HTML-parser to be able to support newer PHP versions", + "keywords": ["php", "html", "dom", "parser", "tranlate5"], "license": "MIT", "authors": [ { - "name": "Gilles Paquette", - "email": "paquettg@gmail.com", - "homepage": "http://gillespaquette.ca" + "name": "Axel Becher", + "email": "axel@mittagqi.com" } ], "require": { diff --git a/src/PHPHtmlParser/Dom/Node/Collection.php b/src/PHPHtmlParser/Dom/Node/Collection.php index ff447259..15cb5b4b 100644 --- a/src/PHPHtmlParser/Dom/Node/Collection.php +++ b/src/PHPHtmlParser/Dom/Node/Collection.php @@ -130,7 +130,7 @@ public function offsetUnset($offset): void * * @return mixed */ - public function offsetGet($offset) + public function offsetGet($offset): mixed { return $this->collection[$offset] ?? null; } From a9fdcd5f44281ff9870f909ed718fc2b6318a59a Mon Sep 17 00:00:00 2001 From: Axel Date: Wed, 5 Jun 2024 20:16:06 +0200 Subject: [PATCH 2/9] - update dependencies --- composer.json | 17 +++++++++++------ src/PHPHtmlParser/Dom/Node/Collection.php | 8 ++++---- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/composer.json b/composer.json index 2c38efe0..ac0485ea 100755 --- a/composer.json +++ b/composer.json @@ -10,7 +10,7 @@ } ], "require": { - "php": ">=7.2", + "php": ">=8.1.0", "ext-mbstring": "*", "ext-zlib": "*", "ext-curl": "*", @@ -21,15 +21,20 @@ "myclabs/php-enum": "^1.7" }, "require-dev": { - "phpunit/phpunit": "^7.5.1", - "mockery/mockery": "^1.2", - "infection/infection": "^0.13.4", - "phan/phan": "^2.4", + "phpunit/phpunit": "^8.0.0", + "mockery/mockery": "^1.6.7", + "infection/infection": "^0.27.0", + "phan/phan": "^5.4.0", "friendsofphp/php-cs-fixer": "^2.16" }, "autoload": { - "psr-4": { + "psr-4": { "PHPHtmlParser\\": "src/PHPHtmlParser" } + }, + "config": { + "allow-plugins": { + "infection/extension-installer": false + } } } diff --git a/src/PHPHtmlParser/Dom/Node/Collection.php b/src/PHPHtmlParser/Dom/Node/Collection.php index 15cb5b4b..32cc7c16 100644 --- a/src/PHPHtmlParser/Dom/Node/Collection.php +++ b/src/PHPHtmlParser/Dom/Node/Collection.php @@ -94,7 +94,7 @@ public function getIterator(): ArrayIterator * @param mixed $offset * @param mixed $value */ - public function offsetSet($offset, $value): void + public function offsetSet(mixed $offset, mixed $value): void { if (\is_null($offset)) { $this->collection[] = $value; @@ -108,7 +108,7 @@ public function offsetSet($offset, $value): void * * @param mixed $offset */ - public function offsetExists($offset): bool + public function offsetExists(mixed $offset): bool { return isset($this->collection[$offset]); } @@ -118,7 +118,7 @@ public function offsetExists($offset): bool * * @param mixed $offset */ - public function offsetUnset($offset): void + public function offsetUnset(mixed $offset): void { unset($this->collection[$offset]); } @@ -130,7 +130,7 @@ public function offsetUnset($offset): void * * @return mixed */ - public function offsetGet($offset): mixed + public function offsetGet(mixed $offset): mixed { return $this->collection[$offset] ?? null; } From c806369a20214b7d28a3503e16b1962ca05dc50c Mon Sep 17 00:00:00 2001 From: Axel Date: Wed, 5 Jun 2024 23:56:59 +0200 Subject: [PATCH 3/9] - --- composer.json | 1 + 1 file changed, 1 insertion(+) diff --git a/composer.json b/composer.json index ac0485ea..e40bb8a6 100755 --- a/composer.json +++ b/composer.json @@ -1,5 +1,6 @@ { "name": "translate5/php-html-parser", + "type": "library", "description": "A Fork of the paquettg/php-html-parser PHP HTML-parser to be able to support newer PHP versions", "keywords": ["php", "html", "dom", "parser", "tranlate5"], "license": "MIT", From 5f1e4c2710df9c2c2b5330b5aaf2b438f6cd72fc Mon Sep 17 00:00:00 2001 From: Axel Date: Thu, 6 Jun 2024 00:02:08 +0200 Subject: [PATCH 4/9] - --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index e40bb8a6..9bf4c37c 100755 --- a/composer.json +++ b/composer.json @@ -22,7 +22,7 @@ "myclabs/php-enum": "^1.7" }, "require-dev": { - "phpunit/phpunit": "^8.0.0", + "phpunit/phpunit": "^8.4", "mockery/mockery": "^1.6.7", "infection/infection": "^0.27.0", "phan/phan": "^5.4.0", From 5d681c4f20ec8a8502ad7e0f9be0f63561bd5429 Mon Sep 17 00:00:00 2001 From: Axel Date: Thu, 6 Jun 2024 00:18:21 +0200 Subject: [PATCH 5/9] - --- composer.json | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 9bf4c37c..a6efe3ca 100755 --- a/composer.json +++ b/composer.json @@ -10,12 +10,19 @@ "email": "axel@mittagqi.com" } ], + "repositories" : [ + { + "_README" : "A Fork of the paquettg/php-html-parser PHP HTML-parser to be able to support newer PHP versions", + "type" : "git", + "url" : "https://github.com/translate5/string-encoder/" + } + ], "require": { "php": ">=8.1.0", "ext-mbstring": "*", "ext-zlib": "*", "ext-curl": "*", - "paquettg/string-encode": "~1.0.0", + "translate5/string-encoder": "dev-master", "php-http/httplug": "^2.1", "guzzlehttp/guzzle": "^7.0", "guzzlehttp/psr7": "^1.6", From 28407b45be925fdaf664191235e501613d73a1c9 Mon Sep 17 00:00:00 2001 From: Axel Date: Thu, 6 Jun 2024 00:22:36 +0200 Subject: [PATCH 6/9] - --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index a6efe3ca..3997cc5f 100755 --- a/composer.json +++ b/composer.json @@ -12,7 +12,7 @@ ], "repositories" : [ { - "_README" : "A Fork of the paquettg/php-html-parser PHP HTML-parser to be able to support newer PHP versions", + "_README" : "A Fork of the paquettg/string-encoder PHP HTML-parser to be able to support newer PHP versions", "type" : "git", "url" : "https://github.com/translate5/string-encoder/" } From 0ef4cbbfaafb6e458fb3ea1e2f98cee18e4d80f8 Mon Sep 17 00:00:00 2001 From: Axel Date: Thu, 6 Jun 2024 00:32:27 +0200 Subject: [PATCH 7/9] - --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 3997cc5f..f6d2823f 100755 --- a/composer.json +++ b/composer.json @@ -12,7 +12,7 @@ ], "repositories" : [ { - "_README" : "A Fork of the paquettg/string-encoder PHP HTML-parser to be able to support newer PHP versions", + "_README" : "A Fork of the paquettg/string-encoder string encoder to be able to support newer PHP versions", "type" : "git", "url" : "https://github.com/translate5/string-encoder/" } From d388b3ba8f275a48d8aec452aa2b28e286ec31e4 Mon Sep 17 00:00:00 2001 From: Axel Date: Tue, 22 Oct 2024 19:29:06 +0200 Subject: [PATCH 8/9] Updated dependencies --- composer.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/composer.json b/composer.json index f6d2823f..a9e1585d 100755 --- a/composer.json +++ b/composer.json @@ -24,8 +24,8 @@ "ext-curl": "*", "translate5/string-encoder": "dev-master", "php-http/httplug": "^2.1", - "guzzlehttp/guzzle": "^7.0", - "guzzlehttp/psr7": "^1.6", + "guzzlehttp/guzzle": "^7.8", + "guzzlehttp/psr7": "^2.7", "myclabs/php-enum": "^1.7" }, "require-dev": { From cd2097d3edd89d869231e8fb58678335855e2b7b Mon Sep 17 00:00:00 2001 From: Axel Date: Tue, 22 Oct 2024 20:14:18 +0200 Subject: [PATCH 9/9] * integrated abandoned stringEncode/encode class --- composer.json | 2 +- src/PHPHtmlParser/DTO/Tag/AttributeDTO.php | 4 +- src/PHPHtmlParser/Dom/Node/AbstractNode.php | 2 +- src/PHPHtmlParser/Dom/Node/InnerNode.php | 2 +- src/PHPHtmlParser/Dom/Parser.php | 2 +- src/PHPHtmlParser/Dom/Tag.php | 6 +- src/PHPHtmlParser/Encode.php | 123 ++++++++++++++++++++ tests/Node/TextTest.php | 2 +- 8 files changed, 133 insertions(+), 10 deletions(-) create mode 100644 src/PHPHtmlParser/Encode.php diff --git a/composer.json b/composer.json index a9e1585d..1dce983a 100755 --- a/composer.json +++ b/composer.json @@ -22,7 +22,7 @@ "ext-mbstring": "*", "ext-zlib": "*", "ext-curl": "*", - "translate5/string-encoder": "dev-master", + "ext-iconv": "*", "php-http/httplug": "^2.1", "guzzlehttp/guzzle": "^7.8", "guzzlehttp/psr7": "^2.7", diff --git a/src/PHPHtmlParser/DTO/Tag/AttributeDTO.php b/src/PHPHtmlParser/DTO/Tag/AttributeDTO.php index 3e7e1824..e830049b 100755 --- a/src/PHPHtmlParser/DTO/Tag/AttributeDTO.php +++ b/src/PHPHtmlParser/DTO/Tag/AttributeDTO.php @@ -4,8 +4,8 @@ namespace PHPHtmlParser\DTO\Tag; -use stringEncode\Encode; -use stringEncode\Exception; +use Exception; +use PHPHtmlParser\Encode; final class AttributeDTO { diff --git a/src/PHPHtmlParser/Dom/Node/AbstractNode.php b/src/PHPHtmlParser/Dom/Node/AbstractNode.php index 897445b0..0c099293 100644 --- a/src/PHPHtmlParser/Dom/Node/AbstractNode.php +++ b/src/PHPHtmlParser/Dom/Node/AbstractNode.php @@ -12,7 +12,7 @@ use PHPHtmlParser\Exceptions\Tag\AttributeNotFoundException; use PHPHtmlParser\Finder; use PHPHtmlParser\Selector\Selector; -use stringEncode\Encode; +use PHPHtmlParser\Encode; /** * Dom node object. diff --git a/src/PHPHtmlParser/Dom/Node/InnerNode.php b/src/PHPHtmlParser/Dom/Node/InnerNode.php index 448057a7..c16291db 100644 --- a/src/PHPHtmlParser/Dom/Node/InnerNode.php +++ b/src/PHPHtmlParser/Dom/Node/InnerNode.php @@ -8,7 +8,7 @@ use PHPHtmlParser\Exceptions\ChildNotFoundException; use PHPHtmlParser\Exceptions\CircularException; use PHPHtmlParser\Exceptions\LogicalException; -use stringEncode\Encode; +use PHPHtmlParser\Encode; /** * Inner node of the html tree, might have children. diff --git a/src/PHPHtmlParser/Dom/Parser.php b/src/PHPHtmlParser/Dom/Parser.php index 7ed310cb..1552422b 100644 --- a/src/PHPHtmlParser/Dom/Parser.php +++ b/src/PHPHtmlParser/Dom/Parser.php @@ -17,7 +17,7 @@ use PHPHtmlParser\Exceptions\LogicalException; use PHPHtmlParser\Exceptions\StrictException; use PHPHtmlParser\Options; -use stringEncode\Encode; +use PHPHtmlParser\Encode; class Parser implements ParserInterface { diff --git a/src/PHPHtmlParser/Dom/Tag.php b/src/PHPHtmlParser/Dom/Tag.php index 2aeb6aa8..b9d5db4d 100644 --- a/src/PHPHtmlParser/Dom/Tag.php +++ b/src/PHPHtmlParser/Dom/Tag.php @@ -6,7 +6,7 @@ use PHPHtmlParser\DTO\Tag\AttributeDTO; use PHPHtmlParser\Exceptions\Tag\AttributeNotFoundException; -use stringEncode\Encode; +use PHPHtmlParser\Encode; /** * Class Tag. @@ -263,7 +263,7 @@ public function setAttributes(array $attr) /** * Returns all attributes of this tag. * - * @throws \stringEncode\Exception + * @throws \Exception * * @return AttributeDTO[] */ @@ -286,7 +286,7 @@ public function getAttributes(): array * Returns an attribute by the key. * * @throws AttributeNotFoundException - * @throws \stringEncode\Exception + * @throws \Exception */ public function getAttribute(string $key): AttributeDTO { diff --git a/src/PHPHtmlParser/Encode.php b/src/PHPHtmlParser/Encode.php new file mode 100644 index 00000000..9c6578cd --- /dev/null +++ b/src/PHPHtmlParser/Encode.php @@ -0,0 +1,123 @@ +from = 'CP1252'; + + // default to encoding + $this->to = 'UTF-8'; + } + + /** + * Sets the charset that we will be converting to. + * + * @param string $charset + * @chainable + */ + public function to($charset) + { + $this->to = strtoupper($charset); + return $this; + } + + /** + * Sets the charset that we will be converting from. + * + * @param string $charset + * @chainable + */ + public function from($charset) + { + $this->from = strtoupper($charset); + } + + /** + * Returns the to and from charset that we will be using. + * + * @return array + */ + public function charset() + { + return [ + 'from' => $this->from, + 'to' => $this->to, + ]; + } + + /** + * Attempts to detect the encoding of the given string from the encodingList. + * + * @param string $str + * @param array $encodingList + * @return bool + */ + public function detect($str, $encodingList = ['UTF-8', 'CP1252']) + { + $charset = mb_detect_encoding($str, $encodingList); + if ($charset === false) + { + // could not detect charset + return false; + } + + $this->from = $charset; + return true; + } + + /** + * Attempts to convert the string to the proper charset. + * + * @return string + */ + public function convert($str) + { + if ($this->from != $this->to) + { + $str = iconv($this->from, $this->to, $str); + } + + if ($str === false) + { + // the convertion was a failure + throw new Exception('The convertion from "'.$this->from.'" to "'.$this->to.'" was a failure.'); + } + + // deal with BOM issue for utf-8 text + if ($this->to == 'UTF-8') + { + if (substr($str, 0, 3) == "\xef\xbb\xbf") + { + $str = substr($str, 3); + } + if (substr($str, -3, 3) == "\xef\xbb\xbf") + { + $str = substr($str, 0, -3); + } + } + + return $str; + } +} diff --git a/tests/Node/TextTest.php b/tests/Node/TextTest.php index f94c4962..3bb6bade 100755 --- a/tests/Node/TextTest.php +++ b/tests/Node/TextTest.php @@ -6,7 +6,7 @@ use PHPHtmlParser\Dom\Node\TextNode; use PHPHtmlParser\Options; use PHPUnit\Framework\TestCase; -use stringEncode\Encode; +use PHPHtmlParser\Encode; class NodeTextTest extends TestCase {