@@ -39,57 +39,57 @@ class Dom
3939 *
4040 * @var string
4141 */
42- protected $ defaultCharset = 'UTF-8 ' ;
42+ private $ defaultCharset = 'UTF-8 ' ;
4343
4444 /**
4545 * The raw version of the document string.
4646 *
4747 * @var string
4848 */
49- protected $ raw ;
49+ private $ raw ;
5050
5151 /**
5252 * The document string.
5353 *
5454 * @var Content
5555 */
56- protected $ content ;
56+ private $ content ;
5757
5858 /**
5959 * The original file size of the document.
6060 *
6161 * @var int
6262 */
63- protected $ rawSize ;
63+ private $ rawSize ;
6464
6565 /**
6666 * The size of the document after it is cleaned.
6767 *
6868 * @var int
6969 */
70- protected $ size ;
70+ private $ size ;
7171
7272 /**
7373 * A global options array to be used by all load calls.
7474 *
7575 * @var array
7676 */
77- protected $ globalOptions = [];
77+ private $ globalOptions = [];
7878
7979 /**
8080 * A persistent option object to be used for all options in the
8181 * parsing of the file.
8282 *
8383 * @var Options
8484 */
85- protected $ options ;
85+ private $ options ;
8686
8787 /**
8888 * A list of tags which will always be self closing.
8989 *
9090 * @var array
9191 */
92- protected $ selfClosing = [
92+ private $ selfClosing = [
9393 'area ' ,
9494 'base ' ,
9595 'basefont ' ,
@@ -114,7 +114,7 @@ class Dom
114114 *
115115 * @var array
116116 */
117- protected $ noSlash = [];
117+ private $ noSlash = [];
118118
119119 /**
120120 * Returns the inner html of the root node.
@@ -173,7 +173,7 @@ public function load(string $str, array $options = []): Dom
173173 */
174174 public function loadFromFile (string $ file , array $ options = []): Dom
175175 {
176- $ content = \file_get_contents ($ file );
176+ $ content = @ \file_get_contents ($ file );
177177 if ($ content === false ) {
178178 throw new LogicalException ('file_get_contents failed and returned false when trying to read " ' . $ file . '". ' );
179179 }
@@ -496,7 +496,7 @@ public function getElementsByClass(string $class)
496496 *
497497 * @throws NotLoadedException
498498 */
499- protected function isLoaded (): void
499+ private function isLoaded (): void
500500 {
501501 if (\is_null ($ this ->content )) {
502502 throw new NotLoadedException ('Content is not loaded! ' );
@@ -506,7 +506,7 @@ protected function isLoaded(): void
506506 /**
507507 * Cleans the html of any none-html information.
508508 */
509- protected function clean (string $ str ): string
509+ private function clean (string $ str ): string
510510 {
511511 if ($ this ->options ->get ('cleanupInput ' ) != true ) {
512512 // skip entire cleanup step
@@ -610,7 +610,7 @@ protected function clean(string $str): string
610610 * @throws StrictException
611611 * @throws LogicalException
612612 */
613- protected function parse (): void
613+ private function parse (): void
614614 {
615615 // add the root node
616616 $ this ->root = new HtmlNode ('root ' );
@@ -679,7 +679,7 @@ protected function parse(): void
679679 *
680680 * @throws StrictException
681681 */
682- protected function parseTag (): array
682+ private function parseTag (): array
683683 {
684684 $ return = [
685685 'status ' => false ,
@@ -823,7 +823,7 @@ protected function parseTag(): array
823823 *
824824 * @throws ChildNotFoundException
825825 */
826- protected function detectCharset (): bool
826+ private function detectCharset (): bool
827827 {
828828 // set the default
829829 $ encode = new Encode ();
@@ -841,11 +841,15 @@ protected function detectCharset(): bool
841841
842842 /** @var AbstractNode $meta */
843843 $ meta = $ this ->root ->find ('meta[http-equiv=Content-Type] ' , 0 );
844- if (\is_null ($ meta )) {
845- // could not find meta tag
846- $ this ->root ->propagateEncoding ($ encode );
844+ if ($ meta == null ) {
845+ if (!$ this ->detectHTML5Charset ($ encode )) {
846+ // could not find meta tag
847+ $ this ->root ->propagateEncoding ($ encode );
847848
848- return false ;
849+ return false ;
850+ }
851+
852+ return true ;
849853 }
850854 $ content = $ meta ->getAttribute ('content ' );
851855 if (\is_null ($ content )) {
@@ -855,7 +859,7 @@ protected function detectCharset(): bool
855859 return false ;
856860 }
857861 $ matches = [];
858- if (\preg_match ('/charset=(. +)/ ' , $ content , $ matches )) {
862+ if (\preg_match ('/charset=([^;] +)/ ' , $ content , $ matches )) {
859863 $ encode ->from (\trim ($ matches [1 ]));
860864 $ this ->root ->propagateEncoding ($ encode );
861865
@@ -867,4 +871,18 @@ protected function detectCharset(): bool
867871
868872 return false ;
869873 }
874+
875+ private function detectHTML5Charset (Encode $ encode ): bool
876+ {
877+ /** @var AbstractNode|null $meta */
878+ $ meta = $ this ->root ->find ('meta[charset] ' , 0 );
879+ if ($ meta == null ) {
880+ return false ;
881+ }
882+
883+ $ encode ->from (\trim ($ meta ->getAttribute ('charset ' )));
884+ $ this ->root ->propagateEncoding ($ encode );
885+
886+ return true ;
887+ }
870888}
0 commit comments