55use Guzzle \Http \Client ;
66use PHPHtmlParser \Dom \HtmlNode ;
77use PHPHtmlParser \Dom \TextNode ;
8+ use stringEncode \Encode ;
89
910class Dom {
1011
@@ -13,14 +14,7 @@ class Dom {
1314 *
1415 * @var string
1516 */
16- public static $ charset = 'UTF-8 ' ;
17-
18- /**
19- * The charset that we expect the html to be in.
20- *
21- * @var string
22- */
23- public static $ expectedCharset = 'UTF-8 ' ;
17+ protected $ defaultCharset = 'UTF-8 ' ;
2418
2519 /**
2620 * Contains the root node of this dom tree.
@@ -57,12 +51,6 @@ class Dom {
5751 */
5852 protected $ size ;
5953
60- public function __construct ()
61- {
62- self ::$ expectedCharset = 'UTF-8 ' ;
63- self ::$ charset = 'UTF-8 ' ;
64- }
65-
6654 /**
6755 * Returns the inner html of the root node.
6856 *
@@ -460,25 +448,35 @@ protected function parseTag()
460448 */
461449 protected function detectCharset ()
462450 {
451+ // set the default
452+ $ encode = new Encode ;
453+ $ encode ->from ($ this ->defaultCharset );
454+ $ encode ->to ($ this ->defaultCharset );
455+
463456 $ meta = $ this ->root ->find ('meta[http-equiv=Content-Type] ' , 0 );
464457 if (is_null ($ meta ))
465458 {
466459 // could not find meta tag
460+ $ this ->root ->propagateEncoding ($ encode );
467461 return false ;
468462 }
469463 $ content = $ meta ->content ;
470464 if (empty ($ content ))
471465 {
472466 // could not find content
467+ $ this ->root ->propagateEncoding ($ encode );
473468 return false ;
474469 }
475470 $ matches = [];
476471 if (preg_match ('/charset=(.+)/ ' , $ content , $ matches ))
477472 {
478- static ::$ expectedCharset = trim ($ matches [1 ]);
473+ $ encode ->from (trim ($ matches [1 ]));
474+ $ this ->root ->propagateEncoding ($ encode );
475+ return true ;
479476 }
480477
481478 // no charset found
479+ $ this ->root ->propagateEncoding ($ encode );
482480 return false ;
483481 }
484482}
0 commit comments