Skip to content

Commit 3016d45

Browse files
committed
Changed encoding to be a local attribute instead of a static attribute
1 parent ffb7a9f commit 3016d45

File tree

5 files changed

+68
-26
lines changed

5 files changed

+68
-26
lines changed

src/PHPHtmlParser/Dom.php

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
use Guzzle\Http\Client;
66
use PHPHtmlParser\Dom\HtmlNode;
77
use PHPHtmlParser\Dom\TextNode;
8+
use stringEncode\Encode;
89

910
class Dom {
1011

@@ -13,14 +14,7 @@ class Dom {
1314
*
1415
* @var string
1516
*/
16-
public static $charset = 'UTF-8';
17-
18-
/**
19-
* The charset that we expect the html to be in.
20-
*
21-
* @var string
22-
*/
23-
public static $expectedCharset = 'UTF-8';
17+
protected $defaultCharset = 'UTF-8';
2418

2519
/**
2620
* Contains the root node of this dom tree.
@@ -57,12 +51,6 @@ class Dom {
5751
*/
5852
protected $size;
5953

60-
public function __construct()
61-
{
62-
self::$expectedCharset = 'UTF-8';
63-
self::$charset = 'UTF-8';
64-
}
65-
6654
/**
6755
* Returns the inner html of the root node.
6856
*
@@ -460,25 +448,35 @@ protected function parseTag()
460448
*/
461449
protected function detectCharset()
462450
{
451+
// set the default
452+
$encode = new Encode;
453+
$encode->from($this->defaultCharset);
454+
$encode->to($this->defaultCharset);
455+
463456
$meta = $this->root->find('meta[http-equiv=Content-Type]', 0);
464457
if (is_null($meta))
465458
{
466459
// could not find meta tag
460+
$this->root->propagateEncoding($encode);
467461
return false;
468462
}
469463
$content = $meta->content;
470464
if (empty($content))
471465
{
472466
// could not find content
467+
$this->root->propagateEncoding($encode);
473468
return false;
474469
}
475470
$matches = [];
476471
if (preg_match('/charset=(.+)/', $content, $matches))
477472
{
478-
static::$expectedCharset = trim($matches[1]);
473+
$encode->from(trim($matches[1]));
474+
$this->root->propagateEncoding($encode);
475+
return true;
479476
}
480477

481478
// no charset found
479+
$this->root->propagateEncoding($encode);
482480
return false;
483481
}
484482
}

src/PHPHtmlParser/Dom/Node.php

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
namespace PHPHtmlParser\Dom;
33

44
use PHPHtmlParser\Selector;
5+
use stringEncode\Encode;
56

67
/**
78
* Dom node object.
@@ -39,10 +40,17 @@ class Node {
3940
/**
4041
* The unique id of the class. Given by PHP.
4142
*
42-
* @string
43+
* @var string
4344
*/
4445
protected $id;
4546

47+
/**
48+
* The encoding class used to encode strings.
49+
*
50+
* @var mixed
51+
*/
52+
protected $encode;
53+
4654
public function __construct()
4755
{
4856
$this->id = spl_object_hash($this);
@@ -145,6 +153,23 @@ public function setParent(Node $parent)
145153
return $this;
146154
}
147155

156+
/**
157+
* Sets the encoding class to this node and propagates it
158+
* to all its children.
159+
*
160+
* @param Encode $encode
161+
*/
162+
public function propagateEncoding(Encode $encode)
163+
{
164+
$this->encode = $encode;
165+
$this->tag->setEncoding($encode);
166+
// check children
167+
foreach ($this->children as $id => $child)
168+
{
169+
$child['node']->propagateEncoding($encode);
170+
}
171+
}
172+
148173
/**
149174
* Checks if this node has children.
150175
*

src/PHPHtmlParser/Dom/Tag.php

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,13 @@ class Tag {
3232
*/
3333
protected $noise = '';
3434

35+
/**
36+
* The encoding class to... encode the tags
37+
*
38+
* @var mixed
39+
*/
40+
protected $encode = null;
41+
3542
public function __construct($name)
3643
{
3744
$this->name = $name;
@@ -78,6 +85,11 @@ public function isSelfClosing()
7885
return $this->selfclosing;
7986
}
8087

88+
public function setEncoding(Encode $encode)
89+
{
90+
$this->encode = $encode;
91+
}
92+
8193
/**
8294
* Sets the noise for this tag (if any)
8395
*
@@ -146,13 +158,10 @@ public function getAttribute($key)
146158
return null;
147159
}
148160
$value = $this->attr[$key]['value'];
149-
if (is_string($value))
161+
if (is_string($value) AND ! is_null($this->encode))
150162
{
151163
// convert charset
152-
$encode = new Encode;
153-
$encode->from(Dom::$expectedCharset);
154-
$encode->to(Dom::$charset);
155-
$this->attr[$key]['value'] = $encode->convert($value);
164+
$this->attr[$key]['value'] = $this->encode->convert($value);
156165
}
157166

158167
return $this->attr[$key];

src/PHPHtmlParser/Dom/TextNode.php

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
namespace PHPHtmlParser\Dom;
33

44
use PHPHtmlParser\Dom;
5-
use stringEncode\Encode;
65

76
class TextNode extends Node {
87

@@ -43,10 +42,14 @@ public function __construct($text)
4342
public function text()
4443
{
4544
// convert charset
46-
$encode = new Encode;
47-
$encode->from(Dom::$expectedCharset);
48-
$encode->to(Dom::$charset);
49-
$text = $encode->convert($this->text);
45+
if ( ! is_null($this->encode))
46+
{
47+
$text = $this->encode->convert($this->text);
48+
}
49+
else
50+
{
51+
$text = $this->text;
52+
}
5053

5154
return $text;
5255
}

tests/DomTest.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,13 @@ public function testLoadNoClosingTag()
4343
$this->assertEquals('<div class="all"><p>Hey bro, <a href="google.com" data-quote="\"">click here</a></p></div><br />', $root->outerHtml);
4444
}
4545

46+
public function testLoadWithFile()
47+
{
48+
$dom = new Dom;
49+
$dom->loadFromFile('tests/small.html');
50+
$this->assertEquals('VonBurgermeister', $dom->find('.post-user font', 0)->text);
51+
}
52+
4653
public function testLoadFromFile()
4754
{
4855
$dom = new Dom;

0 commit comments

Comments
 (0)