Skip to content

Commit ffb7a9f

Browse files
committed
Solved issue paquettg#2
When you attempt to load an html page from a URL using loadFromUrl the encoding is incorrect.
1 parent 5e016fb commit ffb7a9f

File tree

3 files changed

+31
-18
lines changed

3 files changed

+31
-18
lines changed

src/PHPHtmlParser/Dom.php

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ class Dom {
5757
*/
5858
protected $size;
5959

60+
public function __construct()
61+
{
62+
self::$expectedCharset = 'UTF-8';
63+
self::$charset = 'UTF-8';
64+
}
65+
6066
/**
6167
* Returns the inner html of the root node.
6268
*

src/PHPHtmlParser/Dom/Tag.php

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -98,15 +98,6 @@ public function noise($noise)
9898
*/
9999
public function setAttribute($key, $value)
100100
{
101-
if (is_string($value['value']))
102-
{
103-
// convert charset
104-
$encode = new Encode;
105-
$encode->from(Dom::$expectedCharset);
106-
$encode->to(Dom::$charset);
107-
$value['value'] = $encode->convert($value['value']);
108-
}
109-
110101
$this->attr[$key] = $value;
111102
return $this;
112103
}
@@ -134,7 +125,12 @@ public function setAttributes(array $attr)
134125
*/
135126
public function getAttributes()
136127
{
137-
return $this->attr;
128+
$return = [];
129+
foreach ($this->attr as $attr => $info)
130+
{
131+
$return[$attr] = $this->getAttribute($attr);
132+
}
133+
return $return;
138134
}
139135

140136
/**
@@ -149,6 +145,16 @@ public function getAttribute($key)
149145
{
150146
return null;
151147
}
148+
$value = $this->attr[$key]['value'];
149+
if (is_string($value))
150+
{
151+
// convert charset
152+
$encode = new Encode;
153+
$encode->from(Dom::$expectedCharset);
154+
$encode->to(Dom::$charset);
155+
$this->attr[$key]['value'] = $encode->convert($value);
156+
}
157+
152158
return $this->attr[$key];
153159
}
154160

@@ -164,7 +170,8 @@ public function makeOpeningTag()
164170
// add the attributes
165171
foreach ($this->attr as $key => $info)
166172
{
167-
$val = $info['value'];
173+
$info = $this->getAttribute($key);
174+
$val = $info['value'];
168175
if (is_null($val))
169176
{
170177
$return .= ' '.$key;

src/PHPHtmlParser/Dom/TextNode.php

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,6 @@ public function __construct($text)
3030
// remove double spaces
3131
$text = preg_replace('/\s+/', ' ', $text);
3232

33-
// convert charset
34-
$encode = new Encode;
35-
$encode->from(Dom::$expectedCharset);
36-
$encode->to(Dom::$charset);
37-
$text = $encode->convert($text);
38-
3933
$this->text = $text;
4034
$this->tag = new Tag('text');
4135
parent::__construct();
@@ -48,7 +42,13 @@ public function __construct($text)
4842
*/
4943
public function text()
5044
{
51-
return $this->text;
45+
// convert charset
46+
$encode = new Encode;
47+
$encode->from(Dom::$expectedCharset);
48+
$encode->to(Dom::$charset);
49+
$text = $encode->convert($this->text);
50+
51+
return $text;
5252
}
5353

5454
}

0 commit comments

Comments
 (0)