Skip to content

Commit 5e016fb

Browse files
committed
Added easyer loading of files and urls. Still have a problem with encoding while loading from url.
1 parent 44b6857 commit 5e016fb

File tree

1 file changed

+41
-24
lines changed

1 file changed

+41
-24
lines changed

src/PHPHtmlParser/Dom.php

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -79,30 +79,25 @@ public function __get($name)
7979
}
8080

8181
/**
82-
* Attempts to load the dom from a string.
82+
* Attempts to load the dom from any resource, string, file, or URL.
8383
*
8484
* @param string $str
8585
* @chainable
8686
*/
8787
public function load($str)
8888
{
89-
$this->rawSize = strlen($str);
90-
$this->raw = $str;
91-
92-
// clean out none-html text
93-
if ( ! $this instanceof Dom)
89+
// check if it's a file
90+
if (is_file($str))
9491
{
95-
throw new \Exception(get_class($this));
92+
return $this->loadFromFile($str);
93+
}
94+
// check if it's a url
95+
if (preg_match("/^https?:\/\//i",$str))
96+
{
97+
return $this->loadFromUrl($str);
9698
}
97-
$html = $this->clean($str);
98-
99-
$this->size = strlen($str);
100-
$this->content = new Content($html);
101-
102-
$this->parse();
103-
$this->detectCharset();
10499

105-
return $this;
100+
return $this->loadStr($str);
106101
}
107102

108103
/**
@@ -113,11 +108,7 @@ public function load($str)
113108
*/
114109
public function loadFromFile($file)
115110
{
116-
$fp = fopen($file, 'r');
117-
$document = fread($fp, filesize($file));
118-
fclose($fp);
119-
120-
return $this->load($document);
111+
return $this->loadStr(file_get_contents($file));
121112
}
122113

123114
/**
@@ -128,11 +119,9 @@ public function loadFromFile($file)
128119
*/
129120
public function loadFromUrl($url)
130121
{
131-
$client = new Client($url);
132-
$response = $client->get()->send();
133-
$content = (string) $response;
122+
$content = file_get_contents($url);
134123

135-
return $this->load($content);
124+
return $this->loadStr($content);
136125
}
137126

138127
/**
@@ -206,6 +195,34 @@ public function getElementsByClass($class)
206195
return $this->find('.'.$class);
207196
}
208197

198+
/**
199+
* Parsers the html of the given string. Used for load(), loadFromFile(),
200+
* and loadFromUrl().
201+
*
202+
* @param string
203+
* @chainable
204+
*/
205+
protected function loadStr($str)
206+
{
207+
$this->rawSize = strlen($str);
208+
$this->raw = $str;
209+
210+
// clean out none-html text
211+
if ( ! $this instanceof Dom)
212+
{
213+
throw new \Exception(get_class($this));
214+
}
215+
$html = $this->clean($str);
216+
217+
$this->size = strlen($str);
218+
$this->content = new Content($html);
219+
220+
$this->parse();
221+
$this->detectCharset();
222+
223+
return $this;
224+
}
225+
209226
/**
210227
* Checks if the load methods have been called.
211228
*

0 commit comments

Comments
 (0)