11<?php
2-
32namespace PHPHtmlParser ;
43
54use PHPHtmlParser \Dom \HtmlNode ;
@@ -50,6 +49,13 @@ class Dom {
5049 */
5150 protected $ size ;
5251
52+ /**
53+ * A global options array to be used by all load calls.
54+ *
55+ * @var array
56+ */
57+ protected $ globalOptions = [];
58+
5359 /**
5460 * A list of tags which will always be self closing
5561 *
@@ -92,44 +98,47 @@ public function __get($name)
9298 * Attempts to load the dom from any resource, string, file, or URL.
9399 *
94100 * @param string $str
101+ * @param array $option
95102 * @chainable
96103 */
97- public function load ($ str )
104+ public function load ($ str, $ options = [] )
98105 {
99106 // check if it's a file
100107 if (is_file ($ str ))
101108 {
102- return $ this ->loadFromFile ($ str );
109+ return $ this ->loadFromFile ($ str, $ options );
103110 }
104111 // check if it's a url
105112 if (preg_match ("/^https?:\/\//i " ,$ str ))
106113 {
107- return $ this ->loadFromUrl ($ str );
114+ return $ this ->loadFromUrl ($ str, $ options );
108115 }
109116
110- return $ this ->loadStr ($ str );
117+ return $ this ->loadStr ($ str, $ options );
111118 }
112119
113120 /**
114121 * Loads the dom from a document file/url
115122 *
116123 * @param string $file
124+ * @param array $option
117125 * @chainable
118126 */
119- public function loadFromFile ($ file )
127+ public function loadFromFile ($ file, $ options = [] )
120128 {
121- return $ this ->loadStr (file_get_contents ($ file ));
129+ return $ this ->loadStr (file_get_contents ($ file ), $ options );
122130 }
123131
124132 /**
125133 * Use a curl interface implementation to attempt to load
126134 * the content from a url.
127135 *
128136 * @param string $url
137+ * @param array $option
129138 * @param CurlInterface $curl
130139 * @chainable
131140 */
132- public function loadFromUrl ($ url , CurlInterface $ curl = null )
141+ public function loadFromUrl ($ url , $ options = [], CurlInterface $ curl = null )
133142 {
134143 if (is_null ($ curl ))
135144 {
@@ -138,7 +147,19 @@ public function loadFromUrl($url, CurlInterface $curl = null)
138147 }
139148 $ content = $ curl ->get ($ url );
140149
141- return $ this ->loadStr ($ content );
150+ return $ this ->loadStr ($ content , $ options );
151+ }
152+
153+ /**
154+ * Sets a global options array to be used by all load calls.
155+ *
156+ * @param array $options
157+ * @chainable
158+ */
159+ public function setOptions (array $ options )
160+ {
161+ $ this ->globalOptions = $ options ;
162+ return $ this ;
142163 }
143164
144165 /**
@@ -264,11 +285,16 @@ public function getElementsByClass($class)
264285 * Parsers the html of the given string. Used for load(), loadFromFile(),
265286 * and loadFromUrl().
266287 *
267- * @param string
288+ * @param string $str
289+ * @param array $option
268290 * @chainable
269291 */
270- protected function loadStr ($ str )
292+ protected function loadStr ($ str, $ option )
271293 {
294+ $ this ->options = new Options ;
295+ $ this ->options ->setOptions ($ this ->globalOptions )
296+ ->setOptions ($ option );
297+
272298 $ this ->rawSize = strlen ($ str );
273299 $ this ->raw = $ str ;
274300
@@ -397,9 +423,10 @@ protected function parse()
397423 $ activeNode = $ node ;
398424 }
399425 }
400- else
426+ else if ($ this ->options ->whitespaceTextNode or
427+ trim ($ str ) != '' )
401428 {
402- // we found text
429+ // we found text we care about
403430 $ textNode = new TextNode ($ str );
404431 $ activeNode ->addChild ($ textNode );
405432 }
0 commit comments