Skip to content

Commit 91c41e7

Browse files
committed
Added child selector
fixes paquettg#24
1 parent 022356b commit 91c41e7

File tree

3 files changed

+122
-17
lines changed

3 files changed

+122
-17
lines changed

src/PHPHtmlParser/Dom.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ protected function clean($str)
372372
return $str;
373373
}
374374

375-
/**<?php
375+
/**
376376
* Attempts to parse the html in content.
377377
*/
378378
protected function parse()

src/PHPHtmlParser/Selector.php

Lines changed: 75 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ class Selector {
1111
*
1212
* @var string
1313
*/
14-
protected $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
14+
protected $pattern = "/([\w-:\*>]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
1515

1616
protected $selectors = [];
1717

@@ -51,9 +51,17 @@ public function find($node)
5151
if (count($selector) == 0)
5252
continue;
5353

54+
$options = [];
5455
foreach ($selector as $rule)
5556
{
56-
$nodes = $this->seek($nodes, $rule);
57+
if ($rule['alterNext'])
58+
{
59+
$options[] = $this->alterNext($rule);
60+
continue;
61+
}
62+
$nodes = $this->seek($nodes, $rule, $options);
63+
// clear the options
64+
$options = [];
5765
}
5866

5967
// this is the final set of nodes
@@ -81,11 +89,18 @@ protected function parseSelectorString($selector)
8189
foreach ($matches as $match)
8290
{
8391
// default values
84-
$tag = strtolower(trim($match[1]));
85-
$operator = '=';
86-
$key = null;
87-
$value = null;
88-
$noKey = false;
92+
$tag = strtolower(trim($match[1]));
93+
$operator = '=';
94+
$key = null;
95+
$value = null;
96+
$noKey = false;
97+
$alterNext = false;
98+
99+
// check for elements that alter the behavior of the next element
100+
if ($tag == '>')
101+
{
102+
$alterNext = true;
103+
}
89104

90105
// check for id selector
91106
if ( ! empty($match[2]))
@@ -123,11 +138,12 @@ protected function parseSelectorString($selector)
123138
}
124139

125140
$result[] = [
126-
'tag' => $tag,
127-
'key' => $key,
128-
'value' => $value,
129-
'operator' => $operator,
130-
'noKey' => $noKey,
141+
'tag' => $tag,
142+
'key' => $key,
143+
'value' => $value,
144+
'operator' => $operator,
145+
'noKey' => $noKey,
146+
'alterNext' => $alterNext,
131147
];
132148
if (trim($match[7]) == ',')
133149
{
@@ -149,9 +165,10 @@ protected function parseSelectorString($selector)
149165
*
150166
* @param array $nodes
151167
* @param array $rule
168+
* @param array $options
152169
* @recursive
153170
*/
154-
protected function seek(array $nodes, array $rule)
171+
protected function seek(array $nodes, array $rule, array $options)
155172
{
156173
// XPath index
157174
if ( ! empty($rule['tag']) AND ! empty($rule['key']) AND
@@ -173,6 +190,8 @@ protected function seek(array $nodes, array $rule)
173190
return [];
174191
}
175192

193+
$options = $this->flattenOptions($options);
194+
176195
$return = [];
177196
foreach ($nodes as $node)
178197
{
@@ -256,7 +275,7 @@ protected function seek(array $nodes, array $rule)
256275
{
257276
$check = $this->match($rule['operator'], $rule['value'], $class);
258277
}
259-
if ($check)
278+
if ($check)
260279
break;
261280
}
262281
}
@@ -294,10 +313,12 @@ protected function seek(array $nodes, array $rule)
294313
}
295314
}
296315

297-
if (count($children) > 0)
316+
if ((! isset($options['checkGrandChildren']) ||
317+
$options['checkGrandChildren'])
318+
&& count($children) > 0)
298319
{
299320
// we have children that failed but are not leaves.
300-
$matches = $this->seek($children, $rule);
321+
$matches = $this->seek($children, $rule, $options);
301322
foreach ($matches as $match)
302323
{
303324
$return[] = $match;
@@ -339,4 +360,42 @@ protected function match($operator, $pattern, $value)
339360
}
340361
return false;
341362
}
363+
364+
/**
365+
* Attempts to figure out what the alteration will be for
366+
* the next element.
367+
*
368+
* @param array $rule
369+
* @return array
370+
*/
371+
protected function alterNext($rule)
372+
{
373+
$options = [];
374+
if ($rule['tag'] == '>')
375+
{
376+
$options['checkGrandChildren'] = false;
377+
}
378+
379+
return $options;
380+
}
381+
382+
/**
383+
* Flattens the option array.
384+
*
385+
* @param array $optionsArray
386+
* @return array
387+
*/
388+
protected function flattenOptions(array $optionsArray)
389+
{
390+
$options = [];
391+
foreach ($optionsArray as $optionArray)
392+
{
393+
foreach ($optionArray as $key => $option)
394+
{
395+
$options[$key] = $option;
396+
}
397+
}
398+
399+
return $options;
400+
}
342401
}

tests/SelectorTest.php

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,4 +157,50 @@ public function testFindXpathKeySelector()
157157
$selector = new Selector('div[1]');
158158
$this->assertEquals($parent->id(), $selector->find($parent)[0]->id());
159159
}
160+
161+
public function testFindChildMultipleLevelsDeep()
162+
{
163+
$root = new HtmlNode(new Tag('root'));
164+
$parent = new HtmlNode(new Tag('div'));
165+
$child1 = new HtmlNode(new Tag('ul'));
166+
$child2 = new HtmlNode(new Tag('li'));
167+
$root->addChild($parent);
168+
$parent->addChild($child1);
169+
$child1->addChild($child2);
170+
171+
$selector = new Selector('div li');
172+
$this->assertEquals(1, count($selector->find($root)));
173+
}
174+
175+
public function testFindAllChildren()
176+
{
177+
$root = new HtmlNode(new Tag('root'));
178+
$parent = new HtmlNode(new Tag('div'));
179+
$child1 = new HtmlNode(new Tag('ul'));
180+
$child2 = new HtmlNode(new Tag('span'));
181+
$child3 = new HtmlNode(new Tag('ul'));
182+
$root->addChild($parent);
183+
$parent->addChild($child1);
184+
$child2->addChild($child3);
185+
$parent->addChild($child2);
186+
187+
$selector = new Selector('div ul');
188+
$this->assertEquals(2, count($selector->find($root)));
189+
}
190+
191+
public function testFindChildUsingChildSelector()
192+
{
193+
$root = new HtmlNode(new Tag('root'));
194+
$parent = new HtmlNode(new Tag('div'));
195+
$child1 = new HtmlNode(new Tag('ul'));
196+
$child2 = new HtmlNode(new Tag('span'));
197+
$child3 = new HtmlNode(new Tag('ul'));
198+
$root->addChild($parent);
199+
$parent->addChild($child1);
200+
$child2->addChild($child3);
201+
$parent->addChild($child2);
202+
203+
$selector = new Selector('div > ul');
204+
$this->assertEquals(1, count($selector->find($root)));
205+
}
160206
}

0 commit comments

Comments
 (0)