Skip to content

Commit bad5512

Browse files
committed
Fixes paquettg#116 - Added support for multiple selectors.
1 parent 12f382f commit bad5512

File tree

5 files changed

+140
-54
lines changed

5 files changed

+140
-54
lines changed

.travis.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ php:
44
- 7.1
55
- 7.2
66
- 7.3
7-
- 7.4
87

98
install:
109
- composer self-update

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Changed
11+
12+
- Fixed bug with multiple selectors query.
13+
14+
## 2.1.0
15+
1016
### Added
1117
- Added support for php 7.4
1218

src/PHPHtmlParser/Selector/Parser.php

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
<?php declare(strict_types=1);
1+
<?php
2+
3+
declare(strict_types=1);
4+
25
namespace PHPHtmlParser\Selector;
36

47
/**
@@ -67,6 +70,23 @@ public function parseSelectorString(string $selector): array
6770
}
6871
if ( ! empty($match[6])) {
6972
$value = $match[6];
73+
if (strpos($value, '][') !== false) {
74+
// we have multiple type selectors
75+
$keys = [];
76+
$keys[] = $key;
77+
$key = $keys;
78+
$parts = explode('][', $value);
79+
$value = [];
80+
foreach ($parts as $part) {
81+
if (strpos($part, '=') !== false) {
82+
list($first, $second) = explode('=', $part);
83+
$key[] = $first;
84+
$value[] = $second;
85+
} else {
86+
$value[] = $part;
87+
}
88+
}
89+
}
7090
}
7191

7292
// check for elements that do not have a specified attribute

src/PHPHtmlParser/Selector/Selector.php

Lines changed: 101 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
<?php declare(strict_types=1);
1+
<?php
2+
3+
declare(strict_types=1);
4+
25
namespace PHPHtmlParser\Selector;
36

47
use PHPHtmlParser\Dom\AbstractNode;
@@ -37,7 +40,6 @@ public function __construct(string $selector, ParserInterface $parser)
3740

3841
/**
3942
* Returns the selectors that where found in __construct
40-
*
4143
* @return array
4244
*/
4345
public function getSelectors()
@@ -94,26 +96,24 @@ public function find(AbstractNode $node): Collection
9496
/**
9597
* Attempts to find all children that match the rule
9698
* given.
97-
*
9899
* @param array $nodes
99100
* @param array $rule
100101
* @param array $options
101-
*
102102
* @return array
103103
* @throws ChildNotFoundException
104104
*/
105105
protected function seek(array $nodes, array $rule, array $options): array
106106
{
107107
// XPath index
108-
if (array_key_exists('tag', $rule) &&
109-
array_key_exists('key', $rule) &&
110-
is_numeric($rule['key'])
108+
if (array_key_exists('tag', $rule) && array_key_exists('key', $rule)
109+
&& is_numeric($rule['key'])
111110
) {
112111
$count = 0;
113112
/** @var AbstractNode $node */
114113
foreach ($nodes as $node) {
115-
if ($rule['tag'] == '*' ||
116-
$rule['tag'] == $node->getTag()->name()
114+
if ($rule['tag'] == '*'
115+
|| $rule['tag'] == $node->getTag()
116+
->name()
117117
) {
118118
++$count;
119119
if ($count == $rule['key']) {
@@ -132,15 +132,14 @@ protected function seek(array $nodes, array $rule, array $options): array
132132
/** @var InnerNode $node */
133133
foreach ($nodes as $node) {
134134
// check if we are a leaf
135-
if ($node instanceof LeafNode ||
136-
! $node->hasChildren()
135+
if ($node instanceof LeafNode || !$node->hasChildren()
137136
) {
138137
continue;
139138
}
140139

141140
$children = [];
142-
$child = $node->firstChild();
143-
while ( ! is_null($child)) {
141+
$child = $node->firstChild();
142+
while (!is_null($child)) {
144143
// wild card, grab all
145144
if ($rule['tag'] == '*' && is_null($rule['key'])) {
146145
$return[] = $child;
@@ -149,11 +148,11 @@ protected function seek(array $nodes, array $rule, array $options): array
149148
}
150149

151150
$pass = $this->checkTag($rule, $child);
152-
if ($pass && ! is_null($rule['key'])) {
151+
if ($pass && !is_null($rule['key'])) {
153152
$pass = $this->checkKey($rule, $child);
154153
}
155-
if ($pass && ! is_null($rule['key']) &&
156-
! is_null($rule['value']) && $rule['value'] != '*'
154+
if ($pass && !is_null($rule['key']) && !is_null($rule['value'])
155+
&& $rule['value'] != '*'
157156
) {
158157
$pass = $this->checkComparison($rule, $child);
159158
}
@@ -163,14 +162,15 @@ protected function seek(array $nodes, array $rule, array $options): array
163162
$return[] = $child;
164163
} else {
165164
// this child failed to be matched
166-
if ($child instanceof InnerNode &&
167-
$child->hasChildren()
165+
if ($child instanceof InnerNode && $child->hasChildren()
168166
) {
169167
if ($this->depthFirst) {
170-
if ( ! isset($options['checkGrandChildren']) ||
171-
$options['checkGrandChildren']) {
168+
if (!isset($options['checkGrandChildren'])
169+
|| $options['checkGrandChildren']
170+
) {
172171
// we have a child that failed but are not leaves.
173-
$matches = $this->seek([$child], $rule, $options);
172+
$matches = $this->seek([$child], $rule,
173+
$options);
174174
foreach ($matches as $match) {
175175
$return[] = $match;
176176
}
@@ -185,9 +185,9 @@ protected function seek(array $nodes, array $rule, array $options): array
185185
$child = $this->getNextChild($node, $child);
186186
}
187187

188-
if (( ! isset($options['checkGrandChildren']) ||
189-
$options['checkGrandChildren'])
190-
&& count($children) > 0
188+
if ((!isset($options['checkGrandChildren'])
189+
|| $options['checkGrandChildren'])
190+
&& count($children) > 0
191191
) {
192192
// we have children that failed but are not leaves.
193193
$matches = $this->seek($children, $rule, $options);
@@ -202,31 +202,35 @@ protected function seek(array $nodes, array $rule, array $options): array
202202

203203
/**
204204
* Attempts to match the given arguments with the given operator.
205-
*
206205
* @param string $operator
207206
* @param string $pattern
208207
* @param string $value
209208
* @return bool
210209
*/
211-
protected function match(string $operator, string $pattern, string $value): bool
212-
{
213-
$value = strtolower($value);
210+
protected function match(
211+
string $operator,
212+
string $pattern,
213+
string $value
214+
): bool {
215+
$value = strtolower($value);
214216
$pattern = strtolower($pattern);
215217
switch ($operator) {
216218
case '=':
217219
return $value === $pattern;
218220
case '!=':
219221
return $value !== $pattern;
220222
case '^=':
221-
return preg_match('/^'.preg_quote($pattern, '/').'/', $value) == 1;
223+
return preg_match('/^' . preg_quote($pattern, '/') . '/',
224+
$value) == 1;
222225
case '$=':
223-
return preg_match('/'.preg_quote($pattern, '/').'$/', $value) == 1;
226+
return preg_match('/' . preg_quote($pattern, '/') . '$/',
227+
$value) == 1;
224228
case '*=':
225229
if ($pattern[0] == '/') {
226230
return preg_match($pattern, $value) == 1;
227231
}
228232

229-
return preg_match("/".$pattern."/i", $value) == 1;
233+
return preg_match("/" . $pattern . "/i", $value) == 1;
230234
}
231235

232236
return false;
@@ -235,7 +239,6 @@ protected function match(string $operator, string $pattern, string $value): bool
235239
/**
236240
* Attempts to figure out what the alteration will be for
237241
* the next element.
238-
*
239242
* @param array $rule
240243
* @return array
241244
*/
@@ -251,7 +254,6 @@ protected function alterNext(array $rule): array
251254

252255
/**
253256
* Flattens the option array.
254-
*
255257
* @param array $optionsArray
256258
* @return array
257259
*/
@@ -269,13 +271,14 @@ protected function flattenOptions(array $optionsArray)
269271

270272
/**
271273
* Returns the next child or null if no more children.
272-
*
273274
* @param AbstractNode $node
274275
* @param AbstractNode $currentChild
275276
* @return AbstractNode|null
276277
*/
277-
protected function getNextChild(AbstractNode $node, AbstractNode $currentChild)
278-
{
278+
protected function getNextChild(
279+
AbstractNode $node,
280+
AbstractNode $currentChild
281+
) {
279282
try {
280283
$child = null;
281284
if ($node instanceof InnerNode) {
@@ -292,15 +295,14 @@ protected function getNextChild(AbstractNode $node, AbstractNode $currentChild)
292295

293296
/**
294297
* Checks tag condition from rules against node.
295-
*
296-
* @param array $rule
298+
* @param array $rule
297299
* @param AbstractNode $node
298300
* @return bool
299301
*/
300302
protected function checkTag(array $rule, AbstractNode $node): bool
301303
{
302-
if ( ! empty($rule['tag']) && $rule['tag'] != $node->getTag()->name() &&
303-
$rule['tag'] != '*'
304+
if (!empty($rule['tag']) && $rule['tag'] != $node->getTag()->name()
305+
&& $rule['tag'] != '*'
304306
) {
305307
return false;
306308
}
@@ -310,20 +312,39 @@ protected function checkTag(array $rule, AbstractNode $node): bool
310312

311313
/**
312314
* Checks key condition from rules against node.
313-
*
314-
* @param array $rule
315+
* @param array $rule
315316
* @param AbstractNode $node
316317
* @return bool
317318
*/
318319
protected function checkKey(array $rule, AbstractNode $node): bool
319320
{
320-
if ($rule['noKey']) {
321-
if ( ! is_null($node->getAttribute($rule['key']))) {
322-
return false;
321+
if (!is_array($rule['key'])) {
322+
if ($rule['noKey']) {
323+
if (!is_null($node->getAttribute($rule['key']))) {
324+
return false;
325+
}
326+
} else {
327+
if ($rule['key'] != 'plaintext'
328+
&& !$node->hasAttribute($rule['key'])
329+
) {
330+
return false;
331+
}
323332
}
324333
} else {
325-
if ($rule['key'] != 'plaintext' && !$node->hasAttribute($rule['key'])) {
326-
return false;
334+
if ($rule['noKey']) {
335+
foreach ($rule['key'] as $key) {
336+
if (!is_null($node->getAttribute($key))) {
337+
return false;
338+
}
339+
}
340+
} else {
341+
foreach ($rule['key'] as $key) {
342+
if ($key != 'plaintext'
343+
&& !$node->hasAttribute($key)
344+
) {
345+
return false;
346+
}
347+
}
327348
}
328349
}
329350

@@ -332,8 +353,7 @@ protected function checkKey(array $rule, AbstractNode $node): bool
332353

333354
/**
334355
* Checks comparison condition from rules against node.
335-
*
336-
* @param array $rule
356+
* @param array $rule
337357
* @param AbstractNode $node
338358
* @return bool
339359
*/
@@ -342,18 +362,46 @@ public function checkComparison(array $rule, AbstractNode $node): bool
342362
if ($rule['key'] == 'plaintext') {
343363
// plaintext search
344364
$nodeValue = $node->text();
365+
$result = $this->checkNodeValue($nodeValue, $rule, $node);
345366
} else {
346367
// normal search
347-
$nodeValue = $node->getAttribute($rule['key']);
368+
if (!is_array($rule['key'])) {
369+
$nodeValue = $node->getAttribute($rule['key']);
370+
$result = $this->checkNodeValue($nodeValue, $rule, $node);
371+
} else {
372+
$result = true;
373+
foreach ($rule['key'] as $index => $key) {
374+
$nodeValue = $node->getAttribute($key);
375+
$result = $result &&
376+
$this->checkNodeValue($nodeValue, $rule, $node, $index);
377+
}
378+
}
348379
}
349380

381+
return $result;
382+
}
383+
384+
/**
385+
* @param string|null $nodeValue
386+
* @param array $rule
387+
* @param AbstractNode $node
388+
* @param int|null $index
389+
* @return bool
390+
*/
391+
private function checkNodeValue(
392+
?string $nodeValue,
393+
array $rule,
394+
AbstractNode $node,
395+
?int $index = null
396+
) : bool {
350397
$check = false;
351398
if (!is_array($rule['value'])) {
352399
$check = $this->match($rule['operator'], $rule['value'], $nodeValue);
353400
}
354401

355402
// handle multiple classes
356-
if ( ! $check && $rule['key'] == 'class') {
403+
$key = $rule['key'];
404+
if (!$check && $key == 'class') {
357405
$nodeClasses = explode(' ', $node->getAttribute('class') ?? '');
358406
foreach ($rule['value'] as $value) {
359407
foreach ($nodeClasses as $class) {
@@ -368,6 +416,8 @@ public function checkComparison(array $rule, AbstractNode $node): bool
368416
break;
369417
}
370418
}
419+
} elseif (!$check && is_array($key)) {
420+
$check = $this->match($rule['operator'], $rule['value'][$index], $nodeValue);
371421
}
372422

373423
return $check;

0 commit comments

Comments
 (0)