Skip to content

Commit a4f557d

Browse files
committed
Fixes paquettg#173 Added option depthFirstSearch for correct behavior
1 parent 34afb62 commit a4f557d

File tree

7 files changed

+65
-6
lines changed

7 files changed

+65
-6
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
### Added
1111
- New `removeSmartyScripts` configuration setting. Defaults to true.
1212
- Added `declare(strict_types=1)` to all source files.
13+
- Added new option `depthFirstSearch`.
14+
- Deprecated option `depthFirstSearch` and marked for removal in `3.0.0`.
1315

1416
### Changed
1517
- Started using a changelog.

README.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,10 +172,15 @@ Preserves Line Breaks if set to `true`. If set to `false` line breaks are cleane
172172

173173
Set this to `false` if you want to preserve whitespace inside of text nodes. It is set to `true` by default.
174174

175-
176175
**removeSmartyScripts**
177176

178-
Set this to `false` if you want to preserve smarty sccript found in the html content. It is set to `true` by default.
177+
Set this to `false` if you want to preserve smarty script found in the html content. It is set to `true` by default.
178+
179+
**depthFirstSearch**
180+
181+
By default this is set to `false` for legacy support. Setting this to `true` will change the behavior of find to order elements by depth first. This will properly preserve the order of elements as they where in the HTML.
182+
183+
This option is depricated and will be removed in version `3.0.0` with the new behavior being as if it was set to `true`.
179184

180185
Static Facade
181186
-------------

src/PHPHtmlParser/Dom.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ public function find(string $selector, int $nth = null)
237237
{
238238
$this->isLoaded();
239239

240-
return $this->root->find($selector, $nth);
240+
return $this->root->find($selector, $nth, $this->options->get('depthFirstSearch'));
241241
}
242242

243243
/**

src/PHPHtmlParser/Dom/AbstractNode.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,11 +429,13 @@ public function ancestorByTag(string $tag): AbstractNode
429429
*
430430
* @param string $selector
431431
* @param int $nth
432+
* @param bool $depthFirst
432433
* @return mixed
433434
*/
434-
public function find(string $selector, int $nth = null)
435+
public function find(string $selector, int $nth = null, bool $depthFirst = false)
435436
{
436437
$selector = new Selector($selector, new SelectorParser());
438+
$selector->setDepthFirstFind($depthFirst);
437439
$nodes = $selector->find($this);
438440

439441
if ( ! is_null($nth)) {

src/PHPHtmlParser/Options.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
* @property bool preserveLineBreaks
1515
* @property bool removeDoubleSpace
1616
* @property bool removeSmartyScripts
17+
* @property bool depthFirstSearch
1718
*/
1819
class Options
1920
{
@@ -33,6 +34,7 @@ class Options
3334
'preserveLineBreaks' => false,
3435
'removeDoubleSpace' => true,
3536
'removeSmartyScripts' => true,
37+
'depthFirstSearch' => false,
3638
];
3739

3840
/**

src/PHPHtmlParser/Selector/Selector.php

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ class Selector
2020
*/
2121
protected $selectors = [];
2222

23+
/**
24+
* @var bool
25+
*/
26+
private $depthFirst = false;
27+
2328
/**
2429
* Constructs with the selector string
2530
*
@@ -40,6 +45,15 @@ public function getSelectors()
4045
return $this->selectors;
4146
}
4247

48+
/**
49+
* @param bool $status
50+
* @return void
51+
*/
52+
public function setDepthFirstFind(bool $status): void
53+
{
54+
$this->depthFirst = $status;
55+
}
56+
4357
/**
4458
* Attempts to find the selectors starting from the given
4559
* node object.
@@ -151,8 +165,19 @@ protected function seek(array $nodes, array $rule, array $options): array
151165
if ($child instanceof InnerNode &&
152166
$child->hasChildren()
153167
) {
154-
// we still want to check its children
155-
$children[] = $child;
168+
if ($this->depthFirst) {
169+
if ( ! isset($options['checkGrandChildren']) ||
170+
$options['checkGrandChildren']) {
171+
// we have a child that failed but are not leaves.
172+
$matches = $this->seek([$child], $rule, $options);
173+
foreach ($matches as $match) {
174+
$return[] = $match;
175+
}
176+
}
177+
} else {
178+
// we still want to check its children
179+
$children[] = $child;
180+
}
156181
}
157182
}
158183

tests/DomTest.php

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,4 +429,27 @@ public function testInfiniteLoopNotHappening()
429429
$metaNodes = $dom->root->find('meta');
430430
$this->assertEquals(4, count($metaNodes));
431431
}
432+
433+
public function testFindOrder()
434+
{
435+
$str = '<p><img src="http://example.com/first.jpg"></p><img src="http://example.com/second.jpg">';
436+
$dom = new Dom();
437+
$dom->load($str);
438+
$images = $dom->find('img');
439+
440+
$this->assertEquals('<img src="http://example.com/second.jpg" />', (string)$images[0]);
441+
}
442+
443+
public function testFindDepthFirstSearch()
444+
{
445+
$str = '<p><img src="http://example.com/first.jpg"></p><img src="http://example.com/second.jpg">';
446+
$dom = new Dom();
447+
$dom->setOptions([
448+
'depthFirstSearch' => true,
449+
]);
450+
$dom->load($str);
451+
$images = $dom->find('img');
452+
453+
$this->assertEquals('<img src="http://example.com/first.jpg" />', (string)$images[0]);
454+
}
432455
}

0 commit comments

Comments
 (0)