Skip to content

Commit 6900951

Browse files
committed
Added support for non-escaped quotes in attribute value
fixes paquettg#37
1 parent 5dc813c commit 6900951

File tree

3 files changed

+58
-18
lines changed

3 files changed

+58
-18
lines changed

src/PHPHtmlParser/Content.php

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,23 +34,13 @@ class Content {
3434
protected $slash = " />\r\n\t";
3535
protected $attr = ' >';
3636

37-
/**
38-
* Sets up the content block with its content.
39-
*
40-
* @param $content
41-
*/
4237
public function __construct($content)
4338
{
4439
$this->content = $content;
4540
$this->size = strlen($content);
4641
$this->pos = 0;
4742
}
4843

49-
/**
50-
* Returns the current position of the parser.
51-
*
52-
* @return int
53-
*/
5444
public function getPosition()
5545
{
5646
return $this->pos;
@@ -163,19 +153,43 @@ public function copyUntil($string, $char = false, $escape = false)
163153
$this->pos = $this->size;
164154
return $return;
165155
}
166-
156+
167157
if ($position == $this->pos)
168158
{
169159
// we are at the right place
170160
return '';
171161
}
172-
162+
173163
$return = substr($this->content, $this->pos, $position - $this->pos);
174164
// set the new position
175165
$this->pos = $position;
176166
return $return;
177167
}
178168

169+
/**
170+
* Copies the content until the string is found and return it
171+
* unless the 'unless' is found in the substring.
172+
*
173+
* @param string $string
174+
* @param string $unless
175+
* @return string
176+
*/
177+
public function copyUntilUnless($string, $unless)
178+
{
179+
$lastPos = $this->pos;
180+
$this->fastForward(1);
181+
$foundString = $this->copyUntil($string, true, true);
182+
183+
$position = strcspn($foundString, $unless);
184+
if ($position == strlen($foundString))
185+
{
186+
return $string.$foundString;
187+
}
188+
// rewind changes and return nothing
189+
$this->pos = $lastPos;
190+
return '';
191+
}
192+
179193
/**
180194
* Copies the content until it reaches the token string.,
181195
*
@@ -202,7 +216,7 @@ public function skip($string, $copy = false)
202216
{
203217
$len = strspn($this->content, $string, $this->pos);
204218

205-
// make it chain-able if they don't want a copy
219+
// make it chainable if they don't want a copy
206220
$return = $this;
207221
if ($copy)
208222
{

src/PHPHtmlParser/Dom.php

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -498,12 +498,12 @@ protected function parseTag()
498498
$space = $this->content->skipByToken('blank', true);
499499
if (empty($space))
500500
{
501-
break;
501+
$this->content->fastForward(1);
502+
continue;
502503
}
503504

504505
$name = $this->content->copyByToken('equal', true);
505-
if ($name == '/' OR
506-
empty($name))
506+
if ($name == '/')
507507
{
508508
break;
509509
}
@@ -525,14 +525,26 @@ protected function parseTag()
525525
case '"':
526526
$attr['doubleQuote'] = true;
527527
$this->content->fastForward(1);
528-
$attr['value'] = $this->content->copyUntil('"', false, true);
528+
$string = $this->content->copyUntil('"', true, true);
529+
do
530+
{
531+
$moreString = $this->content->copyUntilUnless('"', '=>');
532+
$string .= $moreString;
533+
} while( ! empty($moreString));
534+
$attr['value'] = $string;
529535
$this->content->fastForward(1);
530536
$node->getTag()->$name = $attr;
531537
break;
532538
case "'":
533539
$attr['doubleQuote'] = false;
534540
$this->content->fastForward(1);
535-
$attr['value'] = $this->content->copyUntil("'", false, true);
541+
$string = $this->content->copyUntil("'", true, true);
542+
do
543+
{
544+
$moreString = $this->content->copyUntilUnless("'", '=>');
545+
$string .= $moreString;
546+
} while( ! empty($moreString));
547+
$attr['value'] = $string;
536548
$this->content->fastForward(1);
537549
$node->getTag()->$name = $attr;
538550
break;

tests/DomTest.php

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,4 +271,18 @@ public function testScriptCleanerScriptTag()
271271
<p>....</p>');
272272
$this->assertEquals('....', $dom->getElementsByTag('p')[1]->innerHtml);
273273
}
274+
275+
public function testMultipleDoubleQuotes()
276+
{
277+
$dom = new Dom;
278+
$dom->load('<a title="This is a "test" of double quotes" href="http://www.example.com">Hello</a>');
279+
$this->assertEquals('This is a "test" of double quotes', $dom->getElementsByTag('a')[0]->title);
280+
}
281+
282+
public function testMultipleSingleQuotes()
283+
{
284+
$dom = new Dom;
285+
$dom->load("<a title='Ain't this the best' href=\"http://www.example.com\">Hello</a>");
286+
$this->assertEquals("Ain't this the best", $dom->getElementsByTag('a')[0]->title);
287+
}
274288
}

0 commit comments

Comments
 (0)