Skip to content

Commit 2c6677d

Browse files
authored
Merge pull request #162 from Zegnat/rel-parsing
Improve rel parsing
2 parents 743d979 + 64e7966 commit 2c6677d

File tree

2 files changed

+74
-14
lines changed

2 files changed

+74
-14
lines changed

Mf2/Parser.php

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,16 +1250,15 @@ public function parseRelsAndAlternates() {
12501250

12511251
// Iterate through all a, area and link elements with rel attributes
12521252
foreach ($this->xpath->query('//a[@rel and @href] | //link[@rel and @href] | //area[@rel and @href]') as $hyperlink) {
1253-
if ($hyperlink->getAttribute('rel') == '') {
1253+
// Parse the set of rels for the current link
1254+
$linkRels = array_unique(array_filter(preg_split('/[\t\n\f\r ]/', $hyperlink->getAttribute('rel'))));
1255+
if (count($linkRels) === 0) {
12541256
continue;
12551257
}
12561258

12571259
// Resolve the href
12581260
$href = $this->resolveUrl($hyperlink->getAttribute('href'));
12591261

1260-
// Split up the rel into space-separated values
1261-
$linkRels = array_filter(explode(' ', $hyperlink->getAttribute('rel')));
1262-
12631262
$rel_attributes = array();
12641263

12651264
if ($hyperlink->hasAttribute('media')) {
@@ -1278,8 +1277,8 @@ public function parseRelsAndAlternates() {
12781277
$rel_attributes['type'] = $hyperlink->getAttribute('type');
12791278
}
12801279

1281-
if ($hyperlink->nodeValue) {
1282-
$rel_attributes['text'] = $hyperlink->nodeValue;
1280+
if (strlen($hyperlink->textContent) > 0) {
1281+
$rel_attributes['text'] = $hyperlink->textContent;
12831282
}
12841283

12851284
if ($this->enableAlternates) {
@@ -1296,16 +1295,34 @@ public function parseRelsAndAlternates() {
12961295
}
12971296

12981297
foreach ($linkRels as $rel) {
1299-
$rels[$rel][] = $href;
1298+
if (!array_key_exists($rel, $rels)) {
1299+
$rels[$rel] = array($href);
1300+
} elseif (!in_array($href, $rels[$rel])) {
1301+
$rels[$rel][] = $href;
1302+
}
13001303
}
13011304

1302-
if (!in_array($href, $rel_urls)) {
1303-
$rel_urls[$href] = array_merge(
1304-
$rel_attributes,
1305-
array('rels' => $linkRels)
1306-
);
1305+
if (!array_key_exists($href, $rel_urls)) {
1306+
$rel_urls[$href] = array('rels' => array());
13071307
}
13081308

1309+
// Add the attributes collected only if they were not already set
1310+
$rel_urls[$href] = array_merge(
1311+
$rel_attributes,
1312+
$rel_urls[$href]
1313+
);
1314+
1315+
// Merge current rels with those already set
1316+
$rel_urls[$href]['rels'] = array_merge(
1317+
$rel_urls[$href]['rels'],
1318+
$linkRels
1319+
);
1320+
}
1321+
1322+
// Alphabetically sort the rels arrays after removing duplicates
1323+
foreach ($rel_urls as $href => $object) {
1324+
$rel_urls[$href]['rels'] = array_unique($rel_urls[$href]['rels']);
1325+
sort($rel_urls[$href]['rels']);
13091326
}
13101327

13111328
if (empty($rels) and $this->jsonMode) {
@@ -1314,8 +1331,8 @@ public function parseRelsAndAlternates() {
13141331

13151332
if (empty($rel_urls) and $this->jsonMode) {
13161333
$rel_urls = new stdClass();
1317-
}
1318-
1334+
}
1335+
13191336
return array($rels, $rel_urls, $alternates);
13201337
}
13211338

tests/Mf2/RelTest.php

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,4 +176,47 @@ public function testRelURLs() {
176176
$this->assertArrayHasKey('rels', $output['rel-urls']['http://example.com/articles.atom']);
177177
}
178178

179+
/**
180+
* @see https://github.com/microformats/microformats2-parsing/issues/29
181+
* @see https://github.com/microformats/microformats2-parsing/issues/30
182+
*/
183+
public function testRelURLsRelsUniqueAndSorted() {
184+
$input = '<a href="#" rel="me bookmark"></a>
185+
<a href="#" rel="bookmark archived"></a>';
186+
$parser = new Parser($input);
187+
$output = $parser->parse();
188+
$this->assertEquals($output['rel-urls']['#']['rels'], array('archived', 'bookmark', 'me'));
189+
}
190+
191+
public function testRelURLsInfoMergesCorrectly() {
192+
$input = '<a href="#" rel="a">This nodeValue</a>
193+
<a href="#" rel="a" hreflang="en">Not this nodeValue</a>';
194+
$parser = new Parser($input);
195+
$output = $parser->parse();
196+
$this->assertEquals($output['rel-urls']['#']['hreflang'], 'en');
197+
$this->assertArrayNotHasKey('media', $output['rel-urls']['#']);
198+
$this->assertArrayNotHasKey('title', $output['rel-urls']['#']);
199+
$this->assertArrayNotHasKey('type', $output['rel-urls']['#']);
200+
$this->assertEquals($output['rel-urls']['#']['text'], 'This nodeValue');
201+
}
202+
203+
public function testRelURLsNoDuplicates() {
204+
$input = '<a href="#a" rel="a"></a>
205+
<a href="#b" rel="a"></a>
206+
<a href="#a" rel="a"></a>';
207+
$parser = new Parser($input);
208+
$output = $parser->parse();
209+
$this->assertEquals($output['rels']['a'], array('#a', '#b'));
210+
}
211+
212+
public function testRelURLsFalsyTextVSEmpty() {
213+
$input = '<a href="#a" rel="a">0</a>
214+
<a href="#b" rel="b"></a>';
215+
$parser = new Parser($input);
216+
$output = $parser->parse();
217+
$this->assertArrayHasKey('text', $output['rel-urls']['#a']);
218+
$this->assertEquals($output['rel-urls']['#a']['text'], '0');
219+
$this->assertArrayNotHasKey('text', $output['rel-urls']['#b']);
220+
}
221+
179222
}

0 commit comments

Comments
 (0)