Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix: ensure empty strings fallback to next metadata source in detectors
Previously, detectors would return empty strings from primary sources (like
oembed) without falling back to alternative sources (like metas), losing
valuable metadata. Now empty and whitespace-only strings are treated as
missing data, triggering the fallback chain.

Problem:
When oembed or other primary sources returned empty strings instead of null,
detectors would return those empty values immediately, preventing fallback
to metas, linked data, or document sources that might contain valid data.

Solution:
Add empty string validation using trim() to ensure fallback chain executes
properly: if (is_string($result) && trim($result) !== '')

Impact:
- AuthorName: Empty oembed author_name now falls back to metas
- Title: Empty oembed/metas titles now fall back to document <title>
- Description: Empty oembed/metas descriptions fall back to linked data
- ProviderName: Empty oembed/metas names fall back to hostname
- Language: Empty html lang attributes fall back to meta tags

This improves metadata extraction quality by utilizing all available sources
instead of stopping at the first non-null but empty response.
  • Loading branch information
uzulla committed Oct 6, 2025
commit 77bf186e19b22aa26c369fafc744632da999dc40
2 changes: 1 addition & 1 deletion src/Detectors/AuthorName.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public function detect(): ?string
$metas = $this->extractor->getMetas();

$result = $oembed->str('author_name');
if ($result !== null) {
if (is_string($result) && trim($result) !== '') {
return $result;
}

Expand Down
4 changes: 2 additions & 2 deletions src/Detectors/Description.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public function detect(): ?string
$ld = $this->extractor->getLinkedData();

$result = $oembed->str('description');
if ($result !== null) {
if (is_string($result) && trim($result) !== '') {
return $result;
}

Expand All @@ -27,7 +27,7 @@ public function detect(): ?string
'excerpt',
'article.summary'
);
if ($result !== null) {
if (is_string($result) && trim($result) !== '') {
return $result;
}

Expand Down
8 changes: 4 additions & 4 deletions src/Detectors/Language.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,22 @@ public function detect(): ?string
$ld = $this->extractor->getLinkedData();

$result = $document->select('/html')->str('lang');
if ($result !== null) {
if (is_string($result) && trim($result) !== '') {
return $result;
}

$result = $document->select('/html')->str('xml:lang');
if ($result !== null) {
if (is_string($result) && trim($result) !== '') {
return $result;
}

$result = $metas->str('language', 'lang', 'og:locale', 'dc:language');
if ($result !== null) {
if (is_string($result) && trim($result) !== '') {
return $result;
}

$result = $document->select('.//meta', ['http-equiv' => 'content-language'])->str('content');
if ($result !== null) {
if (is_string($result) && trim($result) !== '') {
return $result;
}

Expand Down
4 changes: 2 additions & 2 deletions src/Detectors/ProviderName.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public function detect(): string
$metas = $this->extractor->getMetas();

$result = $oembed->str('provider_name');
if ($result !== null) {
if (is_string($result) && trim($result) !== '') {
return $result;
}

Expand All @@ -24,7 +24,7 @@ public function detect(): string
'publisher',
'article:publisher'
);
if ($result !== null) {
if (is_string($result) && trim($result) !== '') {
return $result;
}

Expand Down
4 changes: 2 additions & 2 deletions src/Detectors/Title.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public function detect(): ?string
$metas = $this->extractor->getMetas();

$result = $oembed->str('title');
if ($result !== null) {
if (is_string($result) && trim($result) !== '') {
return $result;
}

Expand All @@ -26,7 +26,7 @@ public function detect(): ?string
'article.headline',
'parsely-title'
);
if ($result !== null) {
if (is_string($result) && trim($result) !== '') {
return $result;
}

Expand Down