Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Fixed S2 crash on max over empty sequence
  • Loading branch information
jamesbraza committed Feb 21, 2026
commit 3bbe1c3efb0c0ef093e0daffa6eacee8eb2a9add
6 changes: 5 additions & 1 deletion src/paperqa/clients/semantic_scholar.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,9 +265,13 @@ async def s2_title_search(
(strings_similarity(entry["title"], title), entry)
for entry in data.get("data", data)
)
except ValueError as exc:
# ValueError: S2 may return {"data": []} causing max() on an empty iterable to
# throw a ValueError
raise DOINotFoundError(f"No results found for title {title}.") from exc
except (KeyError, IndexError) as exc:
raise DOINotFoundError(
f"Unexpected Semantic Scholar search/match endpoint shape for {title}"
f"Unexpected Semantic Scholar search/match endpoint shape for title {title}"
f" given data {data}."
) from exc

Expand Down
32 changes: 32 additions & 0 deletions tests/cassettes/test_s2_title_search_empty_data.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions tests/test_clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@
SemanticScholarProvider,
)
from paperqa.clients.client_models import MetadataPostProcessor, MetadataProvider
from paperqa.clients.exceptions import DOINotFoundError
from paperqa.clients.journal_quality import (
DEFAULT_JOURNAL_QUALITY_CSV_PATH,
JournalQualityPostProcessor,
)
from paperqa.clients.openalex import OpenAlexProvider, reformat_name
from paperqa.clients.retractions import RetractionDataPostProcessor
from paperqa.clients.semantic_scholar import s2_title_search
from paperqa.types import SOURCE_QUALITY_MESSAGES, DocDetails

# Use to avoid flaky tests every time citation count changes
Expand Down Expand Up @@ -379,6 +381,35 @@ async def test_client_os_error() -> None:
assert mock_get.call_count >= 1, "Expected the exception to have been thrown"


@pytest.mark.asyncio
@pytest.mark.parametrize(
("return_value", "match"),
[
pytest.param({"data": []}, "No results", id="empty-data"),
pytest.param({"data": [{}]}, "Unexpected", id="missing-title-key"),
],
)
async def test_s2_title_search_edge_cases(
return_value: dict[str, Any], match: str
) -> None:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
with patch(
"paperqa.clients.semantic_scholar._s2_get_with_retrying",
return_value=return_value,
):
with pytest.raises(DOINotFoundError, match=match):
await s2_title_search("some title", client=http_client)


@pytest.mark.vcr
@pytest.mark.asyncio
async def test_s2_title_search_empty_data() -> None:
"""Confirm an S2 match response with empty data raises DOINotFoundError."""
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
with pytest.raises(DOINotFoundError, match="No results"):
await s2_title_search("empty results edge case query", client=http_client)


@pytest.mark.vcr
@pytest.mark.asyncio
async def test_bad_dois() -> None:
Expand Down