Skip to content

Commit bec454a

Browse files
committed
Fixed S2 crash on max over empty sequence
1 parent 1a7d805 commit bec454a

File tree

3 files changed

+63
-1
lines changed

3 files changed

+63
-1
lines changed

src/paperqa/clients/semantic_scholar.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,9 @@ async def s2_title_search(
265265
(strings_similarity(entry["title"], title), entry)
266266
for entry in data.get("data", data)
267267
)
268-
except (KeyError, IndexError) as exc:
268+
except (KeyError, IndexError, ValueError) as exc:
269+
# ValueError: S2 may return {"data": []} causing max() on an empty iterable to
270+
# throw a ValueError
269271
raise DOINotFoundError(
270272
f"Unexpected Semantic Scholar search/match endpoint shape for {title}"
271273
f" given data {data}."

tests/cassettes/test_s2_title_search_empty_data.yaml

Lines changed: 32 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/test_clients.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,14 @@
2222
SemanticScholarProvider,
2323
)
2424
from paperqa.clients.client_models import MetadataPostProcessor, MetadataProvider
25+
from paperqa.clients.exceptions import DOINotFoundError
2526
from paperqa.clients.journal_quality import (
2627
DEFAULT_JOURNAL_QUALITY_CSV_PATH,
2728
JournalQualityPostProcessor,
2829
)
2930
from paperqa.clients.openalex import OpenAlexProvider, reformat_name
3031
from paperqa.clients.retractions import RetractionDataPostProcessor
32+
from paperqa.clients.semantic_scholar import s2_title_search
3133
from paperqa.types import SOURCE_QUALITY_MESSAGES, DocDetails
3234

3335
# Use to avoid flaky tests every time citation count changes
@@ -379,6 +381,32 @@ async def test_client_os_error() -> None:
379381
assert mock_get.call_count >= 1, "Expected the exception to have been thrown"
380382

381383

384+
@pytest.mark.asyncio
385+
@pytest.mark.parametrize(
386+
"mock_return_data",
387+
[
388+
pytest.param({"data": []}, id="empty-data-list"),
389+
],
390+
)
391+
async def test_s2_title_search_edge_cases(mock_return_data: dict) -> None:
392+
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
393+
with patch(
394+
"paperqa.clients.semantic_scholar._s2_get_with_retrying",
395+
return_value=mock_return_data,
396+
):
397+
with pytest.raises(DOINotFoundError):
398+
await s2_title_search("some title", client=http_client)
399+
400+
401+
@pytest.mark.vcr
402+
@pytest.mark.asyncio
403+
async def test_s2_title_search_empty_data() -> None:
404+
"""Confirm an S2 match response with empty data raises DOINotFoundError."""
405+
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
406+
with pytest.raises(DOINotFoundError):
407+
await s2_title_search("empty results edge case query", client=http_client)
408+
409+
382410
@pytest.mark.vcr
383411
@pytest.mark.asyncio
384412
async def test_bad_dois() -> None:

0 commit comments

Comments
 (0)