Skip to content
Prev Previous commit
Next Next commit
Moved many httpx.AsyncClient to httpx_aiohttp.HttpxAiohttpClient
  • Loading branch information
jamesbraza committed Sep 23, 2025
commit a2997a0a11c1e0510a3e5c971a0f6b0a95a216e8
3 changes: 2 additions & 1 deletion src/paperqa/clients/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Any, TypeAlias, cast

import httpx
import httpx_aiohttp
from lmi.utils import gather_with_concurrency
from pydantic import BaseModel, ConfigDict, Field

Expand Down Expand Up @@ -155,7 +156,7 @@ def __init__(
async def query(self, **kwargs) -> DocDetails | None:

client = (
httpx.AsyncClient(timeout=10.0)
httpx_aiohttp.HttpxAiohttpClient(timeout=10.0)
if self._http_client is None
else self._http_client
)
Comment thread
jamesbraza marked this conversation as resolved.
Expand Down
3 changes: 2 additions & 1 deletion src/paperqa/clients/crossref.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from urllib.parse import quote

import httpx
import httpx_aiohttp
from anyio import open_file
from lmi.utils import CROSSREF_KEY_HEADER
from tenacity import (
Expand Down Expand Up @@ -364,7 +365,7 @@ async def download_retracted_dataset(
"""
url = f"https://api.labs.crossref.org/data/retractionwatch?{get_crossref_mailto()}"

async with httpx.AsyncClient(timeout=300) as client:
async with httpx_aiohttp.HttpxAiohttpClient(timeout=300) as client:
response = await client.get(url)
response.raise_for_status()

Expand Down
3 changes: 2 additions & 1 deletion src/paperqa/clients/journal_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import anyio
import httpx
import httpx_aiohttp
from pydantic import ValidationError
from rich.progress import (
BarColumn,
Expand Down Expand Up @@ -128,7 +129,7 @@ async def download(client_: httpx.AsyncClient) -> None:
progress.update(task_id, advance=len(chunk))

if client is None:
async with httpx.AsyncClient() as client: # noqa: PLR1704
async with httpx_aiohttp.HttpxAiohttpClient() as client: # noqa: PLR1704
await download(client)
else:
await download(client)
Expand Down
3 changes: 2 additions & 1 deletion src/paperqa/contrib/openreview_paper_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import anyio
import httpx
import httpx_aiohttp
from aviary.core import Message
from lmi import LiteLLMModel
from pydantic import BaseModel, Field
Expand Down Expand Up @@ -132,7 +133,7 @@ async def download_papers(self, submissions: list[Any]) -> None:
async def _download_pdf(self, submission: Any) -> bool:
"""Download a single PDF."""
pdf_link = f"https://openreview.net/{submission.content['pdf']['value']}"
async with httpx.AsyncClient() as client:
async with httpx_aiohttp.HttpxAiohttpClient() as client:
response = await client.get(pdf_link)
if response.status_code == httpx.codes.OK.value:
async with await anyio.open_file(
Expand Down
3 changes: 2 additions & 1 deletion src/paperqa/sources/clinical_trials.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Any

import httpx
import httpx_aiohttp
from lmi.utils import gather_with_concurrency
from tenacity import (
before_sleep_log,
Expand Down Expand Up @@ -260,7 +261,7 @@ async def add_clinical_trials_to_docs(
ssl_context.maximum_version = ssl.TLSVersion.TLSv1_2
# Cookies are not needed
_client = (
httpx.AsyncClient(timeout=10.0, verify=ssl_context)
httpx_aiohttp.HttpxAiohttpClient(timeout=10.0, verify=ssl_context)
if client is None
else client
)
Expand Down
214 changes: 106 additions & 108 deletions tests/cassettes/test_doi_search[paper_attributes1].yaml

Large diffs are not rendered by default.

43 changes: 22 additions & 21 deletions tests/test_clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from unittest.mock import patch

import httpx
import httpx_aiohttp
import pytest

import paperqa
Expand Down Expand Up @@ -111,7 +112,7 @@
)
@pytest.mark.asyncio
async def test_title_search(paper_attributes: dict[str, str]) -> None:
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
client_list = [
client for client in ALL_CLIENTS if client != RetractionDataPostProcessor
]
Expand Down Expand Up @@ -263,7 +264,7 @@ async def test_title_search(paper_attributes: dict[str, str]) -> None:
)
@pytest.mark.asyncio
async def test_doi_search(paper_attributes: dict[str, str | list[str]]) -> None:
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
client_list = [
client for client in ALL_CLIENTS if client != RetractionDataPostProcessor
]
Expand Down Expand Up @@ -306,7 +307,7 @@ async def test_bulk_doi_search() -> None:
"10.1023/a:1007154515475",
"10.1007/s40278-023-41815-2",
]
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
client = DocMetadataClient(http_client)
details = await client.bulk_query([{"doi": doi} for doi in dois])
assert len(details) == 6, "Should return 6 results"
Expand All @@ -331,7 +332,7 @@ async def test_bulk_title_search() -> None:
),
"Convalescent-anti-sars-cov-2-plasma/immune-globulin",
]
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
client = DocMetadataClient(http_client)
details = await client.bulk_query([{"title": title} for title in titles])
assert len(details) == 6, "Should return 6 results"
Expand All @@ -341,7 +342,7 @@ async def test_bulk_title_search() -> None:
@pytest.mark.vcr
@pytest.mark.asyncio
async def test_bad_titles() -> None:
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
client = DocMetadataClient(http_client)
details = await client.query(title="askldjrq3rjaw938h")
assert not details, "Should return None for bad title"
Expand All @@ -357,7 +358,7 @@ async def test_bad_titles() -> None:
@pytest.mark.asyncio
async def test_client_os_error() -> None:
"""Confirm an OSError variant does not crash us."""
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
client = DocMetadataClient(
http_client, metadata_clients=[SemanticScholarProvider]
)
Expand All @@ -376,7 +377,7 @@ async def test_client_os_error() -> None:
@pytest.mark.vcr
@pytest.mark.asyncio
async def test_bad_dois() -> None:
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
client = DocMetadataClient(http_client)
details = await client.query(title="abs12032jsdafn")
assert not details, "Should return None for bad doi"
Expand All @@ -385,7 +386,7 @@ async def test_bad_dois() -> None:
@pytest.mark.vcr
@pytest.mark.asyncio
async def test_minimal_fields_filtering() -> None:
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
client = DocMetadataClient(http_client)
details = await client.query(
title="Augmenting large language models with chemistry tools",
Expand Down Expand Up @@ -419,7 +420,7 @@ async def test_minimal_fields_filtering() -> None:
@pytest.mark.vcr
@pytest.mark.asyncio
async def test_s2_only_fields_filtering() -> None:
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
# now get with authors just from one source
s2_client = DocMetadataClient(
http_client, metadata_clients=[SemanticScholarProvider]
Expand All @@ -445,7 +446,7 @@ async def test_s2_only_fields_filtering() -> None:
@pytest.mark.vcr
@pytest.mark.asyncio
async def test_crossref_journalquality_fields_filtering() -> None:
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
crossref_client = DocMetadataClient(
http_client,
metadata_clients=cast(
Expand All @@ -471,7 +472,7 @@ async def test_crossref_journalquality_fields_filtering() -> None:
" doi:10.1038/s42256-024-00832-8."
), "Citation should be populated"

async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
crossref_client = DocMetadataClient(
http_client,
metadata_clients=cast(
Expand All @@ -496,7 +497,7 @@ async def test_crossref_journalquality_fields_filtering() -> None:
@pytest.mark.vcr
@pytest.mark.asyncio
async def test_author_matching() -> None:
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
crossref_client = DocMetadataClient(
http_client, metadata_clients=[CrossrefProvider]
)
Expand Down Expand Up @@ -539,7 +540,7 @@ async def test_author_matching() -> None:
@pytest.mark.asyncio
async def test_odd_client_requests() -> None:
# try querying using an authors match, but not requesting authors back
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
client = DocMetadataClient(http_client)
details = await client.query(
title="Augmenting large language models with chemistry tools",
Expand All @@ -550,7 +551,7 @@ async def test_odd_client_requests() -> None:
assert details.authors, "Should return correct author results"

# try querying using a title, asking for no DOI back
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
client = DocMetadataClient(http_client)
details = await client.query(
title="Augmenting large language models with chemistry tools",
Expand All @@ -560,7 +561,7 @@ async def test_odd_client_requests() -> None:
assert details.doi, "Should return a doi even though we don't ask for it"

# try querying using a title, asking for no title back
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
client = DocMetadataClient(http_client)
details = await client.query(
title="Augmenting large language models with chemistry tools",
Expand All @@ -569,7 +570,7 @@ async def test_odd_client_requests() -> None:
assert details, "Assertions require successful query"
assert details.title, "Should return a title even though we don't ask for it"

async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
client = DocMetadataClient(http_client)
details = await client.query(
doi="10.1007/s40278-023-41815-2",
Expand All @@ -587,7 +588,7 @@ async def test_odd_client_requests() -> None:
paperqa.clients.semantic_scholar, "SEMANTIC_SCHOLAR_API_REQUEST_TIMEOUT", 0.001
)
async def test_ensure_robust_to_timeouts() -> None:
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
client = DocMetadataClient(http_client)
details = await client.query(
doi="10.1007/s40278-023-41815-2",
Expand All @@ -610,7 +611,7 @@ async def test_ensure_sequential_run(caplog) -> None:
# were using a DOI that is NOT in crossref, but running the crossref client first
# we will ensure that both are run sequentially

async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
client = DocMetadataClient(
http_client=http_client,
metadata_clients=cast(
Expand Down Expand Up @@ -654,7 +655,7 @@ async def test_ensure_sequential_run(caplog) -> None:
async def test_ensure_sequential_run_early_stop(caplog) -> None:
caplog.set_level(logging.DEBUG, logger=paperqa.clients.__name__)
# now we should stop after hitting s2
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
client = DocMetadataClient(
http_client=http_client,
metadata_clients=cast(
Expand Down Expand Up @@ -690,7 +691,7 @@ async def test_ensure_sequential_run_early_stop(caplog) -> None:
@pytest.mark.vcr
@pytest.mark.asyncio
async def test_crossref_retraction_status(stub_data_dir: Path) -> None:
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
retract_processor = RetractionDataPostProcessor(
f"{stub_data_dir}/stub_retractions.csv"
)
Expand Down Expand Up @@ -776,7 +777,7 @@ async def test_tricky_journal_quality_results(doi: str, score: int) -> None:
or they have a swap like an & for and.

"""
async with httpx.AsyncClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
crossref_client = DocMetadataClient(
http_client,
metadata_clients=cast(
Expand Down
3 changes: 2 additions & 1 deletion tests/test_clinical_trials.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from unittest.mock import AsyncMock, Mock, patch

import httpx
import httpx_aiohttp
import pytest

from paperqa import Docs, Settings
Expand Down Expand Up @@ -37,7 +38,7 @@ def mock_bucket_client():

@pytest.fixture(name="mock_client")
def fixture_mock_client() -> httpx.AsyncClient:
return AsyncMock(spec=httpx.AsyncClient)
return AsyncMock(spec=httpx_aiohttp.HttpxAiohttpClient)


@pytest.mark.asyncio
Expand Down