Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Not serializing lambdas in ParsingSettings.parse_pdf serializer, with…
… a test
  • Loading branch information
jamesbraza committed Jan 3, 2026
commit b9ffc81dec57aed3bf05909826b50b4aa38c1e75
1 change: 1 addition & 0 deletions src/paperqa/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ def _custom_serializer(
info.mode == "json"
and hasattr(self.parse_pdf, "__module__")
and hasattr(self.parse_pdf, "__name__")
and self.parse_pdf.__name__ != "<lambda>"
):
# If going to JSON, and we can get a FQN, do so for JSON compliance
data["parse_pdf"] = f"{self.parse_pdf.__module__}.{self.parse_pdf.__name__}"
Expand Down
11 changes: 10 additions & 1 deletion tests/test_paperqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -3291,6 +3291,9 @@ async def test_timeout_resilience() -> None:
assert not llm_results


TEST_STUB_LAMBDA = lambda: 1 # noqa: E731


def test_parse_pdf_string_resolution() -> None:
# Test with a valid string FQN
pymupdf_str = Settings(
Expand All @@ -3314,7 +3317,7 @@ def test_parse_pdf_string_resolution() -> None:
)
assert "parse_pdf" not in pypdf_str.model_dump()["parsing"]

# Test directly passing a parser
# Test directly passing a normal parser
pymupdf_fn = Settings(parsing=ParsingSettings(parse_pdf=pymupdf_parse_pdf_to_pages))
assert pymupdf_fn.parsing.parse_pdf == pymupdf_parse_pdf_to_pages
assert (
Expand All @@ -3323,6 +3326,12 @@ def test_parse_pdf_string_resolution() -> None:
)
assert "parse_pdf" not in pymupdf_fn.model_dump()["parsing"]

# Test directly passing a lambda parser
lambda_fn = Settings(parsing=ParsingSettings(parse_pdf=TEST_STUB_LAMBDA))
assert lambda_fn.parsing.parse_pdf == TEST_STUB_LAMBDA
assert "parse_pdf" not in lambda_fn.model_dump(mode="json")["parsing"]
assert "parse_pdf" not in lambda_fn.model_dump()["parsing"]

# Test a nonexistent FQN
with pytest.raises(ValueError, match="Failed to locate"):
Settings(parsing=ParsingSettings(parse_pdf="nonexistent.module.function"))
Expand Down