Skip to content
Merged
Prev Previous commit
Next Next commit
Retrying context creation once, and then abandoning if it fails twice
  • Loading branch information
jamesbraza committed Sep 10, 2025
commit 54b3559bd413d99db61b00d9cf9ce5e54df7cb07
108 changes: 87 additions & 21 deletions src/paperqa/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,7 @@ def fraction_replacer(match: re.Match) -> str:
"relevance_score": int(score_match.group(1)),
}

raise ValueError(
f"Failed to parse JSON from text {text!r}. Your model may not be capable of"
" supporting JSON output or our parsing technique could use some work. Try"
" a different model or specify `Settings(prompts={'use_json': False})`"
) from e
raise ValueError(f"Failed to load JSON from text {text!r}.") from e

# Handling incorrect key names for "relevance_score"
for key in list(data.keys()):
Expand All @@ -130,7 +126,15 @@ def fraction_replacer(match: re.Match) -> str:
return data


async def map_fxn_summary(
class LLMBadContextJSONError(ValueError):
"""Retryable exception for when the LLM gives back bad JSON."""

def __init__(self, message: str, llm_results: list[LLMResult]) -> None:
super().__init__(message)
self.llm_results = llm_results # House so we can cost track across retries


async def _map_fxn_summary( # noqa: PLR0912
text: Text,
question: str,
summary_llm_model: LLMModel | None,
Expand All @@ -140,6 +144,7 @@ async def map_fxn_summary(
callbacks: Sequence[Callable[[str], None]] | None = None,
skip_citation_strip: bool = False,
evidence_text_only_fallback: bool = False,
_prior_attempt: LLMBadContextJSONError | None = None,
) -> tuple[Context, list[LLMResult]]:
"""Parses the given text and returns a context object with the parser and prompt runner.

Expand All @@ -160,14 +165,25 @@ async def map_fxn_summary(
skip_citation_strip: Optional skipping of citation stripping, if you want to keep in the context.
evidence_text_only_fallback: Opt-in flag to allow retrying context creation
without media in the completion.
_prior_attempt: Optional failure from a prior attempt, for LLM result tracking.

Returns:
A two-tuple of the made Context, and any LLM results made along the way.
"""
llm_results = []
if _prior_attempt is not None:
llm_results = _prior_attempt.llm_results
append_msgs = [
Message(
content=(
"In a prior attempt, we failed with this failure message:"
f" {_prior_attempt!s}."
)
)
]
else:
llm_results, append_msgs = [], []
extras: dict[str, Any] = {}
citation = text.name + ": " + text.doc.formatted_citation
successfully_has_score = False
used_text_only_fallback = False

# Strip newlines in case chunking led to blank lines,
Expand Down Expand Up @@ -201,6 +217,7 @@ async def map_fxn_summary(
else None
),
),
*append_msgs,
],
callbacks=callbacks,
name="evidence:" + text.name,
Expand All @@ -218,42 +235,69 @@ async def map_fxn_summary(
messages=[
Message(role="system", content=system_prompt),
Message(content=message_prompt),
*append_msgs,
],
callbacks=callbacks,
name="evidence:" + text.name,
)
used_text_only_fallback = True
llm_results.append(llm_result)
context = llm_result.text or ""
result_data = parser(context) if parser else {}
try:
result_data = parser(context) if parser else {}
except ValueError as exc:
raise LLMBadContextJSONError(
f"Failed to parse JSON from context {context!r} due to: {exc}",
llm_results=llm_results,
) from exc
if result_data:
try:
context = result_data.pop("summary")
score = (
result_data.pop("relevance_score")
if "relevance_score" in result_data
else extract_score(context)
)
successfully_has_score = True
try:
score = (
result_data.pop("relevance_score")
if "relevance_score" in result_data
else extract_score(context)
)
except ValueError as exc:
raise LLMBadContextJSONError(
f"Successfully parsed JSON and extracted 'summary' key,"
f" but then failed to extract score from context {context!r} due to: {exc}",
llm_results=llm_results,
) from exc
# just in case question was present
result_data.pop("question", None)
extras = result_data
except KeyError:
successfully_has_score = False
except KeyError: # No summary key, so extract from LLM result
try:
score = extract_score(context)
except ValueError as exc:
raise LLMBadContextJSONError(
f"Successfully parsed JSON but it had no 'summary' key."
f" Then, the failover to extract score from raw context {context!r}"
f" failed due to: {exc}",
llm_results=llm_results,
) from exc
else:
try:
score = extract_score(context)
except ValueError as exc:
raise LLMBadContextJSONError(
f"Extracting score from raw context {context!r}"
f" failed due to: {exc}",
llm_results=llm_results,
) from exc
else:
llm_results.append(LLMResult(model="", date=""))
context = cleaned_text
# If we don't assign scores, just default to 5.
# why 5? Because we filter out 0s in another place
# and 5/10 is the other default I could come up with
score = 5
successfully_has_score = True
# remove citations that collide with our grounded citations (for the answer LLM)
if not skip_citation_strip:
context = strip_citations(context)

if not successfully_has_score:
score = extract_score(context)
if used_text_only_fallback:
extras["used_images"] = False

Expand All @@ -269,8 +313,30 @@ async def map_fxn_summary(
doc=text.doc.model_dump(exclude={"embedding"}),
**text.model_dump(exclude={"embedding", "doc"}),
),
score=score, # pylint: disable=possibly-used-before-assignment
score=score,
**extras,
),
llm_results,
)


async def map_fxn_summary(**kwargs) -> tuple[Context | None, list[LLMResult]]:
if "_prior_attempt" in kwargs:
raise ValueError(
"_prior_attempt is reserved for internal use only, don't specify it."
)
try:
return await _map_fxn_summary(**kwargs)
except LLMBadContextJSONError as exc:
try:
return await _map_fxn_summary(**kwargs, _prior_attempt=exc)
except LLMBadContextJSONError as exc2:
logger.exception(
"Failed twice to create a context, abandoning it."
" Your model may not be capable of supporting JSON output"
" or our parsing technique could use some work. Try"
" a different model or specify `Settings(prompts={'use_json': False})`."
" Or, feel free to just ignore this message, as many contexts are"
" concurrently made and we're not attached to any one given context."
)
return None, exc2.llm_results
2 changes: 1 addition & 1 deletion src/paperqa/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,7 +682,7 @@ async def aget_evidence(
for r in llm_results:
session.add_tokens(r)

session.contexts += [r for r, _ in results]
session.contexts += [c for c, _ in results if c is not None]
return session

def query(
Expand Down