Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion src/paperqa/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@

# For reference, here's Docling's image description prompt:
# https://github.com/docling-project/docling/blob/v2.55.1/docling/datamodel/pipeline_options.py#L214-L216
media_enrichment_prompt_template = (
individual_media_enrichment_prompt_template = (
"You are analyzing an image or table from a scientific document."
" Provide a detailed description that will be used to answer questions about its content."
" Focus on key elements, data, relationships, and scientific insights visible in the image."
Expand Down Expand Up @@ -198,3 +198,21 @@
"\n\n{context_text}Describe the media," # Allow for empty context_text
" or if uncertain on a description please state why:"
)
full_page_enrichment_prompt_template = (
"You are analyzing a screenshot of a page from a scientific document."
" Provide a detailed description that will be used to answer questions about its content."
" Focus on key elements, data, relationships, and scientific insights visible in the image."
" It's especially important to document referential information such as"
" figure/table numbers, labels, plot colors, or legends."
"\n\nText co-located with the screenshot may be associated with"
" other page's content and unrelated,"
" so do not just blindly quote referential information."
" To restate, the co-located text is several pages of content,"
" so only use aspects relevant to the accompanying screenshot."
" Do not feel the need to extensively document entities in the margins"
" such as journal logos, display type, margin boxes, or PDF design elements."
" If the screenshot is garbled due to a bad screenshot,"
" describe the screenshot as garbled, state why it's considered garbled."
"\n\n{context_text}Describe the screenshot," # Allow for empty context_text
" or if uncertain on a description please state why:"
)
4 changes: 2 additions & 2 deletions src/paperqa/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
default_system_prompt,
env_reset_prompt,
env_system_prompt,
media_enrichment_prompt_template,
individual_media_enrichment_prompt_template,
qa_prompt,
select_paper_prompt,
structured_citation_prompt,
Expand Down Expand Up @@ -343,7 +343,7 @@ class ParsingSettings(BaseModel):
),
)
enrichment_prompt: str = Field(
default=media_enrichment_prompt_template,
default=individual_media_enrichment_prompt_template,
description="Prompt template for enriching media.",
)

Expand Down
Loading