Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/paperqa/configs/clinical_trials.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
},
"parsing": {
"use_doc_details": true,
"chunk_size": 9000,
"overlap": 750
"reader_config": {
"chunk_chars": 9000,
"overlap": 750
}
}
}
9 changes: 5 additions & 4 deletions src/paperqa/configs/contracrow.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@
"answer_filter_extra_background": false
},
"parsing": {
"chunk_size": 7000,
"use_doc_details": true,
"overlap": 250,
"reader_config": {
"chunk_chars": 7000,
"overlap": 250
},
"citation_prompt": "Provide the citation for the following text in MLA Format. Do not write an introductory sentence. If reporting date accessed, the current year is 2024\n\n{text}\n\nCitation:",
"structured_citation_prompt": "Extract the title, authors, and doi as a JSON from this MLA citation. If any field can not be found, return it as null. Use title, authors, and doi as keys, author's value should be a list of authors. {citation}\n\nCitation JSON:",
"disable_doc_valid_check": false,
"chunking_algorithm": "simple_overlap"
"disable_doc_valid_check": false
},
"prompts": {
"summary": "Summarize the excerpt below to help answer a question.\n\nExcerpt from {citation}\n\n----\n\n{text}\n\n----\n\nQuestion: {question}\n\nDo not directly answer the question, instead summarize to give evidence to help answer the question. Stay detailed; report specific numbers, equations, or direct quotes (marked with quotation marks). Reply \"Not applicable\" if the excerpt is irrelevant. At the end of your response, provide an integer score from 1-10 on a newline indicating relevance to question. Do not explain your score.\n\nRelevant Information Summary ({summary_length}):",
Expand Down
6 changes: 4 additions & 2 deletions src/paperqa/configs/high_quality.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
},
"parsing": {
"use_doc_details": true,
"chunk_size": 7000,
"overlap": 250
"reader_config": {
"chunk_chars": 7000,
"overlap": 250
}
}
}
6 changes: 4 additions & 2 deletions src/paperqa/configs/openreview.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@
"return_paper_metadata": false
},
"parsing": {
"chunk_size": 3000000,
"use_doc_details": false
"use_doc_details": false,
"reader_config": {
"chunk_chars": 3000000
}
}
}
6 changes: 4 additions & 2 deletions src/paperqa/configs/search_only_clinical_trials.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
},
"parsing": {
"use_doc_details": true,
"chunk_size": 9000,
"overlap": 750
"reader_config": {
"chunk_chars": 9000,
"overlap": 750
}
}
}
6 changes: 4 additions & 2 deletions src/paperqa/configs/tier2_limits.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
},
"parsing": {
"use_doc_details": true,
"chunk_size": 7000,
"overlap": 250
"reader_config": {
"chunk_chars": 7000,
"overlap": 250
}
},
"prompts": {
"use_json": true
Expand Down
6 changes: 4 additions & 2 deletions src/paperqa/configs/tier3_limits.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
},
"parsing": {
"use_doc_details": true,
"chunk_size": 7000,
"overlap": 250
"reader_config": {
"chunk_chars": 7000,
"overlap": 250
}
},
"prompts": {
"use_json": true
Expand Down
6 changes: 4 additions & 2 deletions src/paperqa/configs/tier4_limits.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
},
"parsing": {
"use_doc_details": true,
"chunk_size": 7000,
"overlap": 250
"reader_config": {
"chunk_chars": 7000,
"overlap": 250
}
},
"prompts": {
"use_json": true
Expand Down
6 changes: 4 additions & 2 deletions src/paperqa/configs/tier5_limits.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
},
"parsing": {
"use_doc_details": true,
"chunk_size": 7000,
"overlap": 250
"reader_config": {
"chunk_chars": 7000,
"overlap": 250
}
},
"prompts": {
"use_json": true
Expand Down
9 changes: 5 additions & 4 deletions src/paperqa/configs/wikicrow.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@
"answer_filter_extra_background": false
},
"parsing": {
"chunk_size": 7000,
"use_doc_details": true,
"overlap": 1750,
"reader_config": {
"chunk_chars": 7000,
"overlap": 1750
},
"citation_prompt": "Provide the citation for the following text in MLA Format. Do not write an introductory sentence. If reporting date accessed, the current year is 2024\n\n{text}\n\nCitation:",
"structured_citation_prompt": "Extract the title, authors, and doi as a JSON from this MLA citation. If any field can not be found, return it as null. Use title, authors, and doi as keys, author's value should be a list of authors. {citation}\n\nCitation JSON:",
"disable_doc_valid_check": false,
"chunking_algorithm": "simple_overlap"
"disable_doc_valid_check": false
},
"prompts": {
"summary": "Summarize the excerpt below to help answer a question.\n\nExcerpt from {citation}\n\n----\n\n{text}\n\n----\n\nQuestion: {question}\n\nDo not directly answer the question, instead summarize to give evidence to help answer the question. Stay detailed; report specific numbers, equations, or direct quotes (marked with quotation marks). Reply \"Not applicable\" if the excerpt is irrelevant. At the end of your response, provide an integer score from 1-10 on a newline indicating relevance to question. Do not explain your score.\n\nRelevant Information Summary ({summary_length}):",
Expand Down
14 changes: 14 additions & 0 deletions tests/test_configs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import importlib.resources
import os
import pathlib
from unittest.mock import patch
Expand All @@ -6,6 +7,7 @@
from pydantic import ValidationError
from pytest_subtests import SubTests

import paperqa.configs
from paperqa.prompts import citation_prompt
from paperqa.settings import (
AgentSettings,
Expand Down Expand Up @@ -205,3 +207,15 @@ def test_citation_prompt_current_year():
f"Citation prompt should contain '{expected_year_text}' but got:"
f" {citation_prompt}"
)


def test_validity_of_bundled_configs(subtests: SubTests) -> None:
for config_file in [
f
for f in importlib.resources.files(paperqa.configs).iterdir()
if f.name.endswith(".json")
]:
config_name = config_file.name.removesuffix(".json")
with subtests.test(msg=config_name):
settings = get_settings(config_name)
assert isinstance(settings, Settings)
Loading