From c2de78964fa1487a86af7c5e5b647014910c5d2a Mon Sep 17 00:00:00 2001
From: James Braza <jamesbraza@gmail.com>
Date: Mon, 22 Dec 2025 09:39:58 -0500
Subject: [PATCH 1/2] Created a test to check for invalid configs

---
 tests/test_configs.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/test_configs.py b/tests/test_configs.py
index 90b9c34c8..a45c576b7 100644
--- a/tests/test_configs.py
+++ b/tests/test_configs.py
@@ -1,3 +1,4 @@
+import importlib.resources
 import os
 import pathlib
 from unittest.mock import patch
@@ -6,6 +7,7 @@
 from pydantic import ValidationError
 from pytest_subtests import SubTests
 
+import paperqa.configs
 from paperqa.prompts import citation_prompt
 from paperqa.settings import (
     AgentSettings,
@@ -205,3 +207,15 @@ def test_citation_prompt_current_year():
         f"Citation prompt should contain '{expected_year_text}' but got:"
         f" {citation_prompt}"
     )
+
+
+def test_validity_of_bundled_configs(subtests: SubTests) -> None:
+    for config_file in [
+        f
+        for f in importlib.resources.files(paperqa.configs).iterdir()
+        if f.name.endswith(".json")
+    ]:
+        config_name = config_file.name.removesuffix(".json")
+        with subtests.test(msg=config_name):
+            settings = get_settings(config_name)
+            assert isinstance(settings, Settings)

From 30540fa3b48c1d57f3d470dec0d3f6d1485ff3f7 Mon Sep 17 00:00:00 2001
From: James Braza <jamesbraza@gmail.com>
Date: Mon, 22 Dec 2025 09:42:21 -0500
Subject: [PATCH 2/2] Updated configs for paper-qa==2025.12.17 as needed

---
 src/paperqa/configs/clinical_trials.json             | 6 ++++--
 src/paperqa/configs/contracrow.json                  | 9 +++++----
 src/paperqa/configs/high_quality.json                | 6 ++++--
 src/paperqa/configs/openreview.json                  | 6 ++++--
 src/paperqa/configs/search_only_clinical_trials.json | 6 ++++--
 src/paperqa/configs/tier2_limits.json                | 6 ++++--
 src/paperqa/configs/tier3_limits.json                | 6 ++++--
 src/paperqa/configs/tier4_limits.json                | 6 ++++--
 src/paperqa/configs/tier5_limits.json                | 6 ++++--
 src/paperqa/configs/wikicrow.json                    | 9 +++++----
 10 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/src/paperqa/configs/clinical_trials.json b/src/paperqa/configs/clinical_trials.json
index f7235b4b3..bbfa56b2f 100644
--- a/src/paperqa/configs/clinical_trials.json
+++ b/src/paperqa/configs/clinical_trials.json
@@ -15,7 +15,9 @@
   },
   "parsing": {
     "use_doc_details": true,
-    "chunk_size": 9000,
-    "overlap": 750
+    "reader_config": {
+      "chunk_chars": 9000,
+      "overlap": 750
+    }
   }
 }
diff --git a/src/paperqa/configs/contracrow.json b/src/paperqa/configs/contracrow.json
index 3985ec5e8..47d58935d 100644
--- a/src/paperqa/configs/contracrow.json
+++ b/src/paperqa/configs/contracrow.json
@@ -20,13 +20,14 @@
     "answer_filter_extra_background": false
   },
   "parsing": {
-    "chunk_size": 7000,
     "use_doc_details": true,
-    "overlap": 250,
+    "reader_config": {
+      "chunk_chars": 7000,
+      "overlap": 250
+    },
     "citation_prompt": "Provide the citation for the following text in MLA Format. Do not write an introductory sentence. If reporting date accessed, the current year is 2024\n\n{text}\n\nCitation:",
     "structured_citation_prompt": "Extract the title, authors, and doi as a JSON from this MLA citation. If any field can not be found, return it as null. Use title, authors, and doi as keys, author's value should be a list of authors. {citation}\n\nCitation JSON:",
-    "disable_doc_valid_check": false,
-    "chunking_algorithm": "simple_overlap"
+    "disable_doc_valid_check": false
   },
   "prompts": {
     "summary": "Summarize the excerpt below to help answer a question.\n\nExcerpt from {citation}\n\n----\n\n{text}\n\n----\n\nQuestion: {question}\n\nDo not directly answer the question, instead summarize to give evidence to help answer the question. Stay detailed; report specific numbers, equations, or direct quotes (marked with quotation marks). Reply \"Not applicable\" if the excerpt is irrelevant. At the end of your response, provide an integer score from 1-10 on a newline indicating relevance to question. Do not explain your score.\n\nRelevant Information Summary ({summary_length}):",
diff --git a/src/paperqa/configs/high_quality.json b/src/paperqa/configs/high_quality.json
index 1fac7788b..16cc0ff29 100644
--- a/src/paperqa/configs/high_quality.json
+++ b/src/paperqa/configs/high_quality.json
@@ -6,7 +6,9 @@
   },
   "parsing": {
     "use_doc_details": true,
-    "chunk_size": 7000,
-    "overlap": 250
+    "reader_config": {
+      "chunk_chars": 7000,
+      "overlap": 250
+    }
   }
 }
diff --git a/src/paperqa/configs/openreview.json b/src/paperqa/configs/openreview.json
index 0bf1bc491..562e7d458 100644
--- a/src/paperqa/configs/openreview.json
+++ b/src/paperqa/configs/openreview.json
@@ -30,7 +30,9 @@
     "return_paper_metadata": false
   },
   "parsing": {
-    "chunk_size": 3000000,
-    "use_doc_details": false
+    "use_doc_details": false,
+    "reader_config": {
+      "chunk_chars": 3000000
+    }
   }
 }
diff --git a/src/paperqa/configs/search_only_clinical_trials.json b/src/paperqa/configs/search_only_clinical_trials.json
index d6c105b1a..1b04d473a 100644
--- a/src/paperqa/configs/search_only_clinical_trials.json
+++ b/src/paperqa/configs/search_only_clinical_trials.json
@@ -14,7 +14,9 @@
   },
   "parsing": {
     "use_doc_details": true,
-    "chunk_size": 9000,
-    "overlap": 750
+    "reader_config": {
+      "chunk_chars": 9000,
+      "overlap": 750
+    }
   }
 }
diff --git a/src/paperqa/configs/tier2_limits.json b/src/paperqa/configs/tier2_limits.json
index 238974c93..7ac9dfbe9 100644
--- a/src/paperqa/configs/tier2_limits.json
+++ b/src/paperqa/configs/tier2_limits.json
@@ -6,8 +6,10 @@
   },
   "parsing": {
     "use_doc_details": true,
-    "chunk_size": 7000,
-    "overlap": 250
+    "reader_config": {
+      "chunk_chars": 7000,
+      "overlap": 250
+    }
   },
   "prompts": {
     "use_json": true
diff --git a/src/paperqa/configs/tier3_limits.json b/src/paperqa/configs/tier3_limits.json
index 4c21bda97..eaed70ce0 100644
--- a/src/paperqa/configs/tier3_limits.json
+++ b/src/paperqa/configs/tier3_limits.json
@@ -6,8 +6,10 @@
   },
   "parsing": {
     "use_doc_details": true,
-    "chunk_size": 7000,
-    "overlap": 250
+    "reader_config": {
+      "chunk_chars": 7000,
+      "overlap": 250
+    }
   },
   "prompts": {
     "use_json": true
diff --git a/src/paperqa/configs/tier4_limits.json b/src/paperqa/configs/tier4_limits.json
index ddc6879d3..c4b199aff 100644
--- a/src/paperqa/configs/tier4_limits.json
+++ b/src/paperqa/configs/tier4_limits.json
@@ -6,8 +6,10 @@
   },
   "parsing": {
     "use_doc_details": true,
-    "chunk_size": 7000,
-    "overlap": 250
+    "reader_config": {
+      "chunk_chars": 7000,
+      "overlap": 250
+    }
   },
   "prompts": {
     "use_json": true
diff --git a/src/paperqa/configs/tier5_limits.json b/src/paperqa/configs/tier5_limits.json
index 2b15880fe..a7ea5566c 100644
--- a/src/paperqa/configs/tier5_limits.json
+++ b/src/paperqa/configs/tier5_limits.json
@@ -6,8 +6,10 @@
   },
   "parsing": {
     "use_doc_details": true,
-    "chunk_size": 7000,
-    "overlap": 250
+    "reader_config": {
+      "chunk_chars": 7000,
+      "overlap": 250
+    }
   },
   "prompts": {
     "use_json": true
diff --git a/src/paperqa/configs/wikicrow.json b/src/paperqa/configs/wikicrow.json
index d1ba4f753..38b4de4f8 100644
--- a/src/paperqa/configs/wikicrow.json
+++ b/src/paperqa/configs/wikicrow.json
@@ -20,13 +20,14 @@
     "answer_filter_extra_background": false
   },
   "parsing": {
-    "chunk_size": 7000,
     "use_doc_details": true,
-    "overlap": 1750,
+    "reader_config": {
+      "chunk_chars": 7000,
+      "overlap": 1750
+    },
     "citation_prompt": "Provide the citation for the following text in MLA Format. Do not write an introductory sentence. If reporting date accessed, the current year is 2024\n\n{text}\n\nCitation:",
     "structured_citation_prompt": "Extract the title, authors, and doi as a JSON from this MLA citation. If any field can not be found, return it as null. Use title, authors, and doi as keys, author's value should be a list of authors. {citation}\n\nCitation JSON:",
-    "disable_doc_valid_check": false,
-    "chunking_algorithm": "simple_overlap"
+    "disable_doc_valid_check": false
   },
   "prompts": {
     "summary": "Summarize the excerpt below to help answer a question.\n\nExcerpt from {citation}\n\n----\n\n{text}\n\n----\n\nQuestion: {question}\n\nDo not directly answer the question, instead summarize to give evidence to help answer the question. Stay detailed; report specific numbers, equations, or direct quotes (marked with quotation marks). Reply \"Not applicable\" if the excerpt is irrelevant. At the end of your response, provide an integer score from 1-10 on a newline indicating relevance to question. Do not explain your score.\n\nRelevant Information Summary ({summary_length}):",